Quellcode durchsuchen

优化界面,修复训练列名报错问题,优化训练进度条

lxylxy123321 vor 1 Woche
Ursprung
Commit
f797647427

+ 27 - 1
CLAUDE.md

@@ -19,4 +19,30 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ### 安全要求
 
-- 不要尝试去连接ssh远程,可以把命令给用户去执行
+- 不要尝试去连接ssh远程,可以把命令给用户去执行
+
+## 253 服务器上构建 trainer 容器
+
+在 253 服务器(192.168.91.253)上重建 `finetune-trainer` 容器的命令:
+
+```bash
+docker stop finetune-trainer && docker rename finetune-trainer finetune-trainer-old && docker run -d --name finetune-trainer --privileged --network host --shm-size 64m -e MACA_MPS_MODE=1 -v /root/Fine-tuning/backend:/root/Fine-tuning/backend 5334348e7a9b tail -f /dev/null
+```
+
+### 容器配置说明
+
+- **基础镜像**: `5334348e7a9b`(沐曦官方镜像的 image ID)
+- **特权模式**: `--privileged` 允许容器访问沐曦 GPU 设备
+- **网络模式**: `--network host` 使用宿主机网络
+- **共享内存**: `--shm-size 64m`
+- **MACA_MPS_MODE**: `1` 启用沐曦 MPS 模式
+- **代码目录**: 挂载 `/root/Fine-tuning/backend`(由 151 rsync 同步)
+- **Python 路径**: `/opt/conda/bin/python`(conda 环境)
+
+### 安装训练依赖
+
+容器创建后需要进入容器安装依赖:
+
+```bash
+docker exec -it finetune-trainer /opt/conda/bin/pip install peft trl accelerate bitsandbytes datasets
+```

+ 12 - 3
backend/app/core/job_queue.py

@@ -400,11 +400,20 @@ class JobQueue:
 
                         entry_type = entry.get("type")
                         if entry_type == "progress":
+                            step = entry.get("step", 0)
+                            total_steps = entry.get("total_steps", 0)
+                            # 计算进度:total_steps 为 0 时基于 epoch 估算(每 epoch 按 100/epochs% 递增)
+                            if total_steps > 0:
+                                progress = round(step / total_steps * 100, 1)
+                            else:
+                                # 无 total_steps 时,step 每增加 1 按 0.1% 估算(兜底)
+                                progress = round(step * 0.1, 1)
+                            progress = min(99.9, max(0, progress))  # 限制在 0-99.9%,completed 时才会到 100%
                             self.update_job(job_id,
-                                            current_step=entry.get("step", 0),
-                                            total_steps=entry.get("total_steps", 0),
+                                            current_step=step,
+                                            total_steps=total_steps,
                                             loss=entry.get("loss"),
-                                            progress=round(entry.get("step", 0) / max(entry.get("total_steps", 1), 1) * 100, 1))
+                                            progress=progress)
                             await self._notify_callbacks()
                             await send_progress(job_id, **{k: v for k, v in entry.items() if k != "type"})
 

+ 11 - 8
backend/app/engines/text_engine.py

@@ -331,15 +331,18 @@ class TextEngine(BaseEngine):
                 line = line.strip()
                 if line:
                     item = json.loads(line)
+                    # 兼容多种列名 → 统一映射为 prompt / completion
+                    if "prompt" not in item:
+                        item["prompt"] = item.get("question", item.get("query", item.get("text", item.get("input", ""))))
+                    if "completion" not in item:
+                        item["completion"] = item.get("answer", item.get("response", item.get("target", item.get("output", ""))))
                     # 确保 prompt 和 completion 是字符串
-                    if "prompt" in item:
-                        if isinstance(item["prompt"], (list, dict)):
-                            item["prompt"] = json.dumps(item["prompt"], ensure_ascii=False)
-                        item["prompt"] = str(item["prompt"])
-                    if "completion" in item:
-                        if isinstance(item["completion"], (list, dict)):
-                            item["completion"] = json.dumps(item["completion"], ensure_ascii=False)
-                        item["completion"] = str(item["completion"])
+                    if isinstance(item["prompt"], (list, dict)):
+                        item["prompt"] = json.dumps(item["prompt"], ensure_ascii=False)
+                    item["prompt"] = str(item["prompt"])
+                    if isinstance(item["completion"], (list, dict)):
+                        item["completion"] = json.dumps(item["completion"], ensure_ascii=False)
+                    item["completion"] = str(item["completion"])
                     data.append(item)
 
         hf_dataset = HFDataset.from_list(data)

+ 10 - 10
backend/app/preprocessors/__init__.py

@@ -43,16 +43,16 @@ def apply_sharegpt_template(item: dict) -> dict:
 
 def apply_raw_template(item: dict) -> dict:
     """Raw 模板: 直接读取 prompt/text 和 completion/output 字段。"""
-    prompt = item.get("prompt", item.get("text", item.get("input", "")))
-    completion = item.get("completion", item.get("output", item.get("target", "")))
+    prompt = item.get("prompt", item.get("text", item.get("input", item.get("question", item.get("query", "")))))
+    completion = item.get("completion", item.get("output", item.get("target", item.get("answer", item.get("response", "")))))
     return {"prompt": str(prompt), "completion": str(completion)}
 
 
 def apply_dpo_template(item: dict) -> dict:
     """DPO 模板: prompt + chosen + rejected。"""
     return {
-        "prompt": item.get("prompt", item.get("input", "")),
-        "chosen": item.get("chosen", item.get("positive", "")),
+        "prompt": item.get("prompt", item.get("input", item.get("question", item.get("query", "")))),
+        "chosen": item.get("chosen", item.get("positive", item.get("answer", ""))),
         "rejected": item.get("rejected", item.get("negative", "")),
     }
 
@@ -60,8 +60,8 @@ def apply_dpo_template(item: dict) -> dict:
 def apply_kto_template(item: dict) -> dict:
     """KTO 模板: prompt + completion + label。"""
     return {
-        "prompt": item.get("prompt", item.get("input", "")),
-        "completion": item.get("completion", item.get("output", "")),
+        "prompt": item.get("prompt", item.get("input", item.get("question", item.get("query", "")))),
+        "completion": item.get("completion", item.get("output", item.get("answer", item.get("response", "")))),
         "label": item.get("label", True),
     }
 
@@ -69,8 +69,8 @@ def apply_kto_template(item: dict) -> dict:
 def apply_orpo_template(item: dict) -> dict:
     """ORPO 模板: prompt + chosen + rejected (类似 DPO)。"""
     return {
-        "prompt": item.get("prompt", item.get("input", "")),
-        "chosen": item.get("chosen", item.get("positive", "")),
+        "prompt": item.get("prompt", item.get("input", item.get("question", item.get("query", "")))),
+        "chosen": item.get("chosen", item.get("positive", item.get("answer", ""))),
         "rejected": item.get("rejected", item.get("negative", "")),
     }
 
@@ -78,8 +78,8 @@ def apply_orpo_template(item: dict) -> dict:
 def apply_rm_template(item: dict) -> dict:
     """Reward Modeling 模板: prompt + chosen + rejected。"""
     return {
-        "prompt": item.get("prompt", item.get("input", "")),
-        "chosen": item.get("chosen", item.get("positive", "")),
+        "prompt": item.get("prompt", item.get("input", item.get("question", item.get("query", "")))),
+        "chosen": item.get("chosen", item.get("positive", item.get("answer", ""))),
         "rejected": item.get("rejected", item.get("negative", "")),
     }
 

+ 5 - 3
frontend/src/pages/Dashboard.tsx

@@ -1,4 +1,5 @@
 import { useState, useEffect, memo } from 'react'
+import { Link } from 'react-router-dom'
 import api from '../api/client'
 import { Cpu, Database, Train, Download, Upload, CloudUpload } from 'lucide-react'
 
@@ -106,15 +107,16 @@ export function Dashboard() {
             { label: '上传数据集', desc: '支持 JSONL / CSV / Parquet', icon: Upload, link: '/datasets', color: '#0ea5e9' },
             { label: '创建训练', desc: '配置超参数并启动微调', icon: CloudUpload, link: '/training', color: '#10b981' },
           ].map(item => (
-            <a
+            <Link
+              to={item.link}
               key={item.label}
-              href={item.link}
               style={{
                 display: 'flex', gap: 14, alignItems: 'center',
                 padding: '16px 20px', background: '#fff', borderRadius: 10,
                 border: '1px solid rgba(0,0,0,0.06)',
                 boxShadow: '0 1px 3px rgba(0,0,0,0.04)',
                 textDecoration: 'none', transition: 'all 0.2s ease',
+                cursor: 'pointer',
               }}
               onMouseEnter={e => {
                 e.currentTarget.style.transform = 'translateY(-2px)'
@@ -137,7 +139,7 @@ export function Dashboard() {
                 <div style={{ fontSize: 14, fontWeight: 600, color: '#134e4a' }}>{item.label}</div>
                 <div style={{ fontSize: 12, color: '#94a3b8', marginTop: 2 }}>{item.desc}</div>
               </div>
-            </a>
+            </Link>
           ))}
         </div>
       </div>

+ 5 - 1
frontend/src/pages/Datasets.tsx

@@ -1,4 +1,4 @@
-import { useState, useRef, memo } from 'react'
+import { useState, useEffect, useRef, memo } from 'react'
 import api, { DatasetInfo } from '../api/client'
 import { Database, Upload, Loader2 } from 'lucide-react'
 
@@ -56,6 +56,10 @@ export function Datasets() {
   const [previewData, setPreviewData] = useState<{ columns: string[]; rows: { row_index: number; data: Record<string, unknown> }[] } | null>(null)
   const inputRef = useRef<HTMLInputElement>(null)
 
+  useEffect(() => {
+    fetchDatasets()
+  }, [])
+
   // Download form
   const [dlDatasetId, setDlDatasetId] = useState('')
   const [dlUseModelscope, setDlUseModelscope] = useState(false)

+ 5 - 1
frontend/src/pages/Models.tsx

@@ -1,4 +1,4 @@
-import { useState, memo } from 'react'
+import { useState, useEffect, memo } from 'react'
 import api, { ModelInfo } from '../api/client'
 import { Cpu, CheckCircle, XCircle } from 'lucide-react'
 
@@ -69,6 +69,10 @@ export function Models() {
   const [testError, setTestError] = useState('')
   const [testing, setTesting] = useState(false)
 
+  useEffect(() => {
+    fetchModels()
+  }, [])
+
   const fetchModels = () => {
     setLoading(true)
     api.models.list()

+ 2 - 2
frontend/src/pages/Training.tsx

@@ -281,12 +281,12 @@ const JobRow = memo(function JobRow({ j, onCancel }: { j: TrainingJob; onCancel:
             width: 120, height: 8, background: '#f0f0f0', borderRadius: 4, overflow: 'hidden',
           }}>
             <div style={{
-              width: `${j.progress}%`, height: '100%', borderRadius: 4,
+              width: `${Math.min(100, Math.max(0, j.progress ?? 0))}%`, height: '100%', borderRadius: 4,
               background: `linear-gradient(90deg, ${statusColor(j.status)}, ${statusColor(j.status)}cc)`,
               transition: 'width 0.3s ease',
             }} />
           </div>
-          <span style={{ fontSize: 12, color: '#666', minWidth: 45, fontWeight: 500 }}>{j.progress.toFixed(1)}%</span>
+          <span style={{ fontSize: 12, color: '#666', minWidth: 45, fontWeight: 500 }}>{(j.progress ?? 0).toFixed(1)}%</span>
         </div>
       </td>
       <td style={{ padding: '12px 12px', fontSize: 13, fontFamily: 'monospace', fontWeight: 500 }}>{j.loss?.toFixed(4) ?? '-'}</td>

+ 18 - 267
result.txt

@@ -1,267 +1,18 @@
-2026-05-21T09:07:28.965573273Z app/core/
-2026-05-21T09:07:28.965582083Z app/services/__pycache__/
-2026-05-21T09:07:29.006816071Z 
-2026-05-21T09:07:29.006849199Z sent 2,764 bytes  received 45 bytes  160.51 bytes/sec
-2026-05-21T09:07:29.006859242Z total size is 251,292  speedup is 89.46
-2026-05-21T09:07:29.009059557Z => Sync done.
-2026-05-21T09:07:32.554614348Z INFO:     Started server process [1]
-2026-05-21T09:07:32.554721459Z INFO:     Waiting for application startup.
-2026-05-21T09:07:32.701656602Z 2026-05-21 09:07:32 | INFO     | peft-platform | JobQueue started with 2 workers
-2026-05-21T09:07:32.701760487Z INFO:     Application startup complete.
-2026-05-21T09:07:32.702216450Z INFO:     Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
-2026-05-21T09:07:37.547354571Z INFO:     127.0.0.1:38274 - "GET /health HTTP/1.1" 200 OK
-2026-05-21T09:07:52.977770642Z INFO:     172.20.0.4:34488 - "GET /api/v1/models/ HTTP/1.0" 401 Unauthorized
-2026-05-21T09:07:52.979307264Z INFO:     172.20.0.4:34498 - "GET /api/v1/training/jobs HTTP/1.0" 401 Unauthorized
-2026-05-21T09:07:52.980619287Z INFO:     172.20.0.4:34506 - "GET /api/v1/datasets/ HTTP/1.0" 401 Unauthorized
-2026-05-21T09:08:07.746802896Z INFO:     127.0.0.1:59232 - "GET /health HTTP/1.1" 200 OK
-2026-05-21T09:08:18.534595819Z INFO:     172.20.0.4:52088 - "GET /auth/sso/authorize?redirect=true HTTP/1.0" 307 Temporary Redirect
-2026-05-21T09:08:20.285032265Z INFO:     172.20.0.4:52104 - "GET /auth/sso/authorize?redirect=true HTTP/1.0" 307 Temporary Redirect
-2026-05-21T09:08:27.236514367Z INFO:     172.20.0.4:53808 - "GET /auth/sso/authorize?redirect=true HTTP/1.0" 307 Temporary Redirect
-2026-05-21T09:08:27.723199510Z INFO:     172.20.0.4:53822 - "GET /auth/sso/authorize?redirect=true HTTP/1.0" 307 Temporary Redirect
-2026-05-21T09:08:27.886384722Z INFO:     172.20.0.4:53824 - "GET /auth/sso/authorize?redirect=true HTTP/1.0" 307 Temporary Redirect
-2026-05-21T09:08:28.056337627Z INFO:     172.20.0.4:53840 - "GET /auth/sso/authorize?redirect=true HTTP/1.0" 307 Temporary Redirect
-2026-05-21T09:08:29.911054756Z 2026-05-21 09:08:29 | INFO     | app.api.auth | [SSO] exchange_code start, code=rBzQGQSO3R
-2026-05-21T09:08:29.911110656Z 2026-05-21 09:08:29 | INFO     | app.api.auth | [SSO] sso_base_url=http://192.168.92.61:8200
-2026-05-21T09:08:29.911125216Z 2026-05-21 09:08:29 | INFO     | app.api.auth | [SSO] client_id=hmDeOtXZVbeo2AZ-x58yPssZLg4Tcb1W
-2026-05-21T09:08:29.911134465Z 2026-05-21 09:08:29 | INFO     | app.api.auth | [SSO] redirect_uri=http://192.168.92.151:3000/auth/callback
-2026-05-21T09:08:30.081659122Z 2026-05-21 09:08:30 | INFO     | httpx | HTTP Request: POST http://192.168.92.61:8200/oauth/token "HTTP/1.1 200 OK"
-2026-05-21T09:08:30.082809643Z 2026-05-21 09:08:30 | INFO     | app.api.auth | [SSO] token response: {'access_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ1MDAwMDAwMC0wMDAwLTQwMDAtODAwMC0wMDAwMDAwMDAwMDAiLCJjbGllbnRfaWQiOiJobURlT3RYWlZiZW8yQVoteDU4eVBzc1pMZzRUY2IxVyIsInNjb3BlIjoicHJvZmlsZSBlbWFpbCIsImV4cCI6MTc3OTM1NTcxMCwiaWF0IjoxNzc5MzU0NTEwfQ.joK4ppobgyG7FJ_7VygPCxaWIFJhVdggFWbj3zT5VQo', 'token_type': 'Bearer', 'expires_in': 1800, 'refresh_token': 'aiM69DQpcxp6brDmAPxwmOEcCMY8XiJVJRcr3uoenVw', 'scope': 'profile email'}
-2026-05-21T09:08:30.115741823Z 2026-05-21 09:08:30 | INFO     | httpx | HTTP Request: GET http://192.168.92.61:8200/oauth/userinfo "HTTP/1.1 200 OK"
-2026-05-21T09:08:30.117367101Z 2026-05-21 09:08:30 | INFO     | app.api.auth | [SSO] userinfo: {'sub': 'u0000000-0000-4000-8000-000000000000', 'roles': [{'name': '超级管理员', 'code': 'super_admin'}], 'username': 'super_admin', 'avatar_url': None, 'real_name': '超级管理员', 'company': None, 'department': None, 'position': '超级管理员', 'email': 'super_admin@lqai.com'}
-2026-05-21T09:08:30.171856426Z 2026-05-21 09:08:30 | ERROR    | app.api.auth | [SSO] exchange_code failed: Traceback (most recent call last):
-2026-05-21T09:08:30.171934024Z   File "asyncpg/protocol/prepared_stmt.pyx", line 175, in asyncpg.protocol.protocol.PreparedStatementState._encode_bind_msg
-2026-05-21T09:08:30.171949981Z   File "asyncpg/protocol/codecs/base.pyx", line 251, in asyncpg.protocol.protocol.Codec.encode
-2026-05-21T09:08:30.171958069Z   File "asyncpg/protocol/codecs/base.pyx", line 153, in asyncpg.protocol.protocol.Codec.encode_scalar
-2026-05-21T09:08:30.171965491Z   File "asyncpg/pgproto/codecs/datetime.pyx", line 152, in asyncpg.pgproto.pgproto.timestamp_encode
-2026-05-21T09:08:30.171972748Z TypeError: can't subtract offset-naive and offset-aware datetimes
-2026-05-21T09:08:30.171980194Z 
-2026-05-21T09:08:30.171987156Z The above exception was the direct cause of the following exception:
-2026-05-21T09:08:30.171994262Z 
-2026-05-21T09:08:30.172001190Z Traceback (most recent call last):
-2026-05-21T09:08:30.172008409Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 550, in _prepare_and_execute
-2026-05-21T09:08:30.172015811Z     self._rows = deque(await prepared_stmt.fetch(*parameters))
-2026-05-21T09:08:30.172023291Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/prepared_stmt.py", line 177, in fetch
-2026-05-21T09:08:30.172030580Z     data = await self.__bind_execute(args, 0, timeout)
-2026-05-21T09:08:30.172037707Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/prepared_stmt.py", line 268, in __bind_execute
-2026-05-21T09:08:30.172044996Z     data, status, _ = await self.__do_execute(
-2026-05-21T09:08:30.172052114Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/prepared_stmt.py", line 257, in __do_execute
-2026-05-21T09:08:30.172059402Z     return await executor(protocol)
-2026-05-21T09:08:30.172096523Z   File "asyncpg/protocol/protocol.pyx", line 184, in bind_execute
-2026-05-21T09:08:30.172104302Z   File "asyncpg/protocol/prepared_stmt.pyx", line 204, in asyncpg.protocol.protocol.PreparedStatementState._encode_bind_msg
-2026-05-21T09:08:30.172111764Z asyncpg.exceptions.DataError: invalid input for query argument $4: datetime.datetime(2026, 5, 22, 9, 8, 30,... (can't subtract offset-naive and offset-aware datetimes)
-2026-05-21T09:08:30.172119176Z 
-2026-05-21T09:08:30.172126123Z The above exception was the direct cause of the following exception:
-2026-05-21T09:08:30.172133188Z 
-2026-05-21T09:08:30.172140015Z Traceback (most recent call last):
-2026-05-21T09:08:30.172147207Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
-2026-05-21T09:08:30.172154494Z     self.dialect.do_execute(
-2026-05-21T09:08:30.172161534Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
-2026-05-21T09:08:30.172170175Z     cursor.execute(statement, parameters)
-2026-05-21T09:08:30.172177479Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
-2026-05-21T09:08:30.172184838Z     self._adapt_connection.await_(
-2026-05-21T09:08:30.172191868Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
-2026-05-21T09:08:30.172199170Z     return current.parent.switch(awaitable)  # type: ignore[no-any-return,attr-defined] # noqa: E501
-2026-05-21T09:08:30.172206502Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
-2026-05-21T09:08:30.172290143Z     value = await result
-2026-05-21T09:08:30.172319518Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
-2026-05-21T09:08:30.172328850Z     self._handle_exception(error)
-2026-05-21T09:08:30.172336367Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
-2026-05-21T09:08:30.172343780Z     self._adapt_connection._handle_exception(error)
-2026-05-21T09:08:30.172350877Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
-2026-05-21T09:08:30.172358354Z     raise translated_error from error
-2026-05-21T09:08:30.172365511Z sqlalchemy.dialects.postgresql.asyncpg.AsyncAdapt_asyncpg_dbapi.Error: <class 'asyncpg.exceptions.DataError'>: invalid input for query argument $4: datetime.datetime(2026, 5, 22, 9, 8, 30,... (can't subtract offset-naive and offset-aware datetimes)
-2026-05-21T09:08:30.172374256Z 
-2026-05-21T09:08:30.172381306Z The above exception was the direct cause of the following exception:
-2026-05-21T09:08:30.172388441Z 
-2026-05-21T09:08:30.172395359Z Traceback (most recent call last):
-2026-05-21T09:08:30.172418248Z   File "/app/app/api/auth.py", line 106, in exchange_code
-2026-05-21T09:08:30.172425986Z     await session.commit()
-2026-05-21T09:08:30.172433042Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/ext/asyncio/session.py", line 1000, in commit
-2026-05-21T09:08:30.172440381Z     await greenlet_spawn(self.sync_session.commit)
-2026-05-21T09:08:30.172447622Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 203, in greenlet_spawn
-2026-05-21T09:08:30.172454935Z     result = context.switch(value)
-2026-05-21T09:08:30.172461947Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 2030, in commit
-2026-05-21T09:08:30.172469244Z     trans.commit(_to_root=True)
-2026-05-21T09:08:30.172476262Z   File "<string>", line 2, in commit
-2026-05-21T09:08:30.172483698Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/state_changes.py", line 137, in _go
-2026-05-21T09:08:30.172492877Z     ret_value = fn(self, *arg, **kw)
-2026-05-21T09:08:30.172500164Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 1311, in commit
-2026-05-21T09:08:30.172507488Z     self._prepare_impl()
-2026-05-21T09:08:30.172514559Z   File "<string>", line 2, in _prepare_impl
-2026-05-21T09:08:30.172521854Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/state_changes.py", line 137, in _go
-2026-05-21T09:08:30.172529380Z     ret_value = fn(self, *arg, **kw)
-2026-05-21T09:08:30.172536471Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 1286, in _prepare_impl
-2026-05-21T09:08:30.172543878Z     self.session.flush()
-2026-05-21T09:08:30.172550892Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 4331, in flush
-2026-05-21T09:08:30.172558223Z     self._flush(objects)
-2026-05-21T09:08:30.172565985Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 4466, in _flush
-2026-05-21T09:08:30.172573680Z     with util.safe_reraise():
-2026-05-21T09:08:30.172580738Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/langhelpers.py", line 121, in __exit__
-2026-05-21T09:08:30.172588112Z     raise exc_value.with_traceback(exc_tb)
-2026-05-21T09:08:30.172595197Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 4427, in _flush
-2026-05-21T09:08:30.172602480Z     flush_context.execute()
-2026-05-21T09:08:30.172609602Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/unitofwork.py", line 466, in execute
-2026-05-21T09:08:30.172616905Z     rec.execute(self)
-2026-05-21T09:08:30.172612046Z INFO:     172.20.0.4:53848 - "POST /api/oauth/exchange-code HTTP/1.0" 500 Internal Server Error
-2026-05-21T09:08:30.172623980Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/unitofwork.py", line 642, in execute
-2026-05-21T09:08:30.172672981Z     util.preloaded.orm_persistence.save_obj(
-2026-05-21T09:08:30.172682393Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/persistence.py", line 93, in save_obj
-2026-05-21T09:08:30.172690027Z     _emit_insert_statements(
-2026-05-21T09:08:30.172697151Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/persistence.py", line 1048, in _emit_insert_statements
-2026-05-21T09:08:30.172704563Z     result = connection.execute(
-2026-05-21T09:08:30.172711589Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1419, in execute
-2026-05-21T09:08:30.172719184Z     return meth(
-2026-05-21T09:08:30.172726296Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/sql/elements.py", line 527, in _execute_on_connection
-2026-05-21T09:08:30.172733722Z     return connection._execute_clauseelement(
-2026-05-21T09:08:30.172740890Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1641, in _execute_clauseelement
-2026-05-21T09:08:30.172748406Z     ret = self._execute_context(
-2026-05-21T09:08:30.172756941Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1846, in _execute_context
-2026-05-21T09:08:30.172764551Z     return self._exec_single_context(
-2026-05-21T09:08:30.172771674Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1986, in _exec_single_context
-2026-05-21T09:08:30.172779284Z     self._handle_dbapi_exception(
-2026-05-21T09:08:30.172786495Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2363, in _handle_dbapi_exception
-2026-05-21T09:08:30.172793841Z     raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
-2026-05-21T09:08:30.172800972Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
-2026-05-21T09:08:30.172808480Z     self.dialect.do_execute(
-2026-05-21T09:08:30.172815599Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
-2026-05-21T09:08:30.172822903Z     cursor.execute(statement, parameters)
-2026-05-21T09:08:30.172830012Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
-2026-05-21T09:08:30.172837628Z     self._adapt_connection.await_(
-2026-05-21T09:08:30.172844663Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
-2026-05-21T09:08:30.172852008Z     return current.parent.switch(awaitable)  # type: ignore[no-any-return,attr-defined] # noqa: E501
-2026-05-21T09:08:30.172859257Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
-2026-05-21T09:08:30.172866777Z     value = await result
-2026-05-21T09:08:30.172873797Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
-2026-05-21T09:08:30.172890373Z     self._handle_exception(error)
-2026-05-21T09:08:30.172897578Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
-2026-05-21T09:08:30.172904967Z     self._adapt_connection._handle_exception(error)
-2026-05-21T09:08:30.172912002Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
-2026-05-21T09:08:30.172919438Z     raise translated_error from error
-2026-05-21T09:08:30.172926553Z sqlalchemy.exc.DBAPIError: (sqlalchemy.dialects.postgresql.asyncpg.Error) <class 'asyncpg.exceptions.DataError'>: invalid input for query argument $4: datetime.datetime(2026, 5, 22, 9, 8, 30,... (can't subtract offset-naive and offset-aware datetimes)
-2026-05-21T09:08:30.172935282Z [SQL: INSERT INTO refresh_tokens (id, user_id, token, expires_at, revoked, created_at) VALUES ($1::VARCHAR, $2::VARCHAR, $3::VARCHAR, $4::TIMESTAMP WITHOUT TIME ZONE, $5::INTEGER, $6::TIMESTAMP WITHOUT TIME ZONE)]
-2026-05-21T09:08:30.172942908Z [parameters: ('8a1e812a-0e5d-4438-b686-7e1324fe30ab', 'af827b1d-03b1-40dc-99b0-c8dca135a06e', 'rt_34bca8158df04cd190615d74426399f43823c751b93b4213', datetime.datetime(2026, 5, 22, 9, 8, 30, 158239, tzinfo=datetime.timezone.utc), 0, datetime.datetime(2026, 5, 21, 9, 8, 30, 160428))]
-2026-05-21T09:08:30.172950582Z (Background on this error at: https://sqlalche.me/e/20/dbapi)
-2026-05-21T09:08:30.172957717Z 
-2026-05-21T09:08:34.443177810Z 2026-05-21 09:08:34 | INFO     | app.api.auth | [SSO] exchange_code start, code=EUM7S190Ev
-2026-05-21T09:08:34.443250071Z 2026-05-21 09:08:34 | INFO     | app.api.auth | [SSO] sso_base_url=http://192.168.92.61:8200
-2026-05-21T09:08:34.443260598Z 2026-05-21 09:08:34 | INFO     | app.api.auth | [SSO] client_id=hmDeOtXZVbeo2AZ-x58yPssZLg4Tcb1W
-2026-05-21T09:08:34.443268088Z 2026-05-21 09:08:34 | INFO     | app.api.auth | [SSO] redirect_uri=http://192.168.92.151:3000/auth/callback
-2026-05-21T09:08:34.475327518Z 2026-05-21 09:08:34 | INFO     | httpx | HTTP Request: POST http://192.168.92.61:8200/oauth/token "HTTP/1.1 200 OK"
-2026-05-21T09:08:34.476725113Z 2026-05-21 09:08:34 | INFO     | app.api.auth | [SSO] token response: {'access_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ1MDAwMDAwMC0wMDAwLTQwMDAtODAwMC0wMDAwMDAwMDAwMDAiLCJjbGllbnRfaWQiOiJobURlT3RYWlZiZW8yQVoteDU4eVBzc1pMZzRUY2IxVyIsInNjb3BlIjoicHJvZmlsZSBlbWFpbCIsImV4cCI6MTc3OTM1NTcxNCwiaWF0IjoxNzc5MzU0NTE0fQ.hyettMa2ItZpoSBiDrfyeX8yYAbXVAnzytQzIZO7cuQ', 'token_type': 'Bearer', 'expires_in': 1800, 'refresh_token': 'SpHYi9WhxM_pQvqbbPBORHlJMuiyaV6M7lGk_8rl3Eo', 'scope': 'profile email'}
-2026-05-21T09:08:34.500651786Z 2026-05-21 09:08:34 | INFO     | httpx | HTTP Request: GET http://192.168.92.61:8200/oauth/userinfo "HTTP/1.1 200 OK"
-2026-05-21T09:08:34.502106505Z 2026-05-21 09:08:34 | INFO     | app.api.auth | [SSO] userinfo: {'sub': 'u0000000-0000-4000-8000-000000000000', 'roles': [{'name': '超级管理员', 'code': 'super_admin'}], 'username': 'super_admin', 'avatar_url': None, 'real_name': '超级管理员', 'company': None, 'department': None, 'position': '超级管理员', 'email': 'super_admin@lqai.com'}
-2026-05-21T09:08:34.512605325Z 2026-05-21 09:08:34 | ERROR    | app.api.auth | [SSO] exchange_code failed: Traceback (most recent call last):
-2026-05-21T09:08:34.512683648Z   File "asyncpg/protocol/prepared_stmt.pyx", line 175, in asyncpg.protocol.protocol.PreparedStatementState._encode_bind_msg
-2026-05-21T09:08:34.512695372Z   File "asyncpg/protocol/codecs/base.pyx", line 251, in asyncpg.protocol.protocol.Codec.encode
-2026-05-21T09:08:34.512703146Z   File "asyncpg/protocol/codecs/base.pyx", line 153, in asyncpg.protocol.protocol.Codec.encode_scalar
-2026-05-21T09:08:34.512710486Z   File "asyncpg/pgproto/codecs/datetime.pyx", line 152, in asyncpg.pgproto.pgproto.timestamp_encode
-2026-05-21T09:08:34.512717860Z TypeError: can't subtract offset-naive and offset-aware datetimes
-2026-05-21T09:08:34.512725171Z 
-2026-05-21T09:08:34.512733578Z The above exception was the direct cause of the following exception:
-2026-05-21T09:08:34.512744793Z 
-2026-05-21T09:08:34.512754768Z Traceback (most recent call last):
-2026-05-21T09:08:34.512765349Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 550, in _prepare_and_execute
-2026-05-21T09:08:34.512776244Z     self._rows = deque(await prepared_stmt.fetch(*parameters))
-2026-05-21T09:08:34.512786333Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/prepared_stmt.py", line 177, in fetch
-2026-05-21T09:08:34.512786493Z INFO:     172.20.0.4:44472 - "POST /api/oauth/exchange-code HTTP/1.0" 500 Internal Server Error
-2026-05-21T09:08:34.512796400Z     data = await self.__bind_execute(args, 0, timeout)
-2026-05-21T09:08:34.512821308Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/prepared_stmt.py", line 268, in __bind_execute
-2026-05-21T09:08:34.512829564Z     data, status, _ = await self.__do_execute(
-2026-05-21T09:08:34.512837666Z   File "/usr/local/lib/python3.10/site-packages/asyncpg/prepared_stmt.py", line 257, in __do_execute
-2026-05-21T09:08:34.512848444Z     return await executor(protocol)
-2026-05-21T09:08:34.512857459Z   File "asyncpg/protocol/protocol.pyx", line 184, in bind_execute
-2026-05-21T09:08:34.512864800Z   File "asyncpg/protocol/prepared_stmt.pyx", line 204, in asyncpg.protocol.protocol.PreparedStatementState._encode_bind_msg
-2026-05-21T09:08:34.512872449Z asyncpg.exceptions.DataError: invalid input for query argument $1: datetime.datetime(2026, 5, 21, 9, 8, 34,... (can't subtract offset-naive and offset-aware datetimes)
-2026-05-21T09:08:34.512880167Z 
-2026-05-21T09:08:34.512887788Z The above exception was the direct cause of the following exception:
-2026-05-21T09:08:34.512894931Z 
-2026-05-21T09:08:34.512901829Z Traceback (most recent call last):
-2026-05-21T09:08:34.512910121Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
-2026-05-21T09:08:34.512920571Z     self.dialect.do_execute(
-2026-05-21T09:08:34.512930533Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
-2026-05-21T09:08:34.512946317Z     cursor.execute(statement, parameters)
-2026-05-21T09:08:34.512978113Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
-2026-05-21T09:08:34.512989921Z     self._adapt_connection.await_(
-2026-05-21T09:08:34.512999742Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
-2026-05-21T09:08:34.513007310Z     return current.parent.switch(awaitable)  # type: ignore[no-any-return,attr-defined] # noqa: E501
-2026-05-21T09:08:34.513014744Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
-2026-05-21T09:08:34.513030170Z     value = await result
-2026-05-21T09:08:34.513040062Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
-2026-05-21T09:08:34.513047793Z     self._handle_exception(error)
-2026-05-21T09:08:34.513054786Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
-2026-05-21T09:08:34.513062091Z     self._adapt_connection._handle_exception(error)
-2026-05-21T09:08:34.513069126Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
-2026-05-21T09:08:34.513076402Z     raise translated_error from error
-2026-05-21T09:08:34.513083629Z sqlalchemy.dialects.postgresql.asyncpg.AsyncAdapt_asyncpg_dbapi.Error: <class 'asyncpg.exceptions.DataError'>: invalid input for query argument $1: datetime.datetime(2026, 5, 21, 9, 8, 34,... (can't subtract offset-naive and offset-aware datetimes)
-2026-05-21T09:08:34.513091642Z 
-2026-05-21T09:08:34.513098652Z The above exception was the direct cause of the following exception:
-2026-05-21T09:08:34.513105678Z 
-2026-05-21T09:08:34.513112550Z Traceback (most recent call last):
-2026-05-21T09:08:34.513119577Z   File "/app/app/api/auth.py", line 90, in exchange_code
-2026-05-21T09:08:34.513126757Z     user = await _sync_user(sso_userinfo)
-2026-05-21T09:08:34.513133881Z   File "/app/app/api/auth.py", line 62, in _sync_user
-2026-05-21T09:08:34.513141013Z     await session.commit()
-2026-05-21T09:08:34.513147968Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/ext/asyncio/session.py", line 1000, in commit
-2026-05-21T09:08:34.513155428Z     await greenlet_spawn(self.sync_session.commit)
-2026-05-21T09:08:34.513162519Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 203, in greenlet_spawn
-2026-05-21T09:08:34.513169800Z     result = context.switch(value)
-2026-05-21T09:08:34.513176784Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 2030, in commit
-2026-05-21T09:08:34.513184031Z     trans.commit(_to_root=True)
-2026-05-21T09:08:34.513191139Z   File "<string>", line 2, in commit
-2026-05-21T09:08:34.513199636Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/state_changes.py", line 137, in _go
-2026-05-21T09:08:34.513219249Z     ret_value = fn(self, *arg, **kw)
-2026-05-21T09:08:34.513226873Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 1311, in commit
-2026-05-21T09:08:34.513234068Z     self._prepare_impl()
-2026-05-21T09:08:34.513241031Z   File "<string>", line 2, in _prepare_impl
-2026-05-21T09:08:34.513248178Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/state_changes.py", line 137, in _go
-2026-05-21T09:08:34.513255369Z     ret_value = fn(self, *arg, **kw)
-2026-05-21T09:08:34.513262394Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 1286, in _prepare_impl
-2026-05-21T09:08:34.513277440Z     self.session.flush()
-2026-05-21T09:08:34.513285532Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 4331, in flush
-2026-05-21T09:08:34.513292983Z     self._flush(objects)
-2026-05-21T09:08:34.513300236Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 4466, in _flush
-2026-05-21T09:08:34.513307792Z     with util.safe_reraise():
-2026-05-21T09:08:34.513314701Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/langhelpers.py", line 121, in __exit__
-2026-05-21T09:08:34.513321969Z     raise exc_value.with_traceback(exc_tb)
-2026-05-21T09:08:34.513328960Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/session.py", line 4427, in _flush
-2026-05-21T09:08:34.513336184Z     flush_context.execute()
-2026-05-21T09:08:34.513343115Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/unitofwork.py", line 466, in execute
-2026-05-21T09:08:34.513350525Z     rec.execute(self)
-2026-05-21T09:08:34.513357563Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/unitofwork.py", line 642, in execute
-2026-05-21T09:08:34.513364780Z     util.preloaded.orm_persistence.save_obj(
-2026-05-21T09:08:34.513371797Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/persistence.py", line 85, in save_obj
-2026-05-21T09:08:34.513378973Z     _emit_update_statements(
-2026-05-21T09:08:34.513391698Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/orm/persistence.py", line 912, in _emit_update_statements
-2026-05-21T09:08:34.513400600Z     c = connection.execute(
-2026-05-21T09:08:34.513407663Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1419, in execute
-2026-05-21T09:08:34.513414995Z     return meth(
-2026-05-21T09:08:34.513421930Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/sql/elements.py", line 527, in _execute_on_connection
-2026-05-21T09:08:34.513429380Z     return connection._execute_clauseelement(
-2026-05-21T09:08:34.513436501Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1641, in _execute_clauseelement
-2026-05-21T09:08:34.513455530Z     ret = self._execute_context(
-2026-05-21T09:08:34.513465756Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1846, in _execute_context
-2026-05-21T09:08:34.513473316Z     return self._exec_single_context(
-2026-05-21T09:08:34.513480323Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1986, in _exec_single_context
-2026-05-21T09:08:34.513487794Z     self._handle_dbapi_exception(
-2026-05-21T09:08:34.513494868Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2363, in _handle_dbapi_exception
-2026-05-21T09:08:34.513502253Z     raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
-2026-05-21T09:08:34.513509387Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1967, in _exec_single_context
-2026-05-21T09:08:34.513516656Z     self.dialect.do_execute(
-2026-05-21T09:08:34.513523654Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 952, in do_execute
-2026-05-21T09:08:34.513531049Z     cursor.execute(statement, parameters)
-2026-05-21T09:08:34.513538035Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 585, in execute
-2026-05-21T09:08:34.513545273Z     self._adapt_connection.await_(
-2026-05-21T09:08:34.513552291Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 132, in await_only
-2026-05-21T09:08:34.513559522Z     return current.parent.switch(awaitable)  # type: ignore[no-any-return,attr-defined] # noqa: E501
-2026-05-21T09:08:34.513566685Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 196, in greenlet_spawn
-2026-05-21T09:08:34.513574054Z     value = await result
-2026-05-21T09:08:34.513581065Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 563, in _prepare_and_execute
-2026-05-21T09:08:34.513588392Z     self._handle_exception(error)
-2026-05-21T09:08:34.513595446Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 513, in _handle_exception
-2026-05-21T09:08:34.513602792Z     self._adapt_connection._handle_exception(error)
-2026-05-21T09:08:34.513609841Z   File "/usr/local/lib/python3.10/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 797, in _handle_exception
-2026-05-21T09:08:34.513617181Z     raise translated_error from error
-2026-05-21T09:08:34.513624246Z sqlalchemy.exc.DBAPIError: (sqlalchemy.dialects.postgresql.asyncpg.Error) <class 'asyncpg.exceptions.DataError'>: invalid input for query argument $1: datetime.datetime(2026, 5, 21, 9, 8, 34,... (can't subtract offset-naive and offset-aware datetimes)
-2026-05-21T09:08:34.513633139Z [SQL: UPDATE users SET updated_at=$1::TIMESTAMP WITHOUT TIME ZONE WHERE users.id = $2::VARCHAR]
-2026-05-21T09:08:34.513640382Z [parameters: (datetime.datetime(2026, 5, 21, 9, 8, 34, 505088, tzinfo=datetime.timezone.utc), 'af827b1d-03b1-40dc-99b0-c8dca135a06e')]
-2026-05-21T09:08:34.513656827Z (Background on this error at: https://sqlalche.me/e/20/dbapi)
+(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/python -c "from transformers import AutoModelForCausalLM, AutoConfig; cfg = AutoConfig.from_pretrained('/root/Fine-tuning/backend/data/models/Qwen_Qwen3.5-0.8B'); print('model_type:', cfg.model_type); print('architectures:', cfg.architectures)"
+model_type: qwen3_5
+architectures: ['Qwen3_5ForConditionalGeneration']
+(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/python -c "import torch; print('torch:', torch.__version__); print('cuda:', torch.cuda.is_available()); print('devices:', torch.cuda.device_count())"
+torch: 2.8.0+metax3.5.3.9
+cuda: True
+devices: 4
+(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/python -c "import torch; from transformers import AutoModelForCausalLM; m = AutoModelForCausalLM.from_pretrained('/root/Fine-tuning/backend/data/models/Qwen_Qwen3.5-0.8B', torch_dtype=torch.float16, device_map='auto'); print('Loaded OK')"
+[transformers] `torch_dtype` is deprecated! Use `dtype` instead!
+Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
+torch.compile is not available in Python 3.10, using identity decorator instead
+/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+Loading weights: 100%|██████████| 320/320 [00:06<00:00, 48.32it/s]
+Loaded OK