Browse Source

修复下载数据集问题

lxylxy123321 1 week ago
parent
commit
d09e6434bb
2 changed files with 27 additions and 11 deletions
  1. 17 6
      backend/app/services/dataset_service.py
  2. 10 5
      result.txt

+ 17 - 6
backend/app/services/dataset_service.py

@@ -158,7 +158,7 @@ def _download_modelscope_dataset(dataset_id: str) -> tuple[Path, Path, int]:
     content = target.read_text(encoding="utf-8")
 
     if target.suffix == ".jsonl" or not target.suffix:
-        # JSONL 或无后缀文件:尝试逐行解析
+        # JSONL 或无后缀文件:逐行解析
         records = []
         for line in content.splitlines():
             line = line.strip()
@@ -167,15 +167,26 @@ def _download_modelscope_dataset(dataset_id: str) -> tuple[Path, Path, int]:
             try:
                 records.append(json.loads(line))
             except json.JSONDecodeError:
-                # 如果逐行解析失败,尝试整体解析(可能是 JSON 数组)
                 records = json.loads(content)
                 if not isinstance(records, list):
                     records = [records]
                 break
-    else:
-        records = json.loads(content)
-        if not isinstance(records, list):
-            records = [records]
+    elif target.suffix == ".json":
+        # JSON 文件:先尝试 JSON 数组,失败再逐行解析(可能是 JSONL 格式)
+        try:
+            records = json.loads(content)
+            if not isinstance(records, list):
+                records = [records]
+        except json.JSONDecodeError:
+            records = []
+            for line in content.splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    records.append(json.loads(line))
+                except json.JSONDecodeError:
+                    continue
 
     with open(jsonl_path, "w", encoding="utf-8") as f:
         for item in records:

+ 10 - 5
result.txt

@@ -1,5 +1,10 @@
-finetune-backend  | 2026-05-20 02:49:41 | INFO     | peft-platform | Remote test result: code=0, stdout_len=1544, stderr_len=2373
-finetune-backend  | 2026-05-20 02:49:41 | INFO     | peft-platform | stdout (first 500): 1,7,16,128,128,64,1,1,None
+finetune-backend  | 2026-05-20 03:20:25 | ERROR    | peft-platform | Dataset download failed: Extra data: line 2 column 1 (char 71)
+finetune-backend  | INFO:     172.20.0.4:57044 - "POST /api/v1/datasets/download HTTP/1.0" 400 Bad Request
+finetune-backend  | INFO:     127.0.0.1:33414 - "GET /health HTTP/1.1" 200 OK
+finetune-backend  | INFO:     172.20.0.4:48556 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+finetune-backend  | 2026-05-20 03:22:27 | ERROR    | peft-platform | SSH command timeout after 5s: docker exec finetune-trainer rm -f /tmp/test_model_Qwen_Qwen3.5-0.8B.py
+finetune-backend  | 2026-05-20 03:22:27 | INFO     | peft-platform | Remote test result: code=0, stdout_len=1701, stderr_len=2440
+finetune-backend  | 2026-05-20 03:22:27 | INFO     | peft-platform | stdout (first 500): 1,7,16,128,128,64,1,1,None
 finetune-backend  | 1,7,16,128,128,64,1,1,None
 finetune-backend  | 1,7,16,128,128,64,1,1,None
 finetune-backend  | 1,7,16,128,128,64,1,1,None
@@ -18,10 +23,10 @@ finetune-backend  | 1,7,16,128,128,64,1,1,None
 finetune-backend  | 1,7,16,128,128,64,1,1,None
 finetune-backend  | 1,7,16,128,128,64,1,1,None
 finetune-backend  | {"generated_te
-finetune-backend  | 2026-05-20 02:49:41 | INFO     | peft-platform | stderr (first 500): Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+finetune-backend  | 2026-05-20 03:22:27 | INFO     | peft-platform | stderr (first 500): Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
 finetune-backend  | Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
 finetune-backend  | torch.compile is not available in Python 3.10, using identity decorator instead
 finetune-backend  | 
 finetune-backend  | Loading weights:   0%|          | 0/320 [00:00<?, ?it/s]
-finetune-backend  | Loading weights:   0%|          | 1/320 [00:02<11:37,  2.19s
-finetune-backend  | INFO:     172.20.0.4:51666 - "POST /api/v1/models/test HTTP/1.0" 200 OK
+finetune-backend  | Loading weights:   0%|          | 1/320 [00:02<11:11,  2.11s
+finetune-backend  | INFO:     172.20.0.4:48564 - "POST /api/v1/models/test HTTP/1.0" 200 OK