Explorar o código

修复下载数据问题

lxylxy123321 hai 1 semana
pai
achega
4e01964977
Modificáronse 1 ficheiros con 16 adicións e 2 borrados
  1. 16 2
      backend/app/services/dataset_service.py

+ 16 - 2
backend/app/services/dataset_service.py

@@ -152,8 +152,22 @@ def _download_modelscope_dataset(dataset_id: str) -> tuple[Path, Path, int]:
     jsonl_path = ds_dir / "data.jsonl"
     record_count = 0
     content = target.read_text(encoding="utf-8")
-    if target.suffix == ".jsonl":
-        records = [json.loads(line.strip()) for line in content.splitlines() if line.strip()]
+
+    if target.suffix == ".jsonl" or not target.suffix:
+        # JSONL 或无后缀文件:尝试逐行解析
+        records = []
+        for line in content.splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                records.append(json.loads(line))
+            except json.JSONDecodeError:
+                # 如果逐行解析失败,尝试整体解析(可能是 JSON 数组)
+                records = json.loads(content)
+                if not isinstance(records, list):
+                    records = [records]
+                break
     else:
         records = json.loads(content)
         if not isinstance(records, list):