فهرست منبع

读取原始json

lxylxy123321 1 هفته پیش
والد
کامیت
167a90ce7b
2فایلهای تغییر یافته به همراه24 افزوده شده و 11 حذف شده
  1. 7 2
      backend/app/preprocessors/__init__.py
  2. 17 9
      result.txt

+ 7 - 2
backend/app/preprocessors/__init__.py

@@ -127,8 +127,13 @@ def preprocess_file(
             raw_data = [json.loads(line) for line in f if line.strip()]
     elif ext == ".json":
         with open(input_path, "r", encoding="utf-8") as f:
-            data = json.load(f)
-            raw_data = data if isinstance(data, list) else [data]
+            try:
+                data = json.load(f)
+                raw_data = data if isinstance(data, list) else [data]
+            except json.JSONDecodeError:
+                # 回退到 JSONL 格式(每行一个 JSON 对象)
+                f.seek(0)
+                raw_data = [json.loads(line) for line in f if line.strip()]
     elif ext == ".csv":
         import csv
         with open(input_path, "r", encoding="utf-8") as f:

+ 17 - 9
result.txt

@@ -1,20 +1,28 @@
-(base) [root@localhost ~]# docker exec -w /root/Fine-tuning/backend finetune-trainer /opt/conda/bin/python -m app.engines.remote_train "test-manual-001" "Qwen/Qwen3.5-0.8B" "text" "/root/Fine-tuning/backend/data/processed/ms_yanalong_yanalong/distill_r1_sft.json" "/root/Fine-tuning/backend/data/config_92a0a9cd-46aa-48bc-b7ad-bd5a18270c51.json"
-2026-05-20 14:08:47 | ERROR    | peft-platform | Remote training failed: test-manual-001 - No module named 'sqlalchemy'
+(base) [root@localhost ~]# docker exec -w /root/Fine-tuning/backend finetune-trainer /opt/conda/bin/python -m app.engines.remote_train "test-manual-001" "Qwen/Qwen3.5-0.8B" "text" "/root/Fine-tuning/backend/data/processed/ms_yanalong_yanalong/distill_r1_sft.json" "/root/Fine-tuning/backend/data/config_aa342346-a39e-4644-9a34-f3a9d3b961f8.json"
+2026-05-20 14:28:57 | ERROR    | peft-platform | Remote training failed: test-manual-001 - Extra data: line 2 column 1 (char 71)
 Traceback (most recent call last):
   File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
     return _run_code(code, main_globals, None,
   File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
     exec(code, run_globals)
-  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 179, in <module>
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 162, in <module>
     main()
-  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 175, in main
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 158, in main
     asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config))
   File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
     return loop.run_until_complete(main)
   File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
     return future.result()
-  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 85, in run_training
-    from app.core.db import async_session, DatasetRecord
-  File "/root/Fine-tuning/backend/app/core/db.py", line 3, in <module>
-    from sqlalchemy import Column, DateTime, Float, Integer, String, Text
-ModuleNotFoundError: No module named 'sqlalchemy'
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 108, in run_training
+    await engine.preprocess_dataset(dataset_path, processed_path, task_type=task_type, template=template)
+  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 119, in preprocess_dataset
+    processed = preprocess_file(dataset_path, output_path, task_type, template)
+  File "/root/Fine-tuning/backend/app/preprocessors/__init__.py", line 130, in preprocess_file
+    data = json.load(f)
+  File "/opt/conda/lib/python3.10/json/__init__.py", line 293, in load
+    return loads(fp.read(),
+  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
+    return _default_decoder.decode(s)
+  File "/opt/conda/lib/python3.10/json/decoder.py", line 340, in decode
+    raise JSONDecodeError("Extra data", s, end)
+json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 71)