Ver Fonte

修复模型回复截断问题和数据集预览问题

lxylxy123321 há 1 semana atrás
pai
commit
674b94f4e0

+ 1 - 1
backend/app/services/dataset_service.py

@@ -126,7 +126,7 @@ def _download_modelscope_dataset(dataset_id: str) -> tuple[Path, Path, int]:
     data_files = [f for f in all_files if _is_training_data_file(f)]
 
     if not data_files:
-        fallback = [f for f in all_files if f.suffix in (".json", ".jsonl") and f.name not in META_FILENAMES]
+        fallback = [f for f in all_files if f.suffix in (".json", ".jsonl") and f.name not in META_FILENAMES and f.name != "README.md"]
         logger.warning(f"No training data files found in {dataset_id}. "
                        f"Available JSON files: {[f.name for f in fallback]}")
         if fallback:

+ 8 - 1
backend/app/services/model_test_service.py

@@ -34,9 +34,16 @@ async def _test_model_remote(model_id: str, prompt: str, max_new_tokens: int, te
 
     # 独立脚本:零 app/db 依赖,参数全部通过环境变量传入
     script = rf"""\
-import json, os, base64
+import warnings, json, os, base64, sys
+warnings.filterwarnings('ignore')
+warnings.filterwarnings('ignore', category=FutureWarning)
+os.environ['PYTHONWARNINGS'] = 'ignore'
+os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
+os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = 'true'
 from pathlib import Path
 import torch
+from transformers import logging as tf_logging
+tf_logging.set_verbosity_error()
 from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
 
 def find_model_path(model_id):

+ 29 - 23
result.txt

@@ -1,25 +1,31 @@
-finetune-backend  | 2026-05-20 02:02:04 | INFO     | peft-platform | Remote test result: code=1, stdout_len=885, stderr_len=1764
-finetune-backend  | 2026-05-20 02:02:04 | INFO     | peft-platform | stdout (first 500): {"error": "Unable to load model", "details": ["AutoModelForCausalLM float16: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate`", "AutoModelForCausalLM float32: Using a `device_map`, `tp_plan`, `torch.device` context manager or setting `torch.set_default_device(device)` requires `accelerate`. You can install it with `pip install accelerate`", "AutoModel float16: Usin
-finetune-backend  | 2026-05-20 02:02:04 | INFO     | peft-platform | stderr (first 500): [transformers] `torch_dtype` is deprecated! Use `dtype` instead!
-finetune-backend  | Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+finetune-backend  | INFO:     Started server process [1]
+finetune-backend  | INFO:     Waiting for application startup.
+finetune-backend  | 2026-05-20 02:12:02 | INFO     | peft-platform | JobQueue started with 2 workers
+finetune-backend  | INFO:     Application startup complete.
+finetune-backend  | INFO:     Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
+finetune-backend  | INFO:     127.0.0.1:53198 - "GET /health HTTP/1.1" 200 OK
+finetune-backend  | INFO:     172.20.0.4:35898 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+finetune-backend  | 2026-05-20 02:13:47 | INFO     | peft-platform | Remote test result: code=0, stdout_len=1514, stderr_len=4075
+finetune-backend  | 2026-05-20 02:13:47 | INFO     | peft-platform | stdout (first 500): 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | 1,7,16,128,128,64,1,1,None
+finetune-backend  | {"generated_te
+finetune-backend  | 2026-05-20 02:13:47 | INFO     | peft-platform | stderr (first 500): Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
 finetune-backend  | Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
 finetune-backend  | torch.compile is not available in Python 3.10, using identity decorator instead
-finetune-backend  | /opt/conda/lib/python3.10/site-packages/torchvision/d
-finetune-backend  | 2026-05-20 02:02:04 | ERROR    | peft-platform | Remote model test failed: [transformers] `torch_dtype` is deprecated! Use `dtype` instead!
-finetune-backend  | Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
-finetune-backend  | Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
-finetune-backend  | torch.compile is not available in Python 3.10, using identity decorator instead
-finetune-backend  | /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
-finetune-backend  |   warnings.warn(_BETA_TRANSFORMS_WARNING)
-finetune-backend  | /opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
-finetune-backend  |   warnings.warn(_BETA_TRANSFORMS_WARNING)
-finetune-backend  | 
-finetune-backend  | INFO:     172.20.0.4:60592 - "POST /api/v1/models/test HTTP/1.0" 400 Bad Request
-finetune-backend  | INFO:     127.0.0.1:50720 - "GET /health HTTP/1.1" 200 OK
-finetune-backend  | INFO:     127.0.0.1:47024 - "GET /health HTTP/1.1" 200 OK
-finetune-backend  | INFO:     127.0.0.1:37202 - "GET /health HTTP/1.1" 200 OK
-finetune-backend  | 2026-05-20 02:02:30 | INFO     | peft-platform | ModelScope snapshot_download returned local_path: /root/Fine-tuning/backend/data/processed/yanalong/yanalong
-finetune-backend  | 2026-05-20 02:02:30 | INFO     | peft-platform | Found 5 files in local_path
-finetune-backend  | 2026-05-20 02:02:30 | WARNING  | peft-platform | No training data files found in yanalong/yanalong. Available JSON files: []
-finetune-backend  | 2026-05-20 02:02:30 | ERROR    | peft-platform | Dataset download failed: No JSON/JSONL data files found in dataset yanalong/yanalong
-finetune-backend  | INFO:     172.20.0.4:54076 - "POST /api/v1/datasets/download HTTP/1.0" 400 Bad Request
+finetune-backend  | /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints