Quellcode durchsuchen

修复engine报错问题

lxylxy123321 vor 1 Woche
Ursprung
Commit
04e0c8a45a

+ 1 - 0
backend/app/engines/multimodal_engine.py

@@ -186,6 +186,7 @@ class _ProgressCallback:
     def on_evaluate(self, args, state, control, metrics=None, **kwargs): pass
     def on_save(self, args, state, control, **kwargs): pass
     def on_predict(self, args, state, control, metrics=None, **kwargs): pass
+    def on_init_end(self, args, state, control, **kwargs): pass
 
 
 from app.core.websocket import send_completed, send_epoch_done, send_progress

+ 3 - 0
backend/app/engines/text_engine.py

@@ -328,6 +328,9 @@ class _ProgressCallback:
     def on_predict(self, args, state, control, metrics=None, **kwargs):
         pass
 
+    def on_init_end(self, args, state, control, **kwargs):
+        pass
+
 
 # 全局单例
 text_engine = TextEngine()

+ 1 - 0
backend/app/engines/vision_engine.py

@@ -186,6 +186,7 @@ class _ProgressCallback:
     def on_evaluate(self, args, state, control, metrics=None, **kwargs): pass
     def on_save(self, args, state, control, **kwargs): pass
     def on_predict(self, args, state, control, metrics=None, **kwargs): pass
+    def on_init_end(self, args, state, control, **kwargs): pass
 
 
 from app.core.websocket import send_completed, send_epoch_done, send_progress

+ 21 - 31
result.txt

@@ -1,31 +1,21 @@
-INFO:     127.0.0.1:48618 - "GET /health HTTP/1.1" 200 OK
-2026-05-15 14:36:49,475 - modelscope - INFO - Target directory already exists, skipping creation.
-2026-05-15 14:36:49 | INFO     | peft-platform | Model downloaded: Qwen/Qwen3.5-0.8B -> /root/Fine-tuning/backend/data/models/Qwen/Qwen3___5-0___8B
-Downloading Model from https://www.modelscope.cn to directory: /root/Fine-tuning/backend/data/models/Qwen/Qwen3.5-0.8B
-INFO:     172.19.0.3:58756 - "POST /api/v1/models/download HTTP/1.0" 200 OK
-INFO:     172.19.0.3:58768 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:58776 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:58778 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:58794 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     172.19.0.3:49392 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-2026-05-15 14:37:04 | INFO     | peft-platform | Job 92be1d81-d263-4eff-8547-162728db09a7 enqueued
-2026-05-15 14:37:04 | INFO     | peft-platform | Training job created: 92be1d81-d263-4eff-8547-162728db09a7
-INFO:     172.19.0.3:49400 - "POST /api/v1/training/jobs HTTP/1.0" 200 OK
-2026-05-15 14:37:04 | ERROR    | peft-platform | Job 92be1d81-d263-4eff-8547-162728db09a7 failed: Dataset not found: 4af699cd-ed46-4dd4-ab36-84c54c709b62
-INFO:     172.19.0.3:49406 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     172.19.0.3:49420 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:49422 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:49432 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     172.19.0.3:48350 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     127.0.0.1:60174 - "GET /health HTTP/1.1" 200 OK
-INFO:     172.19.0.3:48352 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:48368 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:48378 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-2026-05-15 14:37:17 | INFO     | peft-platform | Job 48cf1048-b31c-4457-a9e8-0add3d56a56b enqueued
-2026-05-15 14:37:17 | INFO     | peft-platform | Training job created: 48cf1048-b31c-4457-a9e8-0add3d56a56b
-INFO:     172.19.0.3:50122 - "POST /api/v1/training/jobs HTTP/1.0" 200 OK
-2026-05-15 14:37:17 | ERROR    | peft-platform | Job 48cf1048-b31c-4457-a9e8-0add3d56a56b failed: Dataset not found: 4af699cd-ed46-4dd4-ab36-84c54c709b62
-INFO:     172.19.0.3:50136 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     172.19.0.3:50142 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:50154 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     172.19.0.3:50162 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.19.0.3:36528 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-15 14:43:01 | INFO     | peft-platform | Job 32178086-d3ff-40b2-8bb5-5b6403ca47a7 enqueued
+2026-05-15 14:43:01 | INFO     | peft-platform | Training job created: 32178086-d3ff-40b2-8bb5-5b6403ca47a7
+INFO:     172.19.0.3:36538 - "POST /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-15 14:43:01 | INFO     | peft-platform | Preprocessed 60 samples for sft/alpaca
+[transformers] `torch_dtype` is deprecated! Use `dtype` instead!
+2026-05-15 14:43:12 | WARNING  | fla.utils | Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+2026-05-15 14:43:12 | WARNING  | fla.utils | Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
+2026-05-15 14:43:17 | WARNING  | fla.ops.rwkv7.fused_addcmul | torch.compile is not available in Python 3.10, using identity decorator instead
+/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+Loading weights: 100%|██████████| 320/320 [00:00<00:00, 447.82it/s]
+2026-05-15 14:43:18 | INFO     | peft-platform | Loaded model: Qwen/Qwen3.5-0.8B
+Map: 100%|██████████| 60/60 [00:00<00:00, 2251.91 examples/s]
+/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
+  warnings.warn(msg)
+[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2026-05-15 14:43:32 | ERROR    | peft-platform | Job 32178086-d3ff-40b2-8bb5-5b6403ca47a7 failed: '_ProgressCallback' object has no attribute 'on_init_end'
+trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695