Przeglądaj źródła

修复dpo报错

lxylxy123321 17 godzin temu
rodzic
commit
acb80c4217
2 zmienionych plików z 198 dodań i 188 usunięć
  1. 27 87
      backend/app/engines/text_engine.py
  2. 171 101
      result.txt

+ 27 - 87
backend/app/engines/text_engine.py

@@ -296,59 +296,6 @@ class TextEngine(BaseEngine):
                 _ma.MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = {}
             from trl import DPOConfig, DPOTrainer
 
-            # 兼容旧版 transformers:Trainer.__init__ 不接受 tokenizer/processing_class
-            from transformers import Trainer as _HFTrainer
-            _orig_trainer_init = _HFTrainer.__init__
-            if not getattr(_HFTrainer, "_patched_kwargs", False):
-                def _patched_trainer_init(self, *args, **kwargs):
-                    kwargs.pop("tokenizer", None)
-                    kwargs.pop("processing_class", None)
-                    _orig_trainer_init(self, *args, **kwargs)
-                _HFTrainer.__init__ = _patched_trainer_init
-                _HFTrainer._patched_kwargs = True
-
-            # 兼容:新版 transformers Trainer 调用 get_batch_samples(epoch_iterator, num_batches, device)
-            # 但 TRL 0.9.6 的签名是 get_batch_samples(model, batch),参数语义不同
-            if not getattr(DPOTrainer, "_patched_gbs", False):
-                _orig_gbs = DPOTrainer.get_batch_samples
-                def _patched_gbs(self, epoch_iterator, num_batches, device=None):
-                    batch = next(epoch_iterator)
-                    if device:
-                        batch = {k: v.to(device) if hasattr(v, "to") else v for k, v in batch.items()}
-                    _orig_gbs(self, self.model, batch)
-                    num_items = len(batch.get("input_ids", batch.get("prompt_input_ids", [])))
-                    return [batch], num_items
-                DPOTrainer.get_batch_samples = _patched_gbs
-                DPOTrainer._patched_gbs = True
-
-            # 修复 Qwen tokenizer bug:tokenize 后 input_ids 末尾可能追加 None
-            # 导致 DPODataCollatorWithPadding 中 torch.tensor([...None...], dtype=int64) 报错
-            # 参考: https://github.com/huggingface/trl/issues/1073
-            #
-            # 注意:不能用 types.MethodType 绑定到实例上,因为 Python 的特殊方法查找
-            # (如 obj() → type(obj).__call__(obj))会跳过实例属性,直接查类。
-            # 必须在类级别替换 __call__。
-            _tok_cls = type(self._tokenizer)
-            if not getattr(_tok_cls, "_patched_none_filter", False):
-                _orig_cls_call = _tok_cls.__call__
-
-                def _call_filter_none(cls_self, *args, **kwargs):
-                    result = _orig_cls_call(cls_self, *args, **kwargs)
-                    if isinstance(result, dict) and "input_ids" in result:
-                        ids = result["input_ids"]
-                        if isinstance(ids, list) and ids:
-                            if isinstance(ids[0], list):
-                                result["input_ids"] = [
-                                    [x for x in seq if x is not None] for seq in ids
-                                ]
-                            else:
-                                result["input_ids"] = [x for x in ids if x is not None]
-                    return result
-
-                _tok_cls.__call__ = _call_filter_none
-                _tok_cls._patched_none_filter = True
-                logger.info(f"Patched {_tok_cls.__name__}.__call__ to filter None from input_ids (Qwen workaround)")
-
             # 显式创建 reference model 并冻结,避免 AdaLora 多 adapter 冲突
             ref_model = deepcopy(self._model)
             ref_model.eval()
@@ -398,40 +345,33 @@ class TextEngine(BaseEngine):
             # 修复 Qwen tokenizer bug(TRL #1073):
             # tokenize 后 input_ids 末尾可能含 None,导致 collator 中
             # torch.tensor([..., None], dtype=int64) 崩溃。
-            # 直接 monkey-patch DPODataCollatorWithPadding.__call__(即崩溃点),
-            # 在原始逻辑执行前清洗 features 中的 None 值。
-            # 注意:input_ids 和 attention_mask 必须同步截断,否则 tensor 长度不匹配。
-            try:
-                from trl.trainer.utils import DPODataCollatorWithPadding as _DC
-                if not getattr(_DC, "_patched_none_filter", False):
-                    _orig_dc_call = _DC.__call__
-
-                    def _dc_call_clean(self_dc, features):
-                        for ex in features:
-                            for k in list(ex.keys()):
-                                v = ex[k]
-                                if isinstance(v, list) and v and not isinstance(v[0], list):
-                                    # 一维 list(如 input_ids / attention_mask / labels)
-                                    cleaned = [x for x in v if x is not None]
-                                    ex[k] = cleaned
-                            # 同步截断:确保同一组序列(如 chosen_input_ids / chosen_attention_mask)长度一致
-                            for prefix in ("prompt_", "chosen_", "rejected_"):
-                                ids_key = f"{prefix}input_ids"
-                                mask_key = f"{prefix}attention_mask"
-                                labels_key = f"{prefix}labels"
-                                if ids_key in ex and isinstance(ex[ids_key], list):
-                                    target_len = len(ex[ids_key])
-                                    if mask_key in ex and isinstance(ex[mask_key], list):
-                                        ex[mask_key] = ex[mask_key][:target_len]
-                                    if labels_key in ex and isinstance(ex[labels_key], list):
-                                        ex[labels_key] = ex[labels_key][:target_len]
-                        return _orig_dc_call(self_dc, features)
-
-                    _DC.__call__ = _dc_call_clean
-                    _DC._patched_none_filter = True
-                    logger.info("Patched DPODataCollatorWithPadding to filter None from token ids")
-            except (ImportError, AttributeError) as _e:
-                logger.warning(f"Could not patch DPODataCollatorWithPadding: {_e}")
+            # 方案:包装 trainer 的 data collator,在 collate 前将 None 替换为 pad 值
+            # (替换而非删除,避免 input_ids 和 attention_mask 长度不一致)
+            def _sanitize_dpo_features(features):
+                pad_id = self._tokenizer.pad_token_id or 0
+                for ex in features:
+                    for k in list(ex.keys()):
+                        v = ex[k]
+                        if isinstance(v, list) and v:
+                            if isinstance(v[0], list):
+                                ex[k] = [
+                                    [x if x is not None else pad_id for x in seq]
+                                    for seq in v
+                                ]
+                            elif isinstance(v[0], (int, float, type(None))):
+                                if k.endswith("_attention_mask"):
+                                    ex[k] = [x if x is not None else 0 for x in v]
+                                elif k.endswith("_labels"):
+                                    ex[k] = [x if x is not None else -100 for x in v]
+                                else:
+                                    ex[k] = [x if x is not None else pad_id for x in v]
+                return features
+
+            _orig_collator = trainer._data_collator
+            def _safe_collator(features):
+                return _orig_collator(_sanitize_dpo_features(features))
+            trainer._data_collator = _safe_collator
+            logger.info("Wrapped DPO data collator to sanitize None values from Qwen tokenizer")
 
         elif task_type == "ppo":
             import torch

+ 171 - 101
result.txt

@@ -1,101 +1,171 @@
-(base) [root@localhost ~]# docker exec finetune-trainer tail -100 /tmp/train_297b8bc2-e382-4b53-853b-dbff4578601e.log
-[remote_train]   task_type=dpo, template=auto
-[remote_train]   Engine loaded: TextEngine
-[remote_train]   Running preprocess_dataset...
-[remote_train]   Preprocessing done, output: /root/Fine-tuning/backend/data/processed/297b8bc2-e382-4b53-853b-dbff4578601e_processed.jsonl
-[remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
-Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
-Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
-torch.compile is not available in Python 3.10, using identity decorator instead
-/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
-  warnings.warn(_BETA_TRANSFORMS_WARNING)
-/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
-  warnings.warn(_BETA_TRANSFORMS_WARNING)
-Loading weights: 100%|██████████| 320/320 [00:06<00:00, 47.18it/s]
-[remote_train]   Model loaded successfully
-[remote_train] Step 3: Building PEFT config...
-[remote_train] Step 4: Starting training...
-[remote_train] NOTE: First step may take 2-5 minutes due to Triton kernel compilation (autotuning). This is normal.
-[remote_train] Total steps: 3 epochs, batch_size per GPU=16
-/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
-  warnings.warn(msg)
-bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
-Traceback (most recent call last):
-  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
-    lib = get_native_library()
-  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
-    raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
-RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
-[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
-[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
-trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695
-Map: 100%|██████████| 5/5 [00:00<00:00, 160.55 examples/s]
-  0%|          | 0/1 [00:00<?, ?it/s]Training failed for job 297b8bc2-e382-4b53-853b-dbff4578601e: 'NoneType' object cannot be interpreted as an integer
-[remote_train] [rank 0] ERROR: 'NoneType' object cannot be interpreted as an integer
-[remote_train] Traceback (most recent call last):
-  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
-    adapter_path = await engine.train(
-  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 472, in train
-    trainer.train()
-  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
-    return inner_training_loop(
-  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
-    self._run_epoch(
-  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1704, in _run_epoch
-    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
-  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 296, in _patched_gbs
-    batch = next(epoch_iterator)
-  File "/opt/conda/lib/python3.10/site-packages/accelerate/data_loader.py", line 577, in __iter__
-    current_batch = next(dataloader_iter)
-  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
-    data = self._next_data()
-  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
-    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
-  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
-    return self.collate_fn(data)
-  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
-    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
-    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-TypeError: 'NoneType' object cannot be interpreted as an integer
-
-[remote_train] === Training job failed: 297b8bc2-e382-4b53-853b-dbff4578601e ===
-Traceback (most recent call last):
-  File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
-    return _run_code(code, main_globals, None,
-  File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
-    exec(code, run_globals)
-  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 466, in <module>
-    main()
-  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 461, in main
-    asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config,
-  File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
-    return loop.run_until_complete(main)
-  File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
-    return future.result()
-  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
-    adapter_path = await engine.train(
-  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 472, in train
-    trainer.train()
-  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
-    return inner_training_loop(
-  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
-    self._run_epoch(
-  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1704, in _run_epoch
-    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
-  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 296, in _patched_gbs
-    batch = next(epoch_iterator)
-  File "/opt/conda/lib/python3.10/site-packages/accelerate/data_loader.py", line 577, in __iter__
-    current_batch = next(dataloader_iter)
-  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
-    data = self._next_data()
-  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
-    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
-  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
-    return self.collate_fn(data)
-  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
-    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
-    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-TypeError: 'NoneType' object cannot be interpreted as an integer
-  0%|          | 0/1 [00:12<?, ?it/s]
+=> Sync done.
+INFO:     Started server process [1]
+INFO:     Waiting for application startup.
+2026-05-27 08:46:29 | INFO     | peft-platform | JobQueue started with 2 workers
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
+INFO:     172.20.0.4:54384 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54388 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54400 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54394 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54416 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54422 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54428 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54432 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:45336 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:48540 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-27 08:46:36 | INFO     | peft-platform | Training job c7b86d01-4e0d-440e-8aed-1bf1f1134a6f: num_gpus=1, batch_size=16
+2026-05-27 08:46:36 | INFO     | peft-platform | Job c7b86d01-4e0d-440e-8aed-1bf1f1134a6f enqueued
+2026-05-27 08:46:36 | INFO     | peft-platform | Training job created: c7b86d01-4e0d-440e-8aed-1bf1f1134a6f
+INFO:     172.20.0.4:48546 - "POST /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-27 08:46:37 | INFO     | app.engines.text_engine | Preprocessed 5 samples for dpo/alpaca
+INFO:     172.20.0.4:48558 - "GET /api/v1/models/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48566 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48582 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:48584 - "WebSocket /ws/training/c7b86d01-4e0d-440e-8aed-1bf1f1134a6f?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJhZjgyN2IxZC0wM2IxLTQwZGMtOTliMC1jOGRjYTEzNWEwNmUiLCJ1c2VybmFtZSI6InN1cGVyX2FkbWluIiwicm9sZXMiOlsic3VwZXJfYWRtaW4iXSwiZXhwIjoxNzc5ODcyMjM1LCJpYXQiOjE3Nzk4NzEwMzUsInR5cGUiOiJhY2Nlc3MifQ.IcJseF4eKVZm2clqpTrcT_R_bH4h-nHVwHqbJKnHMFQ" [accepted]
+2026-05-27 08:46:37 | INFO     | peft-platform | 客户端已连接到训练 WebSocket (job c7b86d01-4e0d-440e-8aed-1bf1f1134a6f)
+INFO:     connection open
+INFO:     172.20.0.4:48598 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33276 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33286 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51676 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-27 08:46:58 | INFO     | peft-platform | Remote cleanup result: true
+cleaned 58 processes
+2026-05-27 08:47:51 | INFO     | peft-platform | Created remote dataset directory: /root/Fine-tuning/backend/data/datasets
+2026-05-27 08:47:51 | INFO     | peft-platform | Uploading dataset file: /root/Fine-tuning/backend/data/uploads/dpo_sample.jsonl -> /root/Fine-tuning/backend/data/datasets/dpo_sample.jsonl
+2026-05-27 08:48:08 | INFO     | peft-platform | Dataset uploaded successfully: /root/Fine-tuning/backend/data/datasets/dpo_sample.jsonl
+2026-05-27 08:48:44 | INFO     | peft-platform | Remote training launched in container: job=c7b86d01-4e0d-440e-8aed-1bf1f1134a6f, container_pid=18988
+INFO:     127.0.0.1:37124 - "GET /health HTTP/1.1" 200 OK
+INFO:     127.0.0.1:45132 - "GET /health HTTP/1.1" 200 OK
+INFO:     127.0.0.1:36946 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:40056 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52102 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51678 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:40042 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:43566 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:52112 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60560 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60574 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60548 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55898 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55904 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:55908 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:42340 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:44370 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:44384 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41298 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:41308 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51512 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51526 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:50576 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:34098 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:34102 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37120 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:37134 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59828 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:41636 - "GET /health HTTP/1.1" 200 OK
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] fla package found at: /opt/conda/lib/python3.10/site-packages/fla
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] fla shared memory patch v2 already applied, skipping
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] [rank 0] === Training job started: c7b86d01-4e0d-440e-8aed-1bf1f1134a6f ===
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] model_id=Qwen/Qwen3.5-0.8B, model_type=text
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/dpo_sample.jsonl
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] config={"model_id": "Qwen/Qwen3.5-0.8B", "model_type": "text", "dataset_id": "41e0a8e2-ddc7-464b-bc44-b13261bbc221", "peft_method": "lora", "epochs": 3, "batch_size": 16, "gradient_accumulation": 4, "learnin
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] Step 1: Preprocessing dataset...
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train]   task_type=dpo, template=auto
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train]   Engine loaded: TextEngine
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train]   Running preprocess_dataset...
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train]   Preprocessing done, output: /root/Fine-tuning/backend/data/processed/c7b86d01-4e0d-440e-8aed-1bf1f1134a6f_processed.jsonl
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
+2026-05-27 08:50:12 | ERROR    | peft-platform | [253:c7b86d01] Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] torch.compile is not available in Python 3.10, using identity decorator instead
+2026-05-27 08:50:12 | WARNING  | peft-platform | [253:c7b86d01] /opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] warnings.warn(_BETA_TRANSFORMS_WARNING)
+2026-05-27 08:50:12 | WARNING  | peft-platform | [253:c7b86d01] /opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] warnings.warn(_BETA_TRANSFORMS_WARNING)
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:   0%|          | 0/320 [00:00<?, ?it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:   0%|          | 1/320 [00:02<11:50,  2.23s/it]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:   3%|▎         | 11/320 [00:02<00:49,  6.19it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:   8%|▊         | 25/320 [00:02<00:19, 15.49it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  12%|█▎        | 40/320 [00:02<00:10, 26.24it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  16%|█▌        | 50/320 [00:02<00:07, 34.00it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  19%|█▉        | 61/320 [00:02<00:06, 43.05it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  22%|██▎       | 72/320 [00:03<00:05, 48.91it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  25%|██▌       | 80/320 [00:03<00:04, 51.23it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  29%|██▉       | 93/320 [00:03<00:04, 56.41it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  32%|███▏      | 101/320 [00:03<00:03, 60.53it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  35%|███▌      | 113/320 [00:03<00:03, 67.92it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  38%|███▊      | 121/320 [00:03<00:02, 67.69it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  41%|████▏     | 132/320 [00:04<00:02, 63.80it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  45%|████▌     | 145/320 [00:04<00:02, 65.41it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  48%|████▊     | 152/320 [00:04<00:02, 66.27it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  52%|█████▏    | 166/320 [00:04<00:01, 78.46it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  55%|█████▍    | 175/320 [00:04<00:01, 80.22it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  57%|█████▊    | 184/320 [00:04<00:01, 82.57it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  61%|██████    | 194/320 [00:04<00:01, 73.32it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  63%|██████▎   | 202/320 [00:04<00:01, 70.55it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  66%|██████▋   | 212/320 [00:05<00:01, 73.11it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  69%|██████▉   | 220/320 [00:05<00:01, 74.66it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  73%|███████▎  | 234/320 [00:05<00:01, 77.77it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  77%|███████▋  | 247/320 [00:05<00:00, 88.18it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  80%|████████  | 257/320 [00:05<00:00, 66.93it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  85%|████████▌ | 273/320 [00:05<00:00, 77.22it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  89%|████████▉ | 285/320 [00:05<00:00, 77.11it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  92%|█████████▏| 294/320 [00:06<00:00, 79.24it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  95%|█████████▌| 305/320 [00:06<00:00, 72.18it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights:  98%|█████████▊| 313/320 [00:06<00:00, 70.69it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Loading weights: 100%|██████████| 320/320 [00:06<00:00, 49.66it/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train]   Model loaded successfully
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] Step 3: Building PEFT config...
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] Step 4: Starting training...
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] NOTE: First step may take 2-5 minutes due to Triton kernel compilation (autotuning). This is normal.
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] [remote_train] Total steps: 3 epochs, batch_size per GPU=16
+2026-05-27 08:50:12 | WARNING  | peft-platform | [253:c7b86d01] /opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] warnings.warn(msg)
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Traceback (most recent call last):
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] lib = get_native_library()
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
+2026-05-27 08:50:12 | ERROR    | peft-platform | [253:c7b86d01] raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
+2026-05-27 08:50:12 | ERROR    | peft-platform | [253:c7b86d01] RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+2026-05-27 08:50:12 | WARNING  | peft-platform | [253:c7b86d01] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2026-05-27 08:50:12 | WARNING  | peft-platform | [253:c7b86d01] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Map:   0%|          | 0/5 [00:00<?, ? examples/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] Map: 100%|██████████| 5/5 [00:00<00:00, 154.66 examples/s]
+2026-05-27 08:50:12 | INFO     | peft-platform | [253:c7b86d01] 0%|          | 0/1 [00:00<?, ?it/s]
+INFO:     172.20.0.4:45582 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:40826 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:51600 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:51606 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56294 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56298 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:60180 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:60358 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:60360 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54650 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:54666 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56406 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56412 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:51222 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:56496 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:56506 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59168 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:59180 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33646 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:33652 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     127.0.0.1:52982 - "GET /health HTTP/1.1" 200 OK
+INFO:     172.20.0.4:49764 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-27 08:52:08 | INFO     | peft-platform | Job c7b86d01-4e0d-440e-8aed-1bf1f1134a6f cancelled
+2026-05-27 08:52:08 | INFO     | peft-platform | Job cancelled: c7b86d01-4e0d-440e-8aed-1bf1f1134a6f
+INFO:     172.20.0.4:49772 - "POST /api/v1/training/jobs/c7b86d01-4e0d-440e-8aed-1bf1f1134a6f/cancel HTTP/1.0" 200 OK
+INFO:     172.20.0.4:49780 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+2026-05-27 08:52:08 | INFO     | peft-platform | 客户端已从训练 WebSocket 断开 (job c7b86d01-4e0d-440e-8aed-1bf1f1134a6f)
+INFO:     connection closed
+INFO:     172.20.0.4:49796 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50594 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50604 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50612 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50614 - "GET /api/v1/api-keys/ HTTP/1.0" 200 OK
+INFO:     172.20.0.4:50622 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK