Răsfoiți Sursa

修复dpo报错

lxylxy123321 23 ore în urmă
părinte
comite
851ce99eb9
3 a modificat fișierele cu 159 adăugiri și 132 ștergeri
  1. 44 0
      backend/app/engines/text_engine.py
  2. 14 5
      backend/app/preprocessors/__init__.py
  3. 101 127
      result.txt

+ 44 - 0
backend/app/engines/text_engine.py

@@ -187,14 +187,33 @@ class TextEngine(BaseEngine):
         from peft import get_peft_model
         from transformers import DataCollatorForSeq2Seq, TrainingArguments
 
+        # 防御 JSON 反序列化时 null → None:dict.get 的 default 只在 key 不存在时生效,
+        # 如果 key 存在但值为 None(来自前端传 null 或 JSON 中写了 null),仍返回 None。
+        # 用 `if v is None` 显式兜底,确保后续算术运算不会 TypeError。
         task_type = training_args.get("task_type", "sft")
+        if task_type is None:
+            task_type = "sft"
         epochs = training_args.get("epochs", 3)
+        if epochs is None:
+            epochs = 3
         batch_size = training_args.get("batch_size", 4)
+        if batch_size is None:
+            batch_size = 4
         gradient_accumulation = training_args.get("gradient_accumulation", 4)
+        if gradient_accumulation is None:
+            gradient_accumulation = 4
         learning_rate = training_args.get("learning_rate", 2e-4)
+        if learning_rate is None:
+            learning_rate = 2e-4
         max_seq_length = training_args.get("max_seq_length", 2048)
+        if max_seq_length is None:
+            max_seq_length = 2048
         warmup_ratio = training_args.get("warmup_ratio", 0.05)
+        if warmup_ratio is None:
+            warmup_ratio = 0.05
         save_strategy = training_args.get("save_strategy", "epoch")
+        if save_strategy is None:
+            save_strategy = "epoch"
         deepspeed_config = training_args.get("deepspeed", None)
 
         # DDP 支持
@@ -302,6 +321,31 @@ class TextEngine(BaseEngine):
                 DPOTrainer.get_batch_samples = _patched_gbs
                 DPOTrainer._patched_gbs = True
 
+            # 修复 Qwen tokenizer bug:tokenize 后 input_ids 末尾可能追加 None
+            # 导致 DPODataCollatorWithPadding 中 torch.tensor([...None...], dtype=int64) 报错
+            # 参考: https://github.com/huggingface/trl/issues/1073
+            if not getattr(self._tokenizer, "_patched_none_filter", False):
+                _orig_tok_call = self._tokenizer.__class__.__call__
+                def _call_filter_none(self_tok, *args, **kwargs):
+                    result = _orig_tok_call(self_tok, *args, **kwargs)
+                    if isinstance(result, dict) and "input_ids" in result:
+                        ids = result["input_ids"]
+                        if isinstance(ids, list) and ids:
+                            if isinstance(ids[0], list):
+                                # batched 输入:input_ids 是二维 list
+                                result["input_ids"] = [
+                                    [x for x in seq if x is not None] for seq in ids
+                                ]
+                            else:
+                                # 单条输入:input_ids 是一维 list,过滤 None
+                                result["input_ids"] = [x for x in ids if x is not None]
+                    return result
+                # 绑定到实例(通过 type 避免 MRO 问题)
+                import types
+                self._tokenizer.__call__ = types.MethodType(_call_filter_none, self._tokenizer)
+                self._tokenizer._patched_none_filter = True
+                logger.info("Patched tokenizer to filter None values from input_ids (Qwen workaround)")
+
             # 显式创建 reference model 并冻结,避免 AdaLora 多 adapter 冲突
             ref_model = deepcopy(self._model)
             ref_model.eval()

+ 14 - 5
backend/app/preprocessors/__init__.py

@@ -112,29 +112,38 @@ def apply_dpo_template(item: dict) -> dict:
     prompt = item.get("prompt", item.get("instruction", item.get("input", item.get("question", item.get("query", "")))))
     chosen = item.get("chosen", item.get("positive", item.get("answer", "")))
     rejected = item.get("rejected", item.get("negative", ""))
-    # 确保所有值为字符串
-    prompt = str(prompt) if prompt is not None else ""
-    chosen = str(chosen) if chosen is not None else ""
-    rejected = str(rejected) if rejected is not None else ""
-    # 防御列表型字段(messages/sharegpt 残留):拼接为单字符串
+    # 先处理列表型字段(如 ShareGPT messages 列表),再转字符串
+    # 必须在 str() 之前判断,否则 list 会被 str() 转成 Python repr 字符串
     if isinstance(prompt, list):
         prompt = "\n".join(
             str(x.get("content", x)) if isinstance(x, dict) else str(x)
             for x in prompt
             if x is not None
         )
+    elif prompt is not None:
+        prompt = str(prompt)
+    else:
+        prompt = ""
     if isinstance(chosen, list):
         chosen = "\n".join(
             str(x.get("content", x)) if isinstance(x, dict) else str(x)
             for x in chosen
             if x is not None
         )
+    elif chosen is not None:
+        chosen = str(chosen)
+    else:
+        chosen = ""
     if isinstance(rejected, list):
         rejected = "\n".join(
             str(x.get("content", x)) if isinstance(x, dict) else str(x)
             for x in rejected
             if x is not None
         )
+    elif rejected is not None:
+        rejected = str(rejected)
+    else:
+        rejected = ""
     return {"prompt": prompt, "chosen": chosen, "rejected": rejected}
 
 

+ 101 - 127
result.txt

@@ -1,127 +1,101 @@
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Map:   0%|          | 0/5 [00:00<?, ? examples/s]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Map: 100%|██████████| 5/5 [00:00<00:00, 174.12 examples/s]
-2026-05-27 07:35:25 | WARNING  | peft-platform | [253:489b64b9] /opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] warnings.warn(msg)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Traceback (most recent call last):
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] lib = get_native_library()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
-2026-05-27 07:35:25 | WARNING  | peft-platform | [253:489b64b9] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
-2026-05-27 07:35:25 | WARNING  | peft-platform | [253:489b64b9] [transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
-2026-05-27 07:35:25 | WARNING  | peft-platform | [253:489b64b9] /opt/conda/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py:394: UserWarning: `max_length` is not set in the DPOConfig's init it will default to `512` by default, but you should do it yourself in the future.
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] warnings.warn(
-2026-05-27 07:35:25 | WARNING  | peft-platform | [253:489b64b9] /opt/conda/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py:407: UserWarning: `max_prompt_length` is not set in the DPOConfig's init it will default to `128` by default, but you should do it yourself in the future.
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] warnings.warn(
-2026-05-27 07:35:25 | WARNING  | peft-platform | [253:489b64b9] /opt/conda/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py:442: UserWarning: When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments we have set it for you, but you should do it yourself in the future.
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] warnings.warn(
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Map:   0%|          | 0/5 [00:00<?, ? examples/s]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Map: 100%|██████████| 5/5 [00:00<00:00, 173.18 examples/s]
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] 0%|          | 0/1 [00:00<?, ?it/s]Training failed for job 489b64b9-f349-4263-82e1-b6a73eaeff80: Caught TypeError in DataLoader worker process 0.
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Original Traceback (most recent call last):
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return self.collate_fn(data)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] TypeError: 'NoneType' object cannot be interpreted as an integer
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] [remote_train] [rank 0] ERROR: Caught TypeError in DataLoader worker process 0.
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Original Traceback (most recent call last):
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return self.collate_fn(data)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] TypeError: 'NoneType' object cannot be interpreted as an integer
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] [remote_train] Traceback (most recent call last):
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] adapter_path = await engine.train(
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 469, in train
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] trainer.train()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return inner_training_loop(
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] self._run_epoch(
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1704, in _run_epoch
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 296, in _patched_gbs
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] batch = next(epoch_iterator)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/accelerate/data_loader.py", line 577, in __iter__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] current_batch = next(dataloader_iter)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data = self._next_data()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1516, in _next_data
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return self._process_data(data, worker_id)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1551, in _process_data
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data.reraise()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/_utils.py", line 769, in reraise
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] raise exception
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] TypeError: Caught TypeError in DataLoader worker process 0.
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Original Traceback (most recent call last):
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return self.collate_fn(data)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] TypeError: 'NoneType' object cannot be interpreted as an integer
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] [remote_train] === Training job failed: 489b64b9-f349-4263-82e1-b6a73eaeff80 ===
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Traceback (most recent call last):
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return _run_code(code, main_globals, None,
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] exec(code, run_globals)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 466, in <module>
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] main()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 461, in main
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config,
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return loop.run_until_complete(main)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return future.result()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] adapter_path = await engine.train(
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 469, in train
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] trainer.train()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return inner_training_loop(
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] self._run_epoch(
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1704, in _run_epoch
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 296, in _patched_gbs
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] batch = next(epoch_iterator)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/accelerate/data_loader.py", line 577, in __iter__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] current_batch = next(dataloader_iter)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data = self._next_data()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1516, in _next_data
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return self._process_data(data, worker_id)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1551, in _process_data
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data.reraise()
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/_utils.py", line 769, in reraise
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] raise exception
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] TypeError: Caught TypeError in DataLoader worker process 0.
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] Original Traceback (most recent call last):
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] return self.collate_fn(data)
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
-2026-05-27 07:35:25 | ERROR    | peft-platform | [253:489b64b9] TypeError: 'NoneType' object cannot be interpreted as an integer
-2026-05-27 07:35:25 | INFO     | peft-platform | [253:489b64b9] 0%|          | 0/1 [00:14<?, ?it/s]
-INFO:     127.0.0.1:56702 - "GET /health HTTP/1.1" 200 OK
+(base) [root@localhost ~]# docker exec finetune-trainer tail -100 /tmp/train_297b8bc2-e382-4b53-853b-dbff4578601e.log
+[remote_train]   task_type=dpo, template=auto
+[remote_train]   Engine loaded: TextEngine
+[remote_train]   Running preprocess_dataset...
+[remote_train]   Preprocessing done, output: /root/Fine-tuning/backend/data/processed/297b8bc2-e382-4b53-853b-dbff4578601e_processed.jsonl
+[remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
+Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
+torch.compile is not available in Python 3.10, using identity decorator instead
+/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+Loading weights: 100%|██████████| 320/320 [00:06<00:00, 47.18it/s]
+[remote_train]   Model loaded successfully
+[remote_train] Step 3: Building PEFT config...
+[remote_train] Step 4: Starting training...
+[remote_train] NOTE: First step may take 2-5 minutes due to Triton kernel compilation (autotuning). This is normal.
+[remote_train] Total steps: 3 epochs, batch_size per GPU=16
+/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
+  warnings.warn(msg)
+bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+Traceback (most recent call last):
+  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
+    lib = get_native_library()
+  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
+    raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
+RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695
+Map: 100%|██████████| 5/5 [00:00<00:00, 160.55 examples/s]
+  0%|          | 0/1 [00:00<?, ?it/s]Training failed for job 297b8bc2-e382-4b53-853b-dbff4578601e: 'NoneType' object cannot be interpreted as an integer
+[remote_train] [rank 0] ERROR: 'NoneType' object cannot be interpreted as an integer
+[remote_train] Traceback (most recent call last):
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
+    adapter_path = await engine.train(
+  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 472, in train
+    trainer.train()
+  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
+    return inner_training_loop(
+  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
+    self._run_epoch(
+  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1704, in _run_epoch
+    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
+  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 296, in _patched_gbs
+    batch = next(epoch_iterator)
+  File "/opt/conda/lib/python3.10/site-packages/accelerate/data_loader.py", line 577, in __iter__
+    current_batch = next(dataloader_iter)
+  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
+    data = self._next_data()
+  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
+    return self.collate_fn(data)
+  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
+    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
+  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
+    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
+TypeError: 'NoneType' object cannot be interpreted as an integer
+
+[remote_train] === Training job failed: 297b8bc2-e382-4b53-853b-dbff4578601e ===
+Traceback (most recent call last):
+  File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 466, in <module>
+    main()
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 461, in main
+    asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config,
+  File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
+    return loop.run_until_complete(main)
+  File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
+    return future.result()
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
+    adapter_path = await engine.train(
+  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 472, in train
+    trainer.train()
+  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1427, in train
+    return inner_training_loop(
+  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1509, in _inner_training_loop
+    self._run_epoch(
+  File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 1704, in _run_epoch
+    batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, self.args.device)
+  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 296, in _patched_gbs
+    batch = next(epoch_iterator)
+  File "/opt/conda/lib/python3.10/site-packages/accelerate/data_loader.py", line 577, in __iter__
+    current_batch = next(dataloader_iter)
+  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
+    data = self._next_data()
+  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
+    return self.collate_fn(data)
+  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in __call__
+    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
+  File "/opt/conda/lib/python3.10/site-packages/trl/trainer/utils.py", line 460, in <listcomp>
+    to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features]
+TypeError: 'NoneType' object cannot be interpreted as an integer
+  0%|          | 0/1 [00:12<?, ?it/s]