lxylxy123321 17 ساعت پیش
والد
کامیت
308aa19f0a
1فایلهای تغییر یافته به همراه30 افزوده شده و 0 حذف شده
  1. 30 0
      backend/app/engines/text_engine.py

+ 30 - 0
backend/app/engines/text_engine.py

@@ -394,6 +394,36 @@ class TextEngine(BaseEngine):
                 train_dataset=dataset,
                 tokenizer=self._tokenizer,
             )
+
+            # 修复 Qwen tokenizer bug(TRL #1073):
+            # tokenize 后 input_ids 末尾可能含 None,导致 collator 中
+            # torch.tensor([..., None], dtype=int64) 崩溃。
+            # 直接 monkey-patch DPODataCollatorWithPadding.__call__(即崩溃点),
+            # 在原始逻辑执行前清洗 features 中的 None 值。
+            try:
+                from trl.trainer.utils import DPODataCollatorWithPadding as _DC
+                if not getattr(_DC, "_patched_none_filter", False):
+                    _orig_dc_call = _DC.__call__
+
+                    def _dc_call_clean(self_dc, features):
+                        for ex in features:
+                            for k in list(ex.keys()):
+                                v = ex[k]
+                                if isinstance(v, list):
+                                    if v and isinstance(v[0], list):
+                                        ex[k] = [[x for x in seq if x is not None] for seq in v]
+                                    else:
+                                        ex[k] = [x for x in v if x is not None]
+                                elif v is None:
+                                    ex[k] = []
+                        return _orig_dc_call(self_dc, features)
+
+                    _DC.__call__ = _dc_call_clean
+                    _DC._patched_none_filter = True
+                    logger.info("Patched DPODataCollatorWithPadding to filter None from token ids")
+            except (ImportError, AttributeError) as _e:
+                logger.warning(f"Could not patch DPODataCollatorWithPadding: {_e}")
+
         elif task_type == "ppo":
             import torch
             from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer