|
@@ -394,6 +394,36 @@ class TextEngine(BaseEngine):
|
|
|
train_dataset=dataset,
|
|
train_dataset=dataset,
|
|
|
tokenizer=self._tokenizer,
|
|
tokenizer=self._tokenizer,
|
|
|
)
|
|
)
|
|
|
|
|
+
|
|
|
|
|
+ # 修复 Qwen tokenizer bug(TRL #1073):
|
|
|
|
|
+ # tokenize 后 input_ids 末尾可能含 None,导致 collator 中
|
|
|
|
|
+ # torch.tensor([..., None], dtype=int64) 崩溃。
|
|
|
|
|
+ # 直接 monkey-patch DPODataCollatorWithPadding.__call__(即崩溃点),
|
|
|
|
|
+ # 在原始逻辑执行前清洗 features 中的 None 值。
|
|
|
|
|
+ try:
|
|
|
|
|
+ from trl.trainer.utils import DPODataCollatorWithPadding as _DC
|
|
|
|
|
+ if not getattr(_DC, "_patched_none_filter", False):
|
|
|
|
|
+ _orig_dc_call = _DC.__call__
|
|
|
|
|
+
|
|
|
|
|
+ def _dc_call_clean(self_dc, features):
|
|
|
|
|
+ for ex in features:
|
|
|
|
|
+ for k in list(ex.keys()):
|
|
|
|
|
+ v = ex[k]
|
|
|
|
|
+ if isinstance(v, list):
|
|
|
|
|
+ if v and isinstance(v[0], list):
|
|
|
|
|
+ ex[k] = [[x for x in seq if x is not None] for seq in v]
|
|
|
|
|
+ else:
|
|
|
|
|
+ ex[k] = [x for x in v if x is not None]
|
|
|
|
|
+ elif v is None:
|
|
|
|
|
+ ex[k] = []
|
|
|
|
|
+ return _orig_dc_call(self_dc, features)
|
|
|
|
|
+
|
|
|
|
|
+ _DC.__call__ = _dc_call_clean
|
|
|
|
|
+ _DC._patched_none_filter = True
|
|
|
|
|
+ logger.info("Patched DPODataCollatorWithPadding to filter None from token ids")
|
|
|
|
|
+ except (ImportError, AttributeError) as _e:
|
|
|
|
|
+ logger.warning(f"Could not patch DPODataCollatorWithPadding: {_e}")
|
|
|
|
|
+
|
|
|
elif task_type == "ppo":
|
|
elif task_type == "ppo":
|
|
|
import torch
|
|
import torch
|
|
|
from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer
|
|
from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer
|