|
|
@@ -116,6 +116,25 @@ def apply_dpo_template(item: dict) -> dict:
|
|
|
prompt = str(prompt) if prompt is not None else ""
|
|
|
chosen = str(chosen) if chosen is not None else ""
|
|
|
rejected = str(rejected) if rejected is not None else ""
|
|
|
+ # 防御列表型字段(messages/sharegpt 残留):拼接为单字符串
|
|
|
+ if isinstance(prompt, list):
|
|
|
+ prompt = "\n".join(
|
|
|
+ str(x.get("content", x)) if isinstance(x, dict) else str(x)
|
|
|
+ for x in prompt
|
|
|
+ if x is not None
|
|
|
+ )
|
|
|
+ if isinstance(chosen, list):
|
|
|
+ chosen = "\n".join(
|
|
|
+ str(x.get("content", x)) if isinstance(x, dict) else str(x)
|
|
|
+ for x in chosen
|
|
|
+ if x is not None
|
|
|
+ )
|
|
|
+ if isinstance(rejected, list):
|
|
|
+ rejected = "\n".join(
|
|
|
+ str(x.get("content", x)) if isinstance(x, dict) else str(x)
|
|
|
+ for x in rejected
|
|
|
+ if x is not None
|
|
|
+ )
|
|
|
return {"prompt": prompt, "chosen": chosen, "rejected": rejected}
|
|
|
|
|
|
|
|
|
@@ -201,6 +220,12 @@ def preprocess_file(
|
|
|
for item in raw_data:
|
|
|
try:
|
|
|
result = apply_fn(item)
|
|
|
+ # DPO/偏好类任务需要同时保留 prompt/chosen/rejected,
|
|
|
+ # 仅按 prompt 过滤会把合法偏好样本误删,最终导致空 batch 进入 collator 报错。
|
|
|
+ if "chosen" in result or "rejected" in result:
|
|
|
+ if result.get("prompt") and (result.get("chosen") or result.get("rejected")):
|
|
|
+ processed.append(result)
|
|
|
+ continue
|
|
|
if result.get("prompt"):
|
|
|
processed.append(result)
|
|
|
except Exception:
|