Explorar o código

修复ppo报错

lxylxy123321 hai 1 día
pai
achega
a03105a4fd
Modificáronse 2 ficheiros con 100 adicións e 13 borrados
  1. 36 6
      backend/app/engines/text_engine.py
  2. 64 7
      result.txt

+ 36 - 6
backend/app/engines/text_engine.py

@@ -327,23 +327,53 @@ class TextEngine(BaseEngine):
             for param in ref_model.parameters():
                 param.requires_grad = False
 
-            ppo_config = PPOConfig(
+            # 兼容不同版本的 TRL PPOConfig 参数名变化
+            # TRL 0.12+ 中 ppo_epochs -> num_ppo_epochs, kl_ctl -> init_kl_coef, vf_coef 被移除
+            import inspect
+            ppo_config_sig = inspect.signature(PPOConfig.__init__)
+            ppo_config_params = set(ppo_config_sig.parameters.keys())
+
+            ppo_config_kwargs = dict(
                 learning_rate=learning_rate,
                 batch_size=batch_size,
                 gradient_accumulation_steps=gradient_accumulation,
-                ppo_epochs=ppo_epochs,
-                vf_coef=vf_coef,
-                kl_ctl=kl_coef,
-                response_length=response_length,
                 output_dir=output_dir,
                 logging_steps=10,
                 save_strategy=save_strategy,
-                fp16=True,
                 report_to="none",
                 dataloader_num_workers=4,
                 dataloader_pin_memory=False,
             )
 
+            # ppo_epochs: 新版叫 num_ppo_epochs,旧版叫 ppo_epochs
+            if "num_ppo_epochs" in ppo_config_params:
+                ppo_config_kwargs["num_ppo_epochs"] = ppo_epochs
+            elif "ppo_epochs" in ppo_config_params:
+                ppo_config_kwargs["ppo_epochs"] = ppo_epochs
+
+            # kl_ctl: 新版叫 init_kl_coef,旧版叫 kl_ctl
+            if "init_kl_coef" in ppo_config_params:
+                ppo_config_kwargs["init_kl_coef"] = kl_coef
+            elif "kl_ctl" in ppo_config_params:
+                ppo_config_kwargs["kl_ctl"] = kl_coef
+
+            # vf_coef: 新版可能已移除,仅在支持时传入
+            if "vf_coef" in ppo_config_params:
+                ppo_config_kwargs["vf_coef"] = vf_coef
+
+            # response_length: 部分版本可能不支持
+            if "response_length" in ppo_config_params:
+                ppo_config_kwargs["response_length"] = response_length
+
+            # fp16/bf16: 新版可能使用不同的混合精度参数名
+            if "fp16" in ppo_config_params:
+                ppo_config_kwargs["fp16"] = True
+
+            logger.info(f"PPOConfig 可用参数: {sorted(ppo_config_params)}")
+            logger.info(f"PPOConfig 实际传入参数: {ppo_config_kwargs}")
+
+            ppo_config = PPOConfig(**ppo_config_kwargs)
+
             trainer = PPOTrainer(
                 config=ppo_config,
                 model=self._model,

+ 64 - 7
result.txt

@@ -1,7 +1,64 @@
-NFO:     172.20.0.4:53454 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-2026-05-26 08:47:52 | ERROR    | peft-platform | Remote job ef093221-dfbf-4e0e-af51-eb08d29803ec failed: AdaLoraModel supports only 1 trainable adapter. When using multiple adapters, set inference_mode to True for all adapters except the one you want to train.
-INFO:     172.20.0.4:56038 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     172.20.0.4:56044 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-2026-05-26 08:48:02 | ERROR    | peft-platform | SSH command timeout after 10s: docker exec finetune-trainer bash -c 'kill -9 12111 2>/dev/null; pkill -9 -P 12111 2>/dev/null'
-2026-05-26 08:48:02 | INFO     | peft-platform | Killed remote process 12111 via docker exec
-2026-05-26 08:48:02 | INFO     | peft-platform | Remote training launched for job ef093221-dfbf-4e0e-af51-eb08d29803ec
+(base) [root@localhost ~]# docker exec finetune-trainer cat /tmp/train_c95513aa-73e6-40fb-8e2d-1700b5143e44.log
+[remote_train] fla package found at: /opt/conda/lib/python3.10/site-packages/fla
+[remote_train] fla shared memory patch v2 already applied, skipping
+[remote_train] [rank 0] === Training job started: c95513aa-73e6-40fb-8e2d-1700b5143e44 ===
+[remote_train] model_id=Qwen/Qwen3.5-0.8B, model_type=text
+[remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/ppo_sample.jsonl
+[remote_train] config={"model_id": "Qwen/Qwen3.5-0.8B", "model_type": "text", "dataset_id": "26767f82-673c-4199-8c59-e9ed715f0ae0", "peft_method": "lora", "epochs": 3, "batch_size": 16, "gradient_accumulation": 4, "learnin
+[remote_train] Step 1: Preprocessing dataset...
+[remote_train]   task_type=ppo, template=auto
+[remote_train]   Engine loaded: TextEngine
+[remote_train]   Running preprocess_dataset...
+[remote_train]   Preprocessing done, output: /root/Fine-tuning/backend/data/processed/c95513aa-73e6-40fb-8e2d-1700b5143e44_processed.jsonl
+[remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
+Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
+Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
+torch.compile is not available in Python 3.10, using identity decorator instead
+/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
+  warnings.warn(_BETA_TRANSFORMS_WARNING)
+Loading weights: 100%|██████████| 320/320 [00:06<00:00, 50.21it/s]
+[remote_train]   Model loaded successfully
+[remote_train] Step 3: Building PEFT config...
+[remote_train] Step 4: Starting training...
+[remote_train] NOTE: First step may take 2-5 minutes due to Triton kernel compilation (autotuning). This is normal.
+[remote_train] Total steps: 3 epochs, batch_size per GPU=16
+/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:1348: UserWarning: Model has `tie_word_embeddings=True` and a tied layer is part of the adapter, but `ensure_weight_tying` is not set to True. This can lead to complications, for example when merging the adapter or converting your model to formats other than safetensors. Check the discussion here: https://github.com/huggingface/peft/issues/2777
+  warnings.warn(msg)
+bitsandbytes library load error: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+Traceback (most recent call last):
+  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 320, in <module>
+    lib = get_native_library()
+  File "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cextension.py", line 288, in get_native_library
+    raise RuntimeError(f"Configured {BNB_BACKEND} binary not found at {cuda_binary_path}")
+RuntimeError: Configured CUDA binary not found at /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
+[transformers] warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
+[remote_train] [rank 0] ERROR: cannot import name 'PPOConfig' from 'trl' (/opt/conda/lib/python3.10/site-packages/trl/__init__.py)
+[remote_train] Traceback (most recent call last):
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
+    adapter_path = await engine.train(
+  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 308, in train
+    from trl import PPOConfig, PPOTrainer
+ImportError: cannot import name 'PPOConfig' from 'trl' (/opt/conda/lib/python3.10/site-packages/trl/__init__.py)
+
+[remote_train] === Training job failed: c95513aa-73e6-40fb-8e2d-1700b5143e44 ===
+Traceback (most recent call last):
+  File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 466, in <module>
+    main()
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 461, in main
+    asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config,
+  File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
+    return loop.run_until_complete(main)
+  File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
+    return future.result()
+  File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 236, in run_training
+    adapter_path = await engine.train(
+  File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 308, in train
+    from trl import PPOConfig, PPOTrainer
+ImportError: cannot import name 'PPOConfig' from 'trl' (/opt/conda/lib/python3.10/site-packages/trl/__init__.py)
+trainable params: 5,070,848 || all params: 757,463,872 || trainable%: 0.6695