result.txt 4.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. (base) [root@localhost ~]# docker exec finetune-trainer find /root/Fine-tuning/backend -name '*.pyc' -delete && docker exec finetune-trainer find /root/Fine-tuning/backend -name '__pycache__' -type d -delete
  2. (base) [root@localhost ~]#
  3. (base) [root@localhost ~]# docker exec finetune-trainer tail -200 /tmp/train_1e334a57-26f5-4e7e-a961-0a02330fa708.log
  4. [remote_train] === Training job started: 1e334a57-26f5-4e7e-a961-0a02330fa708 ===
  5. [remote_train] model_id=Qwen/Qwen1.5-0.5B, model_type=text
  6. [remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/data.jsonl
  7. [remote_train] config={"model_id": "Qwen/Qwen1.5-0.5B", "model_type": "text", "dataset_id": "3d5f8808-e71a-449d-94e9-c61c4881b2cf", "peft_method": "adalora", "epochs": 3, "batch_size": 16, "gradient_accumulation": 4, "lear
  8. [remote_train] Dataset file exists: /root/Fine-tuning/backend/data/datasets/data.jsonl
  9. [remote_train] Step 1: Preprocessing dataset...
  10. [remote_train] task_type=sft, template=auto
  11. [remote_train] output_path=/root/Fine-tuning/backend/data/processed/1e334a57-26f5-4e7e-a961-0a02330fa708_processed.jsonl
  12. [remote_train] Selecting engine for model_type=text...
  13. [remote_train] Engine loaded: TextEngine
  14. [remote_train] PEFT method: adalora
  15. [remote_train] Running preprocess_dataset...
  16. [remote_train] Preprocessing done, output: /root/Fine-tuning/backend/data/processed/1e334a57-26f5-4e7e-a961-0a02330fa708_processed.jsonl
  17. [remote_train] Step 2: Loading model: Qwen/Qwen1.5-0.5B...
  18. [remote_train] Quantization: None
  19. Loading weights: 100%|██████████| 291/291 [00:04<00:00, 59.39it/s]
  20. [remote_train] Model loaded successfully
  21. [remote_train] Step 3: Building PEFT config...
  22. [remote_train] ERROR: AdaLoRA does not work when `total_step` is None, supply a value > 0.
  23. [remote_train] Traceback (most recent call last):
  24. File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 162, in run_training
  25. peft_config = engine.get_peft_config(peft_method, config)
  26. File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 149, in get_peft_config
  27. return builder(params)
  28. File "/root/Fine-tuning/backend/app/peft/__init__.py", line 43, in build_adalora_config
  29. return AdaLoraConfig(
  30. File "<string>", line 51, in __init__
  31. File "/opt/conda/lib/python3.10/site-packages/peft/tuners/adalora/config.py", line 102, in __post_init__
  32. raise ValueError("AdaLoRA does not work when `total_step` is None, supply a value > 0.")
  33. ValueError: AdaLoRA does not work when `total_step` is None, supply a value > 0.
  34. [remote_train] === Training job failed: 1e334a57-26f5-4e7e-a961-0a02330fa708 ===
  35. Traceback (most recent call last):
  36. File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
  37. return _run_code(code, main_globals, None,
  38. File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
  39. exec(code, run_globals)
  40. File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 213, in <module>
  41. main()
  42. File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 209, in main
  43. asyncio.run(run_training(job_id, model_id, model_type, dataset_id, config))
  44. File "/opt/conda/lib/python3.10/asyncio/runners.py", line 44, in run
  45. return loop.run_until_complete(main)
  46. File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
  47. return future.result()
  48. File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 162, in run_training
  49. peft_config = engine.get_peft_config(peft_method, config)
  50. File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 149, in get_peft_config
  51. return builder(params)
  52. File "/root/Fine-tuning/backend/app/peft/__init__.py", line 43, in build_adalora_config
  53. return AdaLoraConfig(
  54. File "<string>", line 51, in __init__
  55. File "/opt/conda/lib/python3.10/site-packages/peft/tuners/adalora/config.py", line 102, in __post_init__
  56. raise ValueError("AdaLoRA does not work when `total_step` is None, supply a value > 0.")
  57. ValueError: AdaLoRA does not work when `total_step` is None, supply a value > 0.
  58. (base) [root@localhost ~]#
  59. (base) [root@localhost ~]# grep -n 'total_step\|init_r.*target_r' /root/Fine-tuning/backend/app/engines/text_engine.py
  60. 190: # 计算总步数(AdaLoRA 需要在 get_peft_model 之前设置 total_step)
  61. 194: # AdaLoRA 要求 total_step > 0(通过属性名判断而非 isinstance,避免导入路径问题)
  62. 195: if hasattr(peft_config, "init_r") and hasattr(peft_config, "target_r"):
  63. 196: peft_config.total_step = max_steps
  64. 396: total_steps=state.max_steps or 0,