|
@@ -1,38 +1,55 @@
|
|
|
-(base) [root@localhost ~]# docker exec finetune-trainer find /root/Fine-tuning/backend -name '*.pyc' -delete && docker exec finetune-trainer find /root/Fine-tuning/backend -name '__pycache__' -type d -delete
|
|
|
|
|
-(base) [root@localhost ~]#
|
|
|
|
|
-(base) [root@localhost ~]# docker exec finetune-trainer tail -200 /tmp/train_1e334a57-26f5-4e7e-a961-0a02330fa708.log
|
|
|
|
|
-[remote_train] === Training job started: 1e334a57-26f5-4e7e-a961-0a02330fa708 ===
|
|
|
|
|
-[remote_train] model_id=Qwen/Qwen1.5-0.5B, model_type=text
|
|
|
|
|
|
|
+(base) [root@localhost ~]# docker exec finetune-trainer tail -200 /tmp/train_33166c59-034d-4afd-92ba-ff6bece676dc.log
|
|
|
|
|
+[remote_train] === Training job started: 33166c59-034d-4afd-92ba-ff6bece676dc ===
|
|
|
|
|
+[remote_train] model_id=Qwen/Qwen3.5-0.8B, model_type=text
|
|
|
[remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/data.jsonl
|
|
[remote_train] dataset_path=/root/Fine-tuning/backend/data/datasets/data.jsonl
|
|
|
-[remote_train] config={"model_id": "Qwen/Qwen1.5-0.5B", "model_type": "text", "dataset_id": "3d5f8808-e71a-449d-94e9-c61c4881b2cf", "peft_method": "adalora", "epochs": 3, "batch_size": 16, "gradient_accumulation": 4, "lear
|
|
|
|
|
|
|
+[remote_train] config={"model_id": "Qwen/Qwen3.5-0.8B", "model_type": "text", "dataset_id": "3d5f8808-e71a-449d-94e9-c61c4881b2cf", "peft_method": "adalora", "epochs": 3, "batch_size": 16, "gradient_accumulation": 4, "lear
|
|
|
[remote_train] Dataset file exists: /root/Fine-tuning/backend/data/datasets/data.jsonl
|
|
[remote_train] Dataset file exists: /root/Fine-tuning/backend/data/datasets/data.jsonl
|
|
|
[remote_train] Step 1: Preprocessing dataset...
|
|
[remote_train] Step 1: Preprocessing dataset...
|
|
|
[remote_train] task_type=sft, template=auto
|
|
[remote_train] task_type=sft, template=auto
|
|
|
-[remote_train] output_path=/root/Fine-tuning/backend/data/processed/1e334a57-26f5-4e7e-a961-0a02330fa708_processed.jsonl
|
|
|
|
|
|
|
+[remote_train] output_path=/root/Fine-tuning/backend/data/processed/33166c59-034d-4afd-92ba-ff6bece676dc_processed.jsonl
|
|
|
[remote_train] Selecting engine for model_type=text...
|
|
[remote_train] Selecting engine for model_type=text...
|
|
|
[remote_train] Engine loaded: TextEngine
|
|
[remote_train] Engine loaded: TextEngine
|
|
|
[remote_train] PEFT method: adalora
|
|
[remote_train] PEFT method: adalora
|
|
|
[remote_train] Running preprocess_dataset...
|
|
[remote_train] Running preprocess_dataset...
|
|
|
-[remote_train] Preprocessing done, output: /root/Fine-tuning/backend/data/processed/1e334a57-26f5-4e7e-a961-0a02330fa708_processed.jsonl
|
|
|
|
|
-[remote_train] Step 2: Loading model: Qwen/Qwen1.5-0.5B...
|
|
|
|
|
|
|
+[remote_train] Preprocessing done, output: /root/Fine-tuning/backend/data/processed/33166c59-034d-4afd-92ba-ff6bece676dc_processed.jsonl
|
|
|
|
|
+[remote_train] Step 2: Loading model: Qwen/Qwen3.5-0.8B...
|
|
|
[remote_train] Quantization: None
|
|
[remote_train] Quantization: None
|
|
|
-Loading weights: 100%|██████████| 291/291 [00:04<00:00, 59.39it/s]
|
|
|
|
|
|
|
+Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
|
|
|
|
|
+Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
|
|
|
|
|
+torch.compile is not available in Python 3.10, using identity decorator instead
|
|
|
|
|
+/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
|
|
+ warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
|
|
+/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
|
|
|
|
|
+ warnings.warn(_BETA_TRANSFORMS_WARNING)
|
|
|
|
|
+Loading weights: 100%|██████████| 320/320 [00:06<00:00, 49.85it/s]
|
|
|
[remote_train] Model loaded successfully
|
|
[remote_train] Model loaded successfully
|
|
|
[remote_train] Step 3: Building PEFT config...
|
|
[remote_train] Step 3: Building PEFT config...
|
|
|
-[remote_train] ERROR: AdaLoRA does not work when `total_step` is None, supply a value > 0.
|
|
|
|
|
|
|
+[remote_train] PEFT config built
|
|
|
|
|
+[remote_train] Step 4: Starting training...
|
|
|
|
|
+Map: 100%|██████████| 60/60 [00:00<00:00, 2165.49 examples/s]
|
|
|
|
|
+[remote_train] ERROR: Please specify `target_modules` or `target_parameters`in `peft_config`
|
|
|
[remote_train] Traceback (most recent call last):
|
|
[remote_train] Traceback (most recent call last):
|
|
|
- File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 162, in run_training
|
|
|
|
|
- peft_config = engine.get_peft_config(peft_method, config)
|
|
|
|
|
- File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 149, in get_peft_config
|
|
|
|
|
- return builder(params)
|
|
|
|
|
- File "/root/Fine-tuning/backend/app/peft/__init__.py", line 43, in build_adalora_config
|
|
|
|
|
- return AdaLoraConfig(
|
|
|
|
|
- File "<string>", line 51, in __init__
|
|
|
|
|
- File "/opt/conda/lib/python3.10/site-packages/peft/tuners/adalora/config.py", line 102, in __post_init__
|
|
|
|
|
- raise ValueError("AdaLoRA does not work when `total_step` is None, supply a value > 0.")
|
|
|
|
|
-ValueError: AdaLoRA does not work when `total_step` is None, supply a value > 0.
|
|
|
|
|
|
|
+ File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 172, in run_training
|
|
|
|
|
+ adapter_path = await engine.train(
|
|
|
|
|
+ File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 198, in train
|
|
|
|
|
+ self._model = get_peft_model(self._model, peft_config)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/mapping_func.py", line 122, in get_peft_model
|
|
|
|
|
+ return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/peft_model.py", line 1955, in __init__
|
|
|
|
|
+ super().__init__(model, peft_config, adapter_name, **kwargs)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/peft_model.py", line 129, in __init__
|
|
|
|
|
+ self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/adalora/model.py", line 69, in __init__
|
|
|
|
|
+ super().__init__(model, config, adapter_name, **kwargs)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 315, in __init__
|
|
|
|
|
+ self.inject_adapter(self.model, adapter_name, low_cpu_mem_usage=low_cpu_mem_usage, state_dict=state_dict)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 815, in inject_adapter
|
|
|
|
|
+ peft_config = self._prepare_adapter_config(peft_config, model_config)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/lora/model.py", line 570, in _prepare_adapter_config
|
|
|
|
|
+ raise ValueError("Please specify `target_modules` or `target_parameters`in `peft_config`")
|
|
|
|
|
+ValueError: Please specify `target_modules` or `target_parameters`in `peft_config`
|
|
|
|
|
|
|
|
-[remote_train] === Training job failed: 1e334a57-26f5-4e7e-a961-0a02330fa708 ===
|
|
|
|
|
|
|
+[remote_train] === Training job failed: 33166c59-034d-4afd-92ba-ff6bece676dc ===
|
|
|
Traceback (most recent call last):
|
|
Traceback (most recent call last):
|
|
|
File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
|
|
File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
|
|
|
return _run_code(code, main_globals, None,
|
|
return _run_code(code, main_globals, None,
|
|
@@ -46,20 +63,22 @@ Traceback (most recent call last):
|
|
|
return loop.run_until_complete(main)
|
|
return loop.run_until_complete(main)
|
|
|
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
|
|
File "/opt/conda/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
|
|
|
return future.result()
|
|
return future.result()
|
|
|
- File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 162, in run_training
|
|
|
|
|
- peft_config = engine.get_peft_config(peft_method, config)
|
|
|
|
|
- File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 149, in get_peft_config
|
|
|
|
|
- return builder(params)
|
|
|
|
|
- File "/root/Fine-tuning/backend/app/peft/__init__.py", line 43, in build_adalora_config
|
|
|
|
|
- return AdaLoraConfig(
|
|
|
|
|
- File "<string>", line 51, in __init__
|
|
|
|
|
- File "/opt/conda/lib/python3.10/site-packages/peft/tuners/adalora/config.py", line 102, in __post_init__
|
|
|
|
|
- raise ValueError("AdaLoRA does not work when `total_step` is None, supply a value > 0.")
|
|
|
|
|
-ValueError: AdaLoRA does not work when `total_step` is None, supply a value > 0.
|
|
|
|
|
-(base) [root@localhost ~]#
|
|
|
|
|
-(base) [root@localhost ~]# grep -n 'total_step\|init_r.*target_r' /root/Fine-tuning/backend/app/engines/text_engine.py
|
|
|
|
|
-190: # 计算总步数(AdaLoRA 需要在 get_peft_model 之前设置 total_step)
|
|
|
|
|
-194: # AdaLoRA 要求 total_step > 0(通过属性名判断而非 isinstance,避免导入路径问题)
|
|
|
|
|
-195: if hasattr(peft_config, "init_r") and hasattr(peft_config, "target_r"):
|
|
|
|
|
-196: peft_config.total_step = max_steps
|
|
|
|
|
-396: total_steps=state.max_steps or 0,
|
|
|
|
|
|
|
+ File "/root/Fine-tuning/backend/app/engines/remote_train.py", line 172, in run_training
|
|
|
|
|
+ adapter_path = await engine.train(
|
|
|
|
|
+ File "/root/Fine-tuning/backend/app/engines/text_engine.py", line 198, in train
|
|
|
|
|
+ self._model = get_peft_model(self._model, peft_config)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/mapping_func.py", line 122, in get_peft_model
|
|
|
|
|
+ return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/peft_model.py", line 1955, in __init__
|
|
|
|
|
+ super().__init__(model, peft_config, adapter_name, **kwargs)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/peft_model.py", line 129, in __init__
|
|
|
|
|
+ self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/adalora/model.py", line 69, in __init__
|
|
|
|
|
+ super().__init__(model, config, adapter_name, **kwargs)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 315, in __init__
|
|
|
|
|
+ self.inject_adapter(self.model, adapter_name, low_cpu_mem_usage=low_cpu_mem_usage, state_dict=state_dict)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 815, in inject_adapter
|
|
|
|
|
+ peft_config = self._prepare_adapter_config(peft_config, model_config)
|
|
|
|
|
+ File "/opt/conda/lib/python3.10/site-packages/peft/tuners/lora/model.py", line 570, in _prepare_adapter_config
|
|
|
|
|
+ raise ValueError("Please specify `target_modules` or `target_parameters`in `peft_config`")
|
|
|
|
|
+ValueError: Please specify `target_modules` or `target_parameters`in `peft_config`
|