lxylxy123321 пре 3 дана
родитељ
комит
89483a3666

+ 10 - 4
backend/app/engines/multimodal_engine.py

@@ -114,10 +114,7 @@ class MultimodalEngine(BaseEngine):
 
         hf_dataset = HFDataset.from_list(data)
 
-        self._model = get_peft_model(self._model, peft_config)
-        self._model.print_trainable_parameters()
-
-        output_dir = str(settings.adapters_dir / job_id)
+        # 计算总步数(AdaLoRA 需要在 get_peft_model 之前设置 total_step)
         epochs = training_args.get("epochs", 3)
         batch_size = training_args.get("batch_size", 4)
         learning_rate = training_args.get("learning_rate", 2e-4)
@@ -125,6 +122,15 @@ class MultimodalEngine(BaseEngine):
         dataset_len = len(hf_dataset)
         max_steps = max(1, (dataset_len * epochs) // batch_size)
 
+        from peft import AdaLoraConfig
+        if isinstance(peft_config, AdaLoraConfig):
+            peft_config.total_step = max_steps
+
+        self._model = get_peft_model(self._model, peft_config)
+        self._model.print_trainable_parameters()
+
+        output_dir = str(settings.adapters_dir / job_id)
+
         tr_args = TrainingArguments(
             output_dir=output_dir,
             num_train_epochs=epochs,

+ 9 - 4
backend/app/engines/text_engine.py

@@ -187,15 +187,20 @@ class TextEngine(BaseEngine):
 
         dataset = self._tokenize_dataset(dataset_path, max_seq_length)
 
+        # 计算总步数(AdaLoRA 需要在 get_peft_model 之前设置 total_step)
+        dataset_len = len(dataset)
+        max_steps = max(1, (dataset_len * epochs) // (batch_size * gradient_accumulation))
+
+        # AdaLoRA 要求 total_step > 0
+        from peft import AdaLoraConfig
+        if isinstance(peft_config, AdaLoraConfig):
+            peft_config.total_step = max_steps
+
         self._model = get_peft_model(self._model, peft_config)
         self._model.print_trainable_parameters()
 
         output_dir = str(settings.adapters_dir / job_id)
 
-        # AdaLoRA 需要 max_steps > 0,同时也让进度计算更准确
-        dataset_len = len(dataset)
-        max_steps = max(1, (dataset_len * epochs) // (batch_size * gradient_accumulation))
-
         tr_args = TrainingArguments(
             output_dir=output_dir,
             num_train_epochs=epochs,

+ 10 - 4
backend/app/engines/vision_engine.py

@@ -114,10 +114,7 @@ class VisionEngine(BaseEngine):
         hf_dataset = HFDataset.from_list(data)
         hf_dataset.set_transform(transform)
 
-        self._model = get_peft_model(self._model, peft_config)
-        self._model.print_trainable_parameters()
-
-        output_dir = str(settings.adapters_dir / job_id)
+        # 计算总步数(AdaLoRA 需要在 get_peft_model 之前设置 total_step)
         epochs = training_args.get("epochs", 3)
         batch_size = training_args.get("batch_size", 4)
         learning_rate = training_args.get("learning_rate", 2e-4)
@@ -125,6 +122,15 @@ class VisionEngine(BaseEngine):
         dataset_len = len(hf_dataset)
         max_steps = max(1, (dataset_len * epochs) // batch_size)
 
+        from peft import AdaLoraConfig
+        if isinstance(peft_config, AdaLoraConfig):
+            peft_config.total_step = max_steps
+
+        self._model = get_peft_model(self._model, peft_config)
+        self._model.print_trainable_parameters()
+
+        output_dir = str(settings.adapters_dir / job_id)
+
         tr_args = TrainingArguments(
             output_dir=output_dir,
             num_train_epochs=epochs,