|
@@ -192,9 +192,9 @@ class TextEngine(BaseEngine):
|
|
|
|
|
|
|
|
output_dir = str(settings.adapters_dir / job_id)
|
|
output_dir = str(settings.adapters_dir / job_id)
|
|
|
|
|
|
|
|
- # AdaLoRA 需要 max_steps,同时也让进度计算更准确
|
|
|
|
|
|
|
+ # AdaLoRA 需要 max_steps > 0,同时也让进度计算更准确
|
|
|
dataset_len = len(dataset)
|
|
dataset_len = len(dataset)
|
|
|
- max_steps = (dataset_len * epochs) // (batch_size * gradient_accumulation)
|
|
|
|
|
|
|
+ max_steps = max(1, (dataset_len * epochs) // (batch_size * gradient_accumulation))
|
|
|
|
|
|
|
|
tr_args = TrainingArguments(
|
|
tr_args = TrainingArguments(
|
|
|
output_dir=output_dir,
|
|
output_dir=output_dir,
|
|
@@ -236,6 +236,7 @@ class TextEngine(BaseEngine):
|
|
|
base_trainer_kwargs = dict(
|
|
base_trainer_kwargs = dict(
|
|
|
output_dir=output_dir,
|
|
output_dir=output_dir,
|
|
|
num_train_epochs=epochs,
|
|
num_train_epochs=epochs,
|
|
|
|
|
+ max_steps=max_steps,
|
|
|
per_device_train_batch_size=batch_size,
|
|
per_device_train_batch_size=batch_size,
|
|
|
gradient_accumulation_steps=gradient_accumulation,
|
|
gradient_accumulation_steps=gradient_accumulation,
|
|
|
learning_rate=learning_rate,
|
|
learning_rate=learning_rate,
|