training.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. from pydantic import BaseModel, Field
  2. from app.schemas.common import JobStatus, ModelType, PeftMethod
  3. class TrainingConfig(BaseModel):
  4. model_id: str
  5. model_type: ModelType
  6. dataset_id: str
  7. peft_method: PeftMethod = PeftMethod.LORA
  8. epochs: int = 3
  9. batch_size: int = 4
  10. gradient_accumulation: int = 4
  11. learning_rate: float = 2e-4
  12. max_seq_length: int = 2048
  13. warmup_ratio: float = 0.05
  14. save_strategy: str = "epoch"
  15. eval_strategy: str = "epoch"
  16. eval_steps: int = 100
  17. # 硬件配置
  18. num_gpus: int = Field(default=1, ge=1, le=4, description="训练使用的 GPU 数量")
  19. # LoRA-specific
  20. lora_r: int = 16
  21. lora_alpha: int = 32
  22. lora_dropout: float = 0.05
  23. lora_target_modules: str = "all-linear"
  24. # QLoRA-specific
  25. qlora_bits: int = 4
  26. # PPO-specific
  27. task_type: str = "sft"
  28. ppo_epochs: int = 4
  29. vf_coef: float = 0.1
  30. kl_coef: float = 0.2
  31. response_length: int = 512
  32. reward_model_path: str | None = None
  33. reward_type: str = "heuristic" # heuristic | model | none
  34. class TrainingJobResponse(BaseModel):
  35. id: str
  36. model_id: str
  37. model_type: str
  38. peft_method: str
  39. dataset_id: str = ""
  40. status: JobStatus
  41. progress: float = Field(default=0.0, ge=0.0, le=100.0)
  42. current_epoch: int = 0
  43. current_step: int = 0
  44. total_steps: int = 0
  45. loss: float | None = None
  46. created_at: str
  47. started_at: str | None = None
  48. finished_at: str | None = None
  49. error_message: str | None = None
  50. adapter_path: str | None = None
  51. class TrainingProgress(BaseModel):
  52. job_id: str
  53. epoch: int
  54. step: int
  55. total_steps: int
  56. loss: float
  57. learning_rate: float
  58. gpu_memory_mb: int | None = None
  59. eta_seconds: float | None = None