training.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. from pydantic import BaseModel, Field
  2. from app.schemas.common import JobStatus, ModelType, PeftMethod
  3. class TrainingConfig(BaseModel):
  4. model_id: str
  5. model_type: ModelType
  6. dataset_id: str
  7. peft_method: PeftMethod = PeftMethod.LORA
  8. epochs: int = 3
  9. batch_size: int = 4
  10. gradient_accumulation: int = 4
  11. learning_rate: float = 2e-4
  12. max_seq_length: int = 2048
  13. warmup_ratio: float = 0.05
  14. save_strategy: str = "epoch"
  15. eval_strategy: str = "epoch"
  16. eval_steps: int = 100
  17. # LoRA-specific
  18. lora_r: int = 16
  19. lora_alpha: int = 32
  20. lora_dropout: float = 0.05
  21. lora_target_modules: str = "all-linear"
  22. # QLoRA-specific
  23. qlora_bits: int = 4
  24. # PPO-specific
  25. task_type: str = "sft"
  26. ppo_epochs: int = 4
  27. vf_coef: float = 0.1
  28. kl_coef: float = 0.2
  29. response_length: int = 512
  30. reward_model_path: str | None = None
  31. reward_type: str = "heuristic" # heuristic | model | none
  32. class TrainingJobResponse(BaseModel):
  33. id: str
  34. model_id: str
  35. model_type: str
  36. peft_method: str
  37. status: JobStatus
  38. progress: float = Field(default=0.0, ge=0.0, le=100.0)
  39. current_epoch: int = 0
  40. current_step: int = 0
  41. total_steps: int = 0
  42. loss: float | None = None
  43. created_at: str
  44. started_at: str | None = None
  45. finished_at: str | None = None
  46. error_message: str | None = None
  47. adapter_path: str | None = None
  48. class TrainingProgress(BaseModel):
  49. job_id: str
  50. epoch: int
  51. step: int
  52. total_steps: int
  53. loss: float
  54. learning_rate: float
  55. gpu_memory_mb: int | None = None
  56. eta_seconds: float | None = None