from pydantic import BaseModel, Field from app.schemas.common import JobStatus, ModelType, PeftMethod class TrainingConfig(BaseModel): model_id: str model_type: ModelType dataset_id: str peft_method: PeftMethod = PeftMethod.LORA epochs: int = 3 batch_size: int = 4 gradient_accumulation: int = 4 learning_rate: float = 2e-4 max_seq_length: int = 2048 warmup_ratio: float = 0.05 save_strategy: str = "epoch" eval_strategy: str = "epoch" eval_steps: int = 100 # 硬件配置 num_gpus: int = Field(default=1, ge=1, le=4, description="训练使用的 GPU 数量") # LoRA-specific lora_r: int = 16 lora_alpha: int = 32 lora_dropout: float = 0.05 lora_target_modules: str = "all-linear" # QLoRA-specific qlora_bits: int = 4 # PPO-specific task_type: str = "sft" ppo_epochs: int = 4 vf_coef: float = 0.1 kl_coef: float = 0.2 response_length: int = 512 reward_model_path: str | None = None reward_type: str = "heuristic" # heuristic | model | none class TrainingJobResponse(BaseModel): id: str model_id: str model_type: str peft_method: str dataset_id: str = "" status: JobStatus progress: float = Field(default=0.0, ge=0.0, le=100.0) current_epoch: int = 0 current_step: int = 0 total_steps: int = 0 loss: float | None = None created_at: str started_at: str | None = None finished_at: str | None = None error_message: str | None = None adapter_path: str | None = None class TrainingProgress(BaseModel): job_id: str epoch: int step: int total_steps: int loss: float learning_rate: float gpu_memory_mb: int | None = None eta_seconds: float | None = None