from pydantic import BaseModel, Field

from app.schemas.common import JobStatus, ModelType, PeftMethod


class TrainingConfig(BaseModel):
    model_id: str
    model_type: ModelType
    dataset_id: str
    peft_method: PeftMethod = PeftMethod.LORA

    epochs: int = 3
    batch_size: int = 4
    gradient_accumulation: int = 4
    learning_rate: float = 2e-4
    max_seq_length: int = 2048
    warmup_ratio: float = 0.05
    save_strategy: str = "epoch"
    eval_strategy: str = "epoch"
    eval_steps: int = 100

    # 硬件配置
    num_gpus: int = Field(default=1, ge=1, le=4, description="训练使用的 GPU 数量")

    # LoRA-specific
    lora_r: int = 16
    lora_alpha: int = 32
    lora_dropout: float = 0.05
    lora_target_modules: str = "all-linear"

    # QLoRA-specific
    qlora_bits: int = 4

    # PPO-specific
    task_type: str = "sft"
    ppo_epochs: int = 4
    vf_coef: float = 0.1
    kl_coef: float = 0.2
    response_length: int = 512
    reward_model_path: str | None = None
    reward_type: str = "heuristic"  # heuristic | model | none


class TrainingJobResponse(BaseModel):
    id: str
    model_id: str
    model_type: str
    peft_method: str
    dataset_id: str = ""
    status: JobStatus
    progress: float = Field(default=0.0, ge=0.0, le=100.0)
    current_epoch: int = 0
    current_step: int = 0
    total_steps: int = 0
    loss: float | None = None
    created_at: str
    started_at: str | None = None
    finished_at: str | None = None
    error_message: str | None = None
    adapter_path: str | None = None


class TrainingProgress(BaseModel):
    job_id: str
    epoch: int
    step: int
    total_steps: int
    loss: float
    learning_rate: float
    gpu_memory_mb: int | None = None
    eta_seconds: float | None = None