from fastapi import APIRouter, UploadFile, File, Query, HTTPException from app.schemas.dataset import ( DatasetDownloadRequest, DatasetDownloadResponse, DatasetPreviewResponse, DatasetUploadResponse, DatasetValidationResult, ) from app.schemas.background_task import DatasetDownloadTaskResponse from app.services import dataset_service router = APIRouter() @router.post("/download", response_model=DatasetDownloadResponse, status_code=200) async def download_dataset(req: DatasetDownloadRequest): """启动数据集下载后台任务,立即返回 task_id。""" result = await dataset_service.download_dataset(req) return result @router.get("/download/{task_id}") async def get_dataset_download_status(task_id: str): """查询数据集下载任务状态。""" result = await dataset_service.get_dataset_download_status(task_id) if result.get("status") == "not_found": raise HTTPException(status_code=404, detail="Download task not found") return result @router.get("/downloads") async def list_dataset_downloads(): """列出所有数据集下载任务。""" return await dataset_service.list_dataset_downloads() @router.post("/download/{task_id}/cancel") async def cancel_dataset_download(task_id: str): """取消数据集下载任务。""" return await dataset_service.cancel_dataset_download(task_id) @router.post("/upload", response_model=DatasetUploadResponse, status_code=201) async def upload_dataset(file: UploadFile = File(...)): """上传数据集文件(JSONL / CSV / Parquet / JSON)。""" result = await dataset_service.upload_dataset(file) return DatasetUploadResponse(**result) @router.get("/{dataset_id}/preview", response_model=DatasetPreviewResponse) async def preview_dataset(dataset_id: str, rows: int = Query(default=10, le=100)): """预览数据集前 N 行。""" result = await dataset_service.preview_dataset(dataset_id, rows) return DatasetPreviewResponse(**result) @router.post("/{dataset_id}/validate", response_model=DatasetValidationResult) async def validate_dataset(dataset_id: str): """校验数据集格式和 Schema。""" result = await dataset_service.validate_dataset(dataset_id) return DatasetValidationResult(**result) @router.get("/", response_model=list[DatasetUploadResponse]) async def list_datasets(): """列出所有已上传数据集。""" items = await dataset_service.list_datasets() return [DatasetUploadResponse(**item) for item in items] @router.delete("/{dataset_id}", status_code=200) async def delete_dataset(dataset_id: str): """删除数据集。""" return await dataset_service.delete_dataset(dataset_id)