| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- from fastapi import APIRouter, UploadFile, File, Query, HTTPException
- from app.schemas.dataset import (
- DatasetDownloadRequest,
- DatasetDownloadResponse,
- DatasetPreviewResponse,
- DatasetUploadResponse,
- DatasetValidationResult,
- )
- from app.schemas.background_task import DatasetDownloadTaskResponse
- from app.services import dataset_service
- router = APIRouter()
- @router.post("/download", response_model=DatasetDownloadResponse, status_code=200)
- async def download_dataset(req: DatasetDownloadRequest):
- """启动数据集下载后台任务,立即返回 task_id。"""
- result = await dataset_service.download_dataset(req)
- return result
- @router.get("/download/{task_id}")
- async def get_dataset_download_status(task_id: str):
- """查询数据集下载任务状态。"""
- result = await dataset_service.get_dataset_download_status(task_id)
- if result.get("status") == "not_found":
- raise HTTPException(status_code=404, detail="Download task not found")
- return result
- @router.get("/downloads")
- async def list_dataset_downloads():
- """列出所有数据集下载任务。"""
- return await dataset_service.list_dataset_downloads()
- @router.post("/download/{task_id}/cancel")
- async def cancel_dataset_download(task_id: str):
- """取消数据集下载任务。"""
- return await dataset_service.cancel_dataset_download(task_id)
- @router.post("/upload", response_model=DatasetUploadResponse, status_code=201)
- async def upload_dataset(file: UploadFile = File(...)):
- """上传数据集文件(JSONL / CSV / Parquet / JSON)。"""
- result = await dataset_service.upload_dataset(file)
- return DatasetUploadResponse(**result)
- @router.get("/{dataset_id}/preview", response_model=DatasetPreviewResponse)
- async def preview_dataset(dataset_id: str, rows: int = Query(default=10, le=100)):
- """预览数据集前 N 行。"""
- result = await dataset_service.preview_dataset(dataset_id, rows)
- return DatasetPreviewResponse(**result)
- @router.post("/{dataset_id}/validate", response_model=DatasetValidationResult)
- async def validate_dataset(dataset_id: str):
- """校验数据集格式和 Schema。"""
- result = await dataset_service.validate_dataset(dataset_id)
- return DatasetValidationResult(**result)
- @router.get("/", response_model=list[DatasetUploadResponse])
- async def list_datasets():
- """列出所有已上传数据集。"""
- items = await dataset_service.list_datasets()
- return [DatasetUploadResponse(**item) for item in items]
- @router.delete("/{dataset_id}", status_code=200)
- async def delete_dataset(dataset_id: str):
- """删除数据集。"""
- return await dataset_service.delete_dataset(dataset_id)
|