datasets.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. from fastapi import APIRouter, UploadFile, File, Query
  2. from app.schemas.dataset import (
  3. DatasetDownloadRequest,
  4. DatasetDownloadResponse,
  5. DatasetPreviewResponse,
  6. DatasetUploadResponse,
  7. DatasetValidationResult,
  8. )
  9. from app.services import dataset_service
  10. router = APIRouter()
  11. @router.post("/download", response_model=DatasetDownloadResponse)
  12. async def download_dataset(req: DatasetDownloadRequest):
  13. """从 HuggingFace 或 ModelScope 下载数据集。"""
  14. return await dataset_service.download_dataset(req)
  15. @router.post("/upload", response_model=DatasetUploadResponse)
  16. async def upload_dataset(file: UploadFile = File(...)):
  17. """上传数据集文件(JSONL / CSV / Parquet / JSON)。"""
  18. result = await dataset_service.upload_dataset(file)
  19. return DatasetUploadResponse(**result)
  20. @router.get("/{dataset_id}/preview", response_model=DatasetPreviewResponse)
  21. async def preview_dataset(dataset_id: str, rows: int = Query(default=10, le=100)):
  22. """预览数据集前 N 行。"""
  23. result = await dataset_service.preview_dataset(dataset_id, rows)
  24. return DatasetPreviewResponse(**result)
  25. @router.post("/{dataset_id}/validate", response_model=DatasetValidationResult)
  26. async def validate_dataset(dataset_id: str):
  27. """校验数据集格式和 Schema。"""
  28. result = await dataset_service.validate_dataset(dataset_id)
  29. return DatasetValidationResult(**result)
  30. @router.get("/", response_model=list[DatasetUploadResponse])
  31. async def list_datasets():
  32. """列出所有已上传数据集。"""
  33. items = await dataset_service.list_datasets()
  34. return [DatasetUploadResponse(**item) for item in items]
  35. @router.delete("/{dataset_id}")
  36. async def delete_dataset(dataset_id: str):
  37. """删除数据集。"""
  38. return await dataset_service.delete_dataset(dataset_id)