open_dataset.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. """
  2. Pydantic schemas for Open API dataset download endpoints.
  3. """
  4. from enum import Enum
  5. from datetime import datetime
  6. from typing import Optional
  7. from pydantic import BaseModel, Field
  8. class DatasetFormat(str, Enum):
  9. ALPACA = "alpaca"
  10. SHAREGPT = "sharegpt"
  11. JSON = "json"
  12. CSV = "csv"
  13. COCO = "coco"
  14. YOLO = "yolo"
  15. PASCAL_VOC = "pascal_voc"
  16. TEXT_FORMATS = {DatasetFormat.ALPACA, DatasetFormat.SHAREGPT}
  17. IMAGE_FORMATS = {DatasetFormat.JSON, DatasetFormat.CSV, DatasetFormat.COCO,
  18. DatasetFormat.YOLO, DatasetFormat.PASCAL_VOC}
  19. class DatasetDownloadRequest(BaseModel):
  20. format: DatasetFormat = Field(..., description="数据集格式")
  21. completed_only: bool = Field(default=True, description="是否只导出已完成的任务")
  22. class DatasetDownloadResponseData(BaseModel):
  23. project_id: str = Field(..., description="项目ID")
  24. format: str = Field(..., description="导出格式")
  25. total_exported: int = Field(..., description="导出任务数")
  26. file_url: str = Field(..., description="下载链接")
  27. file_name: str = Field(..., description="文件名")
  28. file_size: Optional[int] = Field(None, description="文件大小(字节)")
  29. expires_at: Optional[datetime] = Field(None, description="链接过期时间")
  30. status: str = Field(default="completed", description="导出状态")
  31. class DatasetDownloadResponse(BaseModel):
  32. code: int = Field(default=0)
  33. message: str = Field(default="success")
  34. data: DatasetDownloadResponseData