dataset.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from enum import Enum
  2. from pydantic import BaseModel, Field, Field
  3. class DatasetFormat(str, Enum):
  4. JSONL = "jsonl"
  5. CSV = "csv"
  6. PARQUET = "parquet"
  7. JSON = "json"
  8. class DatasetUploadResponse(BaseModel):
  9. id: str
  10. name: str
  11. format: DatasetFormat
  12. record_count: int
  13. file_path: str
  14. created_at: str
  15. class DatasetDownloadRequest(BaseModel):
  16. dataset_id: str = Field(..., description="HuggingFace or ModelScope dataset ID, e.g. 'glue', 'MRPC'")
  17. use_modelscope: bool = Field(default=False, description="Use ModelScope instead of HuggingFace")
  18. class DatasetDownloadResponse(BaseModel):
  19. dataset_id: str
  20. status: str # "downloading" | "completed" | "failed"
  21. task_id: str
  22. path: str | None = None
  23. error: str | None = None
  24. class DatasetPreviewRow(BaseModel):
  25. row_index: int
  26. data: dict
  27. class DatasetPreviewResponse(BaseModel):
  28. total_records: int
  29. preview_rows: list[DatasetPreviewRow]
  30. columns: list[str]
  31. class DatasetValidationResult(BaseModel):
  32. is_valid: bool
  33. errors: list[str] = []
  34. warnings: list[str] = []