dataset.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. from enum import Enum
  2. from pydantic import BaseModel, Field, Field
  3. class DatasetFormat(str, Enum):
  4. JSONL = "jsonl"
  5. CSV = "csv"
  6. PARQUET = "parquet"
  7. JSON = "json"
  8. class DatasetUploadResponse(BaseModel):
  9. id: str
  10. name: str
  11. format: DatasetFormat
  12. record_count: int
  13. file_path: str
  14. created_at: str
  15. class DatasetDownloadRequest(BaseModel):
  16. dataset_id: str = Field(..., description="HuggingFace or ModelScope dataset ID, e.g. 'glue', 'MRPC'")
  17. use_modelscope: bool = Field(default=False, description="Use ModelScope instead of HuggingFace")
  18. class DatasetDownloadResponse(BaseModel):
  19. dataset_id: str
  20. status: str # "downloading" | "completed" | "failed"
  21. path: str | None = None
  22. error: str | None = None
  23. class DatasetPreviewRow(BaseModel):
  24. row_index: int
  25. data: dict
  26. class DatasetPreviewResponse(BaseModel):
  27. total_records: int
  28. preview_rows: list[DatasetPreviewRow]
  29. columns: list[str]
  30. class DatasetValidationResult(BaseModel):
  31. is_valid: bool
  32. errors: list[str] = []
  33. warnings: list[str] = []