Datasets.tsx 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. import { useState, useRef, memo } from 'react'
  2. import api, { DatasetInfo } from '../api/client'
  3. const DatasetRow = memo(function DatasetRow({ d, onPreview, onDelete }: {
  4. d: DatasetInfo
  5. onPreview: (id: string) => void
  6. onDelete: (id: string) => void
  7. }) {
  8. return (
  9. <tr style={{ borderBottom: '1px solid #eee' }}>
  10. <td style={{ padding: '8px 0' }}>{d.name}</td>
  11. <td>{d.format}</td>
  12. <td>{d.record_count}</td>
  13. <td>{d.created_at}</td>
  14. <td>
  15. <button onClick={() => onPreview(d.id)} style={{ marginRight: 8, padding: '2px 8px', cursor: 'pointer' }}>预览</button>
  16. <button onClick={() => onDelete(d.id)} style={{ padding: '2px 8px', color: '#e94560', border: '1px solid #e94560', borderRadius: 4, background: 'transparent', cursor: 'pointer' }}>删除</button>
  17. </td>
  18. </tr>
  19. )
  20. })
  21. export function Datasets() {
  22. const [datasets, setDatasets] = useState<DatasetInfo[]>([])
  23. const [uploading, setUploading] = useState(false)
  24. const [downloading, setDownloading] = useState(false)
  25. const [loading, setLoading] = useState(false)
  26. const [previewData, setPreviewData] = useState<{ columns: string[]; rows: { row_index: number; data: Record<string, unknown> }[] } | null>(null)
  27. const inputRef = useRef<HTMLInputElement>(null)
  28. // Download form
  29. const [dlDatasetId, setDlDatasetId] = useState('')
  30. const [dlUseModelscope, setDlUseModelscope] = useState(false)
  31. const [dlStatus, setDlStatus] = useState('')
  32. const fetchDatasets = () => {
  33. setLoading(true)
  34. api.datasets.list()
  35. .then(setDatasets)
  36. .catch(() => setDatasets([]))
  37. .finally(() => setLoading(false))
  38. }
  39. const handleFileUpload = async (file: File) => {
  40. setUploading(true)
  41. try {
  42. await api.datasets.upload(file)
  43. fetchDatasets()
  44. } catch (err) {
  45. console.error('Upload failed:', err)
  46. } finally {
  47. setUploading(false)
  48. }
  49. }
  50. const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
  51. const file = e.target.files?.[0]
  52. if (file) handleFileUpload(file)
  53. }
  54. const handleDownload = () => {
  55. if (!dlDatasetId.trim()) return
  56. setDownloading(true)
  57. setDlStatus('正在下载...')
  58. api.datasets.download(dlDatasetId, dlUseModelscope)
  59. .then(res => setDlStatus(`${res.dataset_id}: ${res.status}${res.error ? ` - ${res.error}` : ''}`))
  60. .catch(err => setDlStatus(`下载失败: ${err.message}`))
  61. .finally(() => setDownloading(false))
  62. }
  63. const handlePreview = (id: string) => {
  64. api.datasets.preview(id, 10)
  65. .then(res => setPreviewData({ columns: res.columns, rows: res.preview_rows }))
  66. .catch(() => setPreviewData(null))
  67. }
  68. const handleDelete = async (id: string) => {
  69. if (!confirm('确定删除此数据集?')) return
  70. try {
  71. await api.datasets.delete(id)
  72. fetchDatasets()
  73. setPreviewData(null)
  74. } catch (err) {
  75. console.error('Delete failed:', err)
  76. }
  77. }
  78. return (
  79. <div>
  80. <h1>数据集管理</h1>
  81. {/* Upload area */}
  82. <div
  83. onClick={() => inputRef.current?.click()}
  84. style={{
  85. marginTop: 16, border: '2px dashed #ccc', borderRadius: 8,
  86. padding: 40, textAlign: 'center', color: '#999', cursor: 'pointer',
  87. opacity: uploading ? 0.6 : 1,
  88. }}
  89. >
  90. {uploading ? '上传中...' : '拖拽文件到此处或点击上传 (JSONL / CSV / Parquet / JSON)'}
  91. <input
  92. ref={inputRef}
  93. type="file"
  94. accept=".jsonl,.csv,.parquet,.json"
  95. style={{ display: 'none' }}
  96. onChange={handleInputChange}
  97. />
  98. </div>
  99. {/* Download section */}
  100. <div style={{ marginTop: 24 }}>
  101. <h2 style={{ margin: '0 0 12px', fontSize: 16 }}>从平台下载</h2>
  102. <div style={{ display: 'flex', gap: 8, alignItems: 'center' }}>
  103. <input
  104. type="text"
  105. placeholder="数据集 ID (如 glue, MRPC, stanfordnlp/imdb)"
  106. value={dlDatasetId}
  107. onChange={e => setDlDatasetId(e.target.value)}
  108. style={{ padding: '8px 12px', width: 400, borderRadius: 4, border: '1px solid #ccc' }}
  109. />
  110. <label style={{ fontSize: 13, color: '#666', whiteSpace: 'nowrap' }}>
  111. <input type="checkbox" checked={dlUseModelscope} onChange={e => setDlUseModelscope(e.target.checked)} />
  112. {' '}ModelScope
  113. </label>
  114. <button
  115. onClick={handleDownload}
  116. disabled={downloading}
  117. style={{ padding: '8px 16px', borderRadius: 4, border: 'none', background: '#e94560', color: '#fff', cursor: 'pointer', opacity: downloading ? 0.6 : 1 }}
  118. >
  119. {downloading ? '下载中...' : '下载数据集'}
  120. </button>
  121. </div>
  122. {dlStatus && <p style={{ marginTop: 8, fontSize: 13, color: dlStatus.includes('failed') || dlStatus.includes('失败') ? '#e94560' : '#666' }}>{dlStatus}</p>}
  123. </div>
  124. {/* Dataset list */}
  125. <div style={{ marginTop: 24 }}>
  126. <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 12 }}>
  127. <h2 style={{ margin: 0 }}>已上传数据集</h2>
  128. <button onClick={fetchDatasets} style={{ padding: '4px 12px', borderRadius: 4, border: '1px solid #ccc', background: '#fff', cursor: 'pointer' }}>
  129. 刷新
  130. </button>
  131. </div>
  132. {loading && <p style={{ color: '#999' }}>加载中...</p>}
  133. {!loading && datasets.length === 0 && (
  134. <p style={{ color: '#999', fontSize: 14 }}>暂无数据集</p>
  135. )}
  136. {!loading && datasets.length > 0 && (
  137. <table style={{ width: '100%', borderCollapse: 'collapse', fontSize: 14 }}>
  138. <thead>
  139. <tr style={{ borderBottom: '2px solid #eee', textAlign: 'left' }}>
  140. <th style={{ padding: '8px 0' }}>名称</th>
  141. <th>格式</th>
  142. <th>记录数</th>
  143. <th>上传时间</th>
  144. <th>操作</th>
  145. </tr>
  146. </thead>
  147. <tbody>
  148. {datasets.map(d => (
  149. <DatasetRow key={d.id} d={d} onPreview={handlePreview} onDelete={handleDelete} />
  150. ))}
  151. </tbody>
  152. </table>
  153. )}
  154. </div>
  155. {/* Preview */}
  156. {previewData && previewData.rows.length > 0 && (
  157. <div style={{ marginTop: 24 }}>
  158. <h3>数据预览</h3>
  159. <table style={{ width: '100%', borderCollapse: 'collapse', fontSize: 13 }}>
  160. <thead>
  161. <tr style={{ borderBottom: '2px solid #eee', textAlign: 'left' }}>
  162. {previewData.columns.map(col => (
  163. <th key={col} style={{ padding: '6px 8px' }}>{col}</th>
  164. ))}
  165. </tr>
  166. </thead>
  167. <tbody>
  168. {previewData.rows.slice(0, 10).map((row, i) => (
  169. <tr key={i} style={{ borderBottom: '1px solid #eee' }}>
  170. {previewData.columns.map(col => {
  171. const cellVal = String(row.data[col] ?? '')
  172. const isMultiline = cellVal.includes('\n') || cellVal.length > 100
  173. return (
  174. <td
  175. key={col}
  176. style={{
  177. padding: '6px 8px',
  178. maxWidth: isMultiline ? 500 : 200,
  179. overflow: isMultiline ? 'auto' : 'hidden',
  180. textOverflow: isMultiline ? undefined : 'ellipsis',
  181. whiteSpace: isMultiline ? 'pre-wrap' : 'nowrap',
  182. fontFamily: isMultiline ? 'monospace' : undefined,
  183. fontSize: isMultiline ? 12 : 13,
  184. }}
  185. >
  186. {cellVal}
  187. </td>
  188. )
  189. })}
  190. </tr>
  191. ))}
  192. </tbody>
  193. </table>
  194. </div>
  195. )}
  196. </div>
  197. )
  198. }