1 неделя назад · 30438fc60a
--- a/backend/app/services/dataset_service.py
+++ b/backend/app/services/dataset_service.py
@@ -170,7 +170,13 @@ async def preview_dataset(dataset_id: str, rows: int = 10) -> dict[str, Any]:
 
				 
			
 
				     return {
			
 
				         "total_records": record.record_count,
			
 
				-        "preview_rows": [{"row_index": i, "data": row} for i, row in enumerate(preview_data)],
			
 
				+        "preview_rows": [
			
 
				+            {
			
 
				+                "row_index": i,
			
 
				+                "data": {k: _format_value(v) for k, v in row.items()},
			
 
				+            }
			
 
				+            for i, row in enumerate(preview_data)
			
 
				+        ],
			
 
				         "columns": columns,
			
 
				     }
			
 
				 
			
@@ -281,6 +287,28 @@ def _count_records(file_path: Path, fmt: str) -> int:
 
				     return 0
			
 
				 
			
 
				 
			
 
				+def _format_value(value) -> str:
			
 
				+    """将复杂值格式化为可读字符串，特别处理 ShareGPT 格式的 conversations 数组。"""
			
 
				+    if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
			
 
				+        # 检测 ShareGPT 格式：[{"from": "human", "value": "..."}, {"from": "gpt", "value": "..."}]
			
 
				+        first = value[0]
			
 
				+        if "from" in first and "value" in first:
			
 
				+            parts = []
			
 
				+            for turn in value:
			
 
				+                role = turn.get("from", "unknown")
			
 
				+                text = str(turn.get("value", ""))
			
 
				+                # 截断过长文本
			
 
				+                if len(text) > 200:
			
 
				+                    text = text[:200] + "..."
			
 
				+                parts.append(f"[{role}] {text}")
			
 
				+            return "\n---\n".join(parts)
			
 
				+        # 其他对象数组：显示为 JSON
			
 
				+        return json.dumps(value, ensure_ascii=False, indent=2)
			
 
				+    if isinstance(value, (dict, list)):
			
 
				+        return json.dumps(value, ensure_ascii=False, indent=2)
			
 
				+    return str(value)
			
 
				+
			
 
				+
			
 
				 def _read_records(file_path: Path, fmt: str, n: int) -> list[dict]:
			
 
				     if fmt == "jsonl":
			
 
				         records = []
			
--- a/frontend/src/pages/Datasets.tsx
+++ b/frontend/src/pages/Datasets.tsx
@@ -174,11 +174,26 @@ export function Datasets() {
 
				             <tbody>
			
 
				               {previewData.rows.slice(0, 10).map((row, i) => (
			
 
				                 <tr key={i} style={{ borderBottom: '1px solid #eee' }}>
			
 
				-                  {previewData.columns.map(col => (
			
 
				-                    <td key={col} style={{ padding: '6px 8px', maxWidth: 200, overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
			
 
				-                      {String(row.data[col] ?? '')}
			
 
				-                    </td>
			
 
				-                  ))}
			
 
				+                  {previewData.columns.map(col => {
			
 
				+                    const cellVal = String(row.data[col] ?? '')
			
 
				+                    const isMultiline = cellVal.includes('\n') || cellVal.length > 100
			
 
				+                    return (
			
 
				+                      <td
			
 
				+                        key={col}
			
 
				+                        style={{
			
 
				+                          padding: '6px 8px',
			
 
				+                          maxWidth: isMultiline ? 500 : 200,
			
 
				+                          overflow: isMultiline ? 'auto' : 'hidden',
			
 
				+                          textOverflow: isMultiline ? undefined : 'ellipsis',
			
 
				+                          whiteSpace: isMultiline ? 'pre-wrap' : 'nowrap',
			
 
				+                          fontFamily: isMultiline ? 'monospace' : undefined,
			
 
				+                          fontSize: isMultiline ? 12 : 13,
			
 
				+                        }}
			
 
				+                      >
			
 
				+                        {cellVal}
			
 
				+                      </td>
			
 
				+                    )
			
 
				+                  })}
			
 
				                 </tr>
			
 
				               ))}
			
 
				             </tbody>