|
|
@@ -381,6 +381,44 @@ async def upload_dataset(file: UploadFile) -> dict[str, Any]:
|
|
|
}
|
|
|
|
|
|
|
|
|
+def _resolve_image_path(path_str: str, data_dir: Path) -> Path | None:
|
|
|
+ """解析图片路径,返回绝对路径。"""
|
|
|
+ if not path_str:
|
|
|
+ return None
|
|
|
+ p = Path(path_str)
|
|
|
+ if p.is_absolute() and p.exists():
|
|
|
+ return p
|
|
|
+ # 相对路径:相对于数据目录
|
|
|
+ candidate = data_dir / p
|
|
|
+ if candidate.exists():
|
|
|
+ return candidate
|
|
|
+ # 也可能在 data_dir 的子目录中
|
|
|
+ for child in data_dir.rglob(p.name):
|
|
|
+ if child.is_file():
|
|
|
+ return child
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def _encode_image_base64(image_path: Path, max_size: int = 200) -> str | None:
|
|
|
+ """将图片转为 base64 data URI,用于前端预览。"""
|
|
|
+ import base64
|
|
|
+
|
|
|
+ try:
|
|
|
+ from PIL import Image
|
|
|
+ img = Image.open(image_path)
|
|
|
+ # 缩小尺寸用于预览
|
|
|
+ img.thumbnail((max_size, max_size))
|
|
|
+ if img.mode in ("RGBA", "P", "LA"):
|
|
|
+ img = img.convert("RGB")
|
|
|
+ import io
|
|
|
+ buf = io.BytesIO()
|
|
|
+ img.save(buf, format="JPEG", quality=85)
|
|
|
+ b64 = base64.b64encode(buf.getvalue()).decode("ascii")
|
|
|
+ return f"data:image/jpeg;base64,{b64}"
|
|
|
+ except Exception:
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
def _format_value(value) -> str:
|
|
|
"""将复杂值格式化为可读字符串。"""
|
|
|
if isinstance(value, (dict, list)):
|
|
|
@@ -434,11 +472,11 @@ async def preview_dataset(dataset_id: str, rows: int = 10) -> dict[str, Any]:
|
|
|
result = await session.execute(select(DatasetRecord).where(DatasetRecord.id == dataset_id))
|
|
|
record = result.scalar_one_or_none()
|
|
|
if not record:
|
|
|
- return {"total_records": 0, "preview_rows": [], "columns": []}
|
|
|
+ return {"total_records": 0, "preview_rows": [], "columns": [], "image_column": None}
|
|
|
|
|
|
file_path = Path(record.file_path)
|
|
|
if not file_path.exists():
|
|
|
- return {"total_records": 0, "preview_rows": [], "columns": []}
|
|
|
+ return {"total_records": 0, "preview_rows": [], "columns": [], "image_column": None}
|
|
|
|
|
|
fmt = record.format
|
|
|
preview_data = _read_records(file_path, fmt, rows)
|
|
|
@@ -449,16 +487,27 @@ async def preview_dataset(dataset_id: str, rows: int = 10) -> dict[str, Any]:
|
|
|
else:
|
|
|
columns = list(preview_data[0].keys()) if preview_data else []
|
|
|
|
|
|
+ # 检测是否为视觉数据集(有 image_path 列),将图片转为 base64 嵌入预览
|
|
|
+ image_column = "image_path" if "image_path" in columns else None
|
|
|
+ data_dir = file_path.parent
|
|
|
+
|
|
|
+ preview_rows = []
|
|
|
+ for i, row in enumerate(preview_data):
|
|
|
+ data = {}
|
|
|
+ for k, v in row.items():
|
|
|
+ if k == "image_path" and image_column:
|
|
|
+ # 解析图片路径,转为 base64 嵌入
|
|
|
+ img_path = _resolve_image_path(str(v), data_dir)
|
|
|
+ data[k] = _encode_image_base64(img_path) if img_path else None
|
|
|
+ else:
|
|
|
+ data[k] = _format_value(v)
|
|
|
+ preview_rows.append({"row_index": i, "data": data})
|
|
|
+
|
|
|
return {
|
|
|
"total_records": record.record_count,
|
|
|
- "preview_rows": [
|
|
|
- {
|
|
|
- "row_index": i,
|
|
|
- "data": {k: _format_value(v) for k, v in row.items()},
|
|
|
- }
|
|
|
- for i, row in enumerate(preview_data)
|
|
|
- ],
|
|
|
+ "preview_rows": preview_rows,
|
|
|
"columns": columns,
|
|
|
+ "image_column": image_column,
|
|
|
}
|
|
|
|
|
|
|