Переглянути джерело

修复标注平台json格式转换,完善接口

lxylxy123321 3 годин тому
батько
коміт
f21bf88bc9

+ 94 - 13
backend/app/services/annotation_platform_service.py

@@ -70,15 +70,53 @@ def _is_token_valid() -> bool:
     return time.time() < expires_at - 300
 
 
+async def _refresh_token() -> str:
+    """使用 Bearer Token 刷新 Access Token。
+
+    POST /api/v1/open/auth/refresh
+    比重新签名更高效,仅在 Token 存在但即将过期时调用。
+    """
+    old_token = _token_cache.get("access_token", "")
+    base_url = _get_base_url()
+
+    async with httpx.AsyncClient(timeout=30) as client:
+        resp = await client.post(
+            f"{base_url}/api/v1/open/auth/refresh",
+            headers={"Authorization": f"Bearer {old_token}"},
+        )
+        resp.raise_for_status()
+        body = resp.json()
+
+    if body.get("code") != 0:
+        raise RuntimeError(f"刷新标注平台 Token 失败: {body.get('message', body)}")
+
+    data = body.get("data", {})
+    _token_cache["access_token"] = data["access_token"]
+    _token_cache["expires_in"] = data.get("expires_in", 7200)
+    _token_cache["expires_at"] = time.time() + data.get("expires_in", 7200)
+
+    return data["access_token"]
+
+
 async def get_token() -> str:
-    """获取 Access Token,带缓存。
+    """获取 Access Token,带缓存和自动刷新
 
-    POST /api/v1/open/auth/token
-    使用 HMAC-SHA256 签名认证,无请求体。
+    优先级:
+    1. Token 有效 → 直接返回
+    2. Token 存在但快过期 → 调用 /auth/refresh 刷新
+    3. 无 Token → 调用 /auth/token 重新签名获取
     """
     if _is_token_valid():
         return _token_cache["access_token"]
 
+    # Token 存在但即将过期,尝试刷新
+    if _token_cache.get("access_token"):
+        try:
+            return await _refresh_token()
+        except Exception as e:
+            logger.warning(f"Token 刷新失败,回退到重新获取: {e}")
+
+    # 无 Token 或刷新失败,重新签名获取
     headers = _build_token_headers()
     base_url = _get_base_url()
 
@@ -229,36 +267,79 @@ async def import_project_dataset(
 
     file_path.write_bytes(file_content)
 
-    # 5. 检测格式和记录数
-    fmt = _detect_format(file_path.name)
-    record_count = _count_records(file_path, fmt)
+    # 5. 统一转为 JSONL 格式(和 ModelScope/HF 下载的数据格式一致)
+    jsonl_path = _convert_to_jsonl(file_path)
+    record_count = _count_records(jsonl_path, "jsonl")
 
-    # 6. 写入数据库
+    # 6. 写入数据库(格式统一为 jsonl)
     record_id = str(uuid.uuid4())
     record = DatasetRecord(
         id=record_id,
-        name=file_path.name,
-        format=fmt,
+        name=jsonl_path.name,
+        format="jsonl",
         record_count=record_count,
-        file_path=str(file_path),
+        file_path=str(jsonl_path),
         created_at=datetime.utcnow(),
     )
     async with async_session() as session:
         session.add(record)
         await session.commit()
 
-    logger.info(f"Imported dataset from annotation platform: {project_id} -> {file_path.name} ({record_count} records)")
+    logger.info(f"Imported dataset from annotation platform: {project_id} -> {jsonl_path.name} ({record_count} records)")
 
     return {
         "project_id": project_id,
         "project_name": project_name or project_id,
-        "format": format,
+        "format": "jsonl",
         "total_exported": total_exported,
         "dataset_id": record_id,
-        "dataset_name": file_path.name,
+        "dataset_name": jsonl_path.name,
     }
 
 
+def _convert_to_jsonl(file_path: Path) -> Path:
+    """将 JSON/JSONL 文件统一转为 JSONL 格式。"""
+    import json as _json
+
+    jsonl_path = file_path.with_suffix(".jsonl")
+    with open(file_path, "r", encoding="utf-8") as f:
+        content = f.read().strip()
+
+    if not content:
+        jsonl_path.touch()
+        return jsonl_path
+
+    try:
+        # 尝试作为 JSON 数组解析
+        data = _json.loads(content)
+        if isinstance(data, list):
+            with open(jsonl_path, "w", encoding="utf-8") as out:
+                for item in data:
+                    out.write(_json.dumps(item, ensure_ascii=False) + "\n")
+            # 删除原始 JSON 文件
+            file_path.unlink()
+            return jsonl_path
+    except _json.JSONDecodeError:
+        pass
+
+    # 不是标准 JSON,可能是 JSONL,逐行验证
+    lines = content.split("\n")
+    valid_lines = []
+    for line in lines:
+        line = line.strip()
+        if line:
+            try:
+                _json.loads(line)
+                valid_lines.append(line)
+            except _json.JSONDecodeError:
+                continue  # 跳过无效行
+
+    with open(jsonl_path, "w", encoding="utf-8") as out:
+        out.write("\n".join(valid_lines) + ("\n" if valid_lines else ""))
+    file_path.unlink()
+    return jsonl_path
+
+
 def _detect_format(filename: str) -> str:
     """根据文件名推断格式。"""
     name = filename.lower()

+ 167 - 7
frontend/src/pages/Datasets.tsx

@@ -1,6 +1,6 @@
 import { useState, useEffect, useRef, useCallback } from 'react'
-import api, { DatasetInfo, AnnotationProjectItem, DatasetDownloadTaskResponse } from '../api/client'
-import { Database, Upload, Loader2, FolderOpen, CheckCircle, XCircle, Eye, Trash2, FileText } from 'lucide-react'
+import api, { DatasetInfo, AnnotationProjectItem, AnnotationProjectDetailResponse, DatasetDownloadTaskResponse } from '../api/client'
+import { Database, Upload, Loader2, FolderOpen, CheckCircle, XCircle, Eye, Trash2, FileText, X } from 'lucide-react'
 
 function formatBadge(format: string) {
   const map: Record<string, { bg: string; color: string; border: string }> = {
@@ -122,6 +122,7 @@ export function Datasets() {
   const [downloading, setDownloading] = useState(false)
   const [loading, setLoading] = useState(false)
   const [previewData, setPreviewData] = useState<{ columns: string[]; rows: { row_index: number; data: Record<string, unknown> }[] } | null>(null)
+  const [previewError, setPreviewError] = useState<string | null>(null)
   const inputRef = useRef<HTMLInputElement>(null)
 
   // Annotation platform modal state
@@ -133,6 +134,11 @@ export function Datasets() {
   const [projectPage, setProjectPage] = useState(1)
   const [projectTotal, setProjectTotal] = useState(0)
 
+  // Project detail modal
+  const [projectDetail, setProjectDetail] = useState<AnnotationProjectDetailResponse | null>(null)
+  const [showDetail, setShowDetail] = useState(false)
+  const [detailLoading, setDetailLoading] = useState(false)
+
   // Active downloads tracking
   const [activeDownloads, setActiveDownloads] = useState<Map<string, DatasetDownloadTaskResponse>>(new Map())
   const downloadPollIntervals = useRef<Map<string, ReturnType<typeof setInterval>>>(new Map())
@@ -233,9 +239,20 @@ export function Datasets() {
   }, [])
 
   const handlePreview = (id: string) => {
+    setPreviewError(null)
     api.datasets.preview(id, 10)
-      .then(res => setPreviewData({ columns: res.columns, rows: res.preview_rows }))
-      .catch(() => setPreviewData(null))
+      .then(res => {
+        if (!res.columns || res.columns.length === 0) {
+          setPreviewError('该数据集没有可预览的列,可能格式不受支持')
+          setPreviewData(null)
+          return
+        }
+        setPreviewData({ columns: res.columns, rows: res.preview_rows })
+      })
+      .catch(err => {
+        setPreviewError(`预览失败: ${err.message || '未知错误'}`)
+        setPreviewData(null)
+      })
   }
 
   const handleDelete = async (id: string) => {
@@ -276,6 +293,20 @@ export function Datasets() {
     }
   }
 
+  const handleViewProjectDetail = async (project: AnnotationProjectItem) => {
+    setDetailLoading(true)
+    setShowDetail(true)
+    try {
+      const detail = await api.annotationPlatform.getProjectDetail(project.project_id)
+      setProjectDetail(detail)
+    } catch (err: unknown) {
+      setProjectStatus(`获取项目详情失败: ${err instanceof Error ? err.message : '未知错误'}`)
+      setShowDetail(false)
+    } finally {
+      setDetailLoading(false)
+    }
+  }
+
   return (
     <div>
       <h1 style={{ margin: 0, fontSize: 22, fontWeight: 700 }}>数据集管理</h1>
@@ -504,13 +535,21 @@ export function Datasets() {
                             {p.completed_task_count}/{p.task_count}
                             <span style={{ marginLeft: 4, fontSize: 11, color: '#94a3b8' }}>({progress}%)</span>
                           </td>
-                          <td style={{ padding: '10px 12px' }}>
+                          <td style={{ padding: '10px 12px', display: 'flex', gap: 6 }}>
+                            <button
+                              onClick={() => handleViewProjectDetail(p)}
+                              style={{
+                                padding: '4px 10px', color: '#0ea5e9', border: '1px solid #0ea5e9',
+                                borderRadius: 6, background: 'transparent', cursor: 'pointer',
+                                fontSize: 12, fontWeight: 500,
+                              }}
+                            >详情</button>
                             {isText ? (
                               <button
                                 onClick={() => handleImportProject(p)}
                                 disabled={projectImporting === p.project_id}
                                 style={{
-                                  padding: '4px 12px', color: '#8b5cf6', border: '1px solid #8b5cf6',
+                                  padding: '4px 10px', color: '#8b5cf6', border: '1px solid #8b5cf6',
                                   borderRadius: 6, background: projectImporting === p.project_id ? '#f5f3ff' : 'transparent',
                                   cursor: projectImporting === p.project_id ? 'not-allowed' : 'pointer',
                                   fontSize: 12, fontWeight: 500, opacity: projectImporting === p.project_id ? 0.7 : 1,
@@ -521,7 +560,7 @@ export function Datasets() {
                                 ) : '导入'}
                               </button>
                             ) : (
-                              <span style={{ fontSize: 12, color: '#94a3b8' }}>不支持训练</span>
+                              <span style={{ fontSize: 12, color: '#94a3b8', padding: '4px 0' }}>不支持训练</span>
                             )}
                           </td>
                         </tr>
@@ -570,6 +609,119 @@ export function Datasets() {
         </div>
       )}
 
+      {/* Project Detail Modal */}
+      {showDetail && (
+        <div
+          style={{
+            position: 'fixed', inset: 0, background: 'rgba(0,0,0,0.4)',
+            display: 'flex', alignItems: 'center', justifyContent: 'center', zIndex: 1001,
+          }}
+          onClick={() => { setShowDetail(false); setProjectDetail(null) }}
+        >
+          <div
+            onClick={e => e.stopPropagation()}
+            style={{
+              background: '#fff', borderRadius: 12, padding: 24, width: '90%', maxWidth: 500,
+              boxShadow: '0 20px 60px rgba(0,0,0,0.15)',
+            }}
+          >
+            <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 16 }}>
+              <h2 style={{ margin: 0, fontSize: 17, fontWeight: 600 }}>项目详情</h2>
+              <button
+                onClick={() => { setShowDetail(false); setProjectDetail(null) }}
+                style={{
+                  border: 'none', background: 'transparent', cursor: 'pointer', fontSize: 20,
+                  color: '#64748b', padding: '4px 8px', borderRadius: 4,
+                }}
+              ><X size={18} /></button>
+            </div>
+
+            {detailLoading ? (
+              <div style={{ textAlign: 'center', padding: 20, color: '#94a3b8' }}>
+                <Loader2 size={24} style={{ animation: 'lucide-spin 1s linear infinite' }} />
+                <div style={{ marginTop: 8, fontSize: 13 }}>加载中...</div>
+              </div>
+            ) : projectDetail ? (() => {
+              const d = projectDetail
+              const progress = d.task_count > 0 ? Math.round((d.completed_task_count / d.task_count) * 100) : 0
+              const isText = d.project_type === 'text'
+              const statusMap: Record<string, { bg: string; color: string }> = {
+                completed: { bg: '#dcfce7', color: '#16a34a' },
+                in_progress: { bg: '#dbeafe', color: '#2563eb' },
+                ready: { bg: '#fef3c7', color: '#d97706' },
+                configuring: { bg: '#f1f5f9', color: '#64748b' },
+                draft: { bg: '#f1f5f9', color: '#64748b' },
+              }
+              const st = statusMap[d.status] || { bg: '#f1f5f9', color: '#64748b' }
+
+              const fields = [
+                ['项目 ID', d.project_id],
+                ['项目名称', d.project_name],
+                ['描述', d.description || '-'],
+                ['类型', isText ? '文本' : '图片'],
+                ['任务类型', d.task_type?.replace(/_/g, ' ') || '-'],
+                ['状态', d.status, true],
+                ['创建人', d.created_by],
+                ['创建时间', d.created_at],
+                ['更新时间', d.updated_at],
+                ['总任务数', String(d.task_count)],
+                ['已完成', String(d.completed_task_count)],
+                ['已分配', String(d.assigned_task_count)],
+                ['完成率', `${progress}%`],
+              ]
+
+              return (
+                <div style={{ display: 'flex', flexDirection: 'column', gap: 10 }}>
+                  {fields.map(([label, value, isStatus], idx) => (
+                    <div key={idx} style={{ display: 'flex', gap: 12, fontSize: 13 }}>
+                      <span style={{ color: '#94a3b8', minWidth: 80, flexShrink: 0 }}>{label}</span>
+                      <span style={{ color: '#1e293b', fontWeight: isStatus ? 500 : 400 }}>
+                        {isStatus ? (
+                          <span style={{
+                            display: 'inline-block', padding: '2px 8px', borderRadius: 4, fontSize: 12,
+                            background: st.bg, color: st.color,
+                          }}>
+                            {value === 'completed' ? '已完成' : value === 'in_progress' ? '进行中' : value}
+                          </span>
+                        ) : value}
+                      </span>
+                    </div>
+                  ))}
+                  {isText && (
+                    <div style={{ marginTop: 8, display: 'flex', justifyContent: 'flex-end', gap: 8 }}>
+                      <button
+                        onClick={() => {
+                          setShowDetail(false)
+                          setProjectDetail(null)
+                          handleImportProject({
+                            project_id: d.project_id,
+                            project_name: d.project_name,
+                            project_type: d.project_type,
+                            task_type: d.task_type,
+                            status: d.status,
+                            task_count: d.task_count,
+                            completed_task_count: d.completed_task_count,
+                          } as AnnotationProjectItem)
+                        }}
+                        style={{
+                          padding: '6px 16px', borderRadius: 6, border: 'none',
+                          background: '#8b5cf6', color: '#fff', cursor: 'pointer',
+                          fontSize: 13, fontWeight: 500,
+                        }}
+                      >导入为训练数据</button>
+                    </div>
+                  )}
+                </div>
+              )
+            })() : (
+              <div style={{ padding: 20, textAlign: 'center', color: '#94a3b8', fontSize: 14 }}>
+                暂无详情
+              </div>
+            )}
+          </div>
+        </div>
+      )}
+
       {/* Dataset list */}
       <div style={{ marginTop: 24 }}>
         <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 12 }}>
@@ -611,6 +763,14 @@ export function Datasets() {
       </div>
 
       {/* Preview */}
+      {previewError && (
+        <div style={{
+          marginTop: 24, padding: '12px 16px', borderRadius: 8, fontSize: 13,
+          background: '#fff1f2', color: '#e11d48', border: '1px solid #fecdd3',
+        }}>
+          {previewError}
+        </div>
+      )}
       {previewData && previewData.rows.length > 0 && (
         <div style={{
           marginTop: 24, background: '#fff', borderRadius: 10, padding: 20,