chenkun 2 týždňov pred
rodič
commit
ddbb120c7d

Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 0 - 1074
logs/lq-admin-app.log.1


Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 1074 - 884
logs/lq-admin-app.log.5


+ 1 - 0
src/app/sample/models/base_info.py

@@ -107,6 +107,7 @@ class DocumentMain(BaseModel):
     md_url = Column(String(500), nullable=True, comment="Markdown文件URL")
     json_url = Column(String(500), nullable=True, comment="JSON文件URL")
     file_extension = Column(String(10), nullable=True, comment="后缀名")
+    whether_to_task = Column(Integer, nullable=False, default=0, comment="是否进入任务")
     created_by = Column(String(36), nullable=False, comment="创建人")
     created_time = Column(DateTime, nullable=False, default=func.now(), comment="创建时间")
     updated_by = Column(String(36), nullable=False, comment="修改人")

+ 18 - 18
src/app/server/app.py

@@ -169,12 +169,12 @@ async def api_exception_handler(request: Request, exc: BaseAPIException):
     logger.error(f"API异常: {exc.message} - {exc.details}")
     return JSONResponse(
         status_code=exc.status_code,
-        content=ResponseSchema(
-            code=exc.code,
-            message=exc.message,
-            data=exc.details,
-            timestamp=datetime.now(timezone.utc)
-        ).model_dump(mode='json')
+        content={
+            "code": exc.code,
+            "message": exc.message,
+            "data": exc.details,
+            "timestamp": datetime.now(timezone.utc).isoformat()
+        }
     )
 
 
@@ -193,12 +193,12 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
     
     return JSONResponse(
         status_code=status.HTTP_400_BAD_REQUEST,
-        content=ResponseSchema(
-            code=100001,
-            message="参数验证失败",
-            data={"errors": errors},
-            timestamp=datetime.now(timezone.utc)
-        ).model_dump(mode='json')
+        content={
+            "code": 100001,
+            "message": "参数验证失败",
+            "data": {"errors": errors},
+            "timestamp": datetime.now(timezone.utc).isoformat()
+        }
     )
 
 
@@ -209,12 +209,12 @@ async def general_exception_handler(request: Request, exc: Exception):
     
     return JSONResponse(
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-        content=ResponseSchema(
-            code=500001,
-            message="服务器内部错误" if not config_handler.get_bool("admin_app", "DEBUG", False) else str(exc),
-            data=None,
-            timestamp=datetime.now(timezone.utc)
-        ).model_dump(mode='json')
+        content={
+            "code": 500001,
+            "message": "服务器内部错误" if not config_handler.get_bool("admin_app", "DEBUG", False) else str(exc),
+            "data": None,
+            "timestamp": datetime.now(timezone.utc).isoformat()
+        }
     )
 
 

+ 38 - 4
src/app/services/image_service.py

@@ -5,6 +5,7 @@
 import logging
 import uuid
 import os
+import json
 from typing import Optional, List, Dict, Any, Tuple
 from datetime import datetime
 from app.base.async_mysql_connection import get_db_connection
@@ -312,7 +313,7 @@ class ImageService:
             cursor.close()
             conn.close()
 
-    async def batch_add_to_task(self, image_ids: List[str], username: str, project_name: str) -> Tuple[bool, str]:
+    async def batch_add_to_task(self, image_ids: List[str], username: str, project_name: str, tags: List[str] = None) -> Tuple[bool, str]:
         """批量将图片加入任务中心 (单表化)"""
         conn = get_db_connection()
         if not conn:
@@ -323,8 +324,17 @@ class ImageService:
             if not image_ids:
                 return False, "未指定要加入任务的图片 ID"
             
-            # 0. 直接使用项目名称作为项目 ID
-            project_id = project_name
+            # 0. 获取或生成项目 UUID
+            project_id = None
+            cursor.execute("SELECT project_id FROM t_task_management WHERE project_name = %s LIMIT 1", (project_name,))
+            existing_project = cursor.fetchone()
+            if existing_project:
+                project_id = existing_project['project_id']
+            else:
+                project_id = str(uuid.uuid4())
+            
+            # 处理标签
+            tag_str = json.dumps(tags, ensure_ascii=False) if tags else None
             
             # 1. 待处理图片 ID
             ids_to_add = image_ids
@@ -337,10 +347,34 @@ class ImageService:
             # 3. 确保这些图片都在 t_task_management 表中 (单表逻辑)
             for img_id in ids_to_add:
                 try:
+                    # 获取业务元数据
+                    metadata_dict = {}
+                    try:
+                        # 排除内部系统字段
+                        EXCLUDE_FIELDS = {
+                            'id', 'created_time', 'updated_time', 'created_by', 'updated_by',
+                            'image_url', 'is_deleted'
+                        }
+                        
+                        cursor.execute("SELECT * FROM t_image_info WHERE id = %s", (img_id,))
+                        img_info = cursor.fetchone()
+                        if img_info:
+                            for k, v in img_info.items():
+                                if v is not None and v != '' and k not in EXCLUDE_FIELDS:
+                                    metadata_dict[k] = v
+                        
+                        # 递归格式化时间
+                        metadata_dict = task_service._serialize_datetime(metadata_dict)
+                    except Exception as meta_err:
+                        logger.warning(f"获取图片 {img_id} 元数据失败: {meta_err}")
+
                     await task_service.add_task(
                         business_id=img_id, 
                         task_type='image', 
-                        project_id=project_id
+                        project_id=project_id,
+                        project_name=project_name,
+                        tag=tag_str,
+                        metadata=json.dumps(metadata_dict, ensure_ascii=False) if metadata_dict else None
                     )
                 except Exception as e:
                     logger.error(f"确保图片 {img_id} 在任务中心记录失败: {e}")

+ 14 - 6
src/app/services/milvus_service.py

@@ -54,10 +54,19 @@ class MilvusService:
                 ("##", "H2"),
                 ("###", "H3"),
                 ("####", "H4"),
+                ("#####", "H5"),
+                ("######", "H6"),
             ]
             md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
             md_header_splits = md_splitter.split_text(content)
 
+            # 预处理:将层级标签 (H1, H2...) 转换为更直观的路径字符串
+            for split in md_header_splits:
+                # 按照 H1, H2... 的顺序排列,确保路径有序
+                header_keys = sorted(split.metadata.keys(), key=lambda x: int(x[1:]) if x[1:].isdigit() else 999)
+                hierarchy = " -> ".join([split.metadata[k] for k in header_keys])
+                split.metadata["_formatted_hierarchy"] = hierarchy or "正文"
+
             if kb_method == "parent_child":
                 # --- 方案 A: 父子段分表入库 ---
                 parent_col = doc_info.get("collection_name_parent") or f"{doc_info.get('collection_name', PARENT_COLLECTION_NAME)}_parent"
@@ -71,8 +80,8 @@ class MilvusService:
                 global_idx = 0
                 
                 for split in md_header_splits:
-                    # 获取当前片段的层级路径
-                    hierarchy = " -> ".join([v for k, v in split.metadata.items()])
+                    # 获取预处理后的层级路径
+                    hierarchy = split.metadata.get("_formatted_hierarchy", "正文")
                     
                     # 对每个标题块进行父段切分
                     split_parent_chunks = parent_splitter.split_text(split.page_content)
@@ -130,7 +139,7 @@ class MilvusService:
                 documents = []
                 global_idx = 0
                 for split in md_header_splits:
-                    hierarchy = " -> ".join([v for k, v in split.metadata.items()])
+                    hierarchy = split.metadata.get("_formatted_hierarchy", "正文")
                     split_chunks = text_splitter.split_text(split.page_content)
                     
                     for chunk in split_chunks:
@@ -164,13 +173,12 @@ class MilvusService:
         # 基础元数据
         metadata_dict = {
             "file_name": file_name,
-            "doc_version": doc_version,
             "hierarchy": hierarchy or "正文",
-            "chunk_id": p_id
+            "basic_info_id": business_meta.get("basic_info_id") or ""
         }
         
         # 注入业务元数据
-        if source_type == 'standard':
+        if source_type in ['standard', 'basis']:
             metadata_dict.update({
                 "chinese_name": business_meta.get("chinese_name") or file_name,
                 "standard_number": business_meta.get("standard_number") or "无",

+ 105 - 15
src/app/services/sample_service.py

@@ -129,7 +129,7 @@ class SampleService:
             # 1. 获取所有选中选中的文档详情
             placeholders = ','.join(['%s']*len(doc_ids))
             fetch_sql = f"""
-                SELECT id, title, source_type, md_url, conversion_status, whether_to_enter, created_time, kb_id 
+                SELECT id, title, source_type, md_url, conversion_status, whether_to_enter, created_time, kb_id, file_url
                 FROM t_samp_document_main 
                 WHERE id IN ({placeholders})
             """
@@ -214,10 +214,10 @@ class SampleService:
                 try:
                     # 获取业务元数据
                     business_metadata = {}
-                    if source_type == 'standard':
+                    if source_type in ['standard', 'basis']:
                         std_sql = """
-                            SELECT chinese_name, standard_number, issuing_authority, 
-                                   document_type, professional_field, validity, source_url as file_url
+                            SELECT id as basic_info_id, chinese_name, standard_number, issuing_authority, 
+                                   document_type, professional_field, validity
                             FROM t_samp_standard_base_info 
                             WHERE id = %s
                         """
@@ -225,19 +225,18 @@ class SampleService:
                         business_metadata = cursor.fetchone() or {}
                     elif source_type == 'construction_plan':
                         plan_sql = """
-                            SELECT plan_name, project_name, project_section, compiling_unit, 
+                            SELECT id as basic_info_id, plan_name, project_name, project_section, compiling_unit, 
                                    compiling_date, plan_summary, plan_category, 
                                    level_1_classification, level_2_classification, 
-                                   level_3_classification, level_4_classification,
-                                   '' as file_url
+                                   level_3_classification, level_4_classification
                             FROM t_samp_construction_plan_base_info 
                             WHERE id = %s
                         """
                         cursor.execute(plan_sql, (doc_id,))
                         business_metadata = cursor.fetchone() or {}
-                        # 方案库的 file_url 也可以从主表取,或者这里补一个
-                        if not business_metadata.get('file_url'):
-                            business_metadata['file_url'] = doc.get('file_url', '')
+                    
+                    # 统一使用主表的 file_url,确保数据来源一致
+                    business_metadata['file_url'] = doc.get('file_url', '')
 
                     # 准备元数据
                     current_date = int(datetime.now().strftime('%Y%m%d'))
@@ -301,8 +300,15 @@ class SampleService:
             cursor.close()
             conn.close()
     
-    async def batch_add_to_task(self, doc_ids: List[str], username: str, project_name: str) -> Tuple[bool, str]:
-        """批量将文档加入标注任务中心 (单表化)"""
+    async def batch_add_to_task(self, doc_ids: List[str], username: str, project_name: str, task_tags: Optional[List[str]] = None) -> Tuple[bool, str]:
+        """批量将文档加入标注任务中心 (单表化)
+        
+        Args:
+            doc_ids: 文档ID列表
+            username: 操作人
+            project_name: 项目名称 (作为 project_id)
+            task_tags: 标注任务标签列表 (例如 ["标签1", "标签2"])
+        """
         conn = get_db_connection()
         if not conn:
             return False, "数据库连接失败"
@@ -312,8 +318,19 @@ class SampleService:
             if not doc_ids:
                 return False, "未指定要加入任务的文档 ID"
             
-            # 0. 直接使用项目名称作为项目 ID (根据用户要求,project_id 字段即 project_name)
-            project_id = project_name
+            # 0. 尝试从现有任务中获取该项目名称对应的 UUID
+            # 如果不存在,则生成一个新的 UUID
+            project_id = None
+            cursor.execute("SELECT project_id FROM t_task_management WHERE project_name = %s LIMIT 1", (project_name,))
+            existing_project = cursor.fetchone()
+            if existing_project:
+                project_id = existing_project['project_id']
+            else:
+                project_id = str(uuid.uuid4())
+            
+            # 处理标签:转换为 JSON 字符串存储
+            import json
+            tag_str = json.dumps(task_tags, ensure_ascii=False) if task_tags else None
             
             # 1. 过滤掉未入库的文档
             placeholders = ', '.join(['%s'] * len(doc_ids))
@@ -334,10 +351,48 @@ class SampleService:
             # 3. 写入任务管理表 (单表逻辑)
             for doc_id in ids_to_add:
                 try:
+                    # 获取业务元数据
+                    metadata_dict = {}
+                    try:
+                        # 定义需要过滤掉的非业务/内部状态字段
+                        EXCLUDE_FIELDS = {
+                            'id', 'created_time', 'updated_time', 'created_by', 'updated_by',
+                            'conversion_status', 'whether_to_enter', 'whether_to_task', 
+                            'kb_method', 'whether_to_delete'
+                        }
+                        
+                        # 查询主表和子表信息
+                        cursor.execute("SELECT * FROM t_samp_document_main WHERE id = %s", (doc_id,))
+                        doc_main = cursor.fetchone()
+                        if doc_main:
+                            # 基础元数据 (仅保留标题和来源类型等核心信息)
+                            for k, v in doc_main.items():
+                                if v is not None and v != '' and k not in EXCLUDE_FIELDS:
+                                    metadata_dict[k] = v
+                            
+                            # 子表元数据
+                            source_type = doc_main.get('source_type')
+                            table_name = TABLE_MAP.get(source_type)
+                            if table_name:
+                                cursor.execute(f"SELECT * FROM {table_name} WHERE id = %s", (doc_id,))
+                                sub_data = cursor.fetchone()
+                                if sub_data:
+                                    for k, v in sub_data.items():
+                                        if v is not None and v != '' and k not in EXCLUDE_FIELDS:
+                                            metadata_dict[k] = v
+                        
+                        # 递归格式化时间
+                        metadata_dict = task_service._serialize_datetime(metadata_dict)
+                    except Exception as meta_err:
+                        logger.warning(f"获取文档 {doc_id} 元数据失败: {meta_err}")
+                    
                     await task_service.add_task(
                         business_id=doc_id, 
                         task_type='data', 
-                        project_id=project_id
+                        project_id=project_id,
+                        project_name=project_name,
+                        tag=tag_str,
+                        metadata=json.dumps(metadata_dict, ensure_ascii=False) if metadata_dict else None
                     )
                 except Exception as e:
                     logger.error(f"添加文档 {doc_id} 到任务中心失败: {e}")
@@ -414,6 +469,41 @@ class SampleService:
         finally:
             cursor.close()
             conn.close()
+
+    async def get_tag_tree(self) -> List[Dict[str, Any]]:
+        """获取标签层级树 (从 t_samp_tag_category 查询)"""
+        conn = get_db_connection()
+        if not conn:
+            return []
+        
+        cursor = conn.cursor()
+        try:
+            sql = """
+                SELECT id, parent_id, name, level, type 
+                FROM t_samp_tag_category 
+                WHERE is_deleted = 0 AND status = 1
+                ORDER BY level ASC, sort_no ASC
+            """
+            cursor.execute(sql)
+            tags = cursor.fetchall()
+            
+            # 构建树形结构
+            tag_dict = {tag['id']: {**tag, 'children': []} for tag in tags}
+            tree = []
+            for tag_id, tag_item in tag_dict.items():
+                parent_id = tag_item['parent_id']
+                if parent_id == 0:
+                    tree.append(tag_item)
+                elif parent_id in tag_dict:
+                    tag_dict[parent_id]['children'].append(tag_item)
+            
+            return tree
+        except Exception as e:
+            logger.error(f"获取标签树失败: {e}")
+            return []
+        finally:
+            cursor.close()
+            conn.close()
     
     async def get_document_list(
         self,

+ 298 - 133
src/app/services/task_service.py

@@ -2,6 +2,7 @@
 import logging
 import json
 import httpx
+from datetime import datetime
 from typing import List, Dict, Any, Tuple, Optional
 from app.base.async_mysql_connection import get_db_connection
 from app.base.minio_connection import get_minio_manager
@@ -11,97 +12,76 @@ logger = logging.getLogger(__name__)
 class TaskService:
     """任务管理服务类"""
     
+    _schema_verified = False  # 类级别变量,确保 DDL 逻辑只运行一次
+
     def __init__(self):
         self.minio_manager = get_minio_manager()
 
-    async def get_task_list(self, task_type: str) -> List[Dict[str, Any]]:
-        """获取任务列表
-        
-        Args:
-            task_type: 任务类型, 'data' 或 'image'
-        """
-        conn = get_db_connection()
-        if not conn:
-            return []
+    async def _ensure_table_schema(self, cursor, conn):
+        """确保表结构和索引正确 (DDL 操作)"""
+        if TaskService._schema_verified:
+            return
         
-        cursor = conn.cursor()
         try:
-            if task_type == 'data':
-                # 类型为数据的,从 t_samp_document_main 拿名称
-                sql = """
-                    SELECT 
-                        t.id, 
-                        t.business_id,
-                        t.task_id, 
-                        t.project_id,
-                        t.type,
-                        t.annotation_status,
-                        t.project_id as project_name,
-                        d.title as name
-                    FROM t_task_management t
-                    JOIN t_samp_document_main d ON t.business_id COLLATE utf8mb4_unicode_ci = d.id COLLATE utf8mb4_unicode_ci
-                    WHERE t.type = 'data' AND d.whether_to_task = 1
-                    ORDER BY d.created_time DESC
-                """
-            elif task_type == 'image':
-                # 类型为图片的,从 t_image_info 拿名称和 URL
-                sql = """
-                    SELECT 
-                        t.id, 
-                        t.business_id,
-                        t.task_id, 
-                        t.project_id,
-                        t.type,
-                        t.annotation_status,
-                        t.project_id as project_name,
-                        i.image_name as name,
-                        i.image_url
-                    FROM t_task_management t
-                    JOIN t_image_info i ON t.business_id COLLATE utf8mb4_unicode_ci = i.id COLLATE utf8mb4_unicode_ci
-                    WHERE t.type = 'image' AND i.whether_to_task = 1
-                    ORDER BY i.created_time DESC
-                """
-            else:
-                return []
-                
-            cursor.execute(sql)
-            tasks = cursor.fetchall()
+            # 1. 动态维护字段
+            cursor.execute("SHOW COLUMNS FROM t_task_management LIKE 'tag'")
+            if not cursor.fetchone():
+                cursor.execute("ALTER TABLE t_task_management ADD COLUMN tag json NULL COMMENT '标签' AFTER type")
             
-            for item in tasks:
-                # 统一返回结构,旧代码可能还在找 metadata 字典,这里给个空的
-                item['metadata'] = {}
-                
-                # 如果是图片类型,处理 URL 转换以支持前端预览
-                if task_type == 'image' and item.get('image_url'):
-                    image_url = item.get('image_url')
-                    if image_url and not image_url.startswith(('http://', 'https://')):
-                        item['image_url'] = self.minio_manager.get_full_url(image_url)
+            cursor.execute("SHOW COLUMNS FROM t_task_management LIKE 'metadata'")
+            if not cursor.fetchone():
+                cursor.execute("ALTER TABLE t_task_management ADD COLUMN metadata json NULL COMMENT '业务元数据' AFTER tag")
             
-            return tasks
+            cursor.execute("SHOW COLUMNS FROM t_task_management LIKE 'project_name'")
+            if not cursor.fetchone():
+                cursor.execute("ALTER TABLE t_task_management ADD COLUMN project_name varchar(255) NULL COMMENT '项目显示名称' AFTER project_id")
+            
+            conn.commit()
+
+            # 2. 处理索引冲突
+            cursor.execute("SHOW INDEX FROM t_task_management WHERE Column_name = 'business_id'")
+            indexes = cursor.fetchall()
+            for idx in indexes:
+                if not idx['Non_unique'] and idx['Key_name'] != 'PRIMARY' and idx['Seq_in_index'] == 1:
+                    cursor.execute(f"SHOW INDEX FROM t_task_management WHERE Key_name = '{idx['Key_name']}'")
+                    if len(cursor.fetchall()) == 1:
+                        cursor.execute(f"DROP INDEX {idx['Key_name']} ON t_task_management")
+                        logger.info(f"Dropped old unique index: {idx['Key_name']}")
+            
+            cursor.execute("SHOW INDEX FROM t_task_management WHERE Key_name = 'uk_business_project'")
+            if not cursor.fetchone():
+                cursor.execute("CREATE UNIQUE INDEX uk_business_project ON t_task_management (business_id, project_id)")
+                logger.info("Created new composite unique index: uk_business_project")
+            
+            conn.commit()
+            TaskService._schema_verified = True
         except Exception as e:
-            logger.exception(f"获取任务列表失败 ({task_type}): {e}")
-            return []
-        finally:
-            cursor.close()
-            conn.close()
+            logger.warning(f"表结构维护失败: {e}")
+            conn.rollback()
 
-    async def add_task(self, business_id: str, task_type: str, task_id: str = None, project_id: str = None) -> Tuple[bool, str, Optional[int]]:
-        """添加或更新任务记录 (适配单表结构,直接使用 project_id 存项目名)"""
+    async def add_task(self, business_id: str, task_type: str, task_id: str = None, project_id: str = None, project_name: str = None, tag: str = None, metadata: str = None) -> Tuple[bool, str, Optional[int]]:
+        """添加或更新任务记录 (适配单表结构)"""
         conn = get_db_connection()
         if not conn:
             return False, "数据库连接失败", None
         
         cursor = conn.cursor()
         try:
+            # 确保表结构(仅在第一次调用时执行)
+            await self._ensure_table_schema(cursor, conn)
+
+            # 执行插入/更新
             sql = """
-                INSERT INTO t_task_management (business_id, task_id, project_id, type, annotation_status)
-                VALUES (%s, %s, %s, %s, %s)
+                INSERT INTO t_task_management (business_id, task_id, project_id, project_name, type, annotation_status, tag, metadata)
+                VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
                 ON DUPLICATE KEY UPDATE 
                     task_id = IFNULL(VALUES(task_id), task_id),
-                    project_id = IFNULL(VALUES(project_id), project_id),
-                    annotation_status = IFNULL(VALUES(annotation_status), annotation_status)
+                    project_name = IFNULL(VALUES(project_name), project_name),
+                    annotation_status = IFNULL(VALUES(annotation_status), annotation_status),
+                    tag = IFNULL(VALUES(tag), tag),
+                    metadata = IFNULL(VALUES(metadata), metadata)
             """
-            cursor.execute(sql, (business_id, task_id, project_id, task_type, 'pending'))
+            cursor.execute(sql, (business_id, task_id, project_id, project_name, task_type, 'pending', tag, metadata))
             record_id = cursor.lastrowid
             
             conn.commit()
@@ -125,11 +105,112 @@ class TaskService:
             sql = "DELETE FROM t_task_management WHERE id = %s"
             cursor.execute(sql, (id,))
             conn.commit()
-            return True, "删除成功"
+            return True, "成功"
         except Exception as e:
-            logger.exception(f"根据id删除任务失败: {e}")
             conn.rollback()
-            return False, f"删除失败: {str(e)}"
+            logger.exception(f"删除任务失败: {e}")
+            return False, str(e)
+        finally:
+            cursor.close()
+            conn.close()
+
+    def _serialize_datetime(self, obj: Any) -> Any:
+        """递归遍历对象,将 datetime 转换为字符串"""
+        if isinstance(obj, dict):
+            return {k: self._serialize_datetime(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [self._serialize_datetime(i) for i in obj]
+        elif isinstance(obj, datetime):
+            return obj.strftime('%Y-%m-%d %H:%M:%S')
+        return obj
+
+    async def get_task_list(self, task_type: str) -> List[Dict[str, Any]]:
+        """获取项目列表 (按 project_id 聚合)"""
+        conn = get_db_connection()
+        if not conn or task_type not in ['data', 'image']:
+            return []
+        
+        cursor = conn.cursor()
+        try:
+            # 确保表结构
+            await self._ensure_table_schema(cursor, conn)
+            
+            # 修改聚合逻辑:返回 project_id (UUID) 和 project_name (文字)
+            sql = """
+                SELECT 
+                    project_id, 
+                    MAX(project_name) as project_name,
+                    MAX(tag) as tag,
+                    MAX(id) as sort_id,
+                    COUNT(*) as file_count
+                FROM t_task_management 
+                WHERE type = %s 
+                GROUP BY project_id
+                ORDER BY sort_id DESC
+            """
+            cursor.execute(sql, (task_type,))
+            rows = cursor.fetchall()
+            
+            # 兼容旧数据:如果 project_name 为空,则用 project_id 代替
+            for row in rows:
+                if not row['project_name']:
+                    row['project_name'] = row['project_id']
+            
+            return self._serialize_datetime(rows)
+        except Exception as e:
+            logger.error(f"获取任务列表失败: {e}")
+            return []
+        finally:
+            cursor.close()
+            conn.close()
+
+    async def get_project_details(self, project_id: str, task_type: str) -> List[Dict[str, Any]]:
+        """获取项目详情 (按 project_id 查询)"""
+        conn = get_db_connection()
+        if not conn:
+            return []
+        
+        cursor = conn.cursor()
+        try:
+            # 确保表结构
+            await self._ensure_table_schema(cursor, conn)
+            
+            # 修改查询逻辑:获取 project_name 并在结果中包含它
+            sql = """
+                SELECT 
+                    id, business_id, task_id, project_id, project_name, 
+                    type, annotation_status, tag, metadata
+                FROM t_task_management
+                WHERE project_id = %s AND type = %s
+            """
+            cursor.execute(sql, (project_id, task_type))
+            rows = cursor.fetchall()
+            
+            # 处理 tag 和 metadata 的 JSON 解析
+            for row in rows:
+                if row.get('tag'):
+                    try: row['tag'] = json.loads(row['tag']) if isinstance(row['tag'], str) else row['tag']
+                    except: pass
+                if row.get('metadata'):
+                    try: 
+                        meta = json.loads(row['metadata']) if isinstance(row['metadata'], str) else row['metadata']
+                        row['metadata'] = meta
+                        # 提取名称供前端显示
+                        if row['type'] == 'data':
+                            row['name'] = meta.get('title') or meta.get('filename') or row['business_id']
+                        elif row['type'] == 'image':
+                            row['name'] = meta.get('image_name') or row['business_id']
+                        else:
+                            row['name'] = row['business_id']
+                    except: 
+                        row['name'] = row['business_id']
+                else:
+                    row['name'] = row['business_id']
+            
+            return self._serialize_datetime(rows)
+        except Exception as e:
+            logger.error(f"获取项目详情失败: {e}")
+            return []
         finally:
             cursor.close()
             conn.close()
@@ -154,7 +235,86 @@ class TaskService:
             cursor.close()
             conn.close()
 
-    # create_anno_project 已被废弃,改为单表 batch_add 逻辑
+    async def create_anno_project(self, data: Dict[str, Any]) -> Tuple[bool, str]:
+        """创建标注项目并同步任务数据"""
+        project_name = data.get('name')
+        if not project_name:
+            return False, "项目名称不能为空"
+        
+        # 0. 统一使用 UUID 方案获取或生成 project_id
+        conn = get_db_connection()
+        if not conn:
+            return False, "数据库连接失败"
+        
+        cursor = conn.cursor()
+        try:
+            project_id = None
+            cursor.execute("SELECT project_id FROM t_task_management WHERE project_name = %s LIMIT 1", (project_name,))
+            existing_project = cursor.fetchone()
+            if existing_project:
+                project_id = existing_project['project_id']
+            else:
+                import uuid
+                project_id = str(uuid.uuid4())
+
+            task_type = data.get('task_type', 'data')
+            # 映射回内部类型
+            internal_type_map = {
+                'text_classification': 'data',
+                'image_classification': 'image'
+            }
+            internal_task_type = internal_type_map.get(task_type, task_type)
+            
+            tasks_data = data.get('data', [])
+            if not tasks_data:
+                return False, "任务数据不能为空"
+            
+            # 提取全局标签名列表
+            global_tags = []
+            if data.get('tags'):
+                global_tags = [t['tag'] for t in data['tags'] if 'tag' in t]
+            
+            # 批量写入任务
+            import json
+            tag_str = json.dumps(global_tags, ensure_ascii=False) if global_tags else None
+            
+            for item in tasks_data:
+                business_id = item.get('id')
+                if not business_id: continue
+                
+                # 检查是否已存在 (使用联合主键逻辑)
+                cursor.execute(
+                    "SELECT id FROM t_task_management WHERE business_id = %s AND project_id = %s",
+                    (business_id, project_id)
+                )
+                if cursor.fetchone():
+                    continue
+                
+                metadata_str = json.dumps(item.get('metadata', {}), ensure_ascii=False)
+                
+                sql = """
+                    INSERT INTO t_task_management 
+                    (business_id, type, project_id, project_name, tag, metadata, annotation_status)
+                    VALUES (%s, %s, %s, %s, %s, %s, 'pending')
+                """
+                cursor.execute(sql, (business_id, internal_task_type, project_id, project_name, tag_str, metadata_str))
+            
+            conn.commit()
+            
+            # 自动推送至外部平台
+            success, msg = await self.send_to_external_platform(project_id)
+            if success:
+                return True, project_id
+            else:
+                return False, f"任务已保存但推送失败: {msg}"
+                
+        except Exception as e:
+            logger.exception(f"创建项目失败: {e}")
+            conn.rollback()
+            return False, str(e)
+        finally:
+            cursor.close()
+            conn.close()
 
     async def get_project_progress(self, project_id: str) -> Dict[str, Any]:
         """获取项目进度统计 (单表化)"""
@@ -267,7 +427,7 @@ class TaskService:
         try:
             # 1. 获取任务记录
             sql_tasks = """
-                SELECT business_id as id, type, task_id, metadata as raw_metadata
+                SELECT business_id as id, type, task_id, tag, metadata
                 FROM t_task_management
                 WHERE project_id = %s
             """
@@ -279,7 +439,6 @@ class TaskService:
             
             # 2. 解析基本信息
             first_row = rows[0]
-            project_name = project_id  # 直接使用传入的项目名
             internal_task_type = first_row['type']
             remote_project_id = first_row.get('task_id') or project_id
             
@@ -289,91 +448,91 @@ class TaskService:
 
             # 3. 处理数据
             final_tasks = []
+            all_project_tags = set()
             
-            # 批量获取 Milvus 内容的优化逻辑
+            # 针对 'data' 类型的批量 Milvus 查询优化
+            milvus_data_map = {}
             if internal_task_type == 'data':
-                # 先收集所有需要查询的 task_id
                 all_task_ids = [r['id'] for r in rows]
-                
-                # 尝试通过第一个任务获取知识库信息(假设同一个项目下的任务属于同一个知识库)
-                # 如果项目跨知识库,这里可以进一步优化为按 kb_id 分组批量查
                 sql_kb = """
-                    SELECT kb.collection_name_parent, kb.collection_name_children, d.title
+                    SELECT kb.collection_name_parent, kb.collection_name_children
                     FROM t_samp_document_main d
                     LEFT JOIN t_samp_knowledge_base kb ON d.kb_id = kb.id
                     WHERE d.id COLLATE utf8mb4_unicode_ci = %s COLLATE utf8mb4_unicode_ci
                 """
                 cursor.execute(sql_kb, (all_task_ids[0],))
                 kb_info = cursor.fetchone()
-                
-                # 批量抓取 Milvus 内容
-                milvus_data_map = {}
                 if kb_info:
                     milvus_data_map = self._get_milvus_content_batch(all_task_ids, kb_info)
 
-                for item in rows:
-                    task_id = item['id']
-                    metadata = {}
-                    if item.get('raw_metadata'):
-                        try:
-                            metadata = json.loads(item['raw_metadata'])
-                        except: pass
-                    
+            for item in rows:
+                task_id = item['id']
+                
+                # 提取并处理标签
+                doc_tags = []
+                if item.get('tag'):
+                    try:
+                        doc_tags = json.loads(item['tag']) if isinstance(item['tag'], str) else item['tag']
+                        if doc_tags:
+                            for t in doc_tags: all_project_tags.add(t)
+                    except: pass
+                
+                # 解析数据库元数据
+                db_metadata = {}
+                if item.get('metadata'):
+                    try:
+                        db_metadata = json.loads(item['metadata']) if isinstance(item['metadata'], str) else item['metadata']
+                    except: pass
+                
+                # 获取任务内容
+                task_contents = []
+                if internal_task_type == 'data':
                     task_contents = milvus_data_map.get(task_id, [])
                     if not task_contents:
-                        # 兜底:如果批量没查到,或者不是 Milvus 任务,尝试查 title
                         cursor.execute("SELECT title FROM t_samp_document_main WHERE id = %s", (task_id,))
                         res = cursor.fetchone()
                         if res: task_contents = [res['title']]
-                    
-                    # 拆分推送
-                    for idx, content in enumerate(task_contents):
-                        if not content: continue
-                        task_metadata = metadata.copy()
-                        task_metadata.update({"original_id": task_id, "chunk_index": idx})
-                        final_tasks.append({
-                            "id": f"{task_id}_{idx}" if len(task_contents) > 1 else task_id,
-                            "content": content,
-                            "metadata": task_metadata
-                        })
-            
-            elif internal_task_type == 'image':
-                for item in rows:
-                    task_id = item['id']
-                    metadata = {}
-                    if item.get('raw_metadata'):
-                        try:
-                            metadata = json.loads(item['raw_metadata'])
-                        except: pass
-                        
+                elif internal_task_type == 'image':
                     cursor.execute("SELECT image_url FROM t_image_info WHERE id = %s", (task_id,))
                     res = cursor.fetchone()
-                    task_contents = []
                     if res:
                         img_url = res['image_url']
                         if img_url and not img_url.startswith('http'):
                             img_url = self.minio_manager.get_full_url(img_url)
                         task_contents = [img_url]
 
-                    for idx, content in enumerate(task_contents):
-                        if not content: continue
-                        task_metadata = metadata.copy()
-                        task_metadata.update({"original_id": task_id, "chunk_index": idx})
-                        final_tasks.append({
-                            "id": f"{task_id}_{idx}" if len(task_contents) > 1 else task_id,
-                            "content": content,
-                            "metadata": task_metadata
-                        })
+                # 构建最终任务列表
+                for idx, content in enumerate(task_contents):
+                    if not content: continue
+                    
+                    # 合并元数据:数据库数据 + 动态 ID
+                    task_metadata = {
+                        "original_id": task_id,
+                        "chunk_index": idx
+                    }
+                    if db_metadata:
+                        task_metadata.update(db_metadata)
+                    
+                    if doc_tags:
+                        task_metadata['tags'] = [{"tag": tag} for tag in doc_tags]
+                        
+                    final_tasks.append({
+                        "id": f"{task_id}_{idx}" if len(task_contents) > 1 else task_id,
+                        "content": content,
+                        "metadata": task_metadata
+                    })
 
-            return {
-                "name": project_name,
+            # 统一进行一次递归序列化处理
+            return self._serialize_datetime({
+                "name": project_id,
                 "description": "",
                 "task_type": external_task_type,
                 "data": final_tasks,
-                "external_id": remote_project_id
-            }
+                "external_id": remote_project_id,
+                "tags": [{"tag": t} for t in sorted(list(all_project_tags))]
+            })
         except Exception as e:
-            logger.exception(f"导出数据异常: {e}")
+            logger.exception(f"导出项目数据异常: {e}")
             return {}
         finally:
             cursor.close()
@@ -503,7 +662,13 @@ class TaskService:
             from app.core.config import config_handler
             api_url = config_handler.get('external_platform', 'API_URL', 'http://192.168.92.61:9003/api/external/projects/init')
             # 转换 init URL 为 export URL
-            export_url = api_url.replace('/init', f'/{remote_project_id}/export')
+            # 如果 URL 包含 /init,则替换为 /export,否则直接追加
+            if '/init' in api_url:
+                export_url = api_url.replace('/init', f'/{remote_project_id}/export')
+            else:
+                # 兼容不包含 /init 的情况,直接拼接
+                base_url = api_url.rstrip('/')
+                export_url = f"{base_url}/{remote_project_id}/export"
             token = config_handler.get('external_platform', 'ADMIN_TOKEN', '')
 
             # 3. 发送请求

+ 2 - 1
src/views/image_view.py

@@ -41,6 +41,7 @@ class UploadUrlRequest(BaseModel):
 class BatchAddRequest(BaseModel):
     ids: List[str]
     project_name: str
+    tags: Optional[List[str]] = None
 
 # --- 分类管理 API ---
 
@@ -210,7 +211,7 @@ async def batch_add_to_task(req: BatchAddRequest, credentials: HTTPAuthorization
         username = payload.get("username", user_id)
         
         service = ImageService()
-        success, message = await service.batch_add_to_task(req.ids, username, req.project_name)
+        success, message = await service.batch_add_to_task(req.ids, username, req.project_name, tags=req.tags)
         
         return ApiResponse(
             code=0 if success else 500, 

+ 22 - 6
src/views/sample_view.py

@@ -30,22 +30,37 @@ security_optional = HTTPBearer(auto_error=False)
 
 @router.get("/tasks")
 async def get_tasks(type: str, credentials: HTTPAuthorizationCredentials = Depends(security)):
-    """获取任务列表"""
+    """获取任务项目列表 (聚合显示)"""
     try:
         payload = verify_token(credentials.credentials)
         if not payload:
             return ApiResponse(code=401, message="无效的访问令牌").model_dump()
         
-        tasks = await task_service.get_task_list(type)
-        return ApiResponse(code=0, message="成功", data=tasks).model_dump()
+        projects = await task_service.get_task_list(type)
+        return ApiResponse(code=0, message="成功", data=projects).model_dump()
     except Exception as e:
-        logger.exception("获取任务列表失败")
+        logger.exception("获取项目列表失败")
+        return ApiResponse(code=500, message=str(e)).model_dump()
+
+
+@router.get("/tasks/details")
+async def get_task_details(project_id: str, type: str, credentials: HTTPAuthorizationCredentials = Depends(security)):
+    """获取项目下的文件详情"""
+    try:
+        payload = verify_token(credentials.credentials)
+        if not payload:
+            return ApiResponse(code=401, message="无效的访问令牌").model_dump()
+        
+        files = await task_service.get_project_details(project_id, type)
+        return ApiResponse(code=0, message="成功", data=files).model_dump()
+    except Exception as e:
+        logger.exception("获取项目详情失败")
         return ApiResponse(code=500, message=str(e)).model_dump()
 
 
 # --- 外部联动接口 API ---
 
-@router.post("/external/projects/init")
+@router.post("/external/projects/init", status_code=201)
 async def init_external_project(request: Request, credentials: HTTPAuthorizationCredentials = Depends(security)):
     """项目初始化接口:由标注平台调用,同步数据"""
     try:
@@ -337,6 +352,7 @@ from pydantic import BaseModel
 class BatchAddTaskRequest(BaseModel):
     doc_ids: List[str]
     project_name: str
+    tags: Optional[List[str]] = None
 
 @router.post("/documents/batch-add-to-task")
 async def batch_add_to_task(req: BatchAddTaskRequest, credentials: HTTPAuthorizationCredentials = Depends(security)):
@@ -353,7 +369,7 @@ async def batch_add_to_task(req: BatchAddTaskRequest, credentials: HTTPAuthoriza
         username = payload.get("username", user_id)
         
         sample_service = SampleService()
-        success, message = await sample_service.batch_add_to_task(req.doc_ids, username, req.project_name)
+        success, message = await sample_service.batch_add_to_task(req.doc_ids, username, req.project_name, task_tags=req.tags)
         
         return ApiResponse(
             code=0 if success else 500, 

Niektoré súbory nie sú zobrazené, pretože je v týchto rozdielových dátach zmenené mnoho súborov