chenkun 3 주 전
부모
커밋
0be3f3630c

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 1239
logs/lq-admin-app.log.1


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 1239 - 2
logs/lq-admin-app.log.5


+ 1 - 1
src/app/sample/models/base_info.py

@@ -100,7 +100,7 @@ class DocumentMain(BaseModel):
     title = Column(String(255), nullable=False, comment="文档名称")
     title = Column(String(255), nullable=False, comment="文档名称")
     conversion_status = Column(Integer, nullable=False, default=0, comment="状态: 0-待转换, 1-转换中, 2-完成, 3-失败")
     conversion_status = Column(Integer, nullable=False, default=0, comment="状态: 0-待转换, 1-转换中, 2-完成, 3-失败")
     whether_to_enter = Column(Integer, nullable=False, default=0, comment="是否入库: 0-未入库, 1-已入库")
     whether_to_enter = Column(Integer, nullable=False, default=0, comment="是否入库: 0-未入库, 1-已入库")
-    kb_id = Column(String(36), nullable=True, comment="知识库ID")
+    kb_id = Column(String(36), nullable=False, comment="知识库ID")
     kb_method = Column(String(50), nullable=True, comment="切分方式")
     kb_method = Column(String(50), nullable=True, comment="切分方式")
     conversion_error = Column(Text, nullable=True, comment="错误原因详情")
     conversion_error = Column(Text, nullable=True, comment="错误原因详情")
     file_url = Column(String(500), nullable=True, comment="文件URL")
     file_url = Column(String(500), nullable=True, comment="文件URL")

+ 5 - 4
src/app/sample/schemas/sample_schemas.py

@@ -2,7 +2,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from typing import Optional, List
 from typing import Optional, List
 from datetime import datetime, timezone
 from datetime import datetime, timezone
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from typing import Optional, Any, Union
 from typing import Optional, Any, Union
 
 
 
 
@@ -11,8 +11,9 @@ from typing import Optional, Any, Union
 class BatchEnterRequest(BaseModel):
 class BatchEnterRequest(BaseModel):
     ids: list[Union[int, str]]
     ids: list[Union[int, str]]
     table_type: Optional[str] = None
     table_type: Optional[str] = None
-    kb_id: Optional[str] = None
     kb_method: Optional[str] = None
     kb_method: Optional[str] = None
+    chunk_size: Optional[int] = Field(500, description="切分长度")
+    separator: Optional[str] = Field("。", description="切分符号")
 
 
 class BatchDeleteRequest(BaseModel):
 class BatchDeleteRequest(BaseModel):
     ids: list[Union[int, str]]
     ids: list[Union[int, str]]
@@ -64,8 +65,8 @@ class DocumentAdd(BaseModel):
     file_url: Optional[str] = None
     file_url: Optional[str] = None
     json_url: Optional[str] = None
     json_url: Optional[str] = None
     file_extension: Optional[str] = None
     file_extension: Optional[str] = None
-    kb_id: Optional[str] = None
-
+    kb_id: str
+ 
 class DocumentListRequest(BaseModel):
 class DocumentListRequest(BaseModel):
     page: int = 1
     page: int = 1
     size: int = 50
     size: int = 50

+ 21 - 15
src/app/services/milvus_service.py

@@ -42,22 +42,19 @@ class MilvusService:
         """将 Markdown 内容切分并入库 (支持父子段分表)"""
         """将 Markdown 内容切分并入库 (支持父子段分表)"""
         try:
         try:
             doc_id = doc_info.get("doc_id")
             doc_id = doc_info.get("doc_id")
-            doc_name = doc_info.get("doc_name")
+            file_name = doc_info.get("file_name")
             doc_version = doc_info.get("doc_version", int(time.time()))
             doc_version = doc_info.get("doc_version", int(time.time()))
             tags = doc_info.get("tags", "")
             tags = doc_info.get("tags", "")
             user_id = doc_info.get("user_id", "system")
             user_id = doc_info.get("user_id", "system")
             
             
             kb_method = doc_info.get("kb_method")
             kb_method = doc_info.get("kb_method")
-            target_collection = doc_info.get("collection_name") or PARENT_COLLECTION_NAME
             
             
-            from langchain_text_splitters import RecursiveCharacterTextSplitter
-
             if kb_method == "parent_child":
             if kb_method == "parent_child":
                 # --- 方案 A: 父子段分表入库 ---
                 # --- 方案 A: 父子段分表入库 ---
-                parent_col = f"{target_collection}_parent"
-                child_col = f"{target_collection}_child"
+                parent_col = doc_info.get("collection_name_parent") or f"{doc_info.get('collection_name', PARENT_COLLECTION_NAME)}_parent"
+                child_col = doc_info.get("collection_name_children") or f"{doc_info.get('collection_name', CHILD_COLLECTION_NAME)}_child"
                 
                 
-                # 1. 切分父段 (较大块)
+                from langchain_text_splitters import RecursiveCharacterTextSplitter
                 parent_splitter = RecursiveCharacterTextSplitter(
                 parent_splitter = RecursiveCharacterTextSplitter(
                     chunk_size=1000,
                     chunk_size=1000,
                     chunk_overlap=100
                     chunk_overlap=100
@@ -97,18 +94,27 @@ class MilvusService:
                 if child_docs:
                 if child_docs:
                     get_milvus_vectorstore(child_col).add_documents(child_docs)
                     get_milvus_vectorstore(child_col).add_documents(child_docs)
                 
                 
-                logger.info(f"Successfully inserted parent-child chunks for {doc_name}: {len(parent_docs)} parents -> {len(child_docs)} children")
+                logger.info(f"Successfully inserted parent-child chunks for {file_name}: {len(parent_docs)} parents -> {len(child_docs)} children")
             
             
             else:
             else:
                 # --- 常规单表入库逻辑 ---
                 # --- 常规单表入库逻辑 ---
+                target_collection = doc_info.get("collection_name_parent") or doc_info.get("collection_name") or PARENT_COLLECTION_NAME
+                
                 chunks = []
                 chunks = []
+                # 获取切分参数
+                chunk_size = doc_info.get("chunk_size", 500)
+                separator = doc_info.get("separator", "。")
+                
+                from langchain_text_splitters import RecursiveCharacterTextSplitter
+                
                 if kb_method == "length":
                 if kb_method == "length":
-                    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+                    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=int(chunk_size * 0.1))
                     chunks = splitter.split_text(content)
                     chunks = splitter.split_text(content)
                 elif kb_method == "symbol":
                 elif kb_method == "symbol":
+                    # 符号切分优先使用传入的符号
                     splitter = RecursiveCharacterTextSplitter(
                     splitter = RecursiveCharacterTextSplitter(
-                        separators=["\n\n", "\n", "。", ";", "!", "?", "!", "?", ";"],
-                        chunk_size=500,
+                        separators=["\n\n", "\n", separator, ";", "!", "?", "!", "?", ";"],
+                        chunk_size=chunk_size,
                         chunk_overlap=0
                         chunk_overlap=0
                     )
                     )
                     chunks = splitter.split_text(content)
                     chunks = splitter.split_text(content)
@@ -116,7 +122,7 @@ class MilvusService:
                     chunks = [p.strip() for p in re.split(r"\n\s*\n+", content) if p.strip()]
                     chunks = [p.strip() for p in re.split(r"\n\s*\n+", content) if p.strip()]
                 
                 
                 if not chunks:
                 if not chunks:
-                    logger.warning(f"Document {doc_name} has no content chunks.")
+                    logger.warning(f"Document {file_name} has no content chunks.")
                     return
                     return
 
 
                 documents = []
                 documents = []
@@ -128,7 +134,7 @@ class MilvusService:
                 self.ensure_collection_exists(target_collection)
                 self.ensure_collection_exists(target_collection)
                 get_milvus_vectorstore(target_collection).add_documents(documents)
                 get_milvus_vectorstore(target_collection).add_documents(documents)
                 
                 
-                logger.info(f"Successfully inserted {len(documents)} chunks for {doc_name} into {target_collection}")
+                logger.info(f"Successfully inserted {len(documents)} chunks for {file_name} into {target_collection}")
 
 
         except Exception as e:
         except Exception as e:
             logger.error(f"Error inserting knowledge into Milvus: {e}")
             logger.error(f"Error inserting knowledge into Milvus: {e}")
@@ -137,7 +143,7 @@ class MilvusService:
     def _prepare_metadata(self, doc_info: Dict[str, Any], p_id: str, index: int, parent_ref_id: str) -> Dict[str, Any]:
     def _prepare_metadata(self, doc_info: Dict[str, Any], p_id: str, index: int, parent_ref_id: str) -> Dict[str, Any]:
         """统一准备元数据"""
         """统一准备元数据"""
         doc_id = doc_info.get("doc_id")
         doc_id = doc_info.get("doc_id")
-        doc_name = doc_info.get("doc_name")
+        file_name = doc_info.get("file_name")
         doc_version = doc_info.get("doc_version", int(time.time()))
         doc_version = doc_info.get("doc_version", int(time.time()))
         tags = doc_info.get("tags", "")
         tags = doc_info.get("tags", "")
         user_id = doc_info.get("user_id", "system")
         user_id = doc_info.get("user_id", "system")
@@ -154,7 +160,7 @@ class MilvusService:
             "updated_by": user_id,
             "updated_by": user_id,
             "updated_time": int(time.time() * 1000),
             "updated_time": int(time.time() * 1000),
             "metadata": {
             "metadata": {
-                "doc_name": doc_name,
+                "file_name": file_name,
                 "doc_version": doc_version,
                 "doc_version": doc_version,
                 "outline_path": ""
                 "outline_path": ""
             }
             }

+ 110 - 57
src/app/services/sample_service.py

@@ -105,14 +105,15 @@ class SampleService:
 
 
     # ==================== 文档管理 ====================
     # ==================== 文档管理 ====================
     
     
-    async def batch_enter_knowledge_base(self, doc_ids: List[str], username: str, kb_id: str = None, kb_method: str = None) -> Tuple[int, str]:
+    async def batch_enter_knowledge_base(self, doc_ids: List[str], username: str, kb_method: str = "general", chunk_size: int = 500, separator: str = "。") -> Tuple[int, str]:
         """批量将文档入库到知识库
         """批量将文档入库到知识库
         
         
         Args:
         Args:
             doc_ids: 文档ID列表
             doc_ids: 文档ID列表
             username: 操作人
             username: 操作人
-            kb_id: 知识库ID
             kb_method: 切分方法
             kb_method: 切分方法
+            chunk_size: 切分长度
+            separator: 切分符号
         """
         """
         conn = get_db_connection()
         conn = get_db_connection()
         if not conn:
         if not conn:
@@ -120,7 +121,6 @@ class SampleService:
         
         
         cursor = conn.cursor()
         cursor = conn.cursor()
         success_count = 0
         success_count = 0
-        skipped_count = 0
         already_entered_count = 0
         already_entered_count = 0
         failed_count = 0
         failed_count = 0
         error_details = []
         error_details = []
@@ -129,7 +129,7 @@ class SampleService:
             # 1. 获取所有选中选中的文档详情
             # 1. 获取所有选中选中的文档详情
             placeholders = ','.join(['%s']*len(doc_ids))
             placeholders = ','.join(['%s']*len(doc_ids))
             fetch_sql = f"""
             fetch_sql = f"""
-                SELECT id, title, source_type, md_url, conversion_status, whether_to_enter, created_time 
+                SELECT id, title, source_type, md_url, conversion_status, whether_to_enter, created_time, kb_id 
                 FROM t_samp_document_main 
                 FROM t_samp_document_main 
                 WHERE id IN ({placeholders})
                 WHERE id IN ({placeholders})
             """
             """
@@ -146,6 +146,7 @@ class SampleService:
                 status = doc.get('conversion_status')
                 status = doc.get('conversion_status')
                 whether_to_enter = doc.get('whether_to_enter', 0)
                 whether_to_enter = doc.get('whether_to_enter', 0)
                 md_url = doc.get('md_url')
                 md_url = doc.get('md_url')
+                source_type = doc.get('source_type')
                 
                 
                 # A. 检查是否已入库
                 # A. 检查是否已入库
                 if whether_to_enter == 1:
                 if whether_to_enter == 1:
@@ -157,18 +158,48 @@ class SampleService:
                 # B. 检查转换状态
                 # B. 检查转换状态
                 if status != 2:
                 if status != 2:
                     reason = "尚未转换成功" if status == 0 else "正在转换中" if status == 1 else "转换失败"
                     reason = "尚未转换成功" if status == 0 else "正在转换中" if status == 1 else "转换失败"
-                    logger.warning(f"文档 {title}({doc_id}) 状态为 {status},跳过入库: {reason}")
-                    skipped_count += 1
+                    logger.warning(f"文档 {title}({doc_id}) 状态为 {status},入库失败: {reason}")
+                    failed_count += 1
                     error_details.append(f"· {title}: {reason}")
                     error_details.append(f"· {title}: {reason}")
                     continue
                     continue
                 
                 
                 if not md_url:
                 if not md_url:
-                    logger.warning(f"文档 {title}({doc_id}) 缺少 md_url,跳过入库")
-                    skipped_count += 1
+                    logger.warning(f"文档 {title}({doc_id}) 缺少 md_url,入库失败")
+                    failed_count += 1
                     error_details.append(f"· {title}: 转换结果地址丢失")
                     error_details.append(f"· {title}: 转换结果地址丢失")
                     continue
                     continue
                 
                 
-                # B. 从 MinIO 获取 Markdown 内容
+                # C. 确定入库策略 (从数据库读取已绑定的知识库)
+                current_kb_id = doc.get('kb_id')
+                current_kb_method = kb_method  # 直接使用前端传来的切分方式
+
+                if not current_kb_id:
+                    logger.warning(f"文档 {title}({doc_id}) 未指定知识库,跳过入库")
+                    failed_count += 1
+                    error_details.append(f"· {title}: 未指定目标知识库")
+                    continue
+
+                if not current_kb_method:
+                    logger.warning(f"文档 {title}({doc_id}) 未指定切分方式,跳过入库")
+                    failed_count += 1
+                    error_details.append(f"· {title}: 未指定切分策略")
+                    continue
+
+                # 获取知识库信息 (collection_name_parent, collection_name_children)
+                kb_sql = "SELECT collection_name_parent, collection_name_children FROM t_samp_knowledge_base WHERE id = %s AND is_deleted = 0"
+                cursor.execute(kb_sql, (current_kb_id,))
+                kb_res = cursor.fetchone()
+                
+                if not kb_res:
+                    logger.warning(f"找不到指定的知识库: id={current_kb_id}")
+                    failed_count += 1
+                    error_details.append(f"· {title}: 指定的知识库不存在或已被删除")
+                    continue
+                
+                collection_name_parent = kb_res['collection_name_parent']
+                collection_name_children = kb_res['collection_name_children']
+                
+                # D. 从 MinIO 获取 Markdown 内容
                 try:
                 try:
                     md_content = self.minio_manager.get_object_content(md_url)
                     md_content = self.minio_manager.get_object_content(md_url)
                     if not md_content:
                     if not md_content:
@@ -179,39 +210,34 @@ class SampleService:
                     error_details.append(f"· {title}: 读取云端文件失败")
                     error_details.append(f"· {title}: 读取云端文件失败")
                     continue
                     continue
                 
                 
-                # C. 调用 MilvusService 进行切分和入库
+                # E. 调用 MilvusService 进行切分和入库
                 try:
                 try:
-                    # 如果有 kb_id,需要根据它获取 collection_name
-                    collection_name = None
-                    if kb_id:
-                        kb_sql = "SELECT collection_name FROM t_samp_knowledge_base WHERE id = %s"
-                        cursor.execute(kb_sql, (kb_id,))
-                        kb_res = cursor.fetchone()
-                        if kb_res:
-                            collection_name = kb_res['collection_name']
-                    
                     # 准备元数据
                     # 准备元数据
+                    current_date = int(datetime.now().strftime('%Y%m%d'))
                     doc_info = {
                     doc_info = {
                         "doc_id": doc_id,
                         "doc_id": doc_id,
-                        "doc_name": title,
-                        "doc_version": int(doc['created_time'].strftime('%Y%m%d')) if doc.get('created_time') else 20260127,
-                        "tags": doc.get('source_type') or 'unknown',
+                        "file_name": title,
+                        "doc_version": int(doc['created_time'].strftime('%Y%m%d')) if doc.get('created_time') else current_date,
+                        "tags": "",
                         "user_id": username,  # 传递操作人作为 created_by
                         "user_id": username,  # 传递操作人作为 created_by
-                        "kb_id": kb_id,
-                        "kb_method": kb_method,
-                        "collection_name": collection_name
+                        "kb_id": current_kb_id,
+                        "kb_method": current_kb_method,
+                        "collection_name_parent": collection_name_parent,
+                        "collection_name_children": collection_name_children,
+                        "chunk_size": chunk_size,
+                        "separator": separator
                     }
                     }
                     await self.milvus_service.insert_knowledge(md_content, doc_info)
                     await self.milvus_service.insert_knowledge(md_content, doc_info)
                     
                     
-                    # D. 添加到任务管理中心 (类型为 data)
+                    # F. 添加到任务管理中心 (类型为 data)
                     try:
                     try:
                         await task_service.add_task(doc_id, 'data')
                         await task_service.add_task(doc_id, 'data')
                     except Exception as task_err:
                     except Exception as task_err:
                         logger.error(f"添加文档 {title} 到任务中心失败: {task_err}")
                         logger.error(f"添加文档 {title} 到任务中心失败: {task_err}")
 
 
-                    # E. 更新数据库状态
+                    # G. 更新数据库状态
                     update_sql = "UPDATE t_samp_document_main SET whether_to_enter = 1, kb_id = %s, kb_method = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
                     update_sql = "UPDATE t_samp_document_main SET whether_to_enter = 1, kb_id = %s, kb_method = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
-                    cursor.execute(update_sql, (kb_id, kb_method, username, doc_id))
+                    cursor.execute(update_sql, (current_kb_id, current_kb_method, username, doc_id))
                     success_count += 1
                     success_count += 1
                     
                     
                 except Exception as milvus_err:
                 except Exception as milvus_err:
@@ -223,14 +249,12 @@ class SampleService:
             conn.commit()
             conn.commit()
             
             
             # 构造详细的消息
             # 构造详细的消息
-            if success_count == len(doc_ids) and failed_count == 0 and skipped_count == 0 and already_entered_count == 0:
+            if success_count == len(doc_ids) and failed_count == 0 and already_entered_count == 0:
                 msg = f"✅ 入库成功!共处理 {success_count} 份文档。"
                 msg = f"✅ 入库成功!共处理 {success_count} 份文档。"
             else:
             else:
                 msg = f"📊 入库处理完成:\n· 成功:{success_count} 份\n"
                 msg = f"📊 入库处理完成:\n· 成功:{success_count} 份\n"
                 if already_entered_count > 0:
                 if already_entered_count > 0:
                     msg += f"· 跳过:{already_entered_count} 份 (已入库)\n"
                     msg += f"· 跳过:{already_entered_count} 份 (已入库)\n"
-                if skipped_count > 0:
-                    msg += f"· 跳过:{skipped_count} 份 (转换中或失败)\n"
                 if failed_count > 0:
                 if failed_count > 0:
                     msg += f"· 失败:{failed_count} 份\n"
                     msg += f"· 失败:{failed_count} 份\n"
             
             
@@ -385,8 +409,9 @@ class SampleService:
                     LEFT JOIN {sub_table} s ON m.id = s.id
                     LEFT JOIN {sub_table} s ON m.id = s.id
                     LEFT JOIN t_sys_user u1 ON m.created_by = u1.id
                     LEFT JOIN t_sys_user u1 ON m.created_by = u1.id
                     LEFT JOIN t_sys_user u2 ON m.updated_by = u2.id
                     LEFT JOIN t_sys_user u2 ON m.updated_by = u2.id
+                    LEFT JOIN t_samp_knowledge_base kb ON m.kb_id = kb.id
                 """
                 """
-                fields_sql = "m.*, s.*, u1.username as creator_name, u2.username as updater_name, m.id as id"
+                fields_sql = "m.*, s.*, u1.username as creator_name, u2.username as updater_name, kb.name as kb_name, m.id as id"
                 where_clauses.append("m.source_type = %s")
                 where_clauses.append("m.source_type = %s")
                 params.append(table_type)
                 params.append(table_type)
                 order_sql = "m.created_time DESC"
                 order_sql = "m.created_time DESC"
@@ -407,8 +432,8 @@ class SampleService:
                         where_clauses.append("s.level_4_classification = %s")
                         where_clauses.append("s.level_4_classification = %s")
                         params.append(level_4_classification)
                         params.append(level_4_classification)
             else:
             else:
-                from_sql = "t_samp_document_main m LEFT JOIN t_sys_user u1 ON m.created_by = u1.id LEFT JOIN t_sys_user u2 ON m.updated_by = u2.id"
-                fields_sql = "m.*, u1.username as creator_name, u2.username as updater_name"
+                from_sql = "t_samp_document_main m LEFT JOIN t_sys_user u1 ON m.created_by = u1.id LEFT JOIN t_sys_user u2 ON m.updated_by = u2.id LEFT JOIN t_samp_knowledge_base kb ON m.kb_id = kb.id"
+                fields_sql = "m.*, u1.username as creator_name, u2.username as updater_name, kb.name as kb_name"
                 order_sql = "m.created_time DESC"
                 order_sql = "m.created_time DESC"
                 title_field = "m.title"
                 title_field = "m.title"
             
             
@@ -431,7 +456,6 @@ class SampleService:
             sql = f"SELECT {fields_sql} FROM {from_sql} {where_sql} ORDER BY {order_sql} LIMIT %s OFFSET %s"
             sql = f"SELECT {fields_sql} FROM {from_sql} {where_sql} ORDER BY {order_sql} LIMIT %s OFFSET %s"
             params.extend([size, offset])
             params.extend([size, offset])
             
             
-            logger.info(f"Executing SQL: {sql} with params: {params}")
             cursor.execute(sql, tuple(params))
             cursor.execute(sql, tuple(params))
             items = [self._format_document_row(row) for row in cursor.fetchall()]
             items = [self._format_document_row(row) for row in cursor.fetchall()]
             
             
@@ -546,12 +570,13 @@ class SampleService:
                 INSERT INTO t_samp_document_main (
                 INSERT INTO t_samp_document_main (
                     id, title, source_type, file_url, 
                     id, title, source_type, file_url, 
                     file_extension, created_by, updated_by, created_time, updated_time,
                     file_extension, created_by, updated_by, created_time, updated_time,
-                    conversion_status, whether_to_task
-                ) VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW(), 0, 0)
+                    conversion_status, whether_to_task, kb_id
+                ) VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW(), 0, 0, %s)
                 """,
                 """,
                 (
                 (
                     doc_id, doc_data.get('title'), table_type, file_url,
                     doc_id, doc_data.get('title'), table_type, file_url,
-                    doc_data.get('file_extension'), user_id, user_id
+                    doc_data.get('file_extension'), user_id, user_id,
+                    doc_data.get('kb_id')
                 )
                 )
             )
             )
 
 
@@ -648,14 +673,14 @@ class SampleService:
             # 1. 更新主表
             # 1. 更新主表
             cursor.execute(
             cursor.execute(
                 """
                 """
-                UPDATE t_samp_document_main 
-                SET title = %s, file_url = %s, file_extension = %s,
-                    updated_by = %s, updated_time = NOW()
+                UPDATE t_samp_document_main SET 
+                    title = %s, file_url = %s, file_extension = %s, 
+                    updated_by = %s, updated_time = NOW(), kb_id = %s
                 WHERE id = %s
                 WHERE id = %s
                 """,
                 """,
                 (
                 (
                     doc_data.get('title'), file_url, doc_data.get('file_extension'),
                     doc_data.get('title'), file_url, doc_data.get('file_extension'),
-                    updater_id, doc_id
+                    updater_id, doc_data.get('kb_id'), doc_id
                 )
                 )
             )
             )
 
 
@@ -754,7 +779,7 @@ class SampleService:
                     s.participating_units, s.reference_basis,
                     s.participating_units, s.reference_basis,
                     s.created_by, u1.username as creator_name, s.created_time,
                     s.created_by, u1.username as creator_name, s.created_time,
                     s.updated_by, u2.username as updater_name, s.updated_time,
                     s.updated_by, u2.username as updater_name, s.updated_time,
-                    m.file_url, m.conversion_status, m.md_url, m.json_url
+                    m.file_url, m.conversion_status, m.md_url, m.json_url, m.kb_id, m.whether_to_enter
                 """
                 """
                 field_map = {
                 field_map = {
                     'title': 's.chinese_name',
                     'title': 's.chinese_name',
@@ -778,7 +803,7 @@ class SampleService:
                     s.note, 
                     s.note, 
                     s.created_by, u1.username as creator_name, s.created_time,
                     s.created_by, u1.username as creator_name, s.created_time,
                     s.updated_by, u2.username as updater_name, s.updated_time,
                     s.updated_by, u2.username as updater_name, s.updated_time,
-                    m.file_url, m.conversion_status, m.md_url, m.json_url
+                    m.file_url, m.conversion_status, m.md_url, m.json_url, m.kb_id, m.whether_to_enter
                 """
                 """
                 field_map = {
                 field_map = {
                     'title': 's.plan_name',
                     'title': 's.plan_name',
@@ -799,7 +824,7 @@ class SampleService:
                     s.note, 
                     s.note, 
                     s.created_by, u1.username as creator_name, s.created_time,
                     s.created_by, u1.username as creator_name, s.created_time,
                     s.updated_by, u2.username as updater_name, s.updated_time,
                     s.updated_by, u2.username as updater_name, s.updated_time,
-                    m.file_url, m.conversion_status, m.md_url, m.json_url
+                    m.file_url, m.conversion_status, m.md_url, m.json_url, m.kb_id, m.whether_to_enter
                 """
                 """
                 field_map = {
                 field_map = {
                     'title': 's.file_name',
                     'title': 's.file_name',
@@ -860,11 +885,12 @@ class SampleService:
             
             
             # 使用 LEFT JOIN 关联主表和用户表获取姓名
             # 使用 LEFT JOIN 关联主表和用户表获取姓名
             sql = f"""
             sql = f"""
-                SELECT {fields} 
+                SELECT {fields}, kb.name as kb_name
                 FROM {table_name} s
                 FROM {table_name} s
                 LEFT JOIN t_samp_document_main m ON s.id = m.id
                 LEFT JOIN t_samp_document_main m ON s.id = m.id
                 LEFT JOIN t_sys_user u1 ON s.created_by = u1.id
                 LEFT JOIN t_sys_user u1 ON s.created_by = u1.id
                 LEFT JOIN t_sys_user u2 ON s.updated_by = u2.id
                 LEFT JOIN t_sys_user u2 ON s.updated_by = u2.id
+                LEFT JOIN t_samp_knowledge_base kb ON m.kb_id = kb.id
                 {where_sql} 
                 {where_sql} 
                 ORDER BY s.created_time DESC 
                 ORDER BY s.created_time DESC 
                 LIMIT %s OFFSET %s
                 LIMIT %s OFFSET %s
@@ -1008,12 +1034,12 @@ class SampleService:
                 INSERT INTO t_samp_document_main (
                 INSERT INTO t_samp_document_main (
                     id, title, source_type, file_url, 
                     id, title, source_type, file_url, 
                     file_extension, created_by, updated_by, created_time, updated_time,
                     file_extension, created_by, updated_by, created_time, updated_time,
-                    conversion_status, whether_to_task
-                ) VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW(), 0, 0)
+                    conversion_status, whether_to_task, kb_id
+                ) VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW(), 0, 0, %s)
                 """,
                 """,
                 (
                 (
                     doc_id, data.get('title'), type, file_url,
                     doc_id, data.get('title'), type, file_url,
-                    file_extension, user_id, user_id
+                    file_extension, user_id, user_id, data.get('kb_id')
                 )
                 )
             )
             )
             
             
@@ -1122,10 +1148,10 @@ class SampleService:
             cursor.execute(
             cursor.execute(
                 """
                 """
                 UPDATE t_samp_document_main 
                 UPDATE t_samp_document_main 
-                SET title = %s, file_url = %s, file_extension = %s, updated_by = %s, updated_time = NOW()
+                SET title = %s, file_url = %s, file_extension = %s, updated_by = %s, updated_time = NOW(), kb_id = %s
                 WHERE id = %s
                 WHERE id = %s
                 """,
                 """,
-                (data.get('title'), file_url, file_extension, updater_id, doc_id)
+                (data.get('title'), file_url, file_extension, updater_id, data.get('kb_id'), doc_id)
             )
             )
 
 
             # 2. 更新子表 (移除 file_url)
             # 2. 更新子表 (移除 file_url)
@@ -1200,6 +1226,10 @@ class SampleService:
 
 
     async def delete_basic_info(self, type: str, doc_id: str) -> Tuple[bool, str]:
     async def delete_basic_info(self, type: str, doc_id: str) -> Tuple[bool, str]:
         """删除基本信息"""
         """删除基本信息"""
+        if not doc_id:
+            return False, "缺少 ID 参数"
+            
+        logger.info(f"Deleting basic info: type={type}, id={doc_id}")
         conn = get_db_connection()
         conn = get_db_connection()
         if not conn:
         if not conn:
             return False, "数据库连接失败"
             return False, "数据库连接失败"
@@ -1210,21 +1240,44 @@ class SampleService:
             if not table_name:
             if not table_name:
                 return False, "无效的类型"
                 return False, "无效的类型"
             
             
-            # 1. 删除主表记录 (由于设置了 ON DELETE CASCADE,子表记录会自动删除)
+            # 1. 显式删除子表记录 (防止 CASCADE 未生效)
+            try:
+                cursor.execute(f"DELETE FROM {table_name} WHERE id = %s", (doc_id,))
+                logger.info(f"Deleted from sub-table {table_name}, affected: {cursor.rowcount}")
+            except Exception as sub_e:
+                logger.warning(f"删除子表 {table_name} 记录失败 (可能不存在): {sub_e}")
+
+            # 2. 同步删除任务管理中心的数据 (优先删除关联数据)
+            try:
+                # 使用当前事务删除任务记录(如果 task_service 支持的话,目前它自建连接)
+                # 这里我们直接在当前 cursor 中也执行一次,确保事务一致性
+                cursor.execute("DELETE FROM t_task_management WHERE business_id = %s", (doc_id,))
+                logger.info(f"Deleted from t_task_management, affected: {cursor.rowcount}")
+            except Exception as task_e:
+                logger.warning(f"在主事务中删除任务记录失败: {task_e}")
+
+            # 3. 删除主表记录
             cursor.execute("DELETE FROM t_samp_document_main WHERE id = %s", (doc_id,))
             cursor.execute("DELETE FROM t_samp_document_main WHERE id = %s", (doc_id,))
+            affected_main = cursor.rowcount
+            logger.info(f"Deleted from t_samp_document_main, affected: {affected_main}")
             
             
-            # 同步删除任务管理中心的数据
+            if affected_main == 0:
+                logger.warning(f"未找到主表记录: {doc_id}")
+                # 即使主表没找到,我们也 commit 之前的操作并返回成功(幂等性)
+            
+            conn.commit()
+            
+            # 4. 再次确保任务中心数据已删除 (调用原有服务)
             try:
             try:
                 await task_service.delete_task(doc_id)
                 await task_service.delete_task(doc_id)
             except Exception as task_err:
             except Exception as task_err:
-                logger.error(f"同步删除任务中心数据失败 (ID: {doc_id}): {task_err}")
+                logger.error(f"调用 task_service 删除任务失败: {task_err}")
 
 
-            conn.commit()
             return True, "删除成功"
             return True, "删除成功"
         except Exception as e:
         except Exception as e:
-            logger.exception("删除基本信息失败")
+            logger.exception(f"删除基本信息异常 (ID: {doc_id})")
             conn.rollback()
             conn.rollback()
-            return False, str(e)
+            return False, f"删除失败: {str(e)}"
         finally:
         finally:
             cursor.close()
             cursor.close()
             conn.close()
             conn.close()

+ 7 - 4
src/views/sample_view.py

@@ -234,14 +234,17 @@ async def batch_enter_knowledge_base(req: BatchEnterRequest, credentials: HTTPAu
         
         
         sample_service = SampleService()
         sample_service = SampleService()
         
         
-        affected_rows, message = await sample_service.batch_enter_knowledge_base(
+        success_count, message = await sample_service.batch_enter_knowledge_base(
             req.ids, 
             req.ids, 
             username, 
             username, 
-            kb_id=req.kb_id, 
-            kb_method=req.kb_method
+            kb_method=req.kb_method,
+            chunk_size=req.chunk_size,
+            separator=req.separator
         )
         )
         
         
-        return ApiResponse(code=0, message=message, timestamp=datetime.now(timezone.utc).isoformat()).model_dump()
+        # 如果全部失败,返回非零状态码,触发前端错误提示
+        code = 0 if success_count > 0 else 1
+        return ApiResponse(code=code, message=message, timestamp=datetime.now(timezone.utc).isoformat()).model_dump()
     except Exception as e:
     except Exception as e:
         logger.exception("批量操作失败")
         logger.exception("批量操作失败")
         return ApiResponse(code=500, message=f"批量操作失败: {str(e)}", timestamp=datetime.now(timezone.utc).isoformat()).model_dump()
         return ApiResponse(code=500, message=f"批量操作失败: {str(e)}", timestamp=datetime.now(timezone.utc).isoformat()).model_dump()

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.