chenkun 1 mēnesi atpakaļ
vecāks
revīzija
e2312d160b

+ 2 - 24
scripts/miner_u.py

@@ -27,8 +27,8 @@ def main_cli(doc_id):
         
     try:
         with conn.cursor() as cursor:
-            # 1. 获取文档基本信息
-            cursor.execute("SELECT title, file_url, source_type, source_id FROM t_samp_document_main WHERE id = %s", (doc_id,))
+            # 2. 获取文档基本信息 (主表即为唯一资产中心)
+            cursor.execute("SELECT title, file_url FROM t_samp_document_main WHERE id = %s", (doc_id,))
             row = cursor.fetchone()
             if not row:
                 logger.warning(f"文档不存在: {doc_id}")
@@ -36,28 +36,6 @@ def main_cli(doc_id):
             
             title = row['title']
             file_url = row['file_url']
-            source_type = row['source_type']
-            source_id = row['source_id']
-
-            # 2. 如果主表没有 file_url,尝试从子表获取 (兼容逻辑)
-            if not file_url and source_type and source_id:
-                TABLE_MAP = {
-                    "basis": "t_samp_standard_base_info",
-                    "work": "t_samp_construction_plan_base_info",
-                    "job": "t_samp_office_regulations"
-                }
-                table_name = TABLE_MAP.get(source_type)
-                if table_name:
-                    url_fields = ['file_url', 'source_url', 'url']
-                    for field in url_fields:
-                        try:
-                            cursor.execute(f"SELECT {field} FROM {table_name} WHERE id = %s", (source_id,))
-                            url_row = cursor.fetchone()
-                            if url_row and url_row[field]:
-                                file_url = url_row[field]
-                                break
-                        except:
-                            continue
 
             if not file_url:
                 logger.error(f"未找到文件链接: {doc_id}")

+ 29 - 49
src/app/base/mineru_connection.py

@@ -25,7 +25,11 @@ class MinerUManager:
     """MinerU 管理器"""
     
     def __init__(self):
-        self.token = config_handler.get("admin_app", "MINERU_TOKEN", "")
+        # 优先从配置获取新的 Access Key,如果没有则使用旧的 MINERU_TOKEN
+        self.access_key = config_handler.get("admin_app", "MINERU_ACCESS_KEY", "")
+        self.secret_key = config_handler.get("admin_app", "MINERU_SECRET_KEY", "")
+        self.token = self.access_key if self.access_key else config_handler.get("admin_app", "MINERU_TOKEN", "")
+        
         self.api_apply = config_handler.get("admin_app", "MINERU_API_APPLY", "https://mineru.net/api/v4/file-urls/batch")
         self.api_batch_result = config_handler.get("admin_app", "MINERU_API_BATCH_RESULT", "https://mineru.net/api/v4/extract-results/batch/{}")
         
@@ -70,33 +74,6 @@ class MinerUManager:
                 params.append(doc_id)
                 cursor.execute(sql, params)
                 
-                # 同步更新子表
-                if json_url is not None or md_url is not None:
-                    try:
-                        cursor.execute("SELECT source_type, source_id FROM t_samp_document_main WHERE id = %s", (doc_id,))
-                        row = cursor.fetchone()
-                        if row and row['source_type'] and row['source_id']:
-                            source_type, source_id = row['source_type'], row['source_id']
-                            TABLE_MAP = {
-                                "basis": "t_samp_standard_base_info",
-                                "work": "t_samp_construction_plan_base_info",
-                                "job": "t_samp_office_regulations"
-                            }
-                            table_name = TABLE_MAP.get(source_type)
-                            if table_name:
-                                sub_updates = []
-                                sub_params = []
-                                if json_url is not None:
-                                    sub_updates.append("json_url = %s")
-                                    sub_params.append(json_url)
-                                # 如果子表也有 md_url,可以在此添加
-                                if sub_updates:
-                                    sub_sql = f"UPDATE {table_name} SET {', '.join(sub_updates)} WHERE id = %s"
-                                    sub_params.append(source_id)
-                                    cursor.execute(sub_sql, sub_params)
-                    except Exception as e:
-                        logger.error(f"同步子表 URL 失败: {e}")
-                
                 conn.commit()
         except Exception as e:
             logger.error(f"更新数据库状态失败: {e}")
@@ -109,15 +86,8 @@ class MinerUManager:
             "files": files_meta,
             "model_version": model_version,
         }
-        try:
-            r = requests.post(self.api_apply, headers=self.headers, json=payload, timeout=60)
-            r.raise_for_status()
-        except requests.exceptions.HTTPError as e:
-            if e.response.status_code == 401:
-                logger.error("MinerU Token 已过期或无效,请在 config.ini 中更新 MINERU_TOKEN")
-                raise RuntimeError("MinerU 认证失败 (401): Token 已过期或无效,请联系管理员更新配置。") from e
-            raise
-        
+        r = requests.post(self.api_apply, headers=self.headers, json=payload, timeout=60)
+        r.raise_for_status()
         j = r.json()
         if j.get("code") != 0:
             raise RuntimeError(f"申请上传链接失败: {j.get('msg')}")
@@ -134,15 +104,8 @@ class MinerUManager:
         """轮询转换结果"""
         deadline = time.time() + timeout_sec
         while True:
-            try:
-                r = requests.get(self.api_batch_result.format(batch_id), headers=self.headers, timeout=60)
-                r.raise_for_status()
-            except requests.exceptions.HTTPError as e:
-                if e.response.status_code == 401:
-                    logger.error("MinerU Token 已过期或无效,请在 config.ini 中更新 MINERU_TOKEN")
-                    raise RuntimeError("MinerU 认证失败 (401): Token 已过期或无效,请联系管理员更新配置。") from e
-                raise
-
+            r = requests.get(self.api_batch_result.format(batch_id), headers=self.headers, timeout=60)
+            r.raise_for_status()
             j = r.json()
             if j.get("code") != 0:
                 raise RuntimeError(f"轮询失败: {j.get('msg')}")
@@ -176,10 +139,27 @@ class MinerUManager:
             file_ext = Path(urlparse(file_url).path).suffix.lower()
             if not file_ext:
                 file_ext = ".pdf"
-                
-            file_name = f"{chinese_name}{file_ext}"
             
-            # 3. 提交到 MinerU
+            file_name = f"{chinese_name}{file_ext}"
+
+            # 3. 预检查文件类型
+            if file_ext == ".txt":
+                logger.info(f"[{doc_id}] 检测为 .txt 文件,跳过 MinerU 转换,直接处理为 Markdown")
+                # 直接将 txt 内容作为 md 内容上传
+                md_content = file_content
+                md_object_name = f"{self.minio_manager.base_path}/converted/{datetime.now().strftime('%Y%m%d')}/{doc_id}.md"
+                md_cloud_url = self.minio_manager.upload_file(md_content, md_object_name, content_type="text/markdown")
+                
+                # 更新数据库状态为成功
+                self.update_db_status(doc_id, status=2, md_url=md_cloud_url)
+                logger.info(f"[{doc_id}] .txt 文件处理成功. MD: {md_cloud_url}")
+                return
+
+            if file_ext not in self.supported_suffix:
+                supported_list = ", ".join(self.supported_suffix)
+                raise RuntimeError(f"不支持的文件类型: {file_ext}。MinerU 仅支持: {supported_list}")
+
+            # 4. 提交到 MinerU
             files_meta = [{"name": file_name, "data_id": doc_id}]
             batch_id, upload_urls = self.apply_upload_urls(files_meta)
             

+ 3 - 1
src/app/config/config.ini

@@ -87,7 +87,9 @@ MINIO_USE_SSL=False
 MINIO_BASE_PATH=sampledata
 
 # MinERU 配置
-MINERU_TOKEN=eyJ0eXBlIjoiSldUIiwiYWxnIjoiSFM1MTIifQ.eyJqdGkiOiI1MzgwMDYyNSIsInJvbCI6IlJPTEVfUkVHSVNURVIiLCJpc3MiOiJPcGVuWExhYiIsImlhdCI6MTc2Nzg1OTg5NywiY2xpZW50SWQiOiJsa3pkeDU3bnZ5MjJqa3BxOXgydyIsInBob25lIjoiMTgwMzA5ODIxNTQiLCJvcGVuSWQiOm51bGwsInV1aWQiOiI0NTYyZTUyNi1iZjE3LTRhMmItODExMi04YmM5ZjNjYzMwZGMiLCJlbWFpbCI6IiIsImV4cCI6MTc2OTA2OTQ5N30.mNH7afPPANNQq_BRsBOlbk-2P7e_ewdfzPQXO4woeoT15mDEbPKc45Auk_BuRuNaAS-Gm2GK3qKGjQ2VDtepvA
+MINERU_ACCESS_KEY=
+MINERU_SECRET_KEY=
+MINERU_TOKEN=eyJ0eXBlIjoiSldUIiwiYWxnIjoiSFM1MTIifQ.eyJqdGkiOiIyNjQwMDgzNiIsInJvbCI6IlJPTEVfUkVHSVNURVIiLCJpc3MiOiJPcGVuWExhYiIsImlhdCI6MTc2OTE0NDEwMSwiY2xpZW50SWQiOiJsa3pkeDU3bnZ5MjJqa3BxOXgydyIsInBob25lIjoiMTk5ODA4ODg3ODAiLCJvcGVuSWQiOm51bGwsInV1aWQiOiJmNmVkMDk0YS0wYzM3LTQzN2EtYmIyMi04MTk1ODVmZWQ5ODgiLCJlbWFpbCI6IiIsImV4cCI6MTc3MDM1MzcwMX0.ZIJQGdyrhUjC2z3Eqgt_LDuT7-q1ByWqPJ_jJYRwSyvd9_ZhbsvnUahVYh_zZiWVjiVSgZsx9IdXGReIbRpGqg
 MINERU_API_APPLY=https://mineru.net/api/v4/file-urls/batch
 MINERU_API_BATCH_RESULT=https://mineru.net/api/v4/extract-results/batch/{}
 

+ 165 - 114
src/app/services/sample_service.py

@@ -192,6 +192,13 @@ class SampleService:
                 for key in ['created_time', 'updated_time']:
                     if item.get(key) and hasattr(item[key], 'isoformat'):
                         item[key] = item[key].isoformat()
+                
+                # 增加格式化文件名供前端显示
+                if item.get('conversion_status') == 2:
+                    title = item.get('title', 'document')
+                    item['md_display_name'] = f"{title}.md"
+                    item['json_display_name'] = f"{title}.json"
+                
                 items.append(item)
             
             # 总数
@@ -237,6 +244,12 @@ class SampleService:
                 if doc.get(key) and hasattr(doc[key], 'isoformat'):
                     doc[key] = doc[key].isoformat()
             
+            # 增加格式化文件名供前端显示
+            if doc.get('conversion_status') == 2:
+                title = doc.get('title', 'document')
+                doc['md_display_name'] = f"{title}.md"
+                doc['json_display_name'] = f"{title}.json"
+            
             return doc
         except Exception as e:
             logger.exception("获取文档详情失败")
@@ -264,7 +277,7 @@ class SampleService:
         return str(value)
 
     async def add_document(self, doc_data: Dict[str, Any], user_id: str) -> Tuple[bool, str, Optional[str]]:
-        """添加新文档(同步主表和子表)"""
+        """添加新文档(先主表后子表,解耦触发器)"""
         conn = get_db_connection()
         if not conn:
             return False, "数据库连接失败", None
@@ -275,42 +288,43 @@ class SampleService:
             doc_id = str(uuid.uuid4())
             source_id = str(uuid.uuid4())
             table_type = doc_data.get('table_type', 'basis')
-            table_name = TABLE_MAP.get(table_type, "t_basis_of_preparation")
+            table_name = TABLE_MAP.get(table_type)
             
             # 安全转换字段
             release_date = self._to_date(doc_data.get('release_date'))
             
-            # 插入子表 (会触发数据库触发器自动向 t_samp_document_main 插入记录)
+            # 1. 插入主表 (作为资产中心)
+            cursor.execute(
+                """
+                INSERT INTO t_samp_document_main (
+                    id, title, source_type, source_id, file_url, 
+                    file_extension, created_by, updated_by, created_time, updated_time,
+                    conversion_status
+                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW(), 0)
+                """,
+                (
+                    doc_id, doc_data.get('title'), table_type, source_id, doc_data.get('file_url'),
+                    doc_data.get('file_extension'), user_id, user_id
+                )
+            )
+
+            # 2. 插入子表 (仅存储业务字段)
             if table_type == 'basis':
                 cursor.execute(
-                    f"INSERT INTO {table_name} (id, chinese_name, standard_number, issuing_authority, release_date, document_type, professional_field, validity, file_url, content, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())",
-                    (source_id, doc_data.get('title'), doc_data.get('standard_no'), doc_data.get('issuing_authority'), release_date, doc_data.get('document_type'), doc_data.get('professional_field'), doc_data.get('validity'), doc_data.get('file_url'), doc_data.get('content'), user_id)
+                    f"INSERT INTO {table_name} (id, chinese_name, standard_number, issuing_authority, release_date, document_type, professional_field, validity, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())",
+                    (source_id, doc_data.get('title'), doc_data.get('standard_no'), doc_data.get('issuing_authority'), release_date, doc_data.get('document_type'), doc_data.get('professional_field'), doc_data.get('validity'), user_id)
                 )
             elif table_type == 'work':
                 cursor.execute(
-                    f"INSERT INTO {table_name} (id, plan_name, project_name, project_section, compiling_unit, compiling_date, file_url, content, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())",
-                    (source_id, doc_data.get('title'), doc_data.get('project_name'), doc_data.get('project_section'), doc_data.get('issuing_authority'), release_date, doc_data.get('file_url'), doc_data.get('content'), user_id)
+                    f"INSERT INTO {table_name} (id, plan_name, project_name, project_section, compiling_unit, compiling_date, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW())",
+                    (source_id, doc_data.get('title'), doc_data.get('project_name'), doc_data.get('project_section'), doc_data.get('issuing_authority'), release_date, user_id)
                 )
             elif table_type == 'job':
                 cursor.execute(
-                    f"INSERT INTO {table_name} (id, file_name, issuing_department, document_type, publish_date, file_url, content, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())",
-                    (source_id, doc_data.get('title'), doc_data.get('issuing_authority'), doc_data.get('document_type'), release_date, doc_data.get('file_url'), doc_data.get('content'), user_id)
+                    f"INSERT INTO {table_name} (id, file_name, issuing_department, document_type, publish_date, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, NOW(), NOW())",
+                    (source_id, doc_data.get('title'), doc_data.get('issuing_authority'), doc_data.get('document_type'), release_date, user_id)
                 )
             
-            # 获取触发器自动生成的 doc_id
-            cursor.execute("SELECT id FROM t_samp_document_main WHERE source_id = %s AND source_type = %s", (source_id, table_type))
-            row = cursor.fetchone()
-            if not row:
-                raise Exception("触发器未能在主表创建记录")
-            doc_id = row['id']
-
-            # 更新主表中触发器未处理的字段
-            cursor.execute("""
-                UPDATE t_samp_document_main 
-                SET file_extension = %s
-                WHERE id = %s
-            """, (doc_data.get('file_extension'), doc_id))
-            
             conn.commit()
             return True, "文档添加成功", doc_id
         except Exception as e:
@@ -322,7 +336,7 @@ class SampleService:
             conn.close()
 
     async def edit_document(self, doc_data: Dict[str, Any], updater_id: str) -> Tuple[bool, str]:
-        """编辑文档(同步主表和子表)"""
+        """编辑文档(同步主表和子表,解耦触发器)"""
         conn = get_db_connection()
         if not conn:
             return False, "数据库连接失败"
@@ -333,38 +347,42 @@ class SampleService:
             doc_id = doc_data.get('id')
             source_id = doc_data.get('source_id')
             table_type = doc_data.get('table_type', 'basis')
-            table_name = TABLE_MAP.get(table_type, "t_samp_standard_base_info")
+            table_name = TABLE_MAP.get(table_type)
             
             # 安全转换字段
             release_date = self._to_date(doc_data.get('release_date'))
             
-            # 1. 更新子表 (会触发数据库触发器更新 t_samp_document_main 的基本字段)
+            # 1. 更新主表
+            cursor.execute(
+                """
+                UPDATE t_samp_document_main 
+                SET title = %s, file_url = %s, file_extension = %s,
+                    updated_by = %s, updated_time = NOW()
+                WHERE id = %s
+                """,
+                (
+                    doc_data.get('title'), doc_data.get('file_url'), doc_data.get('file_extension'),
+                    updater_id, doc_id
+                )
+            )
+
+            # 2. 更新子表
             if table_type == 'basis':
                 cursor.execute(
-                    f"UPDATE {table_name} SET chinese_name = %s, standard_number = %s, issuing_authority = %s, release_date = %s, document_type = %s, professional_field = %s, validity = %s, file_url = %s, content = %s, updated_by = %s, updated_time = NOW() WHERE id = %s",
-                    (doc_data.get('title'), doc_data.get('standard_no'), doc_data.get('issuing_authority'), release_date, doc_data.get('document_type'), doc_data.get('professional_field'), doc_data.get('validity'), doc_data.get('file_url'), doc_data.get('content'), updater_id, source_id)
+                    f"UPDATE {table_name} SET chinese_name = %s, standard_number = %s, issuing_authority = %s, release_date = %s, document_type = %s, professional_field = %s, validity = %s, updated_by = %s, updated_time = NOW() WHERE id = %s",
+                    (doc_data.get('title'), doc_data.get('standard_no'), doc_data.get('issuing_authority'), release_date, doc_data.get('document_type'), doc_data.get('professional_field'), doc_data.get('validity'), updater_id, source_id)
                 )
             elif table_type == 'work':
                 cursor.execute(
-                    f"UPDATE {table_name} SET plan_name = %s, project_name = %s, project_section = %s, compiling_unit = %s, compiling_date = %s, file_url = %s, content = %s, updated_by = %s, updated_time = NOW() WHERE id = %s",
-                    (doc_data.get('title'), doc_data.get('project_name'), doc_data.get('project_section'), doc_data.get('issuing_authority'), release_date, doc_data.get('file_url'), doc_data.get('content'), updater_id, source_id)
+                    f"UPDATE {table_name} SET plan_name = %s, project_name = %s, project_section = %s, compiling_unit = %s, compiling_date = %s, updated_by = %s, updated_time = NOW() WHERE id = %s",
+                    (doc_data.get('title'), doc_data.get('project_name'), doc_data.get('project_section'), doc_data.get('issuing_authority'), release_date, updater_id, source_id)
                 )
             elif table_type == 'job':
                 cursor.execute(
-                    f"UPDATE {table_name} SET file_name = %s, issuing_department = %s, document_type = %s, publish_date = %s, file_url = %s, content = %s, updated_by = %s, updated_time = NOW() WHERE id = %s",
-                    (doc_data.get('title'), doc_data.get('issuing_authority'), doc_data.get('document_type'), release_date, doc_data.get('file_url'), doc_data.get('content'), updater_id, source_id)
+                    f"UPDATE {table_name} SET file_name = %s, issuing_department = %s, document_type = %s, publish_date = %s, updated_by = %s, updated_time = NOW() WHERE id = %s",
+                    (doc_data.get('title'), doc_data.get('issuing_authority'), doc_data.get('document_type'), release_date, updater_id, source_id)
                 )
 
-            # 2. 更新主表中触发器未处理的字段(或者被触发器覆盖的非子表字段)
-            cursor.execute("""
-                UPDATE t_samp_document_main 
-                SET file_extension = %s,
-                    updated_by = %s, updated_time = NOW()
-                WHERE id = %s
-            """, (doc_data.get('file_extension'),
-                  updater_id, doc_id))
-
-            
             conn.commit()
             return True, "文档更新成功"
         except Exception as e:
@@ -388,7 +406,7 @@ class SampleService:
         keyword: Optional[str] = None,
         **filters
     ) -> Tuple[List[Dict[str, Any]], int]:
-        """获取基本信息列表(支持多条件检索)"""
+        """获取基本信息列表(关联主表获取文件和转换状态)"""
         conn = get_db_connection()
         if not conn:
             return [], 0
@@ -399,32 +417,50 @@ class SampleService:
             # 根据类型选择表名和字段映射
             if type == 'basis':
                 table_name = "t_samp_standard_base_info"
-                fields = "id, chinese_name as title, standard_number as standard_no, issuing_authority, release_date, document_type, professional_field, validity, created_by, created_time, file_url"
+                # 关联主表字段:file_url, conversion_status, md_url, json_url
+                fields = """
+                    s.id, s.chinese_name as title, s.standard_number as standard_no, 
+                    s.issuing_authority, s.release_date, s.document_type, 
+                    s.professional_field, s.validity, s.created_by, s.created_time,
+                    m.file_url, m.conversion_status, m.md_url, m.json_url, m.id as doc_id
+                """
                 field_map = {
-                    'title': 'chinese_name',
-                    'standard_no': 'standard_number',
-                    'issuing_authority': 'issuing_authority',
-                    'release_date': 'release_date',
-                    'document_type': 'document_type',
-                    'professional_field': 'professional_field',
-                    'validity': 'validity'
+                    'title': 's.chinese_name',
+                    'standard_no': 's.standard_number',
+                    'issuing_authority': 's.issuing_authority',
+                    'release_date': 's.release_date',
+                    'document_type': 's.document_type',
+                    'professional_field': 's.professional_field',
+                    'validity': 's.validity'
                 }
             elif type == 'work':
                 table_name = "t_samp_construction_plan_base_info"
-                fields = "id, plan_name as title, NULL as standard_no, compiling_unit as issuing_authority, compiling_date as release_date, NULL as document_type, NULL as professional_field, NULL as validity, created_by, created_time, file_url"
+                fields = """
+                    s.id, s.plan_name as title, NULL as standard_no, 
+                    s.compiling_unit as issuing_authority, s.compiling_date as release_date, 
+                    NULL as document_type, NULL as professional_field, NULL as validity, 
+                    s.created_by, s.created_time,
+                    m.file_url, m.conversion_status, m.md_url, m.json_url, m.id as doc_id
+                """
                 field_map = {
-                    'title': 'plan_name',
-                    'issuing_authority': 'compiling_unit',
-                    'release_date': 'compiling_date'
+                    'title': 's.plan_name',
+                    'issuing_authority': 's.compiling_unit',
+                    'release_date': 's.compiling_date'
                 }
             elif type == 'job':
                 table_name = "t_samp_office_regulations"
-                fields = "id, file_name as title, NULL as standard_no, issuing_department as issuing_authority, publish_date as release_date, document_type, NULL as professional_field, NULL as validity, created_by, created_time, file_url"
+                fields = """
+                    s.id, s.file_name as title, NULL as standard_no, 
+                    s.issuing_department as issuing_authority, s.publish_date as release_date, 
+                    s.document_type, NULL as professional_field, NULL as validity, 
+                    s.created_by, s.created_time,
+                    m.file_url, m.conversion_status, m.md_url, m.json_url, m.id as doc_id
+                """
                 field_map = {
-                    'title': 'file_name',
-                    'issuing_authority': 'issuing_department',
-                    'release_date': 'publish_date',
-                    'document_type': 'document_type'
+                    'title': 's.file_name',
+                    'issuing_authority': 's.issuing_department',
+                    'release_date': 's.publish_date',
+                    'document_type': 's.document_type'
                 }
             else:
                 return [], 0
@@ -435,13 +471,13 @@ class SampleService:
             # 统一关键字搜索
             if keyword:
                 if type == 'basis':
-                    where_clauses.append("(chinese_name LIKE %s OR standard_number LIKE %s)")
+                    where_clauses.append("(s.chinese_name LIKE %s OR s.standard_number LIKE %s)")
                     params.extend([f"%{keyword}%", f"%{keyword}%"])
                 elif type == 'work':
-                    where_clauses.append("plan_name LIKE %s")
+                    where_clauses.append("s.plan_name LIKE %s")
                     params.append(f"%{keyword}%")
                 elif type == 'job':
-                    where_clauses.append("file_name LIKE %s")
+                    where_clauses.append("s.file_name LIKE %s")
                     params.append(f"%{keyword}%")
             
             # 精细化检索
@@ -449,52 +485,42 @@ class SampleService:
                 if not filter_value:
                     continue
                 
-                # 处理日期范围
-                if filter_key == 'release_date_start':
-                    where_clauses.append("release_date >= %s")
-                    params.append(filter_value)
-                elif filter_key == 'release_date_end':
-                    where_clauses.append("release_date <= %s")
+                db_field = field_map.get(filter_key)
+                if db_field:
+                    where_clauses.append(f"{db_field} = %s")
                     params.append(filter_value)
-                elif filter_key in field_map:
-                    db_field = field_map[filter_key]
-                    where_clauses.append(f"{db_field} LIKE %s")
-                    params.append(f"%{filter_value}%")
             
             where_sql = " WHERE " + " AND ".join(where_clauses) if where_clauses else ""
-            
-            # 分页查询
             offset = (page - 1) * size
-            sql = f"SELECT {fields} FROM {table_name}{where_sql} ORDER BY created_time DESC LIMIT %s OFFSET %s"
-            params.extend([size, offset])
             
-            logger.info(f"Executing Basic Info SQL: {sql} with params: {params}")
+            # 使用 LEFT JOIN 关联主表
+            sql = f"""
+                SELECT {fields} 
+                FROM {table_name} s
+                LEFT JOIN t_samp_document_main m ON s.id = m.source_id AND m.source_type = %s
+                {where_sql} 
+                ORDER BY s.created_time DESC 
+                LIMIT %s OFFSET %s
+            """
+            params = [type] + params + [size, offset]
+            
             cursor.execute(sql, tuple(params))
-            items = []
-            for row in cursor.fetchall():
-                item = row # DictCursor already returns dict
-                # 格式化日期
-                for key in ['release_date', 'created_time']:
-                    if item.get(key) and hasattr(item[key], 'isoformat'):
-                        item[key] = item[key].isoformat()
-                    elif item.get(key):
-                        item[key] = str(item[key])
-                items.append(item)
+            items = cursor.fetchall()
             
-            # 总数查询
-            count_sql = f"SELECT COUNT(*) as count FROM {table_name}{where_sql}"
-            cursor.execute(count_sql, tuple(params[:-2]))
+            # 总数
+            count_sql = f"SELECT COUNT(*) as count FROM {table_name} s {where_sql}"
+            cursor.execute(count_sql, tuple(params[1:-2]))
             res = cursor.fetchone()
             total = res['count'] if res else 0
             
             return items, total
         except Exception as e:
-            logger.exception("查询基本信息失败")
+            logger.exception(f"获取 {type} 列表失败")
             return [], 0
         finally:
             cursor.close()
             conn.close()
-    
+
     # ==================== 文档转换 ====================
     
     async def get_document_source_type(self, doc_id: str) -> Optional[str]:
@@ -535,11 +561,11 @@ class SampleService:
             cursor.close()
             conn.close()
     
-    async def update_conversion_progress(self, doc_id: str, status: int, 
+    async def update_conversion_status(self, doc_id: str, status: int, 
                                         md_url: Optional[str] = None,
                                         json_url: Optional[str] = None,
                                         error_message: Optional[str] = None) -> bool:
-        """更新文档转换进度
+        """更新文档转换状态
         
         Args:
             doc_id: 文档ID
@@ -600,25 +626,39 @@ class SampleService:
             
             source_id = str(uuid.uuid4())
             doc_id = str(uuid.uuid4())
+            file_url = data.get('file_url')
+            file_extension = file_url.split('.')[-1] if file_url and '.' in file_url else None
+            
+            # 1. 插入主表 (解耦触发器,手动同步)
+            cursor.execute(
+                """
+                INSERT INTO t_samp_document_main (
+                    id, title, source_type, source_id, file_url, 
+                    file_extension, created_by, updated_by, created_time, updated_time,
+                    conversion_status
+                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW(), 0)
+                """,
+                (
+                    doc_id, data.get('title'), type, source_id, file_url,
+                    file_extension, user_id, user_id
+                )
+            )
             
-            # 1. 插入子表 (触发器会自动同步到主表)
+            # 2. 插入子表 (移除 file_url,因为它现在只存储在主表中)
             if type == 'basis':
-                sql = f"INSERT INTO {table_name} (id, chinese_name, standard_number, issuing_authority, release_date, document_type, professional_field, validity, file_url, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())"
-                params = (source_id, data.get('title'), data.get('standard_no'), data.get('issuing_authority'), self._to_date(data.get('release_date')), data.get('document_type'), data.get('professional_field'), data.get('validity', '现行'), data.get('file_url'), user_id)
+                sql = f"INSERT INTO {table_name} (id, chinese_name, standard_number, issuing_authority, release_date, document_type, professional_field, validity, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())"
+                params = (source_id, data.get('title'), data.get('standard_no'), data.get('issuing_authority'), self._to_date(data.get('release_date')), data.get('document_type'), data.get('professional_field'), data.get('validity', '现行'), user_id)
             elif type == 'work':
-                sql = f"INSERT INTO {table_name} (id, plan_name, project_name, project_section, compiling_unit, compiling_date, file_url, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())"
-                params = (source_id, data.get('title'), data.get('project_name'), data.get('project_section'), data.get('issuing_authority'), self._to_date(data.get('release_date')), data.get('file_url'), user_id)
+                sql = f"INSERT INTO {table_name} (id, plan_name, project_name, project_section, compiling_unit, compiling_date, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW())"
+                params = (source_id, data.get('title'), data.get('project_name'), data.get('project_section'), data.get('issuing_authority'), self._to_date(data.get('release_date')), user_id)
             elif type == 'job':
-                sql = f"INSERT INTO {table_name} (id, file_name, issuing_department, document_type, publish_date, file_url, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), NOW())"
-                params = (source_id, data.get('title'), data.get('issuing_authority'), data.get('document_type'), self._to_date(data.get('release_date')), data.get('file_url'), user_id)
+                sql = f"INSERT INTO {table_name} (id, file_name, issuing_department, document_type, publish_date, created_by, created_time, updated_time) VALUES (%s, %s, %s, %s, %s, %s, NOW(), NOW())"
+                params = (source_id, data.get('title'), data.get('issuing_authority'), data.get('document_type'), self._to_date(data.get('release_date')), user_id)
             else:
                 return False, "不支持的类型"
             
             cursor.execute(sql, params)
             
-            # 2. 触发器已经同步到主表,不需要手动插入
-            # 但如果需要更新主表中触发器没处理的字段,可以在这里更新
-            
             conn.commit()
             return True, "新增成功"
         except Exception as e:
@@ -641,23 +681,34 @@ class SampleService:
             if not table_name:
                 return False, "无效的类型"
             
-            # 1. 更新子表 (触发器会自动同步到主表)
+            file_url = data.get('file_url')
+            file_extension = file_url.split('.')[-1] if file_url and '.' in file_url else None
+
+            # 1. 更新主表 (解耦触发器)
+            cursor.execute(
+                """
+                UPDATE t_samp_document_main 
+                SET title = %s, file_url = %s, file_extension = %s, updated_by = %s, updated_time = NOW()
+                WHERE source_id = %s AND source_type = %s
+                """,
+                (data.get('title'), file_url, file_extension, updater_id, info_id, type)
+            )
+
+            # 2. 更新子表 (移除 file_url)
             if type == 'basis':
-                sql = f"UPDATE {table_name} SET chinese_name = %s, standard_number = %s, issuing_authority = %s, release_date = %s, document_type = %s, professional_field = %s, validity = %s, file_url = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
-                params = (data.get('title'), data.get('standard_no'), data.get('issuing_authority'), self._to_date(data.get('release_date')), data.get('document_type'), data.get('professional_field'), data.get('validity'), data.get('file_url'), updater_id, info_id)
+                sql = f"UPDATE {table_name} SET chinese_name = %s, standard_number = %s, issuing_authority = %s, release_date = %s, document_type = %s, professional_field = %s, validity = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
+                params = (data.get('title'), data.get('standard_no'), data.get('issuing_authority'), self._to_date(data.get('release_date')), data.get('document_type'), data.get('professional_field'), data.get('validity'), updater_id, info_id)
             elif type == 'work':
-                sql = f"UPDATE {table_name} SET plan_name = %s, project_name = %s, project_section = %s, compiling_unit = %s, compiling_date = %s, file_url = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
-                params = (data.get('title'), data.get('project_name'), data.get('project_section'), data.get('issuing_authority'), self._to_date(data.get('release_date')), data.get('file_url'), updater_id, info_id)
+                sql = f"UPDATE {table_name} SET plan_name = %s, project_name = %s, project_section = %s, compiling_unit = %s, compiling_date = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
+                params = (data.get('title'), data.get('project_name'), data.get('project_section'), data.get('issuing_authority'), self._to_date(data.get('release_date')), updater_id, info_id)
             elif type == 'job':
-                sql = f"UPDATE {table_name} SET file_name = %s, issuing_department = %s, document_type = %s, publish_date = %s, file_url = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
-                params = (data.get('title'), data.get('issuing_authority'), data.get('document_type'), self._to_date(data.get('release_date')), data.get('file_url'), updater_id, info_id)
+                sql = f"UPDATE {table_name} SET file_name = %s, issuing_department = %s, document_type = %s, publish_date = %s, updated_by = %s, updated_time = NOW() WHERE id = %s"
+                params = (data.get('title'), data.get('issuing_authority'), data.get('document_type'), self._to_date(data.get('release_date')), updater_id, info_id)
             else:
                 return False, "不支持的类型"
             
             cursor.execute(sql, params)
             
-            # 2. 触发器已经同步到主表的基本字段,如果主表有额外字段需要更新,可以在这里操作
-            
             conn.commit()
             return True, "编辑成功"
         except Exception as e:

+ 11 - 17
src/views/sample_view.py

@@ -202,33 +202,24 @@ async def batch_delete_documents(req: BatchDeleteRequest, credentials: HTTPAutho
         return ApiResponse(code=500, message=f"批量删除失败: {str(e)}", timestamp=datetime.now(timezone.utc).isoformat()).model_dump()
 
 async def simulate_conversion(doc_id: str):
-    """模拟文档转换过程"""
+    """模拟文档转换过程 (仅保留状态切换)"""
     sample_service = SampleService()
     
     try:
-        # 1. 模拟开始 (10%)
-        await sample_service.update_conversion_progress(doc_id, status=1, progress=10)
+        # 1. 模拟开始
+        await sample_service.update_conversion_status(doc_id, status=1)
         await asyncio.sleep(2)
         
-        # 2. 模拟进行中 (40%)
-        await sample_service.update_conversion_progress(doc_id, status=1, progress=40)
-        await asyncio.sleep(3)
-        
-        # 3. 模拟进行中 (75%)
-        await sample_service.update_conversion_progress(doc_id, status=1, progress=75)
-        await asyncio.sleep(2)
-        
-        # 4. 模拟完成 (100%)
-        # 模拟云端存储地址
+        # 2. 模拟完成
         converted_file_name = f"http://192.168.91.15:19000/aidata/sampledata/converted/simulated/{doc_id}.pdf"
         json_url = f"http://192.168.91.15:19000/aidata/sampledata/converted/simulated/{doc_id}.json"
-        await sample_service.update_conversion_progress(doc_id, status=2, progress=100, 
-                                                       converted_file_name=converted_file_name,
+        await sample_service.update_conversion_status(doc_id, status=2, 
+                                                       md_url=converted_file_name,
                                                        json_url=json_url)
         
     except Exception as e:
         logger.exception("模拟转换出错")
-        await sample_service.update_conversion_progress(doc_id, status=3, progress=0, 
+        await sample_service.update_conversion_status(doc_id, status=3, 
                                                        error_message=str(e))
 
 @router.post("/documents/convert")
@@ -257,7 +248,10 @@ async def convert_document(req: ConvertRequest, background_tasks: BackgroundTask
         if not file_url:
             return ApiResponse(code=400, message="文档缺少文件链接,无法转换", timestamp=datetime.now(timezone.utc).isoformat()).model_dump()
 
-        # 2. 启动后台任务
+        # 2. 立即将状态更新为“转换中”,避免前端轮询延迟
+        await sample_service.update_conversion_status(doc_id, status=1)
+
+        # 3. 启动后台任务
         manager = get_mineru_manager()
         background_tasks.add_task(manager.process_document, doc_id, title, file_url)