Преглед изворни кода

Merge branch 'dev' of http://47.109.151.80:15030/CRBC-MaaS-Platform-Project/LQAgentPlatform into dev_sgsc_wxm

WangXuMing пре 1 недеља
родитељ
комит
78bcae0dcb

+ 6 - 5
config/config.ini.template

@@ -145,13 +145,10 @@ MYSQL_HOST=192.168.92.61
 MYSQL_PORT=13306
 MYSQL_PORT=13306
 MYSQL_USER=root
 MYSQL_USER=root
 MYSQL_PASSWORD=Lq123456!
 MYSQL_PASSWORD=Lq123456!
-MYSQL_DB=lq_db
+MYSQL_DB=lq_db_dev
 MYSQL_MIN_SIZE=1
 MYSQL_MIN_SIZE=1
 MYSQL_MAX_SIZE=5
 MYSQL_MAX_SIZE=5
 MYSQL_AUTO_COMMIT=True
 MYSQL_AUTO_COMMIT=True
-MYSQL_CONNECT_TIMEOUT=30
-MYSQL_READ_TIMEOUT=60
-MYSQL_WRITE_TIMEOUT=30
 
 
 
 
 [pgvector]
 [pgvector]
@@ -241,5 +238,9 @@ MAX_TOKENS=1024
 [construction_review]
 [construction_review]
 MAX_CELERY_TASKS=1
 MAX_CELERY_TASKS=1
 
 
-
+[timeliness_review]
+# 时效性审查中用于匹配前需要去除的符号(第二轮处理)
+# 这些符号会在基础规范化(去除空白、书名号、括号、HTML标签)之后去除
+# 包含各种连接符:半角连字符(-)、全角连接号(-)、全角破折号(—)
+REMOVE_SYMBOLS=),-,.,/,,:,[,],【,】,〔,〕,(,),-,—
 
 

+ 23 - 16
core/construction_review/component/ai_review_engine.py

@@ -1047,34 +1047,41 @@ class AIReviewEngine(BaseReviewer):
                 elif isinstance(outline_raw, list):
                 elif isinstance(outline_raw, list):
                     outline_chapters = outline_raw
                     outline_chapters = outline_raw
 
 
-            # 提取一级和二级信息
-            outline_first = set()
-            outline_secondary = {}
+            # 🆕 提取一级和二级标题(用于独立模糊匹配)
+            # 结构:{first_code: {'title': '章节标题', 'subsections': ['二级标题1', '二级标题2']}}
+            outline_by_first: Dict[str, Dict] = {}
             
             
             for chapter in outline_chapters:
             for chapter in outline_chapters:
                 if not isinstance(chapter, dict):
                 if not isinstance(chapter, dict):
                     continue
                     continue
                 
                 
                 first_code = chapter.get('chapter_classification', '')
                 first_code = chapter.get('chapter_classification', '')
-                if first_code:
-                    outline_first.add(first_code)
+                first_title = chapter.get('title', '')
                 
                 
-                # 提取 subsections 中的二级章节
+                if not first_code:
+                    continue
+                
+                if first_code not in outline_by_first:
+                    outline_by_first[first_code] = {
+                        'title': first_title,
+                        'subsections': []
+                    }
+                
+                # 提取二级标题列表
                 for sub in chapter.get('subsections', []):
                 for sub in chapter.get('subsections', []):
                     if not isinstance(sub, dict):
                     if not isinstance(sub, dict):
                         continue
                         continue
-                    second_code = sub.get('secondary_category_code', '')
-                    if first_code and second_code:
-                        outline_secondary[(first_code, second_code)] = sub.get('title', '')
+                    sub_title = sub.get('title', '')
+                    if sub_title:
+                        outline_by_first[first_code]['subsections'].append(sub_title)
             
             
-            logger.info(f"[{name}] 获取到 {len(outline_first)} 个一级, {len(outline_secondary)} 个二级")
-
-            # 使用模糊匹配
+            logger.info(f"[{name}] 获取到 {len(outline_by_first)} 个一级章节")
+            
+            # 使用模糊匹配(基于标题)
             matcher = OutlineCatalogueMatcher(csv_path, raw_content_csv)
             matcher = OutlineCatalogueMatcher(csv_path, raw_content_csv)
-            match_result = matcher.match_catalogue(
-                outline_first=outline_first,
-                outline_secondary=outline_secondary,
-                threshold=0.6
+            match_result = matcher.match_catalogue_by_title(
+                outline_by_first=outline_by_first,
+                threshold=0.6  # 阈值0.6
             )
             )
             
             
             catalogue_result = {
             catalogue_result = {

+ 86 - 0
core/construction_review/component/document_processor.py

@@ -66,6 +66,91 @@ class DocumentComponents:
     text_splitter: TextSplitter
     text_splitter: TextSplitter
 
 
 
 
+# 二级分类标题关键词映射(用于outline的subsection分类)
+# 基于 StandardCategoryTable.csv,严格匹配标准目录名
+SECONDARY_CATEGORY_KEYWORDS = {
+    # 编制依据 (basis)
+    "basis": {
+        "LawsAndRegulations": ["法律法规"],  # 严格匹配
+        "StandardsAndSpecifications": ["标准规范"],  # 严格匹配
+        "DocumentSystems": ["文件制度"],  # 严格匹配
+        "CompilationPrinciples": ["编制原则"],  # 严格匹配
+        "CompilationScope": ["编制范围"],  # 严格匹配
+    },
+    # 工程概况 (overview)
+    "overview": {
+        "DesignSummary": ["设计概况"],  # 严格匹配
+        "GeologyWeather": ["工程地质与水文气象"],  # 严格匹配标准目录名
+        "Surroundings": ["周边环境"],  # 严格匹配
+        "LayoutPlan": ["施工平面及立面布置"],  # 严格匹配标准目录名
+        "RequirementsTech": ["施工要求和技术保证条件"],  # 严格匹配标准目录名
+        "RiskLevel": ["风险辨识与分级"],  # 严格匹配标准目录名
+        "Stakeholders": ["参建各方责任主体单位"],  # 严格匹配标准目录名
+    },
+    # 施工计划 (plan)
+    "plan": {
+        "Schedule": ["施工进度计划"],  # 严格匹配标准目录名
+        "Materials": ["施工材料计划"],  # 严格匹配标准目录名
+        "Equipment": ["施工设备计划"],  # 严格匹配标准目录名
+        "Workforce": ["劳动力计划"],  # 严格匹配
+        "SafetyCost": ["安全生产费用使用计划"],  # 严格匹配标准目录名
+    },
+    # 施工工艺技术 (technology)
+    "technology": {
+        # 按标准目录严格匹配,优先匹配完整名称避免歧义
+        "MethodsOverview": ["主要施工方法概述", "施工方法概述"],  # 不包含"施工方法"避免与Operations冲突
+        "TechParams": ["技术参数"],  # 不包含"参数"避免过于宽泛
+        "Process": ["工艺流程"],  # 不包含"流程"避免过于宽泛
+        "PrepWork": ["施工准备"],  # 不包含"准备"避免过于宽泛
+        "Operations": ["施工方法及操作要求", "施工方案及操作要求", "操作要求", "施工方案"],  # 最具体的放前面
+        "Inspection": ["检查要求"],  # 不包含"检查""验收"避免与其他章节冲突
+    },
+    # 安全保证措施 (safety)
+    "safety": {
+        "SafetySystem": ["安全保证体系"],  # 严格匹配标准目录名
+        "Organization": ["组织保证措施"],  # 严格匹配
+        "TechMeasures": ["技术保障措施", "技术保证措施"],  # 严格匹配(包含常见变体)
+        "Protection": ["安全防护措施"],  # 🆕 新增缺失的分类
+        "Monitoring": ["监测监控措施"],  # 严格匹配
+        "Emergency": ["应急处置措施"],  # 严格匹配
+    },
+    # 质量保证措施 (quality)
+    "quality": {
+        "QualitySystem": ["质量保证体系"],  # 严格匹配
+        "QualityGoals": ["质量目标"],  # 严格匹配
+        "Excellence": ["工程创优规划"],  # 严格匹配
+        "QualityControl": ["质量控制程序与具体措施"],  # 严格匹配标准目录名
+    },
+    # 环境保证措施 (environment)
+    "environment": {
+        "EnvSystem": ["环境保证体系"],  # 严格匹配
+        "EnvOrg": ["环境保护组织机构"],  # 严格匹配
+        "EnvProtection": ["环境保护及文明施工措施"],  # 严格匹配标准目录名
+    },
+    # 施工管理及作业人员配备与分工 (management)
+    "management": {
+        "Managers": ["施工管理人员"],  # 严格匹配
+        "SafetyStaff": ["专职安全生产管理人员"],  # 严格匹配标准目录名
+        "SpecialWorkers": ["特种作业人员"],  # 严格匹配
+        "OtherWorkers": ["其他作业人员"],  # 严格匹配
+    },
+    # 验收要求 (acceptance)
+    "acceptance": {
+        "Standards": ["验收标准"],  # 严格匹配
+        "Procedure": ["验收程序"],  # 严格匹配
+        "Content": ["验收内容"],  # 严格匹配
+        "Timing": ["验收时间"],  # 严格匹配
+        "Personnel": ["验收人员"],  # 严格匹配
+    },
+    # 其他资料 (other)
+    "other": {
+        "Calculations": ["计算书"],  # 严格匹配
+        "Drawings": ["相关施工图纸"],  # 严格匹配标准目录名
+        "Tables": ["附图附表"],  # 严格匹配
+        "Team": ["编制及审核人员情况"],  # 严格匹配标准目录名
+    },
+}
+
 class DocumentProcessor:
 class DocumentProcessor:
     """
     """
     文档处理器
     文档处理器
@@ -734,3 +819,4 @@ class DocumentProcessor:
         except Exception as e:
         except Exception as e:
             logger.error(f"基础PDF处理失败: {str(e)}", exc_info=True)
             logger.error(f"基础PDF处理失败: {str(e)}", exc_info=True)
             raise
             raise
+

+ 171 - 131
core/construction_review/component/outline_catalogue_matcher.py

@@ -8,6 +8,7 @@
 """
 """
 
 
 import difflib
 import difflib
+import logging
 import re
 import re
 from typing import Dict, List, Optional, Set, Tuple, Any
 from typing import Dict, List, Optional, Set, Tuple, Any
 from collections import defaultdict
 from collections import defaultdict
@@ -15,6 +16,8 @@ from pathlib import Path
 
 
 import pandas as pd
 import pandas as pd
 
 
+logger = logging.getLogger(__name__)
+
 
 
 class OutlineCatalogueMatcher:
 class OutlineCatalogueMatcher:
     """
     """
@@ -227,161 +230,198 @@ class OutlineCatalogueMatcher:
         
         
         return min(sum(scores), 1.0)
         return min(sum(scores), 1.0)
     
     
-    def match_catalogue(
+    def _match_by_title_fuzzy(
+        self,
+        standard_name: str,
+        candidate_titles: List[str],
+        threshold: float
+    ) -> Tuple[bool, float, Optional[str]]:
+        """
+        在候选标题中找到与标准名称最相似的一个
+        
+        Returns:
+            (是否匹配, 最佳分数, 匹配的标题)
+        """
+        best_score = 0.0
+        best_title = None
+        
+        for title in candidate_titles:
+            score = self._calculate_enhanced_similarity(standard_name, title)
+            if score > best_score:
+                best_score = score
+                best_title = title
+        
+        is_match = best_score >= threshold
+        return is_match, best_score, best_title
+    
+    def match_catalogue_by_title(
         self,
         self,
-        outline_first: Set[str],
-        outline_secondary: Dict[Tuple[str, str], str],
+        outline_by_first: Dict[str, Dict[str, any]],
         threshold: float = 0.6
         threshold: float = 0.6
     ) -> Dict[str, Any]:
     ) -> Dict[str, Any]:
         """
         """
-        执行目录匹配
+        🆕 基于标题的独立模糊匹配(一二级都独立)
         
         
         Args:
         Args:
-            outline_first: 从outline中提取的一级code集合
-            outline_secondary: 从outline中提取的二级 {(first_code, second_code): title}
-            threshold: 模糊匹配阈值(默认0.6)
+            outline_by_first: {
+                first_code: {
+                    'title': '一级标题',
+                    'subsections': ['二级标题1', '二级标题2', ...]
+                }
+            }
+            threshold: 匹配阈值,默认0.6
             
             
         Returns:
         Returns:
-            匹配结果,包含:
-            - matched_first: 匹配的一级code集合
-            - matched_second: 匹配的二级key集合
-            - missing_first: 缺失的一级列表
-            - missing_second: 缺失的二级列表
-            - match_details: 匹配详情
+            匹配结果
         """
         """
-        required_first = set(self.first_names.keys())
-        required_second = set(self.second_names.keys())
+        logger.info(f"[独立模糊匹配] 开始,阈值={threshold}")
+        
+        # ========== 一级目录匹配(独立模糊)==========
+        actual_first_titles = {
+            code: info['title'] 
+            for code, info in outline_by_first.items()
+        }
         
         
-        # 一级匹配
-        matched_first = outline_first & required_first
-        missing_first = required_first - matched_first
+        matched_first = set()
+        missing_first = []
+        
+        for req_code, req_name in self.first_names.items():
+            # 优先:直接用code精确匹配,因为一级分类通常较准
+            if req_code in actual_first_titles:
+                matched_first.add(req_code)
+                logger.debug(f"[一级匹配] {req_name}: 存在")
+            else:
+                # 尝试用标题模糊匹配
+                is_match, score, matched_title = self._match_by_title_fuzzy(
+                    req_name,
+                    list(actual_first_titles.values()),
+                    threshold
+                )
+                if is_match:
+                    # 找到匹配的标题,反向查找code
+                    for code, title in actual_first_titles.items():
+                        if title == matched_title:
+                            matched_first.add(req_code)
+                            logger.debug(f"[一级模糊匹配] {req_name} -> {matched_title} ({score:.3f})")
+                            break
+                else:
+                    missing_first.append({
+                        'first_code': req_code,
+                        'first_name': req_name,
+                        'first_seq': self.first_seq.get(req_code, 0)
+                    })
+                    logger.debug(f"[一级缺失] {req_name}")
+        
+        # ========== 二级目录匹配(结合一级 + 全局兜底)==========
+        # 🆕 先收集所有二级标题用于全局兜底
+        all_actual_second_titles = []
+        for fc, info in outline_by_first.items():
+            for sub_title in info.get('subsections', []):
+                all_actual_second_titles.append({
+                    'first_code': fc,
+                    'title': sub_title
+                })
         
         
-        # 二级匹配
         matched_second = set()
         matched_second = set()
-        missing_second = set()
+        missing_second = []
         match_details = []
         match_details = []
+        matched_actual_titles = set()  # 防重复
         
         
-        # 精确匹配
-        outline_second_keys = set(outline_secondary.keys())
-        exact_matches = outline_second_keys & required_second
-        matched_second.update(exact_matches)
-        
-        for key in exact_matches:
-            first_code, second_code = key
+        for req_key, req_name in self.second_names.items():
+            first_code, second_code = req_key
+            
+            # 🆕 步骤1:优先在同一一级下匹配
+            same_group_titles = outline_by_first.get(first_code, {}).get('subsections', [])
+            best_score_same = 0.0
+            best_match_same = None
+            
+            for title in same_group_titles:
+                if title in matched_actual_titles:
+                    continue
+                score = self._calculate_enhanced_similarity(req_name, title)
+                if score > best_score_same:
+                    best_score_same = score
+                    best_match_same = title
+            
+            # 同组匹配成功
+            if best_score_same >= threshold and best_match_same:
+                matched_second.add(req_key)
+                matched_actual_titles.add(best_match_same)
+                match_details.append({
+                    'level': 'second',
+                    'required_first_code': first_code,
+                    'required_second_code': second_code,
+                    'required_second_name': req_name,
+                    'matched': True,
+                    'match_type': 'same_group_fuzzy',
+                    'similarity': best_score_same,
+                    'matched_title': best_match_same
+                })
+                logger.debug(f"[二级同组匹配] {req_name} -> {best_match_same} ({best_score_same:.3f})")
+                continue
+            
+            # 🆕 步骤2:同组失败,尝试全局匹配(提高阈值防误匹配)
+            GLOBAL_THRESHOLD = 0.7  # 全局匹配阈值更高
+            best_score_global = 0.0
+            best_match_global = None
+            best_match_fc = None
+            
+            for actual in all_actual_second_titles:
+                if actual['title'] in matched_actual_titles:
+                    continue
+                score = self._calculate_enhanced_similarity(req_name, actual['title'])
+                if score > best_score_global:
+                    best_score_global = score
+                    best_match_global = actual['title']
+                    best_match_fc = actual['first_code']
+            
+            # 全局匹配成功(且跨组)
+            if best_score_global >= GLOBAL_THRESHOLD and best_match_global:
+                matched_second.add(req_key)
+                matched_actual_titles.add(best_match_global)
+                match_details.append({
+                    'level': 'second',
+                    'required_first_code': first_code,
+                    'required_second_code': second_code,
+                    'required_second_name': req_name,
+                    'matched': True,
+                    'match_type': 'cross_group_fuzzy',  # 标记为跨组匹配
+                    'similarity': best_score_global,
+                    'matched_title': best_match_global,
+                    'matched_actual_first': best_match_fc  # 实际匹配到的一级
+                })
+                logger.warning(f"[二级跨组匹配] {req_name}(应在{first_code}) -> {best_match_global}(实际在{best_match_fc}) ({best_score_global:.3f})")
+                continue
+            
+            # 都失败,记为缺失
+            best_score = max(best_score_same, best_score_global)
+            best_attempt = best_match_same or best_match_global
+            missing_second.append({
+                'first_code': first_code,
+                'first_name': self.first_names.get(first_code, ''),
+                'secondary_code': second_code,
+                'secondary_name': req_name,
+                'second_seq': self.second_seq.get(req_key, 0)
+            })
             match_details.append({
             match_details.append({
                 'level': 'second',
                 'level': 'second',
                 'required_first_code': first_code,
                 'required_first_code': first_code,
                 'required_second_code': second_code,
                 'required_second_code': second_code,
-                'required_second_name': self.second_names.get(key, ''),
-                'matched': True,
-                'match_type': 'exact',
-                'similarity': 1.0
-            })
-        
-        # 模糊匹配(对未精确匹配的)
-        required_remaining = required_second - exact_matches
-        outline_remaining = outline_second_keys - exact_matches
-        
-        if required_remaining and outline_remaining:
-            # 准备outline数据
-            outline_list = []
-            for key in outline_remaining:
-                first_code, second_code = key
-                title = outline_secondary.get(key, "")
-                outline_list.append({
-                    'key': key,
-                    'first_code': first_code,
-                    'second_code': second_code,
-                    'title': title
-                })
-            
-            # 对每个required进行模糊匹配
-            for req_key in required_remaining:
-                first_code, second_code = req_key
-                second_name = self.second_names.get(req_key, '')
-                first_name = self.first_names.get(first_code, '')
-                
-                # 获取详细定义
-                raw_content = self.second_raw_content.get((first_name, second_name))
-                
-                best_match = None
-                best_score = 0.0
-                
-                for item in outline_list:
-                    # 计算相似度
-                    score1 = self._calculate_enhanced_similarity(second_name, item['title'])
-                    score2 = self._calculate_enhanced_similarity(
-                        f"{first_name}{second_name}",
-                        item['title']
-                    )
-                    score = max(score1, score2)
-                    
-                    # 如果有详细定义,也计算
-                    if raw_content:
-                        score3 = self._calculate_enhanced_similarity(
-                            second_name,
-                            item['title'],
-                            raw_content
-                        )
-                        score = max(score, score3)
-                    
-                    if score > best_score:
-                        best_score = score
-                        best_match = item
-                
-                if best_score >= threshold:
-                    matched_second.add(req_key)
-                    match_details.append({
-                        'level': 'second',
-                        'required_first_code': first_code,
-                        'required_second_code': second_code,
-                        'required_second_name': second_name,
-                        'matched': True,
-                        'match_type': 'fuzzy',
-                        'similarity': best_score,
-                        'matched_title': best_match['title'] if best_match else None,
-                        'used_raw_content': raw_content is not None
-                    })
-                else:
-                    missing_second.add(req_key)
-                    match_details.append({
-                        'level': 'second',
-                        'required_first_code': first_code,
-                        'required_second_code': second_code,
-                        'required_second_name': second_name,
-                        'matched': False,
-                        'match_type': 'none',
-                        'similarity': best_score
-                    })
-        else:
-            missing_second = required_remaining
-        
-        # 构建缺失详情
-        missing_first_details = []
-        for code in sorted(missing_first, key=lambda x: self.first_seq.get(x, 0)):
-            missing_first_details.append({
-                'first_code': code,
-                'first_name': self.first_names.get(code, code),
-                'first_seq': self.first_seq.get(code, 0)
+                'required_second_name': req_name,
+                'matched': False,
+                'match_type': 'none',
+                'similarity': best_score,
+                'best_attempt': best_attempt
             })
             })
+            logger.debug(f"[二级缺失] {req_name} (最佳尝试: {best_attempt}, {best_score:.3f})")
         
         
-        missing_second_details = []
-        for key in sorted(missing_second, key=lambda x: (self.first_seq.get(x[0], 0), self.second_seq.get(x, 0))):
-            first_code, second_code = key
-            missing_second_details.append({
-                'first_code': first_code,
-                'first_name': self.first_names.get(first_code, first_code),
-                'first_seq': self.first_seq.get(first_code, 0),
-                'secondary_code': second_code,
-                'secondary_name': self.second_names.get(key, ''),
-                'second_seq': self.second_seq.get(key, 0)
-            })
+        logger.info(f"[独立模糊匹配] 完成:一级缺失 {len(missing_first)} 个,二级缺失 {len(missing_second)} 个")
         
         
         return {
         return {
             'matched_first': matched_first,
             'matched_first': matched_first,
             'matched_second': matched_second,
             'matched_second': matched_second,
-            'missing_first': missing_first_details,
-            'missing_second': missing_second_details,
+            'missing_first': missing_first,
+            'missing_second': missing_second,
             'missing_first_count': len(missing_first),
             'missing_first_count': len(missing_first),
             'missing_second_count': len(missing_second),
             'missing_second_count': len(missing_second),
             'match_details': match_details
             'match_details': match_details

+ 133 - 37
core/construction_review/component/reviewers/standard_timeliness_reviewer.py

@@ -26,6 +26,10 @@
         results = reviewer.review_standards(standards_list)
         results = reviewer.review_standards(standards_list)
 """
 """
 import asyncio
 import asyncio
+import json
+import os
+import threading
+from datetime import datetime
 from typing import List, Dict, Any, Optional
 from typing import List, Dict, Any, Optional
 from dataclasses import dataclass, asdict
 from dataclasses import dataclass, asdict
 
 
@@ -67,13 +71,14 @@ class StandardTimelinessReviewer:
     对标准列表进行时效性审查。
     对标准列表进行时效性审查。
     """
     """
 
 
-    def __init__(self, db_pool=None, standard_service: Optional[StandardMatchingService] = None):
+    def __init__(self, db_pool=None, standard_service: Optional[StandardMatchingService] = None, callback_task_id: Optional[str] = None):
         """
         """
         初始化审查器
         初始化审查器
 
 
         Args:
         Args:
             db_pool: 数据库连接池,用于初始化 StandardMatchingService(如未提供standard_service则必填)
             db_pool: 数据库连接池,用于初始化 StandardMatchingService(如未提供standard_service则必填)
             standard_service: 已初始化的 StandardMatchingService 实例(优先级高于 db_pool)
             standard_service: 已初始化的 StandardMatchingService 实例(优先级高于 db_pool)
+            callback_task_id: 回调任务ID,用于持久化判定结果
 
 
         Raises:
         Raises:
             RuntimeError: 当db_pool和standard_service都为None时抛出异常
             RuntimeError: 当db_pool和standard_service都为None时抛出异常
@@ -86,6 +91,8 @@ class StandardTimelinessReviewer:
         self.db_pool = db_pool
         self.db_pool = db_pool
         self._service = standard_service
         self._service = standard_service
         self._own_service = False  # 标记是否由本实例创建 service
         self._own_service = False  # 标记是否由本实例创建 service
+        self.callback_task_id = callback_task_id
+        self._log_lock = threading.Lock()
 
 
     async def __aenter__(self):
     async def __aenter__(self):
         """异步上下文管理器入口"""
         """异步上下文管理器入口"""
@@ -102,6 +109,38 @@ class StandardTimelinessReviewer:
             await self._service.close()
             await self._service.close()
         return False
         return False
 
 
+    def _log_determination_results(self, review_results: List["TimelinessReviewResult"]) -> None:
+        """将时效性判定结果持久化到JSON文件,不影响主逻辑"""
+        if not self.callback_task_id:
+            return
+        try:
+            with self._log_lock:
+                log_dir = os.path.join("temp", "construction_review", "timeliness_result")
+                os.makedirs(log_dir, exist_ok=True)
+                log_path = os.path.join(log_dir, f"{self.callback_task_id}.json")
+
+                records = []
+                if os.path.exists(log_path):
+                    try:
+                        with open(log_path, "r", encoding="utf-8") as f:
+                            records = json.load(f)
+                            if not isinstance(records, list):
+                                records = []
+                    except Exception:
+                        records = []
+
+                for result in review_results:
+                    records.append({
+                        "timestamp": datetime.now().isoformat(),
+                        "callback_task_id": self.callback_task_id,
+                        **result.to_dict()
+                    })
+
+                with open(log_path, "w", encoding="utf-8") as f:
+                    json.dump(records, f, ensure_ascii=False, indent=2)
+        except Exception as e:
+            logger.warning(f"记录时效性判定结果失败: {e}")
+
     def review_standards(self, standards: List[Dict[str, str]]) -> List[TimelinessReviewResult]:
     def review_standards(self, standards: List[Dict[str, str]]) -> List[TimelinessReviewResult]:
         """
         """
         审查标准列表的时效性
         审查标准列表的时效性
@@ -112,7 +151,7 @@ class StandardTimelinessReviewer:
                 - standard_number: 标准号
                 - standard_number: 标准号
 
 
         Returns:
         Returns:
-            List[TimelinessReviewResult]: 审查结果列表
+            List[TimelinessReviewResult]: 审查结果列表(文件名为空的会被过滤掉)
         """
         """
         if not self._service:
         if not self._service:
             raise RuntimeError("服务未初始化,请使用异步上下文管理器或调用 initialize()")
             raise RuntimeError("服务未初始化,请使用异步上下文管理器或调用 initialize()")
@@ -123,12 +162,15 @@ class StandardTimelinessReviewer:
         # 转换为时效性审查结果
         # 转换为时效性审查结果
         review_results = []
         review_results = []
         for match_result in match_results:
         for match_result in match_results:
-            review_result = self._convert_match_to_review_result(match_result)
-            review_results.append(review_result)
+            # 跳过 match 返回 None 的情况(文件名为空)
+            if match_result is not None:
+                review_result = self._convert_match_to_review_result(match_result)
+                review_results.append(review_result)
 
 
+        self._log_determination_results(review_results)
         return review_results
         return review_results
 
 
-    def review_single(self, standard_name: str, standard_number: str, seq_no: int = 1) -> TimelinessReviewResult:
+    def review_single(self, standard_name: str, standard_number: str, seq_no: int = 1) -> Optional[TimelinessReviewResult]:
         """
         """
         审查单个标准的时效性
         审查单个标准的时效性
 
 
@@ -139,12 +181,18 @@ class StandardTimelinessReviewer:
 
 
         Returns:
         Returns:
             TimelinessReviewResult: 审查结果
             TimelinessReviewResult: 审查结果
+            None: 当文件名为空时返回 None,表示跳过审查
         """
         """
         if not self._service:
         if not self._service:
             raise RuntimeError("服务未初始化,请使用异步上下文管理器或调用 initialize()")
             raise RuntimeError("服务未初始化,请使用异步上下文管理器或调用 initialize()")
 
 
         match_result = self._service.check_single(seq_no, standard_name, standard_number)
         match_result = self._service.check_single(seq_no, standard_name, standard_number)
-        return self._convert_match_to_review_result(match_result)
+        # 如果 match 返回 None(文件名为空),则返回 None
+        if match_result is None:
+            return None
+        review_result = self._convert_match_to_review_result(match_result)
+        self._log_determination_results([review_result])
+        return review_result
 
 
     def _convert_match_to_review_result(self, match_result: StandardMatchResult) -> TimelinessReviewResult:
     def _convert_match_to_review_result(self, match_result: StandardMatchResult) -> TimelinessReviewResult:
         """
         """
@@ -163,8 +211,8 @@ class StandardTimelinessReviewer:
             # 正常状态 - 无风险
             # 正常状态 - 无风险
             return TimelinessReviewResult(
             return TimelinessReviewResult(
                 seq_no=match_result.seq_no,
                 seq_no=match_result.seq_no,
-                standard_name=match_result.original_name,
-                standard_number=match_result.original_number,
+                standard_name=match_result.raw_name,
+                standard_number=match_result.raw_number,
                 process_result=match_result.process_result,
                 process_result=match_result.process_result,
                 status_code=status_code,
                 status_code=status_code,
                 has_issue=False,
                 has_issue=False,
@@ -176,8 +224,8 @@ class StandardTimelinessReviewer:
             # 被替代 - high(与原有逻辑一致)
             # 被替代 - high(与原有逻辑一致)
             return TimelinessReviewResult(
             return TimelinessReviewResult(
                 seq_no=match_result.seq_no,
                 seq_no=match_result.seq_no,
-                standard_name=match_result.original_name,
-                standard_number=match_result.original_number,
+                standard_name=match_result.raw_name,
+                standard_number=match_result.raw_number,
                 process_result=match_result.process_result,
                 process_result=match_result.process_result,
                 status_code=status_code,
                 status_code=status_code,
                 has_issue=True,
                 has_issue=True,
@@ -194,8 +242,8 @@ class StandardTimelinessReviewer:
             # 废止无替代 - high(与原有逻辑一致)
             # 废止无替代 - high(与原有逻辑一致)
             return TimelinessReviewResult(
             return TimelinessReviewResult(
                 seq_no=match_result.seq_no,
                 seq_no=match_result.seq_no,
-                standard_name=match_result.original_name,
-                standard_number=match_result.original_number,
+                standard_name=match_result.raw_name,
+                standard_number=match_result.raw_number,
                 process_result=match_result.process_result,
                 process_result=match_result.process_result,
                 status_code=status_code,
                 status_code=status_code,
                 has_issue=True,
                 has_issue=True,
@@ -210,8 +258,8 @@ class StandardTimelinessReviewer:
             # 不匹配 - high(与原有逻辑一致:编号错误属于high)
             # 不匹配 - high(与原有逻辑一致:编号错误属于high)
             return TimelinessReviewResult(
             return TimelinessReviewResult(
                 seq_no=match_result.seq_no,
                 seq_no=match_result.seq_no,
-                standard_name=match_result.original_name,
-                standard_number=match_result.original_number,
+                standard_name=match_result.raw_name,
+                standard_number=match_result.raw_number,
                 process_result=match_result.process_result,
                 process_result=match_result.process_result,
                 status_code=status_code,
                 status_code=status_code,
                 has_issue=True,
                 has_issue=True,
@@ -228,8 +276,8 @@ class StandardTimelinessReviewer:
             # 标准库不存在 - 直接过滤,不返回问题
             # 标准库不存在 - 直接过滤,不返回问题
             return TimelinessReviewResult(
             return TimelinessReviewResult(
                 seq_no=match_result.seq_no,
                 seq_no=match_result.seq_no,
-                standard_name=match_result.original_name,
-                standard_number=match_result.original_number,
+                standard_name=match_result.raw_name,
+                standard_number=match_result.raw_number,
                 process_result=match_result.process_result,
                 process_result=match_result.process_result,
                 status_code=status_code,
                 status_code=status_code,
                 has_issue=False,
                 has_issue=False,
@@ -242,8 +290,8 @@ class StandardTimelinessReviewer:
             logger.warning(f"未知的匹配状态码: {status_code}")
             logger.warning(f"未知的匹配状态码: {status_code}")
             return TimelinessReviewResult(
             return TimelinessReviewResult(
                 seq_no=match_result.seq_no,
                 seq_no=match_result.seq_no,
-                standard_name=match_result.original_name,
-                standard_number=match_result.original_number,
+                standard_name=match_result.raw_name,
+                standard_number=match_result.raw_number,
                 process_result="未知",
                 process_result="未知",
                 status_code=status_code,
                 status_code=status_code,
                 has_issue=True,
                 has_issue=True,
@@ -253,6 +301,43 @@ class StandardTimelinessReviewer:
                 final_result=match_result.final_result
                 final_result=match_result.final_result
             )
             )
 
 
+    def _normalize_text(self, text: str) -> str:
+        """
+        规范化文本用于比较(与 StandardRepository._normalize_for_matching 保持一致)
+        去除所有空白、标点符号、书名号、括号等
+        从 config.ini 读取需要去除的符号
+        """
+        if not text:
+            return ""
+        import re
+
+        # 基础规范化(与 StandardRepository 一致)
+        # 去除 HTML 标签
+        text = re.sub(r'<[^>]+>', '', text)
+        # 去除所有 Unicode 空白字符
+        text = re.sub(r'\s+', '', text)
+        # 去除书名号和括号(第一轮)
+        text = text.replace('《', '').replace('》', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
+
+        # 第二轮:从 config.ini 读取并去除指定符号
+        default_symbols = '),-,.,/,,:,[,],【,】,〔,〕,(,),-,—'
+
+        # 尝试从配置读取
+        symbols_str = default_symbols
+        try:
+            from foundation.infrastructure.config.config import config_handler
+            symbols_str = config_handler.get('timeliness_review', 'REMOVE_SYMBOLS', default_symbols)
+        except Exception:
+            pass  # 使用默认符号
+
+        # 解析并去除符号
+        if symbols_str:
+            symbols_to_remove = [s.strip() for s in symbols_str.split(',') if s.strip()]
+            for symbol in symbols_to_remove:
+                text = text.replace(symbol, '')
+
+        return text
+
     def convert_to_standardized_format(
     def convert_to_standardized_format(
         self,
         self,
         review_results: List[TimelinessReviewResult],
         review_results: List[TimelinessReviewResult],
@@ -278,25 +363,36 @@ class StandardTimelinessReviewer:
             # 标准库不存在或无问题的结果直接过滤,不返回
             # 标准库不存在或无问题的结果直接过滤,不返回
             if result.status_code == MatchResultCode.NOT_FOUND.value or not result.has_issue:
             if result.status_code == MatchResultCode.NOT_FOUND.value or not result.has_issue:
                 continue
                 continue
-            else:
-                # 有问题
-                standardized_results.append({
-                    "check_item": check_item,
-                    "chapter_code": chapter_code,
-                    "check_item_code": check_item_code,
-                    "check_result": {
-                        "location": f"《{result.standard_name}》({result.standard_number})",
-                        "description": result.reason or result.final_result,
-                        "suggestion": result.suggestion,
-                        "issue_type": result.issue_type,
-                        "standard_name": result.standard_name,
-                        "standard_number": result.standard_number,
-                        "replacement_name": result.replacement_name,
-                        "replacement_number": result.replacement_number,
-                    },
-                    "exist_issue": True,
-                    "risk_info": {"risk_level": result.risk_level}
-                })
+
+            # 【兜底逻辑】检查替代标准是否和原始标准实质相同(规范化后比较)
+            if result.replacement_name and result.replacement_number:
+                original_combined = self._normalize_text(f"{result.standard_name}{result.standard_number}")
+                replacement_combined = self._normalize_text(f"{result.replacement_name}{result.replacement_number}")
+
+                if original_combined == replacement_combined:
+                    logger.info(f"[兜底过滤] 替代标准与原始标准实质相同,跳过: "
+                                f"{result.standard_name}({result.standard_number}) ~ "
+                                f"{result.replacement_name}({result.replacement_number})")
+                    continue  # 跳过这条问题,视为无风险
+
+            # 有问题
+            standardized_results.append({
+                "check_item": check_item,
+                "chapter_code": chapter_code,
+                "check_item_code": check_item_code,
+                "check_result": {
+                    "location": f"《{result.standard_name}》({result.standard_number})",
+                    "description": result.reason or result.final_result,
+                    "suggestion": result.suggestion,
+                    "issue_type": result.issue_type,
+                    "standard_name": result.standard_name,
+                    "standard_number": result.standard_number,
+                    "replacement_name": result.replacement_name,
+                    "replacement_number": result.replacement_number,
+                },
+                "exist_issue": True,
+                "risk_info": {"risk_level": result.risk_level}
+            })
 
 
         return standardized_results
         return standardized_results
 
 

+ 4 - 0
core/construction_review/component/reviewers/timeliness_basis_reviewer.py

@@ -388,6 +388,10 @@ class BasisReviewService:
 
 
         start_time = time.time()
         start_time = time.time()
         total_batches = (len(items) + 2) // 3  # 计算总批次数
         total_batches = (len(items) + 2) // 3  # 计算总批次数
+
+        # 绑定 callback_task_id 到时效性审查器,用于记录判定结果
+        if self._timeliness_reviewer and callback_task_id:
+            self._timeliness_reviewer.callback_task_id = callback_task_id
         
         
         # 发送开始审查的SSE推送(使用独立命名空间,避免与主流程进度冲突)
         # 发送开始审查的SSE推送(使用独立命名空间,避免与主流程进度冲突)
         if progress_manager and callback_task_id:
         if progress_manager and callback_task_id:

+ 3 - 0
core/construction_review/component/reviewers/timeliness_content_reviewer.py

@@ -276,6 +276,9 @@ class ContentTimelinessReviewer:
 
 
         try:
         try:
             async with self._semaphore:
             async with self._semaphore:
+                # 绑定 callback_task_id,用于记录判定结果
+                if callback_task_id:
+                    self._timeliness_reviewer.callback_task_id = callback_task_id
                 # 执行规则匹配审查
                 # 执行规则匹配审查
                 review_results = self._timeliness_reviewer.review_standards(standards_list)
                 review_results = self._timeliness_reviewer.review_standards(standards_list)
 
 

+ 2 - 1
core/construction_review/component/reviewers/utils/directory_extraction.py

@@ -43,7 +43,8 @@ SYSTEM = """
 1) 只抽取包含书名号《 》的条目。
 1) 只抽取包含书名号《 》的条目。
 2) 每条条目包括:title(《》内名称,去掉书名号)、suffix(《》后面的版本/日期/修订说明,可为空)、raw(该条目原文)。
 2) 每条条目包括:title(《》内名称,去掉书名号)、suffix(《》后面的版本/日期/修订说明,可为空)、raw(该条目原文)。
 3) 忽略标题行、段落说明、无《》的行。
 3) 忽略标题行、段落说明、无《》的行。
-4) 输出必须严格符合格式要求,不要输出任何额外文字。
+4) **重要:title 和 raw 必须保留原文的所有空格和格式,不要修改或去除任何空格。**
+5) 输出必须严格符合格式要求,不要输出任何额外文字。
 """
 """
 HUMAN ="""
 HUMAN ="""
 文本如下:
 文本如下:

+ 1 - 1
core/construction_review/component/standard_matching/standard_dao.py

@@ -12,7 +12,7 @@ class StandardDAO:
 
 
     def __init__(self, db_pool):
     def __init__(self, db_pool):
         self.db_pool = db_pool
         self.db_pool = db_pool
-        self.table_name = "t_samp_standard_base_info"
+        self.table_name = "t_samp_standard_base_info_status"
 
 
     async def load_all_standards(self) -> List[Dict]:
     async def load_all_standards(self) -> List[Dict]:
         """
         """

+ 265 - 137
core/construction_review/component/standard_matching/standard_service.py

@@ -15,6 +15,14 @@ from enum import Enum
 
 
 from foundation.observability.logger.loggering import review_logger as logger
 from foundation.observability.logger.loggering import review_logger as logger
 
 
+# 导入配置处理器
+try:
+    from foundation.infrastructure.config.config import config_handler
+    _CONFIG_AVAILABLE = True
+except ImportError:
+    _CONFIG_AVAILABLE = False
+    config_handler = None
+
 
 
 class ValidityStatus(Enum):
 class ValidityStatus(Enum):
     """时效性状态"""
     """时效性状态"""
@@ -36,8 +44,12 @@ class MatchResultCode(Enum):
 class StandardMatchResult:
 class StandardMatchResult:
     """标准匹配结果数据结构"""
     """标准匹配结果数据结构"""
     seq_no: int = 0                             # 序号
     seq_no: int = 0                             # 序号
-    original_name: str = ""                      # 原始标准名称
-    original_number: str = ""                    # 原始标准号
+    raw_name: str = ""                           # 原始输入名称(未修改,用于返回)
+    raw_number: str = ""                         # 原始输入标准号(未修改,用于返回)
+    normalized_name: str = ""                    # 规范化名称(用于匹配)
+    normalized_number: str = ""                  # 规范化标准号(用于匹配)
+    matched_name: str = ""                       # 匹配到的数据库原始名称
+    matched_number: str = ""                     # 匹配到的数据库原始标准号
     substitute_number: Optional[str] = None      # 替代标准号(如果有)
     substitute_number: Optional[str] = None      # 替代标准号(如果有)
     substitute_name: Optional[str] = None        # 替代标准名称(如果有)
     substitute_name: Optional[str] = None        # 替代标准名称(如果有)
     process_result: str = ""                     # 处理结果状态
     process_result: str = ""                     # 处理结果状态
@@ -49,8 +61,10 @@ class StandardMatchResult:
 class StandardRecord:
 class StandardRecord:
     """标准记录数据结构"""
     """标准记录数据结构"""
     id: int
     id: int
-    standard_name: str
-    standard_number: str
+    standard_name: str           # 原始名称(数据库中的值,用于返回)
+    standard_number: str         # 原始标准号(用于返回)
+    normalized_name: str         # 规范化名称(用于匹配)
+    normalized_number: str       # 规范化标准号(用于匹配)
     validity: str
     validity: str
 
 
 
 
@@ -64,14 +78,19 @@ class StandardRepository:
         # 原始数据列表
         # 原始数据列表
         self._records: List[StandardRecord] = []
         self._records: List[StandardRecord] = []
 
 
-        # 索引结构,加速查询
+        # 原始索引(用于返回数据)
         self._number_index: Dict[str, StandardRecord] = {}  # 标准号 -> 记录
         self._number_index: Dict[str, StandardRecord] = {}  # 标准号 -> 记录
         self._name_index: Dict[str, List[StandardRecord]] = {}  # 名称 -> 记录列表
         self._name_index: Dict[str, List[StandardRecord]] = {}  # 名称 -> 记录列表
         self._current_records: List[StandardRecord] = []  # 现行/试行标准列表
         self._current_records: List[StandardRecord] = []  # 现行/试行标准列表
 
 
+        # 规范化索引(用于匹配)
+        self._normalized_number_index: Dict[str, StandardRecord] = {}  # 规范化标准号 -> 记录
+        self._normalized_name_index: Dict[str, List[StandardRecord]] = {}  # 规范化名称 -> 记录列表
+
     def load_data(self, raw_data: List[Dict]):
     def load_data(self, raw_data: List[Dict]):
         """
         """
         加载原始数据到内存并建立索引
         加载原始数据到内存并建立索引
+        同时创建规范化索引用于匹配
 
 
         Args:
         Args:
             raw_data: 从数据库查询的原始标准数据列表
             raw_data: 从数据库查询的原始标准数据列表
@@ -80,6 +99,8 @@ class StandardRepository:
         self._number_index = {}
         self._number_index = {}
         self._name_index = {}
         self._name_index = {}
         self._current_records = []
         self._current_records = []
+        self._normalized_number_index = {}
+        self._normalized_name_index = {}
 
 
         for item in raw_data:
         for item in raw_data:
             # 跳过无效数据
             # 跳过无效数据
@@ -88,28 +109,37 @@ class StandardRepository:
             if not standard_number or not standard_name:
             if not standard_number or not standard_name:
                 continue
                 continue
 
 
+            # 创建规范化版本(用于匹配)
+            normalized_name = self._normalize_for_matching(standard_name)
+            normalized_number = self._normalize_for_matching(standard_number)
+
             record = StandardRecord(
             record = StandardRecord(
                 id=item.get("id", 0),
                 id=item.get("id", 0),
-                standard_name=standard_name,
-                standard_number=standard_number,
+                standard_name=standard_name,           # 原始名称(用于返回)
+                standard_number=standard_number,       # 原始标准号(用于返回)
+                normalized_name=normalized_name,       # 规范化名称(用于匹配)
+                normalized_number=normalized_number,   # 规范化标准号(用于匹配)
                 validity=item.get("validity", "")
                 validity=item.get("validity", "")
             )
             )
             self._records.append(record)
             self._records.append(record)
 
 
-            # 建立标准号索引
+            # 建立原始索引(用于返回数据)
             self._number_index[record.standard_number] = record
             self._number_index[record.standard_number] = record
-
-            # 建立名称索引(一个名称可能对应多个标准号)
             if record.standard_name not in self._name_index:
             if record.standard_name not in self._name_index:
                 self._name_index[record.standard_name] = []
                 self._name_index[record.standard_name] = []
             self._name_index[record.standard_name].append(record)
             self._name_index[record.standard_name].append(record)
 
 
+            # 建立规范化索引(用于匹配)
+            self._normalized_number_index[record.normalized_number] = record
+            if record.normalized_name not in self._normalized_name_index:
+                self._normalized_name_index[record.normalized_name] = []
+            self._normalized_name_index[record.normalized_name].append(record)
+
             # 收集现行/试行标准
             # 收集现行/试行标准
             if record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
             if record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
                 self._current_records.append(record)
                 self._current_records.append(record)
 
 
         # 对现行标准按标准号降序排序(用于找最新替代标准)
         # 对现行标准按标准号降序排序(用于找最新替代标准)
-        # 处理可能的 None 值
         self._current_records.sort(
         self._current_records.sort(
             key=lambda r: r.standard_number or "",
             key=lambda r: r.standard_number or "",
             reverse=True
             reverse=True
@@ -134,40 +164,98 @@ class StandardRepository:
         return results
         return results
 
 
     def find_by_number_fuzzy(self, standard_number: str) -> List[StandardRecord]:
     def find_by_number_fuzzy(self, standard_number: str) -> List[StandardRecord]:
-        """模糊匹配标准号"""
+        """模糊匹配标准号(使用规范化数据)"""
         results = []
         results = []
-        # 提取前缀(如 GB/T 5972)
-        parts = standard_number.split("-")
-        prefix = parts[0] if parts else standard_number
+        # 规范化输入的标准号
+        normalized_input = self._normalize_for_matching(standard_number)
 
 
-        for number, record in self._number_index.items():
-            # 前缀匹配
-            if number.startswith(prefix):
+        # 使用规范化索引进行前缀匹配
+        for normalized_number, record in self._normalized_number_index.items():
+            # 前缀匹配:检查是否以规范化后的输入开头,或包含关系
+            if normalized_number.startswith(normalized_input) or normalized_input in normalized_number:
                 results.append(record)
                 results.append(record)
         return results
         return results
 
 
-    def find_current_by_name(self, standard_name: str) -> List[StandardRecord]:
-        """查询指定名称的现行/试行标准(支持模糊匹配)"""
+    def find_current_by_name(self, normalized_standard_name: str) -> List[StandardRecord]:
+        """查询指定名称的现行/试行标准(使用规范化名称匹配)"""
         results = []
         results = []
         for record in self._current_records:
         for record in self._current_records:
-            # 精确匹配
-            if record.standard_name == standard_name:
-                results.append(record)
-            # 模糊匹配(忽略空格、书名号等)
-            elif self._is_name_fuzzy_match_for_repo(record.standard_name, standard_name):
+            # 使用规范化名称匹配
+            if record.normalized_name == normalized_standard_name:
                 results.append(record)
                 results.append(record)
         return results
         return results
 
 
-    def _is_name_fuzzy_match_for_repo(self, name1: str, name2: str) -> bool:
-        """判断两个标准名称是否模糊匹配"""
-        clean1 = name1.replace("《", "").replace("》", "").replace(" ", "").replace(" ", "")
-        clean2 = name2.replace("《", "").replace("》", "").replace(" ", "").replace(" ", "")
-        return clean1 == clean2
+    def _is_name_fuzzy_match_for_repo(self, normalized_name1: str, normalized_name2: str) -> bool:
+        """判断两个标准名称是否模糊匹配(使用规范化名称)"""
+        return normalized_name1 == normalized_name2
 
 
     def get_all_records(self) -> List[StandardRecord]:
     def get_all_records(self) -> List[StandardRecord]:
         """获取所有记录"""
         """获取所有记录"""
         return self._records.copy()
         return self._records.copy()
 
 
+    def _normalize_for_matching(self, text: str) -> str:
+        """
+        规范化文本用于匹配
+        第一轮:去除所有空白字符(包括空格、不间断空格、换行符等)、书名号、括号和 HTML 标签
+        第二轮:从配置读取并去除指定符号
+
+        Args:
+            text: 原始文本
+
+        Returns:
+            规范化后的字符串(去除所有空白、分隔符、HTML 标签和配置指定的符号)
+        """
+        if not text:
+            return ""
+
+        import re
+
+        # ========== 第一轮:基础规范化 ==========
+        # 去除 HTML 标签(如 <1680>)
+        text = re.sub(r'<[^>]+>', '', text)
+        # 去除所有 Unicode 空白字符(包括普通空格、不间断空格、换行等)
+        text = re.sub(r'\s+', '', text)
+        # 去除书名号和括号
+        text = text.replace('《', '').replace('》', '').replace('(', '').replace(')', '').replace('(', '').replace(')', '')
+
+        # ========== 第二轮:从配置读取并去除指定符号 ==========
+        # 读取配置中的符号列表,默认使用常见符号
+        # 包含各种连接符:半角连字符(-)、全角连接号(-)、全角破折号(—)
+        default_symbols = '),-,.,/,,:,[,],【,】,〔,〕,(,),-,—'
+
+        if _CONFIG_AVAILABLE and config_handler:
+            try:
+                symbols_str = config_handler.get('timeliness_review', 'REMOVE_SYMBOLS', default_symbols)
+            except Exception:
+                symbols_str = default_symbols
+        else:
+            symbols_str = default_symbols
+
+        # 解析符号列表(按逗号分割)
+        if symbols_str:
+            symbols_to_remove = [s.strip() for s in symbols_str.split(',') if s.strip()]
+            # 去除每个符号
+            for symbol in symbols_to_remove:
+                text = text.replace(symbol, '')
+
+        return text
+
+    def find_by_normalized_number(self, normalized_number: str) -> Optional[StandardRecord]:
+        """通过规范化标准号精确匹配"""
+        return self._normalized_number_index.get(normalized_number)
+
+    def find_by_normalized_name(self, normalized_name: str) -> List[StandardRecord]:
+        """通过规范化名称匹配"""
+        return self._normalized_name_index.get(normalized_name, [])
+
+    def find_current_by_normalized_name(self, normalized_name: str) -> List[StandardRecord]:
+        """查询指定规范化名称的现行/试行标准"""
+        results = []
+        for record in self._current_records:
+            if record.normalized_name == normalized_name:
+                results.append(record)
+        return results
+
 
 
 class StandardMatcher:
 class StandardMatcher:
     """
     """
@@ -178,52 +266,70 @@ class StandardMatcher:
     def __init__(self, repository: StandardRepository):
     def __init__(self, repository: StandardRepository):
         self.repo = repository
         self.repo = repository
 
 
-    def match(self, seq_no: int, input_name: str, input_number: str) -> StandardMatchResult:
+    def match(self, seq_no: int, input_name: str, input_number: str) -> Optional[StandardMatchResult]:
         """
         """
         执行标准匹配
         执行标准匹配
 
 
         匹配流程:
         匹配流程:
-        1. 标准号精确匹配
-        2. 根据匹配结果进入不同分支处理
+        1. 保存原始输入(用于返回)
+        2. 创建规范化版本(用于匹配)
+        3. 如果规范化后文件名为空,返回 None(跳过审查)
+        4. 使用规范化数据进行匹配
+        5. 返回结果中使用原始数据
+
+        Returns:
+            StandardMatchResult: 匹配结果
+            None: 当规范化文件名为空时返回 None,表示跳过审查
         """
         """
-        # 去除前后空格
-        input_name = input_name.strip() if input_name else input_name
-        input_number = input_number.strip() if input_number else input_number
+        # 1. 保存原始输入
+        raw_name = input_name.strip() if input_name else ""
+        raw_number = input_number.strip() if input_number else ""
+
+        # 2. 创建规范化版本(去除所有符号,只保留中文字符)
+        normalized_name = self.repo._normalize_for_matching(raw_name)
+        normalized_number = self.repo._normalize_for_matching(raw_number)
 
 
-        # 清洗书名号和括号
-        input_name = self._clean_brackets_and_booknames(input_name)
-        input_number = self._clean_brackets_and_booknames(input_number)
+        # 3. 如果规范化后文件名为空,跳过审查
+        if not normalized_name:
+            logger.info(f"文件名规范化后为空,跳过审查。原始名称: '{raw_name}'")
+            return None
 
 
+        # 4. 初始化结果(保存原始和规范化数据)
         result = StandardMatchResult(
         result = StandardMatchResult(
             seq_no=seq_no,
             seq_no=seq_no,
-            original_name=input_name,
-            original_number=input_number
+            raw_name=raw_name,
+            raw_number=raw_number,
+            normalized_name=normalized_name,
+            normalized_number=normalized_number
         )
         )
 
 
-        # 步骤1: 精确匹配标准号
-        match_by_number = self.repo.find_by_number_exact(input_number)
+        # 5. 使用规范化数据进行匹配
+        match_by_number = self.repo.find_by_normalized_number(normalized_number)
 
 
         if match_by_number:
         if match_by_number:
             # 分支A: 标准号匹配成功
             # 分支A: 标准号匹配成功
-            return self._handle_number_matched(result, match_by_number, input_name)
+            return self._handle_number_matched(result, match_by_number)
         else:
         else:
             # 分支B: 标准号未匹配
             # 分支B: 标准号未匹配
-            return self._handle_number_not_matched(result, input_name, input_number)
+            return self._handle_number_not_matched(result, normalized_name, normalized_number)
 
 
     def _handle_number_matched(
     def _handle_number_matched(
         self,
         self,
         result: StandardMatchResult,
         result: StandardMatchResult,
-        db_record: StandardRecord,
-        input_name: str
+        db_record: StandardRecord
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
         """处理标准号匹配成功的情况"""
         """处理标准号匹配成功的情况"""
-        # 检查名称是否匹配
-        if db_record.standard_name == input_name:
+        # 保存匹配到的数据库原始数据
+        result.matched_name = db_record.standard_name
+        result.matched_number = db_record.standard_number
+
+        # 使用规范化名称进行比较
+        if db_record.normalized_name == result.normalized_name:
             # 名称也匹配
             # 名称也匹配
             return self._handle_full_match(result, db_record)
             return self._handle_full_match(result, db_record)
         else:
         else:
             # 名称不匹配
             # 名称不匹配
-            return self._handle_name_mismatch(result, db_record, input_name)
+            return self._handle_name_mismatch(result, db_record)
 
 
     def _handle_full_match(
     def _handle_full_match(
         self,
         self,
@@ -231,39 +337,45 @@ class StandardMatcher:
         db_record: StandardRecord
         db_record: StandardRecord
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
         """处理名称和标准号都完全匹配的情况"""
         """处理名称和标准号都完全匹配的情况"""
+        # 【调试日志】
+        logger.info(f"[_handle_full_match] 匹配记录: name={db_record.standard_name}, "
+                    f"number={db_record.standard_number}, validity={db_record.validity} "
+                    f"(期望: {ValidityStatus.CURRENT.value}/{ValidityStatus.TRIAL.value}, "
+                    f"实际是否匹配: {db_record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]})")
+
         if db_record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
         if db_record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
             # 情况1: 现行或试行 - 状态正常
             # 情况1: 现行或试行 - 状态正常
             return self._set_ok_result(result)
             return self._set_ok_result(result)
         else:
         else:
             # 废止状态 - 查找替代标准
             # 废止状态 - 查找替代标准
+            logger.info(f"[_handle_full_match] 进入废止处理流程")
             return self._handle_abolished(result, db_record)
             return self._handle_abolished(result, db_record)
 
 
     def _handle_name_mismatch(
     def _handle_name_mismatch(
         self,
         self,
         result: StandardMatchResult,
         result: StandardMatchResult,
-        db_record: StandardRecord,
-        input_name: str
+        db_record: StandardRecord
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
         """处理标准号匹配但名称不匹配的情况"""
         """处理标准号匹配但名称不匹配的情况"""
-        # 首先检查是否是名称模糊匹配(忽略空格、书名号等
-        if self._is_name_fuzzy_match(db_record.standard_name, input_name):
-            # 名称模糊匹配成功,按完全匹配处理
+        # 首先检查是否是名称模糊匹配(使用规范化名称
+        if self._is_name_fuzzy_match(db_record.normalized_name, result.normalized_name):
+            # 名称规范化后匹配成功,按完全匹配处理
             return self._handle_full_match(result, db_record)
             return self._handle_full_match(result, db_record)
 
 
-        # 尝试用输入的名称模糊匹配
-        name_matches = self.repo.find_by_name_fuzzy(input_name)
+        # 尝试用规范化名称模糊匹配
+        name_matches = self.repo.find_by_normalized_name(result.normalized_name)
 
 
-        # 查找精确名称匹配
-        exact_match = self._find_exact_name_match(name_matches, input_name)
+        # 查找精确名称匹配(使用规范化名称)
+        exact_match = self._find_exact_name_match(name_matches, result.normalized_name)
 
 
         if exact_match:
         if exact_match:
-            # 找到名称匹配的记录
-            return self._handle_fuzzy_name_match(result, exact_match)
-
-        # 尝试在模糊匹配结果中查找模糊名称匹配
-        for match in name_matches:
-            if self._is_name_fuzzy_match(match.standard_name, input_name):
-                return self._handle_fuzzy_name_match(result, match)
+            # 找到名称匹配的记录,检查标准号是否一致
+            if result.normalized_number == exact_match.normalized_number:
+                # 标准号实质一致,按完全匹配处理
+                return self._handle_full_match(result, exact_match)
+            else:
+                # 名称匹配但标准号不一致 = 标准号错误
+                return self._set_mismatch_result(result, exact_match)
 
 
         # 名称完全不匹配,但标准号已匹配成功
         # 名称完全不匹配,但标准号已匹配成功
         # 说明该标准存在于库中,应返回不匹配而非不存在
         # 说明该标准存在于库中,应返回不匹配而非不存在
@@ -277,42 +389,34 @@ class StandardMatcher:
     def _handle_number_not_matched(
     def _handle_number_not_matched(
         self,
         self,
         result: StandardMatchResult,
         result: StandardMatchResult,
-        input_name: str,
-        input_number: str
+        normalized_name: str,
+        normalized_number: str
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
         """处理标准号未匹配的情况"""
         """处理标准号未匹配的情况"""
-        # 尝试模糊匹配标准号
-        fuzzy_number_matches = self.repo.find_by_number_fuzzy(input_number)
+        # 尝试模糊匹配标准号(使用原始数据的方法,可能需要改进)
+        fuzzy_number_matches = self.repo.find_by_number_fuzzy(normalized_number)
 
 
         if fuzzy_number_matches:
         if fuzzy_number_matches:
             # 检查名称是否匹配
             # 检查名称是否匹配
-            return self._check_name_in_records(result, fuzzy_number_matches, input_name)
+            return self._check_name_in_records(result, fuzzy_number_matches, normalized_name)
         else:
         else:
             # 尝试直接按名称查询
             # 尝试直接按名称查询
-            return self._search_by_name_only(result, input_name)
+            return self._search_by_name_only(result, normalized_name)
 
 
     def _check_name_in_records(
     def _check_name_in_records(
         self,
         self,
         result: StandardMatchResult,
         result: StandardMatchResult,
         records: List[StandardRecord],
         records: List[StandardRecord],
-        input_name: str
+        normalized_name: str
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
-        """在一批记录中查找名称匹配"""
-        # 首先尝试精确匹配
+        """在一批记录中查找名称匹配(标准号已模糊匹配成功)"""
+        # 首先尝试精确匹配(使用规范化名称)
         for record in records:
         for record in records:
-            if record.standard_name == input_name:
+            if record.normalized_name == normalized_name:
                 # 名称匹配,检查状态
                 # 名称匹配,检查状态
                 if record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
                 if record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
-                    return self._set_mismatch_result(result, record)
-                elif record.validity == ValidityStatus.ABOLISHED.value:
-                    return self._handle_abolished(result, record)
-
-        # 尝试模糊名称匹配(忽略空格和书名号)
-        for record in records:
-            if self._is_name_fuzzy_match(record.standard_name, input_name):
-                # 名称模糊匹配成功
-                if record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
-                    return self._set_mismatch_result(result, record)
+                    # 标准号模糊匹配成功 + 名称匹配 + 现行/试行 = 正常
+                    return self._set_ok_result(result)
                 elif record.validity == ValidityStatus.ABOLISHED.value:
                 elif record.validity == ValidityStatus.ABOLISHED.value:
                     return self._handle_abolished(result, record)
                     return self._handle_abolished(result, record)
 
 
@@ -322,35 +426,22 @@ class StandardMatcher:
     def _search_by_name_only(
     def _search_by_name_only(
         self,
         self,
         result: StandardMatchResult,
         result: StandardMatchResult,
-        input_name: str
+        normalized_name: str
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
-        """仅通过名称查询"""
-        # 精确匹配名称
-        name_match = self.repo.find_by_name_exact(input_name)
+        """仅通过名称查询(标准号未匹配)"""
+        # 精确匹配规范化名称
+        name_matches = self.repo.find_by_normalized_name(normalized_name)
 
 
-        if name_match:
+        if name_matches:
+            # 取第一个匹配的记录
+            name_match = name_matches[0]
             if name_match.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
             if name_match.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
+                # 标准号不匹配但名称匹配 + 现行/试行 = 标准号错误(不匹配)
                 return self._set_mismatch_result(result, name_match)
                 return self._set_mismatch_result(result, name_match)
             elif name_match.validity == ValidityStatus.ABOLISHED.value:
             elif name_match.validity == ValidityStatus.ABOLISHED.value:
-                return self._set_not_found_result(result)
-
-        # 模糊匹配名称
-        fuzzy_matches = self.repo.find_by_name_fuzzy(input_name)
-
-        # 首先尝试精确匹配
-        exact_match = self._find_exact_name_match(fuzzy_matches, input_name)
-        if exact_match:
-            if exact_match.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
-                return self._set_mismatch_result(result, exact_match)
-
-        # 尝试模糊名称匹配(忽略空格、书名号等)
-        for match in fuzzy_matches:
-            if self._is_name_fuzzy_match(match.standard_name, input_name):
-                if match.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
-                    return self._set_mismatch_result(result, match)
-                elif match.validity == ValidityStatus.ABOLISHED.value:
-                    return self._handle_abolished(result, match)
+                return self._handle_abolished(result, name_match)
 
 
+        # 名称未找到
         return self._set_not_found_result(result)
         return self._set_not_found_result(result)
 
 
     def _handle_fuzzy_name_match(
     def _handle_fuzzy_name_match(
@@ -358,9 +449,10 @@ class StandardMatcher:
         result: StandardMatchResult,
         result: StandardMatchResult,
         match_record: StandardRecord
         match_record: StandardRecord
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
-        """处理模糊名称匹配成功的情况"""
+        """处理模糊名称匹配成功的情况(标准号已匹配)"""
         if match_record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
         if match_record.validity in [ValidityStatus.CURRENT.value, ValidityStatus.TRIAL.value]:
-            return self._set_mismatch_result(result, match_record)
+            # 标准号匹配 + 名称模糊匹配 + 现行/试行 = 正常
+            return self._set_ok_result(result)
         elif match_record.validity == ValidityStatus.ABOLISHED.value:
         elif match_record.validity == ValidityStatus.ABOLISHED.value:
             return self._handle_abolished(result, match_record)
             return self._handle_abolished(result, match_record)
         return self._set_not_found_result(result)
         return self._set_not_found_result(result)
@@ -371,8 +463,8 @@ class StandardMatcher:
         abolished_record: StandardRecord
         abolished_record: StandardRecord
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
         """处理已废止标准的情况"""
         """处理已废止标准的情况"""
-        # 查询同名现行标准作为替代
-        substitutes = self.repo.find_current_by_name(abolished_record.standard_name)
+        # 查询同名现行标准作为替代(使用规范化名称)
+        substitutes = self.repo.find_current_by_normalized_name(abolished_record.normalized_name)
 
 
         if substitutes:
         if substitutes:
             # 有替代标准,取最新的(已按标准号降序)
             # 有替代标准,取最新的(已按标准号降序)
@@ -422,26 +514,27 @@ class StandardMatcher:
         result: StandardMatchResult,
         result: StandardMatchResult,
         substitute: StandardRecord
         substitute: StandardRecord
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
-        """设置被替代的结果"""
+        """设置被替代的结果 - 使用原始数据显示"""
         result.substitute_name = self._format_standard_name(substitute.standard_name)
         result.substitute_name = self._format_standard_name(substitute.standard_name)
         result.substitute_number = self._format_standard_number(substitute.standard_number)
         result.substitute_number = self._format_standard_number(substitute.standard_number)
         result.process_result = "被替代"
         result.process_result = "被替代"
         result.status_code = MatchResultCode.SUBSTITUTED.value
         result.status_code = MatchResultCode.SUBSTITUTED.value
+        # 使用 raw_name(原始输入)和 matched_name(数据库原始值)显示
         result.final_result = (
         result.final_result = (
-            f"{self._format_standard_name(result.original_name)}"
-            f"{self._format_standard_number(result.original_number)}已废止,"
+            f"{self._format_standard_name(result.raw_name)}"
+            f"{self._format_standard_number(result.raw_number)}已废止,"
             f"替代{self._format_standard_name(substitute.standard_name)}"
             f"替代{self._format_standard_name(substitute.standard_name)}"
             f"{self._format_standard_number(substitute.standard_number)}"
             f"{self._format_standard_number(substitute.standard_number)}"
         )
         )
         return result
         return result
 
 
     def _set_abolished_result(self, result: StandardMatchResult) -> StandardMatchResult:
     def _set_abolished_result(self, result: StandardMatchResult) -> StandardMatchResult:
-        """设置废止无替代的结果"""
+        """设置废止无替代的结果 - 使用原始数据显示"""
         result.process_result = "废止无现行"
         result.process_result = "废止无现行"
         result.status_code = MatchResultCode.ABOLISHED.value
         result.status_code = MatchResultCode.ABOLISHED.value
         result.final_result = (
         result.final_result = (
-            f"{self._format_standard_name(result.original_name)}"
-            f"{self._format_standard_number(result.original_number)}已废止,无现行状态"
+            f"{self._format_standard_name(result.raw_name)}"
+            f"{self._format_standard_number(result.raw_number)}已废止,无现行状态"
         )
         )
         return result
         return result
 
 
@@ -450,40 +543,37 @@ class StandardMatcher:
         result: StandardMatchResult,
         result: StandardMatchResult,
         actual: StandardRecord
         actual: StandardRecord
     ) -> StandardMatchResult:
     ) -> StandardMatchResult:
-        """设置不匹配的结果"""
+        """设置不匹配的结果 - 使用原始数据显示"""
         result.substitute_name = self._format_standard_name(actual.standard_name)
         result.substitute_name = self._format_standard_name(actual.standard_name)
         result.substitute_number = self._format_standard_number(actual.standard_number)
         result.substitute_number = self._format_standard_number(actual.standard_number)
         result.process_result = "不匹配"
         result.process_result = "不匹配"
         result.status_code = MatchResultCode.MISMATCH.value
         result.status_code = MatchResultCode.MISMATCH.value
         result.final_result = (
         result.final_result = (
-            f"{self._format_standard_name(result.original_name)}"
-            f"{self._format_standard_number(result.original_number)}"
+            f"{self._format_standard_name(result.raw_name)}"
+            f"{self._format_standard_number(result.raw_number)}"
             f"与实际{self._format_standard_name(actual.standard_name)}"
             f"与实际{self._format_standard_name(actual.standard_name)}"
             f"{self._format_standard_number(actual.standard_number)}不匹配"
             f"{self._format_standard_number(actual.standard_number)}不匹配"
         )
         )
         return result
         return result
 
 
     def _set_not_found_result(self, result: StandardMatchResult) -> StandardMatchResult:
     def _set_not_found_result(self, result: StandardMatchResult) -> StandardMatchResult:
-        """设置不存在的结果"""
+        """设置不存在的结果 - 使用原始数据显示"""
         result.process_result = "标准库不存在"
         result.process_result = "标准库不存在"
         result.status_code = MatchResultCode.NOT_FOUND.value
         result.status_code = MatchResultCode.NOT_FOUND.value
         result.final_result = (
         result.final_result = (
-            f"{self._format_standard_name(result.original_name)}"
-            f"{self._format_standard_number(result.original_number)}标准库不存在,请确认"
+            f"{self._format_standard_name(result.raw_name)}"
+            f"{self._format_standard_number(result.raw_number)}标准库不存在,请确认"
         )
         )
         return result
         return result
 
 
     # ========== 工具方法 ==========
     # ========== 工具方法 ==========
 
 
-    def _is_name_fuzzy_match(self, name1: str, name2: str) -> bool:
+    def _is_name_fuzzy_match(self, normalized_name1: str, normalized_name2: str) -> bool:
         """
         """
         判断两个标准名称是否模糊匹配
         判断两个标准名称是否模糊匹配
-        只去除书名号,保留中间空格(中间空格属于名称的一部分
+        使用规范化后的名称进行比较(已去除空格、括号、书名号等
         """
         """
-        # 清理书名号,但保留中间空格
-        clean1 = name1.replace("《", "").replace("》", "")
-        clean2 = name2.replace("《", "").replace("》", "")
-        return clean1 == clean2
+        return normalized_name1 == normalized_name2
 
 
     def _clean_brackets_and_booknames(self, text: str) -> str:
     def _clean_brackets_and_booknames(self, text: str) -> str:
         """
         """
@@ -545,14 +635,49 @@ class StandardMatcher:
 
 
         return text
         return text
 
 
+    def _extract_chinese_chars(self, text: str) -> str:
+        """
+        提取字符串中的中文字符和空格
+        保留:中文字符(\u4e00-\u9fa5)、中文标点、空格(无换行符时)
+        删除:英文、数字、特殊符号、换行符等
+        特殊处理:如果存在换行符,则去除所有空格
+        """
+        if not text:
+            return text
+
+        import re
+
+        # 检查是否存在换行符(在清洗前检查)
+        has_newline = '\n' in text or '\r' in text
+
+        # 首先去除换行符及其旁边的所有空格
+        text = re.sub(r'\s*[\n\r]+\s*', '', text)
+        # 去除制表符
+        text = text.replace('\t', '')
+
+        if has_newline:
+            # 有换行符时:提取中文字符,去除所有空格
+            chinese_pattern = re.compile(r'[\u4e00-\u9fa5\u3000-\u303F\uFF00-\uFFEF]+')
+            matches = chinese_pattern.findall(text)
+            result = ''.join(matches)
+            # 去除所有空格(包括全角空格)
+            result = result.replace(' ', '').replace(' ', '')
+            return result.strip()
+        else:
+            # 无换行符时:提取中文字符和空格,保留中间空格
+            chinese_pattern = re.compile(r'[\u4e00-\u9fa5\u3000-\u303F\uFF00-\uFFEF\s]+')
+            matches = chinese_pattern.findall(text)
+            result = ''.join(matches)
+            return result.strip()
+
     def _find_exact_name_match(
     def _find_exact_name_match(
         self,
         self,
         records: List[StandardRecord],
         records: List[StandardRecord],
-        target_name: str
+        target_normalized_name: str
     ) -> Optional[StandardRecord]:
     ) -> Optional[StandardRecord]:
-        """在记录列表中查找精确名称匹配"""
+        """在记录列表中查找规范化名称精确匹配"""
         for record in records:
         for record in records:
-            if record.standard_name == target_name:
+            if record.normalized_name == target_normalized_name:
                 return record
                 return record
         return None
         return None
 
 
@@ -632,7 +757,7 @@ class StandardMatchingService:
                 - standard_number: 标准号(原始)
                 - standard_number: 标准号(原始)
 
 
         Returns:
         Returns:
-            List[StandardMatchResult]: 匹配结果列表
+            List[StandardMatchResult]: 匹配结果列表(文件名为空的会被过滤掉)
         """
         """
         if not self._initialized:
         if not self._initialized:
             raise RuntimeError("服务未初始化,请先调用 initialize()")
             raise RuntimeError("服务未初始化,请先调用 initialize()")
@@ -644,7 +769,9 @@ class StandardMatchingService:
                 input_name=std.get("standard_name", ""),
                 input_name=std.get("standard_name", ""),
                 input_number=std.get("standard_number", "")
                 input_number=std.get("standard_number", "")
             )
             )
-            results.append(result)
+            # 跳过文件名为空的情况(match 返回 None)
+            if result is not None:
+                results.append(result)
         return results
         return results
 
 
     def check_single(
     def check_single(
@@ -652,7 +779,7 @@ class StandardMatchingService:
         seq_no: int,
         seq_no: int,
         standard_name: str,
         standard_name: str,
         standard_number: str
         standard_number: str
-    ) -> StandardMatchResult:
+    ) -> Optional[StandardMatchResult]:
         """
         """
         检查单个标准
         检查单个标准
 
 
@@ -663,6 +790,7 @@ class StandardMatchingService:
 
 
         Returns:
         Returns:
             StandardMatchResult: 匹配结果
             StandardMatchResult: 匹配结果
+            None: 当文件名为空时返回 None,表示跳过审查
         """
         """
         if not self._initialized:
         if not self._initialized:
             raise RuntimeError("服务未初始化,请先调用 initialize()")
             raise RuntimeError("服务未初始化,请先调用 initialize()")