3 天之前 · 1fb3abc958
--- a/core/construction_review/component/reviewers/timeliness_content_reviewer.py
+++ b/core/construction_review/component/reviewers/timeliness_content_reviewer.py
@@ -114,6 +114,13 @@ class StandardExtractor:
 
															                     location_info=location_info or {}
														
 
															                 )
														
 
															                 references.append(ref)
														
 
															+            else:
														
 
															+                logger.info(
														
 
															+                    "[三级内容提取跳过] "
														
 
															+                    f"括号内容不符合标准号格式，standard_name={name.strip()}, "
														
 
															+                    f"bracket_content={number.strip()}, "
														
 
															+                    f"location_info={location_info or {}}"
														
 
															+                )
														
 
															         # 2. 提取孤立的规范编号（用于补充）
														
 
															         number_matches = self.STANDARD_NUMBER_ONLY_PATTERN.findall(content)
														
--- a/core/construction_review/component/standard_matching/standard_service.py
+++ b/core/construction_review/component/standard_matching/standard_service.py
@@ -9,6 +9,7 @@
 
															 - StandardMatcher: 匹配规则逻辑
														
 
															 - StandardMatchingService: 对外服务接口
														
 
															 """
														
 
															+import re
														
 
															 from typing import List, Dict, Optional
														
 
															 from dataclasses import dataclass, field
														
 
															 from enum import Enum
														
@@ -85,6 +86,7 @@ class StandardRepository:
 
															         # 规范化索引（用于匹配）
														
 
															         self._normalized_number_index: Dict[str, StandardRecord] = {}  # 规范化标准号 -> 记录
														
 
															+        self._normalized_number_records_index: Dict[str, List[StandardRecord]] = {}  # 规范化标准号 -> 多条记录
														
 
															         self._normalized_name_index: Dict[str, List[StandardRecord]] = {}  # 规范化名称 -> 记录列表
														
 
															     def load_data(self, raw_data: List[Dict]):
														
@@ -100,6 +102,7 @@ class StandardRepository:
 
															         self._name_index = {}
														
 
															         self._current_records = []
														
 
															         self._normalized_number_index = {}
														
 
															+        self._normalized_number_records_index = {}
														
 
															         self._normalized_name_index = {}
														
 
															         for item in raw_data:
														
@@ -131,6 +134,9 @@ class StandardRepository:
 
															             # 建立规范化索引（用于匹配）
														
 
															             self._normalized_number_index[record.normalized_number] = record
														
 
															+            if record.normalized_number not in self._normalized_number_records_index:
														
 
															+                self._normalized_number_records_index[record.normalized_number] = []
														
 
															+            self._normalized_number_records_index[record.normalized_number].append(record)
														
 
															             if record.normalized_name not in self._normalized_name_index:
														
 
															                 self._normalized_name_index[record.normalized_name] = []
														
 
															             self._normalized_name_index[record.normalized_name].append(record)
														
@@ -170,10 +176,10 @@ class StandardRepository:
 
															         normalized_input = self._normalize_for_matching(standard_number)
														
 
															         # 使用规范化索引进行前缀匹配
														
 
															-        for normalized_number, record in self._normalized_number_index.items():
														
 
															+        for normalized_number, records in self._normalized_number_records_index.items():
														
 
															             # 前缀匹配：检查是否以规范化后的输入开头，或包含关系
														
 
															             if normalized_number.startswith(normalized_input) or normalized_input in normalized_number:
														
 
															-                results.append(record)
														
 
															+                results.extend(records)
														
 
															         return results
														
 
															     def find_current_by_name(self, normalized_standard_name: str) -> List[StandardRecord]:
														
@@ -246,6 +252,10 @@ class StandardRepository:
 
															         """通过规范化标准号精确匹配"""
														
 
															         return self._normalized_number_index.get(normalized_number)
														
 
															+    def find_all_by_normalized_number(self, normalized_number: str) -> List[StandardRecord]:
														
 
															+        """通过规范化标准号获取全部匹配记录"""
														
 
															+        return self._normalized_number_records_index.get(normalized_number, [])
														
 
															+
														
 
															     def find_by_normalized_name(self, normalized_name: str) -> List[StandardRecord]:
														
 
															         """通过规范化名称匹配"""
														
 
															         return self._normalized_name_index.get(normalized_name, [])
														
@@ -267,6 +277,10 @@ class StandardMatcher:
 
															     def __init__(self, repository: StandardRepository):
														
 
															         self.repo = repository
														
 
															+        self._year_version_only_name_pattern = re.compile(
														
 
															+            r"^(?:(?:20(?:0\d|1\d|2[0-6]))(?:年版|年|版)?)+$"
														
 
															+        )
														
 
															+        self._year_only_number_pattern = re.compile(r"^20(?:0\d|1\d|2[0-6])$")
														
 
															     def match(self, seq_no: int, input_name: str, input_number: str) -> Optional[StandardMatchResult]:
														
 
															         """
														
@@ -287,6 +301,13 @@ class StandardMatcher:
 
															         raw_name = input_name.strip() if input_name else ""
														
 
															         raw_number = input_number.strip() if input_number else ""
														
 
															+        if self._is_year_only_number(raw_number):
														
 
															+            logger.info(
														
 
															+                "[skip_year_only_number] "
														
 
															+                f"raw_name={raw_name}, raw_number={raw_number}"
														
 
															+            )
														
 
															+            return None
														
 
															+
														
 
															         # 2. 创建规范化版本（去除所有符号，只保留中文字符）
														
 
															         normalized_name = self.repo._normalize_for_matching(raw_name)
														
 
															         normalized_number = self.repo._normalize_for_matching(raw_number)
														
@@ -306,7 +327,11 @@ class StandardMatcher:
 
															         )
														
 
															         # 5. 使用规范化数据进行匹配
														
 
															-        match_by_number = self.repo.find_by_normalized_number(normalized_number)
														
 
															+        exact_number_matches = self.repo.find_all_by_normalized_number(normalized_number)
														
 
															+        if self._should_skip_ambiguous_numeric_version_name(raw_name, raw_number, exact_number_matches):
														
 
															+            return None
														
 
															+
														
 
															+        match_by_number = exact_number_matches[0] if exact_number_matches else None
														
 
															         if match_by_number:
														
 
															             logger.info(
														
 
															                 "[standard_number_exact_match] "
														
@@ -584,6 +609,49 @@ class StandardMatcher:
 
															         """
														
 
															         return normalized_name1 == normalized_name2
														
 
															+    def _should_skip_ambiguous_numeric_version_name(
														
 
															+        self,
														
 
															+        raw_name: str,
														
 
															+        raw_number: str,
														
 
															+        records: List[StandardRecord]
														
 
															+    ) -> bool:
														
 
															+        """标准号对应多个中文名且提取名称仅为年份/版次时，跳过时效性审查。"""
														
 
															+        if len(records) < 2:
														
 
															+            return False
														
 
															+
														
 
															+        unique_names = {record.standard_name.strip() for record in records if record.standard_name}
														
 
															+        if len(unique_names) < 2:
														
 
															+            return False
														
 
															+
														
 
															+        if not self._is_year_version_only_name(raw_name):
														
 
															+            return False
														
 
															+
														
 
															+        logger.info(
														
 
															+            "[skip_ambiguous_numeric_version_name] "
														
 
															+            f"raw_name={raw_name}, raw_number={raw_number}, "
														
 
															+            f"candidate_names={sorted(unique_names)}"
														
 
															+        )
														
 
															+        return True
														
 
															+
														
 
															+    def _is_year_version_only_name(self, raw_name: str) -> bool:
														
 
															+        """判断提取出的名称是否只是 2000-2026 年份及“年/版”的噪声文本。"""
														
 
															+        if not raw_name:
														
 
															+            return False
														
 
															+
														
 
															+        compact_name = re.sub(r"\s+", "", raw_name)
														
 
															+        if re.search(r"[A-Za-z]", compact_name):
														
 
															+            return False
														
 
															+
														
 
															+        return bool(self._year_version_only_name_pattern.fullmatch(compact_name))
														
 
															+
														
 
															+    def _is_year_only_number(self, raw_number: str) -> bool:
														
 
															+        """判断提取出的标准号是否只是 2000-2026 年份。"""
														
 
															+        if not raw_number:
														
 
															+            return False
														
 
															+
														
 
															+        compact_number = re.sub(r"\s+", "", raw_number)
														
 
															+        return bool(self._year_only_number_pattern.fullmatch(compact_number))
														
 
															+
														
 
															     def _clean_brackets_and_booknames(self, text: str) -> str:
														
 
															         """
														
 
															         清洗字符串前后的书名号和括号
														
--- a/utils_test/Timeliness_Test/test_timeliness_basis_extraction.py
+++ b/utils_test/Timeliness_Test/test_timeliness_basis_extraction.py
@@ -0,0 +1,67 @@
 
															+import importlib.util
														
 
															+import pathlib
														
 
															+import sys
														
 
															+import types
														
 
															+
														
 
															+import pytest
														
 
															+
														
 
															+
														
 
															+CURRENT_DIR = pathlib.Path(__file__).resolve().parent
														
 
															+PROJECT_ROOT = CURRENT_DIR.parent.parent
														
 
															+MODULE_PATH = PROJECT_ROOT / "core" / "construction_review" / "component" / "reviewers" / "timeliness_basis_reviewer.py"
														
 
															+
														
 
															+
														
 
															+@pytest.fixture
														
 
															+def basis_module(monkeypatch):
														
 
															+    logger_module = types.ModuleType("foundation.observability.logger.loggering")
														
 
															+    logger_module.review_logger = types.SimpleNamespace(
														
 
															+        info=lambda *args, **kwargs: None,
														
 
															+        warning=lambda *args, **kwargs: None,
														
 
															+        error=lambda *args, **kwargs: None,
														
 
															+    )
														
 
															+
														
 
															+    inter_tool_module = types.ModuleType("core.construction_review.component.reviewers.utils.inter_tool")
														
 
															+    inter_tool_module.InterTool = type("InterTool", (), {})
														
 
															+
														
 
															+    directory_module = types.ModuleType("core.construction_review.component.reviewers.utils.directory_extraction")
														
 
															+    directory_module.BasisItems = type("BasisItems", (), {})
														
 
															+    directory_module.BasisItem = type("BasisItem", (), {})
														
 
															+
														
 
															+    reviewer_module = types.ModuleType("core.construction_review.component.reviewers.standard_timeliness_reviewer")
														
 
															+    reviewer_module.StandardTimelinessReviewer = type("StandardTimelinessReviewer", (), {})
														
 
															+    reviewer_module.review_standard_timeliness_with_standardized_output = lambda *args, **kwargs: None
														
 
															+
														
 
															+    monkeypatch.setitem(sys.modules, "foundation", types.ModuleType("foundation"))
														
 
															+    monkeypatch.setitem(sys.modules, "foundation.observability", types.ModuleType("foundation.observability"))
														
 
															+    monkeypatch.setitem(sys.modules, "foundation.observability.logger", types.ModuleType("foundation.observability.logger"))
														
 
															+    monkeypatch.setitem(sys.modules, "foundation.observability.logger.loggering", logger_module)
														
 
															+
														
 
															+    monkeypatch.setitem(sys.modules, "core", types.ModuleType("core"))
														
 
															+    monkeypatch.setitem(sys.modules, "core.construction_review", types.ModuleType("core.construction_review"))
														
 
															+    monkeypatch.setitem(sys.modules, "core.construction_review.component", types.ModuleType("core.construction_review.component"))
														
 
															+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers", types.ModuleType("core.construction_review.component.reviewers"))
														
 
															+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.utils", types.ModuleType("core.construction_review.component.reviewers.utils"))
														
 
															+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.utils.inter_tool", inter_tool_module)
														
 
															+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.utils.directory_extraction", directory_module)
														
 
															+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.standard_timeliness_reviewer", reviewer_module)
														
 
															+
														
 
															+    spec = importlib.util.spec_from_file_location("test_timeliness_basis_reviewer_module", MODULE_PATH)
														
 
															+    module = importlib.util.module_from_spec(spec)
														
 
															+    assert spec.loader is not None
														
 
															+    spec.loader.exec_module(module)
														
 
															+    return module
														
 
															+
														
 
															+
														
 
															+@pytest.fixture
														
 
															+def basis_service(basis_module):
														
 
															+    return basis_module.BasisReviewService()
														
 
															+
														
 
															+def test_debug_single_input(basis_service):
														
 
															+    text = "《okk》(主席令第29号)"
														
 
															+    result = basis_service._extract_standard_from_basis(text)
														
 
															+    assert result == {
														
 
															+        "standard_name": "okk",
														
 
															+        "standard_number": "主席令第29号",
														
 
															+    }
														
 
															+
														
 
															+    print(result)
														
--- a/utils_test/standard_new_Test/test_core_standard_matching_skip_rule.py
+++ b/utils_test/standard_new_Test/test_core_standard_matching_skip_rule.py
@@ -0,0 +1,81 @@
 
															+import pytest
														
 
															+
														
 
															+from core.construction_review.component.standard_matching.standard_service import (
														
 
															+    MatchResultCode,
														
 
															+    StandardMatcher,
														
 
															+    StandardRepository,
														
 
															+)
														
 
															+
														
 
															+
														
 
															+@pytest.fixture
														
 
															+def repository_with_duplicate_numbers():
														
 
															+    repository = StandardRepository()
														
 
															+    repository.load_data(
														
 
															+        [
														
 
															+            {
														
 
															+                "id": 1,
														
 
															+                "standard_name": "混凝土结构设计规范",
														
 
															+                "standard_number": "GB 50010-2010",
														
 
															+                "validity": "XH",
														
 
															+            },
														
 
															+            {
														
 
															+                "id": 2,
														
 
															+                "standard_name": "混凝土结构设计标准",
														
 
															+                "standard_number": "GB 50010-2010",
														
 
															+                "validity": "XH",
														
 
															+            },
														
 
															+        ]
														
 
															+    )
														
 
															+    return repository
														
 
															+
														
 
															+
														
 
															+def test_skip_when_name_is_year_version_only_and_number_maps_to_multiple_names(
														
 
															+    repository_with_duplicate_numbers,
														
 
															+):
														
 
															+    matcher = StandardMatcher(repository_with_duplicate_numbers)
														
 
															+
														
 
															+    result = matcher.match(
														
 
															+        seq_no=1,
														
 
															+        input_name="2024年版",
														
 
															+        input_number="GB 50010-2010",
														
 
															+    )
														
 
															+
														
 
															+    assert result is None
														
 
															+
														
 
															+
														
 
															+def test_do_not_skip_when_name_is_real_standard_name(repository_with_duplicate_numbers):
														
 
															+    matcher = StandardMatcher(repository_with_duplicate_numbers)
														
 
															+
														
 
															+    result = matcher.match(
														
 
															+        seq_no=1,
														
 
															+        input_name="混凝土结构设计规范",
														
 
															+        input_number="GB 50010-2010",
														
 
															+    )
														
 
															+
														
 
															+    assert result is not None
														
 
															+    assert result.status_code == MatchResultCode.OK.value
														
 
															+
														
 
															+
														
 
															+def test_do_not_skip_when_name_contains_extra_chinese_text(repository_with_duplicate_numbers):
														
 
															+    matcher = StandardMatcher(repository_with_duplicate_numbers)
														
 
															+
														
 
															+    result = matcher.match(
														
 
															+        seq_no=1,
														
 
															+        input_name="2024年修订版",
														
 
															+        input_number="GB 50010-2010",
														
 
															+    )
														
 
															+
														
 
															+    assert result is not None
														
 
															+    assert result.status_code == MatchResultCode.MISMATCH.value
														
 
															+
														
 
															+
														
 
															+def test_skip_when_raw_number_is_year_only(repository_with_duplicate_numbers):
														
 
															+    matcher = StandardMatcher(repository_with_duplicate_numbers)
														
 
															+
														
 
															+    result = matcher.match(
														
 
															+        seq_no=1,
														
 
															+        input_name="四川省安全生产条例",
														
 
															+        input_number="2023",
														
 
															+    )
														
 
															+
														
 
															+    assert result is None