1 сар өмнө · 1fb3abc958
--- a/core/construction_review/component/reviewers/timeliness_content_reviewer.py
+++ b/core/construction_review/component/reviewers/timeliness_content_reviewer.py
@@ -114,6 +114,13 @@ class StandardExtractor:
 
				                     location_info=location_info or {}
			
 
				                 )
			
 
				                 references.append(ref)
			
 
				+            else:
			
 
				+                logger.info(
			
 
				+                    "[三级内容提取跳过] "
			
 
				+                    f"括号内容不符合标准号格式，standard_name={name.strip()}, "
			
 
				+                    f"bracket_content={number.strip()}, "
			
 
				+                    f"location_info={location_info or {}}"
			
 
				+                )
			
 
				 
			
 
				         # 2. 提取孤立的规范编号（用于补充）
			
 
				         number_matches = self.STANDARD_NUMBER_ONLY_PATTERN.findall(content)
			
--- a/core/construction_review/component/standard_matching/standard_service.py
+++ b/core/construction_review/component/standard_matching/standard_service.py
@@ -9,6 +9,7 @@
 
				 - StandardMatcher: 匹配规则逻辑
			
 
				 - StandardMatchingService: 对外服务接口
			
 
				 """
			
 
				+import re
			
 
				 from typing import List, Dict, Optional
			
 
				 from dataclasses import dataclass, field
			
 
				 from enum import Enum
			
@@ -85,6 +86,7 @@ class StandardRepository:
 
				 
			
 
				         # 规范化索引（用于匹配）
			
 
				         self._normalized_number_index: Dict[str, StandardRecord] = {}  # 规范化标准号 -> 记录
			
 
				+        self._normalized_number_records_index: Dict[str, List[StandardRecord]] = {}  # 规范化标准号 -> 多条记录
			
 
				         self._normalized_name_index: Dict[str, List[StandardRecord]] = {}  # 规范化名称 -> 记录列表
			
 
				 
			
 
				     def load_data(self, raw_data: List[Dict]):
			
@@ -100,6 +102,7 @@ class StandardRepository:
 
				         self._name_index = {}
			
 
				         self._current_records = []
			
 
				         self._normalized_number_index = {}
			
 
				+        self._normalized_number_records_index = {}
			
 
				         self._normalized_name_index = {}
			
 
				 
			
 
				         for item in raw_data:
			
@@ -131,6 +134,9 @@ class StandardRepository:
 
				 
			
 
				             # 建立规范化索引（用于匹配）
			
 
				             self._normalized_number_index[record.normalized_number] = record
			
 
				+            if record.normalized_number not in self._normalized_number_records_index:
			
 
				+                self._normalized_number_records_index[record.normalized_number] = []
			
 
				+            self._normalized_number_records_index[record.normalized_number].append(record)
			
 
				             if record.normalized_name not in self._normalized_name_index:
			
 
				                 self._normalized_name_index[record.normalized_name] = []
			
 
				             self._normalized_name_index[record.normalized_name].append(record)
			
@@ -170,10 +176,10 @@ class StandardRepository:
 
				         normalized_input = self._normalize_for_matching(standard_number)
			
 
				 
			
 
				         # 使用规范化索引进行前缀匹配
			
 
				-        for normalized_number, record in self._normalized_number_index.items():
			
 
				+        for normalized_number, records in self._normalized_number_records_index.items():
			
 
				             # 前缀匹配：检查是否以规范化后的输入开头，或包含关系
			
 
				             if normalized_number.startswith(normalized_input) or normalized_input in normalized_number:
			
 
				-                results.append(record)
			
 
				+                results.extend(records)
			
 
				         return results
			
 
				 
			
 
				     def find_current_by_name(self, normalized_standard_name: str) -> List[StandardRecord]:
			
@@ -246,6 +252,10 @@ class StandardRepository:
 
				         """通过规范化标准号精确匹配"""
			
 
				         return self._normalized_number_index.get(normalized_number)
			
 
				 
			
 
				+    def find_all_by_normalized_number(self, normalized_number: str) -> List[StandardRecord]:
			
 
				+        """通过规范化标准号获取全部匹配记录"""
			
 
				+        return self._normalized_number_records_index.get(normalized_number, [])
			
 
				+
			
 
				     def find_by_normalized_name(self, normalized_name: str) -> List[StandardRecord]:
			
 
				         """通过规范化名称匹配"""
			
 
				         return self._normalized_name_index.get(normalized_name, [])
			
@@ -267,6 +277,10 @@ class StandardMatcher:
 
				 
			
 
				     def __init__(self, repository: StandardRepository):
			
 
				         self.repo = repository
			
 
				+        self._year_version_only_name_pattern = re.compile(
			
 
				+            r"^(?:(?:20(?:0\d|1\d|2[0-6]))(?:年版|年|版)?)+$"
			
 
				+        )
			
 
				+        self._year_only_number_pattern = re.compile(r"^20(?:0\d|1\d|2[0-6])$")
			
 
				 
			
 
				     def match(self, seq_no: int, input_name: str, input_number: str) -> Optional[StandardMatchResult]:
			
 
				         """
			
@@ -287,6 +301,13 @@ class StandardMatcher:
 
				         raw_name = input_name.strip() if input_name else ""
			
 
				         raw_number = input_number.strip() if input_number else ""
			
 
				 
			
 
				+        if self._is_year_only_number(raw_number):
			
 
				+            logger.info(
			
 
				+                "[skip_year_only_number] "
			
 
				+                f"raw_name={raw_name}, raw_number={raw_number}"
			
 
				+            )
			
 
				+            return None
			
 
				+
			
 
				         # 2. 创建规范化版本（去除所有符号，只保留中文字符）
			
 
				         normalized_name = self.repo._normalize_for_matching(raw_name)
			
 
				         normalized_number = self.repo._normalize_for_matching(raw_number)
			
@@ -306,7 +327,11 @@ class StandardMatcher:
 
				         )
			
 
				 
			
 
				         # 5. 使用规范化数据进行匹配
			
 
				-        match_by_number = self.repo.find_by_normalized_number(normalized_number)
			
 
				+        exact_number_matches = self.repo.find_all_by_normalized_number(normalized_number)
			
 
				+        if self._should_skip_ambiguous_numeric_version_name(raw_name, raw_number, exact_number_matches):
			
 
				+            return None
			
 
				+
			
 
				+        match_by_number = exact_number_matches[0] if exact_number_matches else None
			
 
				         if match_by_number:
			
 
				             logger.info(
			
 
				                 "[standard_number_exact_match] "
			
@@ -584,6 +609,49 @@ class StandardMatcher:
 
				         """
			
 
				         return normalized_name1 == normalized_name2
			
 
				 
			
 
				+    def _should_skip_ambiguous_numeric_version_name(
			
 
				+        self,
			
 
				+        raw_name: str,
			
 
				+        raw_number: str,
			
 
				+        records: List[StandardRecord]
			
 
				+    ) -> bool:
			
 
				+        """标准号对应多个中文名且提取名称仅为年份/版次时，跳过时效性审查。"""
			
 
				+        if len(records) < 2:
			
 
				+            return False
			
 
				+
			
 
				+        unique_names = {record.standard_name.strip() for record in records if record.standard_name}
			
 
				+        if len(unique_names) < 2:
			
 
				+            return False
			
 
				+
			
 
				+        if not self._is_year_version_only_name(raw_name):
			
 
				+            return False
			
 
				+
			
 
				+        logger.info(
			
 
				+            "[skip_ambiguous_numeric_version_name] "
			
 
				+            f"raw_name={raw_name}, raw_number={raw_number}, "
			
 
				+            f"candidate_names={sorted(unique_names)}"
			
 
				+        )
			
 
				+        return True
			
 
				+
			
 
				+    def _is_year_version_only_name(self, raw_name: str) -> bool:
			
 
				+        """判断提取出的名称是否只是 2000-2026 年份及“年/版”的噪声文本。"""
			
 
				+        if not raw_name:
			
 
				+            return False
			
 
				+
			
 
				+        compact_name = re.sub(r"\s+", "", raw_name)
			
 
				+        if re.search(r"[A-Za-z]", compact_name):
			
 
				+            return False
			
 
				+
			
 
				+        return bool(self._year_version_only_name_pattern.fullmatch(compact_name))
			
 
				+
			
 
				+    def _is_year_only_number(self, raw_number: str) -> bool:
			
 
				+        """判断提取出的标准号是否只是 2000-2026 年份。"""
			
 
				+        if not raw_number:
			
 
				+            return False
			
 
				+
			
 
				+        compact_number = re.sub(r"\s+", "", raw_number)
			
 
				+        return bool(self._year_only_number_pattern.fullmatch(compact_number))
			
 
				+
			
 
				     def _clean_brackets_and_booknames(self, text: str) -> str:
			
 
				         """
			
 
				         清洗字符串前后的书名号和括号
			
--- a/utils_test/Timeliness_Test/test_timeliness_basis_extraction.py
+++ b/utils_test/Timeliness_Test/test_timeliness_basis_extraction.py
@@ -0,0 +1,67 @@
 
				+import importlib.util
			
 
				+import pathlib
			
 
				+import sys
			
 
				+import types
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+
			
 
				+CURRENT_DIR = pathlib.Path(__file__).resolve().parent
			
 
				+PROJECT_ROOT = CURRENT_DIR.parent.parent
			
 
				+MODULE_PATH = PROJECT_ROOT / "core" / "construction_review" / "component" / "reviewers" / "timeliness_basis_reviewer.py"
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def basis_module(monkeypatch):
			
 
				+    logger_module = types.ModuleType("foundation.observability.logger.loggering")
			
 
				+    logger_module.review_logger = types.SimpleNamespace(
			
 
				+        info=lambda *args, **kwargs: None,
			
 
				+        warning=lambda *args, **kwargs: None,
			
 
				+        error=lambda *args, **kwargs: None,
			
 
				+    )
			
 
				+
			
 
				+    inter_tool_module = types.ModuleType("core.construction_review.component.reviewers.utils.inter_tool")
			
 
				+    inter_tool_module.InterTool = type("InterTool", (), {})
			
 
				+
			
 
				+    directory_module = types.ModuleType("core.construction_review.component.reviewers.utils.directory_extraction")
			
 
				+    directory_module.BasisItems = type("BasisItems", (), {})
			
 
				+    directory_module.BasisItem = type("BasisItem", (), {})
			
 
				+
			
 
				+    reviewer_module = types.ModuleType("core.construction_review.component.reviewers.standard_timeliness_reviewer")
			
 
				+    reviewer_module.StandardTimelinessReviewer = type("StandardTimelinessReviewer", (), {})
			
 
				+    reviewer_module.review_standard_timeliness_with_standardized_output = lambda *args, **kwargs: None
			
 
				+
			
 
				+    monkeypatch.setitem(sys.modules, "foundation", types.ModuleType("foundation"))
			
 
				+    monkeypatch.setitem(sys.modules, "foundation.observability", types.ModuleType("foundation.observability"))
			
 
				+    monkeypatch.setitem(sys.modules, "foundation.observability.logger", types.ModuleType("foundation.observability.logger"))
			
 
				+    monkeypatch.setitem(sys.modules, "foundation.observability.logger.loggering", logger_module)
			
 
				+
			
 
				+    monkeypatch.setitem(sys.modules, "core", types.ModuleType("core"))
			
 
				+    monkeypatch.setitem(sys.modules, "core.construction_review", types.ModuleType("core.construction_review"))
			
 
				+    monkeypatch.setitem(sys.modules, "core.construction_review.component", types.ModuleType("core.construction_review.component"))
			
 
				+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers", types.ModuleType("core.construction_review.component.reviewers"))
			
 
				+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.utils", types.ModuleType("core.construction_review.component.reviewers.utils"))
			
 
				+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.utils.inter_tool", inter_tool_module)
			
 
				+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.utils.directory_extraction", directory_module)
			
 
				+    monkeypatch.setitem(sys.modules, "core.construction_review.component.reviewers.standard_timeliness_reviewer", reviewer_module)
			
 
				+
			
 
				+    spec = importlib.util.spec_from_file_location("test_timeliness_basis_reviewer_module", MODULE_PATH)
			
 
				+    module = importlib.util.module_from_spec(spec)
			
 
				+    assert spec.loader is not None
			
 
				+    spec.loader.exec_module(module)
			
 
				+    return module
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def basis_service(basis_module):
			
 
				+    return basis_module.BasisReviewService()
			
 
				+
			
 
				+def test_debug_single_input(basis_service):
			
 
				+    text = "《okk》(主席令第29号)"
			
 
				+    result = basis_service._extract_standard_from_basis(text)
			
 
				+    assert result == {
			
 
				+        "standard_name": "okk",
			
 
				+        "standard_number": "主席令第29号",
			
 
				+    }
			
 
				+
			
 
				+    print(result)
			
--- a/utils_test/standard_new_Test/test_core_standard_matching_skip_rule.py
+++ b/utils_test/standard_new_Test/test_core_standard_matching_skip_rule.py
@@ -0,0 +1,81 @@
 
				+import pytest
			
 
				+
			
 
				+from core.construction_review.component.standard_matching.standard_service import (
			
 
				+    MatchResultCode,
			
 
				+    StandardMatcher,
			
 
				+    StandardRepository,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+@pytest.fixture
			
 
				+def repository_with_duplicate_numbers():
			
 
				+    repository = StandardRepository()
			
 
				+    repository.load_data(
			
 
				+        [
			
 
				+            {
			
 
				+                "id": 1,
			
 
				+                "standard_name": "混凝土结构设计规范",
			
 
				+                "standard_number": "GB 50010-2010",
			
 
				+                "validity": "XH",
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 2,
			
 
				+                "standard_name": "混凝土结构设计标准",
			
 
				+                "standard_number": "GB 50010-2010",
			
 
				+                "validity": "XH",
			
 
				+            },
			
 
				+        ]
			
 
				+    )
			
 
				+    return repository
			
 
				+
			
 
				+
			
 
				+def test_skip_when_name_is_year_version_only_and_number_maps_to_multiple_names(
			
 
				+    repository_with_duplicate_numbers,
			
 
				+):
			
 
				+    matcher = StandardMatcher(repository_with_duplicate_numbers)
			
 
				+
			
 
				+    result = matcher.match(
			
 
				+        seq_no=1,
			
 
				+        input_name="2024年版",
			
 
				+        input_number="GB 50010-2010",
			
 
				+    )
			
 
				+
			
 
				+    assert result is None
			
 
				+
			
 
				+
			
 
				+def test_do_not_skip_when_name_is_real_standard_name(repository_with_duplicate_numbers):
			
 
				+    matcher = StandardMatcher(repository_with_duplicate_numbers)
			
 
				+
			
 
				+    result = matcher.match(
			
 
				+        seq_no=1,
			
 
				+        input_name="混凝土结构设计规范",
			
 
				+        input_number="GB 50010-2010",
			
 
				+    )
			
 
				+
			
 
				+    assert result is not None
			
 
				+    assert result.status_code == MatchResultCode.OK.value
			
 
				+
			
 
				+
			
 
				+def test_do_not_skip_when_name_contains_extra_chinese_text(repository_with_duplicate_numbers):
			
 
				+    matcher = StandardMatcher(repository_with_duplicate_numbers)
			
 
				+
			
 
				+    result = matcher.match(
			
 
				+        seq_no=1,
			
 
				+        input_name="2024年修订版",
			
 
				+        input_number="GB 50010-2010",
			
 
				+    )
			
 
				+
			
 
				+    assert result is not None
			
 
				+    assert result.status_code == MatchResultCode.MISMATCH.value
			
 
				+
			
 
				+
			
 
				+def test_skip_when_raw_number_is_year_only(repository_with_duplicate_numbers):
			
 
				+    matcher = StandardMatcher(repository_with_duplicate_numbers)
			
 
				+
			
 
				+    result = matcher.match(
			
 
				+        seq_no=1,
			
 
				+        input_name="四川省安全生产条例",
			
 
				+        input_number="2023",
			
 
				+    )
			
 
				+
			
 
				+    assert result is None