Ver código fonte

fix: 过滤词句语法审查中的解析失败和无效结果

- inter_tool.py: JSON解析失败时标记exist_issue=false,不输出到最终结果
- inter_tool.py: 增加check_result类型检查,过滤字符串格式的无效结果
- 新增测试:切分逻辑测试、质量分析脚本、prompt修复验证测试
- 新增工具:全量扫描脚本用于验证38个chunks的审查质量

解决的问题:
- 模型生成的malformed JSON不再作为有效issue输出
- exist_issue=false的项被彻底过滤
- check_result为字符串(解析失败)的项被过滤

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
WangXuMing 1 dia atrás
pai
commit
fd211873b7

+ 13 - 4
core/construction_review/component/reviewers/utils/inter_tool.py

@@ -504,7 +504,11 @@ class InterTool:
                 logger.warning(f"check_result内容: {check_result}")
 
         # 过滤掉 exist_issue=false 的审查项(无问题的项不返回到最终结果)
-        review_lists = [item for item in review_lists if item.get("exist_issue", False)]
+        # 同时过滤掉 check_result 为字符串的项(解析失败的 malformed JSON)
+        review_lists = [
+            item for item in review_lists
+            if item.get("exist_issue", False) and not isinstance(item.get("check_result"), str)
+        ]
 
         # 统计风险等级
         for issue in review_lists:
@@ -581,19 +585,24 @@ class InterTool:
 
             # 3. 如果JSON解析失败,回退到文本解析
             if not review_lists:
-                # 🔧 修复:检查响应是否为空或只包含空白字符
                 response_stripped = response.strip() if isinstance(response, str) else ""
                 is_empty_response = not response_stripped or response_stripped in ["", "null", "None", "undefined"]
 
+                # 检测是否为格式错误的 JSON(包含 JSON 标记但解析失败)
+                # 这类情况不应作为有效 issue 输出
+                looks_like_broken_json = any(marker in response_stripped for marker in ['```json', '{', '[', '"issue_point"'])
+
                 risk_level = self._determine_risk_level(response)
 
-                # 如果响应为空,则设置 exist_issue=False
+                # 空响应或格式错误的 JSON → exist_issue=False(不输出到最终结果)
+                should_exist = not is_empty_response and not looks_like_broken_json
+
                 review_lists.append({
                     "check_item": check_name,
                     "chapter_code": chapter_code,
                     "check_item_code": check_item_code,
                     "check_result": response,
-                    "exist_issue": not is_empty_response,  # 🔧 修复:空响应不存在问题
+                    "exist_issue": should_exist,
                     "risk_info": {"risk_level": risk_level}
                 })
 

+ 230 - 0
utils_test/Grammar_Check_Test/analyze_grammar_quality.py

@@ -0,0 +1,230 @@
+"""
+分析最新审查结果中词句语法审查的质量
+
+检查项:
+1. "将A改为A" 模式(修正前后相同)
+2. suggestion/reason 中的自我辩论(犹豫措辞)
+3. risk_level 为空
+4. 技术操作规程越界审查
+5. 重复问题
+6. JSON 解析失败
+7. suggestion 过长(>200字,可能包含推理过程)
+"""
+
+import json
+import re
+import sys
+import os
+
+project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, project_root)
+
+
+def extract_correction_pairs(suggestion: str):
+    """从 suggestion 中提取所有 '将X改为Y' 的 (X, Y) 对"""
+    quote_chars = r"""['""''「」]"""
+    pattern = rf"将{quote_chars}(.*?){quote_chars}\s*改为\s*{quote_chars}(.*?){quote_chars}"
+    return re.findall(pattern, suggestion)
+
+
+def check_hesitation_words(text: str):
+    """检查文本中是否包含犹豫措辞"""
+    hesitation_words = [
+        '可能', '暂定', '不确定', '重新审视', '然而', '不过', '似乎',
+        '但是', '其实', '实际上', '再细看', '再想想', '仔细想想',
+        '反过来', '另一方面', '换个角度'
+    ]
+    found = [w for w in hesitation_words if w in text]
+    return found
+
+
+def is_technical_procedure(issue_point: str, reason: str):
+    """检查是否为技术操作规程越界审查"""
+    technical_keywords = [
+        '操作步骤', '工艺参数', '施工顺序', '操作规程',
+        '技术规范', '施工方案', '工艺流程'
+    ]
+    combined = issue_point + reason
+    return [kw for kw in technical_keywords if kw in combined]
+
+
+def analyze_grammar_check_results(result_file: str):
+    """分析词句语法审查结果质量"""
+    with open(result_file, encoding='utf-8') as f:
+        data = json.load(f)
+
+    issues = data.get('issues', [])
+    grammar_items = []
+
+    for issue_wrapper in issues:
+        for issue_id, issue_detail in issue_wrapper.items():
+            review_lists = issue_detail.get('review_lists', [])
+            metadata = issue_detail.get('metadata', {})
+            for item in review_lists:
+                check_item = item.get('check_item', '')
+                if check_item in ['sensitive_word_check', 'grammar_check']:
+                    grammar_items.append({
+                        'item': item,
+                        'issue_id': issue_id,
+                        'location_label': metadata.get('review_location_label', '')
+                    })
+
+    print(f"Total grammar_check items: {len(grammar_items)}")
+    print()
+
+    # Quality checks
+    a_to_a_issues = []
+    hesitation_issues = []
+    empty_risk_issues = []
+    technical_issues = []
+    duplicate_issues = []
+    parse_failures = []
+    long_suggestion_issues = []
+
+    seen_corrections = {}
+
+    for i, entry in enumerate(grammar_items):
+        item = entry['item']
+        check_result = item.get('check_result', {})
+
+        # STRING format = parse failure
+        if isinstance(check_result, str):
+            parse_failures.append({
+                'index': i + 1,
+                'raw': check_result[:200],
+                'location_label': entry['location_label']
+            })
+            continue
+
+        issue_point = check_result.get('issue_point', '')
+        location = check_result.get('location', '')
+        suggestion = check_result.get('suggestion', '')
+        reason = check_result.get('reason', '')
+        risk_level = check_result.get('risk_level', '')
+
+        # Check 1: Empty risk_level
+        if not risk_level or risk_level.strip() == '':
+            empty_risk_issues.append({
+                'index': i + 1,
+                'issue_point': issue_point,
+                'suggestion': suggestion[:80]
+            })
+
+        # Check 2: A→A pattern
+        pairs = extract_correction_pairs(suggestion)
+        for before, after in pairs:
+            if before.strip() == after.strip():
+                a_to_a_issues.append({
+                    'index': i + 1,
+                    'issue_point': issue_point,
+                    'before': before,
+                    'after': after
+                })
+
+        # Check 3: Hesitation words in suggestion
+        sug_hesitation = check_hesitation_words(suggestion)
+        if sug_hesitation:
+            hesitation_issues.append({
+                'index': i + 1,
+                'field': 'suggestion',
+                'words': sug_hesitation,
+                'text': suggestion[:100]
+            })
+
+        # Check 4: Hesitation words in reason
+        reason_hesitation = check_hesitation_words(reason)
+        if reason_hesitation:
+            hesitation_issues.append({
+                'index': i + 1,
+                'field': 'reason',
+                'words': reason_hesitation,
+                'text': reason[:100]
+            })
+
+        # Check 5: Technical procedure
+        tech_kws = is_technical_procedure(issue_point, reason)
+        if tech_kws:
+            technical_issues.append({
+                'index': i + 1,
+                'issue_point': issue_point,
+                'keywords': tech_kws
+            })
+
+        # Check 6: Long suggestion (>200 chars)
+        if len(suggestion) > 200:
+            long_suggestion_issues.append({
+                'index': i + 1,
+                'issue_point': issue_point,
+                'length': len(suggestion),
+                'text': suggestion[:100]
+            })
+
+        # Check 7: Duplicates (same correction key)
+        if pairs:
+            sorted_pairs = sorted(pairs)
+            correction_key = ",".join(f"{a}→{b}" for a, b in sorted_pairs)
+        else:
+            correction_key = suggestion.strip()
+
+        if correction_key in seen_corrections:
+            duplicate_issues.append({
+                'index': i + 1,
+                'first_index': seen_corrections[correction_key],
+                'correction_key': correction_key,
+                'issue_point': issue_point
+            })
+        else:
+            seen_corrections[correction_key] = i + 1
+
+    # Print results
+    print("=" * 60)
+    print("QUALITY ANALYSIS RESULTS")
+    print("=" * 60)
+
+    sections = [
+        ("A->A Pattern (will A change to A)", a_to_a_issues),
+        ("Self-debate / Hesitation words", hesitation_issues),
+        ("Empty risk_level", empty_risk_issues),
+        ("Technical procedure (out of scope)", technical_issues),
+        ("Duplicate corrections", duplicate_issues),
+        ("JSON parse failures", parse_failures),
+        ("Long suggestions (>200 chars)", long_suggestion_issues),
+    ]
+
+    total_problems = 0
+    for title, items in sections:
+        count = len(items)
+        total_problems += count
+        status = "PASS" if count == 0 else "FAIL"
+        print(f"\n[{status}] {title}: {count}")
+        if items:
+            for item in items:
+                print(f"  - #{item.get('index', '?')}: {json.dumps(item, ensure_ascii=False)[:150]}")
+
+    print(f"\n{'=' * 60}")
+    print(f"TOTAL: {len(grammar_items)} items, {total_problems} quality issues")
+    print(f"Quality rate: {(len(grammar_items) - total_problems) / len(grammar_items) * 100:.1f}%")
+
+    # Print valid items summary
+    print(f"\n{'=' * 60}")
+    print("VALID ITEMS SUMMARY")
+    print("=" * 60)
+    for i, entry in enumerate(grammar_items):
+        item = entry['item']
+        check_result = item.get('check_result', {})
+        if isinstance(check_result, str):
+            print(f"  [{i+1}] [PARSE_FAIL] {check_result[:60]}...")
+            continue
+        issue_point = check_result.get('issue_point', '')
+        suggestion = check_result.get('suggestion', '')
+        risk_level = check_result.get('risk_level', '')
+        print(f"  [{i+1}] [{risk_level}] {issue_point}: {suggestion[:60]}...")
+
+
+if __name__ == "__main__":
+    result_file = os.path.join(
+        project_root,
+        "temp", "construction_review", "final_result",
+        "67d45692fb97aeef8f896e78475ce539-1779785718.json"
+    )
+    analyze_grammar_check_results(result_file)

+ 93 - 0
utils_test/Grammar_Check_Test/run_full_scan.py

@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""全量 chunk 词句语法审查 — 保存所有原始响应用于人工分析"""
+
+import sys, os, json, asyncio, time
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)).split('utils_test')[0])
+
+RESULT_JSON = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)).split('utils_test')[0],
+    "temp", "construction_review", "final_result",
+    "67d45692fb97aeef8f896e78475ce539-1779781589.json"
+)
+OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "full_scan_results")
+
+async def main():
+    from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
+
+    with open(RESULT_JSON, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    chunks = data['document_result']['structured_content']['chunks']
+
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    reviewer = GrammarCheckReviewer()
+
+    all_results = []
+
+    for i, chunk in enumerate(chunks):
+        content = chunk['content']
+        section = chunk.get('section_label', f'chunk_{i}')
+        chapter = chunk.get('chapter_classification', 'unknown')
+        trace_id = f"full_scan_{i}_{int(time.time())}"
+
+        print(f"[{i:02d}/{len(chunks)}] {chapter}/{section[:40]}... (len={len(content)})")
+
+        start = time.time()
+        try:
+            result = await reviewer.check_grammar(
+                trace_id=trace_id,
+                review_content=content,
+                state=None, stage_name=None,
+                enable_thinking=False,
+            )
+            wall_time = time.time() - start
+            response_text = result.details.get('response', '')
+            success = result.success
+            error = result.error_message
+        except Exception as e:
+            wall_time = time.time() - start
+            response_text = ""
+            success = False
+            error = str(e)
+            print(f"      ERROR: {e}")
+
+        record = {
+            "chunk_index": i,
+            "chapter": chapter,
+            "section": section,
+            "content_length": len(content),
+            "content_preview": content[:200],
+            "success": success,
+            "error": error,
+            "wall_time": round(wall_time, 2),
+            "response_length": len(response_text),
+            "raw_response": response_text,
+        }
+        all_results.append(record)
+
+        is_no_issue = '无明显问题' in response_text and len(response_text) < 50
+        status = "NO_ISSUE" if is_no_issue else f"ISSUES(response_len={len(response_text)})"
+        print(f"      {wall_time:.2f}s | {status}")
+
+    # 保存汇总
+    summary_path = os.path.join(OUTPUT_DIR, "all_results.json")
+    with open(summary_path, 'w', encoding='utf-8') as f:
+        json.dump(all_results, f, ensure_ascii=False, indent=2)
+    print(f"\nSaved {len(all_results)} results to {summary_path}")
+
+    # 保存每个 chunk 的独立文件(方便逐条阅读)
+    for record in all_results:
+        idx = record["chunk_index"]
+        chunk_path = os.path.join(OUTPUT_DIR, f"chunk_{idx:02d}_{record['chapter']}.json")
+        with open(chunk_path, 'w', encoding='utf-8') as f:
+            json.dump(record, f, ensure_ascii=False, indent=2)
+
+    print(f"Saved individual files to {OUTPUT_DIR}/")
+
+    # 打印统计
+    no_issue_count = sum(1 for r in all_results if '无明显问题' in r['raw_response'] and len(r['raw_response']) < 50)
+    issue_count = len(all_results) - no_issue_count
+    error_count = sum(1 for r in all_results if not r['success'])
+    print(f"\nStats: {no_issue_count} no-issue, {issue_count} has-issues, {error_count} errors")
+
+asyncio.run(main())

+ 341 - 0
utils_test/Grammar_Check_Test/test_grammar_check_prompt_fix.py

@@ -0,0 +1,341 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+词句语法审查 — Prompt 修复验证测试
+
+验证目标:修复 "将A改为A" 的离谱错误
+- 旧 prompt 包含否定示例(如"禁止输出将'设'改为'设'"),反而给模型植入了错误模式
+- 新 prompt 使用肯定式规则("犹豫时输出无明显问题")
+
+测试数据:temp/construction_review/final_result/67d45692fb97aeef8f896e78475ce539-1779781589.json
+其中 chunk[8] 包含触发 bug 的原文:"必须采取充分的安全保证措施"
+
+运行方式:
+    $env:PYTHONPATH = (Get-Location)
+    pytest utils_test/Grammar_Check_Test/test_grammar_check_prompt_fix.py -v -s
+"""
+
+import sys
+import os
+import json
+import re
+import time
+import asyncio
+from pathlib import Path
+
+# 项目根目录注入
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+if PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, PROJECT_ROOT)
+
+import pytest
+
+# ============================================================
+# 测试数据
+# ============================================================
+RESULT_JSON = os.path.join(
+    PROJECT_ROOT,
+    "temp", "construction_review", "final_result",
+    "67d45692fb97aeef8f896e78475ce539-1779781589.json"
+)
+
+
+def _load_chunks():
+    """加载文档 chunks"""
+    with open(RESULT_JSON, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data['document_result']['structured_content']['chunks']
+
+
+# ============================================================
+# Bug 检测工具函数
+# ============================================================
+def detect_a_to_a_pattern(response_text: str) -> list:
+    """
+    检测模型输出中是否包含 "将X改为X" 模式(X相同)
+
+    匹配模式:
+    - 将"充分"改为"充分"
+    - 将'设'改为'设'
+    - 把"X"修改为"X"
+    - 建议将X改为X
+
+    Returns:
+        list: 匹配到的问题片段列表
+    """
+    if not response_text:
+        return []
+
+    issues = []
+
+    # 模式1: 将"X"改为"X" / 将'X'改为'X' / 把"X"改为"X"
+    pattern_quoted = re.compile(
+        r'(?:将|把)["“\'](.{1,10})["”\']\s*(?:改为|修改为|替换为|换成)\s*["“\'](.{1,10})["”\']'
+    )
+    for m in pattern_quoted.finditer(response_text):
+        original, replacement = m.group(1).strip(), m.group(2).strip()
+        if original == replacement:
+            issues.append(m.group(0))
+
+    # 模式2: suggestion 字段过长且包含自我辩论关键词
+    debate_keywords = ['然而', '再细看', '重新审视', '其实', '但', '不过', '似乎', '略显生硬']
+    debate_count = sum(1 for kw in debate_keywords if kw in response_text)
+    if debate_count >= 3:
+        issues.append(f"[自我辩论] 响应中包含 {debate_count} 个犹豫/反驳关键词: "
+                      f"{[kw for kw in debate_keywords if kw in response_text]}")
+
+    return issues
+
+
+def parse_json_from_response(response_text: str) -> list:
+    """从模型响应中提取 JSON 结果"""
+    if not response_text:
+        return []
+
+    # 先尝试直接解析
+    try:
+        data = json.loads(response_text)
+        if isinstance(data, list):
+            return data
+        elif isinstance(data, dict):
+            return [data]
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    # 尝试从 markdown 代码块中提取
+    json_blocks = re.findall(r'```(?:json)?\s*\n?(.*?)\n?```', response_text, re.DOTALL)
+    for block in json_blocks:
+        try:
+            data = json.loads(block.strip())
+            if isinstance(data, list):
+                return data
+            elif isinstance(data, dict):
+                return [data]
+        except (json.JSONDecodeError, TypeError):
+            continue
+
+    # 尝试找到第一个 [ 或 { 开始解析
+    for start_char, end_char in [('[', ']'), ('{', '}')]:
+        start = response_text.find(start_char)
+        if start >= 0:
+            # 从后往前找匹配的结束符
+            for end in range(len(response_text) - 1, start, -1):
+                if response_text[end] == end_char:
+                    try:
+                        data = json.loads(response_text[start:end + 1])
+                        if isinstance(data, list):
+                            return data
+                        elif isinstance(data, dict):
+                            return [data]
+                    except (json.JSONDecodeError, TypeError):
+                        continue
+
+    return []
+
+
+# ============================================================
+# 测试类
+# ============================================================
+class TestGrammarCheckPromptFix:
+    """词句语法审查 Prompt 修复验证"""
+
+    @pytest.fixture(autouse=True)
+    def setup(self):
+        """初始化"""
+        self.chunks = _load_chunks()
+        # bug 复现的 chunk: [8] 包含 "采取充分的安全保证措施"
+        self.bug_chunk = self.chunks[8]
+        assert '充分' in self.bug_chunk['content'], "chunk[8] 应包含 '充分' 文本"
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_bug_chunk_no_a_to_a(self):
+        """
+        【核心测试】原 bug chunk 不再产生 "将A改为A" 的结果
+
+        这是触发原始 bug 的具体文本(chunk[8]: 施工要求和技术保证条件),
+        模型曾对"充分"一词产生自我辩论,输出"将'充分'改为'充分'"。
+        """
+        from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
+
+        reviewer = GrammarCheckReviewer()
+        trace_id = f"grammar_fix_test_bug_{int(time.time())}"
+
+        print(f"\n{'='*70}")
+        print(f"  测试原 bug chunk: {self.bug_chunk['section_label']}")
+        print(f"  内容长度: {len(self.bug_chunk['content'])} 字符")
+        print(f"{'='*70}")
+
+        start = time.time()
+        result = await reviewer.check_grammar(
+            trace_id=trace_id,
+            review_content=self.bug_chunk['content'],
+            state=None,
+            stage_name=None,
+            enable_thinking=False,
+        )
+        wall_time = time.time() - start
+
+        print(f"\n  审查耗时: {wall_time:.2f}s")
+        print(f"  success: {result.success}")
+
+        response_text = result.details.get('response', '')
+        print(f"  响应长度: {len(response_text)} 字符")
+
+        # 判断是否输出"无明显问题"
+        is_no_issue = '无明显问题' in response_text and len(response_text) < 50
+        print(f"  是否无明显问题: {is_no_issue}")
+
+        if not is_no_issue:
+            # 解析 JSON 结果
+            issues = parse_json_from_response(response_text)
+            print(f"  发现 {len(issues)} 个问题")
+            for idx, issue in enumerate(issues):
+                print(f"\n  --- 问题 {idx + 1} ---")
+                print(f"  issue_point: {issue.get('issue_point', 'N/A')}")
+                print(f"  location: {issue.get('location', 'N/A')[:80]}...")
+                print(f"  suggestion: {issue.get('suggestion', 'N/A')[:120]}")
+                print(f"  reason: {issue.get('reason', 'N/A')[:120]}")
+                print(f"  risk_level: {issue.get('risk_level', 'N/A')}")
+
+            # 打印原始响应供人工检查
+            print(f"\n  --- 原始响应 ---")
+            print(response_text[:2000])
+        else:
+            print(f"  原始响应: {response_text}")
+
+        # ===== 断言 =====
+        assert result.success, f"审查应成功,实际错误: {result.error_message}"
+
+        # 核心断言:不应出现 "将A改为A" 模式
+        a_to_a_issues = detect_a_to_a_pattern(response_text)
+        assert not a_to_a_issues, (
+            f"检测到 '将A改为A' 模式仍存在!\n"
+            f"问题片段: {a_to_a_issues}\n"
+            f"完整响应:\n{response_text}"
+        )
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_multiple_overview_chunks(self):
+        """
+        【扩展测试】多个 overview chunk 均不产生 "将A改为A" 结果
+
+        测试所有 overview 章节的 chunk,确保修复具有泛化性。
+        """
+        from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
+
+        reviewer = GrammarCheckReviewer()
+
+        # 筛选 overview chunks
+        overview_chunks = [
+            c for c in self.chunks
+            if c.get('chapter_classification') == 'overview'
+        ]
+        print(f"\n{'='*70}")
+        print(f"  扩展测试: {len(overview_chunks)} 个 overview chunks")
+        print(f"{'='*70}")
+
+        all_a_to_a_issues = []
+
+        for idx, chunk in enumerate(overview_chunks):
+            trace_id = f"grammar_fix_test_overview_{idx}_{int(time.time())}"
+            section = chunk.get('section_label', f'chunk_{idx}')
+            content = chunk['content']
+
+            print(f"\n  [{idx}] {section} (len={len(content)})")
+
+            start = time.time()
+            result = await reviewer.check_grammar(
+                trace_id=trace_id,
+                review_content=content,
+                state=None,
+                stage_name=None,
+                enable_thinking=False,
+            )
+            wall_time = time.time() - start
+
+            response_text = result.details.get('response', '')
+            is_no_issue = '无明显问题' in response_text and len(response_text) < 50
+
+            # 检测 A→A 模式
+            a_to_a = detect_a_to_a_pattern(response_text)
+            status = "[OK] 无明显问题" if is_no_issue else (
+                f"[!!] 有 {len(parse_json_from_response(response_text))} 个问题"
+            )
+            if a_to_a:
+                status += f" [FAIL] 检测到A->A模式: {a_to_a}"
+                all_a_to_a_issues.extend([(section, issue) for issue in a_to_a])
+
+            print(f"      耗时: {wall_time:.2f}s | {status}")
+
+            if not is_no_issue and not a_to_a:
+                # 打印发现的问题摘要
+                issues = parse_json_from_response(response_text)
+                for issue in issues:
+                    ip = issue.get('issue_point', '')[:60]
+                    sg = issue.get('suggestion', '')[:80]
+                    print(f"      -> {ip} | 建议: {sg}")
+
+        print(f"\n{'='*70}")
+        print(f"  扩展测试完成: {len(overview_chunks)} 个 chunks")
+        print(f"  A->A 问题数: {len(all_a_to_a_issues)}")
+        print(f"{'='*70}")
+
+        # 核心断言
+        assert not all_a_to_a_issues, (
+            f"检测到 {len(all_a_to_a_issues)} 个 '将A改为A' 模式!\n"
+            + "\n".join(f"  {sec}: {issue}" for sec, issue in all_a_to_a_issues)
+        )
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_suggestion_field_concise(self):
+        """
+        【格式验证】suggestion 字段应简洁,不包含推理过程
+
+        新 prompt 要求 suggestion 只写最终结论,禁止自我辩论。
+        """
+        from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
+
+        reviewer = GrammarCheckReviewer()
+        trace_id = f"grammar_fix_test_concise_{int(time.time())}"
+
+        # 使用 bug chunk
+        result = await reviewer.check_grammar(
+            trace_id=trace_id,
+            review_content=self.bug_chunk['content'],
+            state=None,
+            stage_name=None,
+            enable_thinking=False,
+        )
+
+        response_text = result.details.get('response', '')
+        issues = parse_json_from_response(response_text)
+
+        if not issues:
+            print("\n  模型输出'无明显问题',无需验证 suggestion 格式")
+            return
+
+        print(f"\n  发现 {len(issues)} 个问题,验证 suggestion 格式:")
+
+        for idx, issue in enumerate(issues):
+            suggestion = issue.get('suggestion', '')
+            reason = issue.get('reason', '')
+            print(f"\n  --- 问题 {idx + 1} ---")
+            print(f"  suggestion ({len(suggestion)}字): {suggestion[:150]}")
+            print(f"  reason ({len(reason)}字): {reason[:150]}")
+
+            # suggestion 不应包含推理/辩论关键词
+            debate_keywords = ['然而', '再细看', '重新审视', '让我们', '再审视']
+            found_debate = [kw for kw in debate_keywords if kw in suggestion]
+            assert not found_debate, (
+                f"suggestion 字段包含推理过程!\n"
+                f"检测到辩论关键词: {found_debate}\n"
+                f"suggestion 内容: {suggestion}"
+            )
+
+            # suggestion 不应过长(超过 200 字大概率包含推理)
+            assert len(suggestion) < 200, (
+                f"suggestion 字段过长({len(suggestion)}字),可能包含推理过程:\n{suggestion}"
+            )

+ 197 - 0
utils_test/Grammar_Check_Test/test_grammar_check_split.py

@@ -0,0 +1,197 @@
+"""
+测试词句语法审查的长文本切分逻辑
+
+测试内容:
+1. 切分触发条件(>5000字)
+2. 切分后并行审查
+3. 结果合并去重
+4. JSON 解析鲁棒性
+"""
+
+import sys
+import os
+import json
+import asyncio
+import time
+
+# 注入项目根目录
+project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, project_root)
+
+from core.construction_review.component.reviewers.grammar_check_reviewer import (
+    GrammarCheckReviewer,
+    SPLIT_THRESHOLD,
+    SEGMENT_MIN_LENGTH,
+    SEGMENT_TARGET_LENGTH,
+    SEGMENT_OVERLAP,
+)
+from core.construction_review.component.reviewers.utils.text_split import split_text_with_overlap
+
+
+def load_test_chunks():
+    """加载测试数据"""
+    test_file = os.path.join(
+        project_root,
+        "temp", "construction_review", "final_result",
+        "67d45692fb97aeef8f896e78475ce539-1779781589.json"
+    )
+    with open(test_file, encoding="utf-8") as f:
+        data = json.load(f)
+    return data["document_result"]["structured_content"]["chunks"]
+
+
+def test_parse_segment_response():
+    """测试 JSON 解析鲁棒性"""
+    reviewer = GrammarCheckReviewer()
+
+    # 测试1: 标准 JSON 数组
+    response1 = '''```json
+[
+  {"issue_point": "错别字", "location": "位置1", "suggestion": "将A改为B", "reason": "原因", "risk_level": "中风险"},
+  {"issue_point": "重复字词", "location": "位置2", "suggestion": "删除重复", "reason": "原因", "risk_level": "低风险"}
+]
+```'''
+    issues1 = reviewer._parse_segment_response(response1)
+    assert len(issues1) == 2, f"Expected 2 issues, got {len(issues1)}"
+    print("[PASS] test_parse_segment_response: JSON array parsed correctly")
+
+    # 测试2: 单个 JSON 对象
+    response2 = '''```json
+{"issue_point": "错别字", "location": "位置1", "suggestion": "将A改为B", "reason": "原因", "risk_level": "中风险"}
+```'''
+    issues2 = reviewer._parse_segment_response(response2)
+    assert len(issues2) == 1, f"Expected 1 issue, got {len(issues2)}"
+    print("[PASS] test_parse_segment_response: JSON object parsed correctly")
+
+    # 测试3: 无明显问题
+    response3 = "无明显问题"
+    issues3 = reviewer._parse_segment_response(response3)
+    assert len(issues3) == 0, f"Expected 0 issues, got {len(issues3)}"
+    print("[PASS] test_parse_segment_response: no-issue response handled correctly")
+
+    # 测试4: 空响应
+    response4 = ""
+    issues4 = reviewer._parse_segment_response(response4)
+    assert len(issues4) == 0, f"Expected 0 issues, got {len(issues4)}"
+    print("[PASS] test_parse_segment_response: empty response handled correctly")
+
+    # 测试5: JSON 中嵌套"无明显问题"(reason 字段中)
+    response5 = '''```json
+[{"issue_point": "错别字", "location": "位置1", "suggestion": "将A改为B", "reason": "原文无明显问题但实际有错", "risk_level": "中风险"}]
+```'''
+    issues5 = reviewer._parse_segment_response(response5)
+    assert len(issues5) == 1, f"Expected 1 issue, got {len(issues5)}"
+    print("[PASS] test_parse_segment_response: JSON with 'no-issue' keyword in reason parsed correctly")
+
+
+def test_deduplicate_issues():
+    """测试去重逻辑"""
+    reviewer = GrammarCheckReviewer()
+
+    issues = [
+        {"issue_point": "错别字", "location": "位置1", "suggestion": "将'混泥土'改为'混凝土'", "reason": "原因", "risk_level": "中风险"},
+        {"issue_point": "错别字", "location": "位置1", "suggestion": "将'混泥土'改为'混凝土'", "reason": "原因重复", "risk_level": "中风险"},  # 精确重复
+        {"issue_point": "错别字", "location": "位置2", "suggestion": "将'珩架梁'改为'桁架梁'", "reason": "原因", "risk_level": "中风险"},
+        {"issue_point": "错别字", "location": "位置3", "suggestion": "将'卷拨'改为'卷扬'", "reason": "原因", "risk_level": "中风险"},
+        {"issue_point": "无明显问题", "location": "位置4", "suggestion": "无明显问题", "reason": "原因", "risk_level": "低风险"},  # 无效条目
+        {"issue_point": "错别字", "location": "位置5", "suggestion": "将'不和'改为'不得'", "reason": "原因", "risk_level": ""},  # risk_level 为空
+        {"issue_point": "错别字", "location": "位置6", "suggestion": "将'千斤项'改为'千斤顶'", "reason": "原因", "risk_level": "高风险"},  # 有效
+    ]
+
+    unique = reviewer._deduplicate_issues(issues)
+
+    # 应该保留: 混泥土→混凝土, 珩架梁→桁架梁, 卷拨→卷扬, 千斤项→千斤顶 = 4个
+    assert len(unique) == 4, f"Expected 4 unique issues, got {len(unique)}: {[i['suggestion'] for i in unique]}"
+    print(f"[PASS] test_deduplicate_issues: {len(issues)} -> {len(unique)} issues")
+
+    # 验证过滤了无效条目
+    suggestions = [i["suggestion"] for i in unique]
+    assert "无明显问题" not in suggestions, "Should filter out 'no-issue' suggestions"
+    assert all(i["risk_level"] for i in unique), "Should filter out empty risk_level"
+    print("[PASS] test_deduplicate_issues: invalid entries filtered correctly")
+
+
+def test_split_trigger():
+    """测试切分触发条件"""
+    chunks = load_test_chunks()
+
+    # 统计哪些 chunk 会触发切分
+    trigger_count = 0
+    no_trigger_count = 0
+    for i, chunk in enumerate(chunks):
+        content = chunk.get("content", "")
+        if len(content) > SPLIT_THRESHOLD:
+            trigger_count += 1
+            segments = split_text_with_overlap(
+                content,
+                min_length=SEGMENT_MIN_LENGTH,
+                target_length=SEGMENT_TARGET_LENGTH,
+                overlap=SEGMENT_OVERLAP,
+            )
+            print(f"  Chunk[{i}] len={len(content)} -> {len(segments)} segments")
+        else:
+            no_trigger_count += 1
+
+    print(f"[PASS] test_split_trigger: {trigger_count} chunks will be split, {no_trigger_count} chunks will not")
+
+
+async def test_full_split_review():
+    """完整测试:对 Chunk 24 进行切分审查"""
+    chunks = load_test_chunks()
+    chunk24 = chunks[24]["content"]
+
+    print(f"\nChunk 24 length: {len(chunk24)}")
+    print(f"Split threshold: {SPLIT_THRESHOLD}")
+
+    reviewer = GrammarCheckReviewer()
+
+    start_time = time.time()
+    response = await reviewer._check_grammar_with_split(
+        trace_id="test_split_chunk24",
+        review_content=chunk24,
+        enable_thinking=False,
+    )
+    elapsed = time.time() - start_time
+
+    print(f"\nSplit review completed in {elapsed:.2f}s")
+    print(f"Response length: {len(response)}")
+
+    # 解析响应验证
+    if response == "无明显问题":
+        print("[INFO] No issues found after split review")
+    else:
+        try:
+            issues = json.loads(response)
+            print(f"[PASS] test_full_split_review: {len(issues)} unique issues found")
+            for i, issue in enumerate(issues):
+                print(f"  [{i+1}] {issue.get('issue_point', '')}: {issue.get('suggestion', '')[:50]}...")
+        except json.JSONDecodeError:
+            print(f"[FAIL] Response is not valid JSON: {response[:200]}...")
+
+    # 保存结果
+    output_file = os.path.join(
+        project_root,
+        "utils_test", "Grammar_Check_Test", "full_scan_results",
+        "chunk24_split_review_new.json"
+    )
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(response)
+    print(f"Results saved to: {output_file}")
+
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("Testing grammar_check split logic")
+    print("=" * 60)
+
+    # 同步测试
+    test_parse_segment_response()
+    test_deduplicate_issues()
+    test_split_trigger()
+
+    # 异步测试
+    print("\n" + "=" * 60)
+    print("Running full split review test (async)...")
+    print("=" * 60)
+    asyncio.run(test_full_split_review())