vor 1 Tag · fd211873b7
--- a/core/construction_review/component/reviewers/utils/inter_tool.py
+++ b/core/construction_review/component/reviewers/utils/inter_tool.py
@@ -504,7 +504,11 @@ class InterTool:
 
				                 logger.warning(f"check_result内容: {check_result}")
			
 
				 
			
 
				         # 过滤掉 exist_issue=false 的审查项（无问题的项不返回到最终结果）
			
 
				-        review_lists = [item for item in review_lists if item.get("exist_issue", False)]
			
 
				+        # 同时过滤掉 check_result 为字符串的项（解析失败的 malformed JSON）
			
 
				+        review_lists = [
			
 
				+            item for item in review_lists
			
 
				+            if item.get("exist_issue", False) and not isinstance(item.get("check_result"), str)
			
 
				+        ]
			
 
				 
			
 
				         # 统计风险等级
			
 
				         for issue in review_lists:
			
@@ -581,19 +585,24 @@ class InterTool:
 
				 
			
 
				             # 3. 如果JSON解析失败，回退到文本解析
			
 
				             if not review_lists:
			
 
				-                # 🔧 修复：检查响应是否为空或只包含空白字符
			
 
				                 response_stripped = response.strip() if isinstance(response, str) else ""
			
 
				                 is_empty_response = not response_stripped or response_stripped in ["", "null", "None", "undefined"]
			
 
				 
			
 
				+                # 检测是否为格式错误的 JSON（包含 JSON 标记但解析失败）
			
 
				+                # 这类情况不应作为有效 issue 输出
			
 
				+                looks_like_broken_json = any(marker in response_stripped for marker in ['```json', '{', '[', '"issue_point"'])
			
 
				+
			
 
				                 risk_level = self._determine_risk_level(response)
			
 
				 
			
 
				-                # 如果响应为空，则设置 exist_issue=False
			
 
				+                # 空响应或格式错误的 JSON → exist_issue=False（不输出到最终结果）
			
 
				+                should_exist = not is_empty_response and not looks_like_broken_json
			
 
				+
			
 
				                 review_lists.append({
			
 
				                     "check_item": check_name,
			
 
				                     "chapter_code": chapter_code,
			
 
				                     "check_item_code": check_item_code,
			
 
				                     "check_result": response,
			
 
				-                    "exist_issue": not is_empty_response,  # 🔧 修复：空响应不存在问题
			
 
				+                    "exist_issue": should_exist,
			
 
				                     "risk_info": {"risk_level": risk_level}
			
 
				                 })
			
 
				 
			
--- a/utils_test/Grammar_Check_Test/analyze_grammar_quality.py
+++ b/utils_test/Grammar_Check_Test/analyze_grammar_quality.py
@@ -0,0 +1,230 @@
 
				+"""
			
 
				+分析最新审查结果中词句语法审查的质量
			
 
				+
			
 
				+检查项：
			
 
				+1. "将A改为A" 模式（修正前后相同）
			
 
				+2. suggestion/reason 中的自我辩论（犹豫措辞）
			
 
				+3. risk_level 为空
			
 
				+4. 技术操作规程越界审查
			
 
				+5. 重复问题
			
 
				+6. JSON 解析失败
			
 
				+7. suggestion 过长（>200字，可能包含推理过程）
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import re
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+
			
 
				+def extract_correction_pairs(suggestion: str):
			
 
				+    """从 suggestion 中提取所有 '将X改为Y' 的 (X, Y) 对"""
			
 
				+    quote_chars = r"""['""''「」]"""
			
 
				+    pattern = rf"将{quote_chars}(.*?){quote_chars}\s*改为\s*{quote_chars}(.*?){quote_chars}"
			
 
				+    return re.findall(pattern, suggestion)
			
 
				+
			
 
				+
			
 
				+def check_hesitation_words(text: str):
			
 
				+    """检查文本中是否包含犹豫措辞"""
			
 
				+    hesitation_words = [
			
 
				+        '可能', '暂定', '不确定', '重新审视', '然而', '不过', '似乎',
			
 
				+        '但是', '其实', '实际上', '再细看', '再想想', '仔细想想',
			
 
				+        '反过来', '另一方面', '换个角度'
			
 
				+    ]
			
 
				+    found = [w for w in hesitation_words if w in text]
			
 
				+    return found
			
 
				+
			
 
				+
			
 
				+def is_technical_procedure(issue_point: str, reason: str):
			
 
				+    """检查是否为技术操作规程越界审查"""
			
 
				+    technical_keywords = [
			
 
				+        '操作步骤', '工艺参数', '施工顺序', '操作规程',
			
 
				+        '技术规范', '施工方案', '工艺流程'
			
 
				+    ]
			
 
				+    combined = issue_point + reason
			
 
				+    return [kw for kw in technical_keywords if kw in combined]
			
 
				+
			
 
				+
			
 
				+def analyze_grammar_check_results(result_file: str):
			
 
				+    """分析词句语法审查结果质量"""
			
 
				+    with open(result_file, encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+
			
 
				+    issues = data.get('issues', [])
			
 
				+    grammar_items = []
			
 
				+
			
 
				+    for issue_wrapper in issues:
			
 
				+        for issue_id, issue_detail in issue_wrapper.items():
			
 
				+            review_lists = issue_detail.get('review_lists', [])
			
 
				+            metadata = issue_detail.get('metadata', {})
			
 
				+            for item in review_lists:
			
 
				+                check_item = item.get('check_item', '')
			
 
				+                if check_item in ['sensitive_word_check', 'grammar_check']:
			
 
				+                    grammar_items.append({
			
 
				+                        'item': item,
			
 
				+                        'issue_id': issue_id,
			
 
				+                        'location_label': metadata.get('review_location_label', '')
			
 
				+                    })
			
 
				+
			
 
				+    print(f"Total grammar_check items: {len(grammar_items)}")
			
 
				+    print()
			
 
				+
			
 
				+    # Quality checks
			
 
				+    a_to_a_issues = []
			
 
				+    hesitation_issues = []
			
 
				+    empty_risk_issues = []
			
 
				+    technical_issues = []
			
 
				+    duplicate_issues = []
			
 
				+    parse_failures = []
			
 
				+    long_suggestion_issues = []
			
 
				+
			
 
				+    seen_corrections = {}
			
 
				+
			
 
				+    for i, entry in enumerate(grammar_items):
			
 
				+        item = entry['item']
			
 
				+        check_result = item.get('check_result', {})
			
 
				+
			
 
				+        # STRING format = parse failure
			
 
				+        if isinstance(check_result, str):
			
 
				+            parse_failures.append({
			
 
				+                'index': i + 1,
			
 
				+                'raw': check_result[:200],
			
 
				+                'location_label': entry['location_label']
			
 
				+            })
			
 
				+            continue
			
 
				+
			
 
				+        issue_point = check_result.get('issue_point', '')
			
 
				+        location = check_result.get('location', '')
			
 
				+        suggestion = check_result.get('suggestion', '')
			
 
				+        reason = check_result.get('reason', '')
			
 
				+        risk_level = check_result.get('risk_level', '')
			
 
				+
			
 
				+        # Check 1: Empty risk_level
			
 
				+        if not risk_level or risk_level.strip() == '':
			
 
				+            empty_risk_issues.append({
			
 
				+                'index': i + 1,
			
 
				+                'issue_point': issue_point,
			
 
				+                'suggestion': suggestion[:80]
			
 
				+            })
			
 
				+
			
 
				+        # Check 2: A→A pattern
			
 
				+        pairs = extract_correction_pairs(suggestion)
			
 
				+        for before, after in pairs:
			
 
				+            if before.strip() == after.strip():
			
 
				+                a_to_a_issues.append({
			
 
				+                    'index': i + 1,
			
 
				+                    'issue_point': issue_point,
			
 
				+                    'before': before,
			
 
				+                    'after': after
			
 
				+                })
			
 
				+
			
 
				+        # Check 3: Hesitation words in suggestion
			
 
				+        sug_hesitation = check_hesitation_words(suggestion)
			
 
				+        if sug_hesitation:
			
 
				+            hesitation_issues.append({
			
 
				+                'index': i + 1,
			
 
				+                'field': 'suggestion',
			
 
				+                'words': sug_hesitation,
			
 
				+                'text': suggestion[:100]
			
 
				+            })
			
 
				+
			
 
				+        # Check 4: Hesitation words in reason
			
 
				+        reason_hesitation = check_hesitation_words(reason)
			
 
				+        if reason_hesitation:
			
 
				+            hesitation_issues.append({
			
 
				+                'index': i + 1,
			
 
				+                'field': 'reason',
			
 
				+                'words': reason_hesitation,
			
 
				+                'text': reason[:100]
			
 
				+            })
			
 
				+
			
 
				+        # Check 5: Technical procedure
			
 
				+        tech_kws = is_technical_procedure(issue_point, reason)
			
 
				+        if tech_kws:
			
 
				+            technical_issues.append({
			
 
				+                'index': i + 1,
			
 
				+                'issue_point': issue_point,
			
 
				+                'keywords': tech_kws
			
 
				+            })
			
 
				+
			
 
				+        # Check 6: Long suggestion (>200 chars)
			
 
				+        if len(suggestion) > 200:
			
 
				+            long_suggestion_issues.append({
			
 
				+                'index': i + 1,
			
 
				+                'issue_point': issue_point,
			
 
				+                'length': len(suggestion),
			
 
				+                'text': suggestion[:100]
			
 
				+            })
			
 
				+
			
 
				+        # Check 7: Duplicates (same correction key)
			
 
				+        if pairs:
			
 
				+            sorted_pairs = sorted(pairs)
			
 
				+            correction_key = ",".join(f"{a}→{b}" for a, b in sorted_pairs)
			
 
				+        else:
			
 
				+            correction_key = suggestion.strip()
			
 
				+
			
 
				+        if correction_key in seen_corrections:
			
 
				+            duplicate_issues.append({
			
 
				+                'index': i + 1,
			
 
				+                'first_index': seen_corrections[correction_key],
			
 
				+                'correction_key': correction_key,
			
 
				+                'issue_point': issue_point
			
 
				+            })
			
 
				+        else:
			
 
				+            seen_corrections[correction_key] = i + 1
			
 
				+
			
 
				+    # Print results
			
 
				+    print("=" * 60)
			
 
				+    print("QUALITY ANALYSIS RESULTS")
			
 
				+    print("=" * 60)
			
 
				+
			
 
				+    sections = [
			
 
				+        ("A->A Pattern (will A change to A)", a_to_a_issues),
			
 
				+        ("Self-debate / Hesitation words", hesitation_issues),
			
 
				+        ("Empty risk_level", empty_risk_issues),
			
 
				+        ("Technical procedure (out of scope)", technical_issues),
			
 
				+        ("Duplicate corrections", duplicate_issues),
			
 
				+        ("JSON parse failures", parse_failures),
			
 
				+        ("Long suggestions (>200 chars)", long_suggestion_issues),
			
 
				+    ]
			
 
				+
			
 
				+    total_problems = 0
			
 
				+    for title, items in sections:
			
 
				+        count = len(items)
			
 
				+        total_problems += count
			
 
				+        status = "PASS" if count == 0 else "FAIL"
			
 
				+        print(f"\n[{status}] {title}: {count}")
			
 
				+        if items:
			
 
				+            for item in items:
			
 
				+                print(f"  - #{item.get('index', '?')}: {json.dumps(item, ensure_ascii=False)[:150]}")
			
 
				+
			
 
				+    print(f"\n{'=' * 60}")
			
 
				+    print(f"TOTAL: {len(grammar_items)} items, {total_problems} quality issues")
			
 
				+    print(f"Quality rate: {(len(grammar_items) - total_problems) / len(grammar_items) * 100:.1f}%")
			
 
				+
			
 
				+    # Print valid items summary
			
 
				+    print(f"\n{'=' * 60}")
			
 
				+    print("VALID ITEMS SUMMARY")
			
 
				+    print("=" * 60)
			
 
				+    for i, entry in enumerate(grammar_items):
			
 
				+        item = entry['item']
			
 
				+        check_result = item.get('check_result', {})
			
 
				+        if isinstance(check_result, str):
			
 
				+            print(f"  [{i+1}] [PARSE_FAIL] {check_result[:60]}...")
			
 
				+            continue
			
 
				+        issue_point = check_result.get('issue_point', '')
			
 
				+        suggestion = check_result.get('suggestion', '')
			
 
				+        risk_level = check_result.get('risk_level', '')
			
 
				+        print(f"  [{i+1}] [{risk_level}] {issue_point}: {suggestion[:60]}...")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    result_file = os.path.join(
			
 
				+        project_root,
			
 
				+        "temp", "construction_review", "final_result",
			
 
				+        "67d45692fb97aeef8f896e78475ce539-1779785718.json"
			
 
				+    )
			
 
				+    analyze_grammar_check_results(result_file)
			
--- a/utils_test/Grammar_Check_Test/run_full_scan.py
+++ b/utils_test/Grammar_Check_Test/run_full_scan.py
@@ -0,0 +1,93 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""全量 chunk 词句语法审查 — 保存所有原始响应用于人工分析"""
			
 
				+
			
 
				+import sys, os, json, asyncio, time
			
 
				+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)).split('utils_test')[0])
			
 
				+
			
 
				+RESULT_JSON = os.path.join(
			
 
				+    os.path.dirname(os.path.abspath(__file__)).split('utils_test')[0],
			
 
				+    "temp", "construction_review", "final_result",
			
 
				+    "67d45692fb97aeef8f896e78475ce539-1779781589.json"
			
 
				+)
			
 
				+OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "full_scan_results")
			
 
				+
			
 
				+async def main():
			
 
				+    from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
			
 
				+
			
 
				+    with open(RESULT_JSON, 'r', encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+    chunks = data['document_result']['structured_content']['chunks']
			
 
				+
			
 
				+    os.makedirs(OUTPUT_DIR, exist_ok=True)
			
 
				+    reviewer = GrammarCheckReviewer()
			
 
				+
			
 
				+    all_results = []
			
 
				+
			
 
				+    for i, chunk in enumerate(chunks):
			
 
				+        content = chunk['content']
			
 
				+        section = chunk.get('section_label', f'chunk_{i}')
			
 
				+        chapter = chunk.get('chapter_classification', 'unknown')
			
 
				+        trace_id = f"full_scan_{i}_{int(time.time())}"
			
 
				+
			
 
				+        print(f"[{i:02d}/{len(chunks)}] {chapter}/{section[:40]}... (len={len(content)})")
			
 
				+
			
 
				+        start = time.time()
			
 
				+        try:
			
 
				+            result = await reviewer.check_grammar(
			
 
				+                trace_id=trace_id,
			
 
				+                review_content=content,
			
 
				+                state=None, stage_name=None,
			
 
				+                enable_thinking=False,
			
 
				+            )
			
 
				+            wall_time = time.time() - start
			
 
				+            response_text = result.details.get('response', '')
			
 
				+            success = result.success
			
 
				+            error = result.error_message
			
 
				+        except Exception as e:
			
 
				+            wall_time = time.time() - start
			
 
				+            response_text = ""
			
 
				+            success = False
			
 
				+            error = str(e)
			
 
				+            print(f"      ERROR: {e}")
			
 
				+
			
 
				+        record = {
			
 
				+            "chunk_index": i,
			
 
				+            "chapter": chapter,
			
 
				+            "section": section,
			
 
				+            "content_length": len(content),
			
 
				+            "content_preview": content[:200],
			
 
				+            "success": success,
			
 
				+            "error": error,
			
 
				+            "wall_time": round(wall_time, 2),
			
 
				+            "response_length": len(response_text),
			
 
				+            "raw_response": response_text,
			
 
				+        }
			
 
				+        all_results.append(record)
			
 
				+
			
 
				+        is_no_issue = '无明显问题' in response_text and len(response_text) < 50
			
 
				+        status = "NO_ISSUE" if is_no_issue else f"ISSUES(response_len={len(response_text)})"
			
 
				+        print(f"      {wall_time:.2f}s | {status}")
			
 
				+
			
 
				+    # 保存汇总
			
 
				+    summary_path = os.path.join(OUTPUT_DIR, "all_results.json")
			
 
				+    with open(summary_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(all_results, f, ensure_ascii=False, indent=2)
			
 
				+    print(f"\nSaved {len(all_results)} results to {summary_path}")
			
 
				+
			
 
				+    # 保存每个 chunk 的独立文件（方便逐条阅读）
			
 
				+    for record in all_results:
			
 
				+        idx = record["chunk_index"]
			
 
				+        chunk_path = os.path.join(OUTPUT_DIR, f"chunk_{idx:02d}_{record['chapter']}.json")
			
 
				+        with open(chunk_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(record, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print(f"Saved individual files to {OUTPUT_DIR}/")
			
 
				+
			
 
				+    # 打印统计
			
 
				+    no_issue_count = sum(1 for r in all_results if '无明显问题' in r['raw_response'] and len(r['raw_response']) < 50)
			
 
				+    issue_count = len(all_results) - no_issue_count
			
 
				+    error_count = sum(1 for r in all_results if not r['success'])
			
 
				+    print(f"\nStats: {no_issue_count} no-issue, {issue_count} has-issues, {error_count} errors")
			
 
				+
			
 
				+asyncio.run(main())
			
--- a/utils_test/Grammar_Check_Test/test_grammar_check_prompt_fix.py
+++ b/utils_test/Grammar_Check_Test/test_grammar_check_prompt_fix.py
@@ -0,0 +1,341 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+词句语法审查 — Prompt 修复验证测试
			
 
				+
			
 
				+验证目标：修复 "将A改为A" 的离谱错误
			
 
				+- 旧 prompt 包含否定示例（如"禁止输出将'设'改为'设'"），反而给模型植入了错误模式
			
 
				+- 新 prompt 使用肯定式规则（"犹豫时输出无明显问题"）
			
 
				+
			
 
				+测试数据：temp/construction_review/final_result/67d45692fb97aeef8f896e78475ce539-1779781589.json
			
 
				+其中 chunk[8] 包含触发 bug 的原文："必须采取充分的安全保证措施"
			
 
				+
			
 
				+运行方式：
			
 
				+    $env:PYTHONPATH = (Get-Location)
			
 
				+    pytest utils_test/Grammar_Check_Test/test_grammar_check_prompt_fix.py -v -s
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+import json
			
 
				+import re
			
 
				+import time
			
 
				+import asyncio
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# 项目根目录注入
			
 
				+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
			
 
				+if PROJECT_ROOT not in sys.path:
			
 
				+    sys.path.insert(0, PROJECT_ROOT)
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+# ============================================================
			
 
				+# 测试数据
			
 
				+# ============================================================
			
 
				+RESULT_JSON = os.path.join(
			
 
				+    PROJECT_ROOT,
			
 
				+    "temp", "construction_review", "final_result",
			
 
				+    "67d45692fb97aeef8f896e78475ce539-1779781589.json"
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def _load_chunks():
			
 
				+    """加载文档 chunks"""
			
 
				+    with open(RESULT_JSON, 'r', encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+    return data['document_result']['structured_content']['chunks']
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# Bug 检测工具函数
			
 
				+# ============================================================
			
 
				+def detect_a_to_a_pattern(response_text: str) -> list:
			
 
				+    """
			
 
				+    检测模型输出中是否包含 "将X改为X" 模式（X相同）
			
 
				+
			
 
				+    匹配模式：
			
 
				+    - 将"充分"改为"充分"
			
 
				+    - 将'设'改为'设'
			
 
				+    - 把"X"修改为"X"
			
 
				+    - 建议将X改为X
			
 
				+
			
 
				+    Returns:
			
 
				+        list: 匹配到的问题片段列表
			
 
				+    """
			
 
				+    if not response_text:
			
 
				+        return []
			
 
				+
			
 
				+    issues = []
			
 
				+
			
 
				+    # 模式1: 将"X"改为"X" / 将'X'改为'X' / 把"X"改为"X"
			
 
				+    pattern_quoted = re.compile(
			
 
				+        r'(?:将|把)["“\'](.{1,10})["”\']\s*(?:改为|修改为|替换为|换成)\s*["“\'](.{1,10})["”\']'
			
 
				+    )
			
 
				+    for m in pattern_quoted.finditer(response_text):
			
 
				+        original, replacement = m.group(1).strip(), m.group(2).strip()
			
 
				+        if original == replacement:
			
 
				+            issues.append(m.group(0))
			
 
				+
			
 
				+    # 模式2: suggestion 字段过长且包含自我辩论关键词
			
 
				+    debate_keywords = ['然而', '再细看', '重新审视', '其实', '但', '不过', '似乎', '略显生硬']
			
 
				+    debate_count = sum(1 for kw in debate_keywords if kw in response_text)
			
 
				+    if debate_count >= 3:
			
 
				+        issues.append(f"[自我辩论] 响应中包含 {debate_count} 个犹豫/反驳关键词: "
			
 
				+                      f"{[kw for kw in debate_keywords if kw in response_text]}")
			
 
				+
			
 
				+    return issues
			
 
				+
			
 
				+
			
 
				+def parse_json_from_response(response_text: str) -> list:
			
 
				+    """从模型响应中提取 JSON 结果"""
			
 
				+    if not response_text:
			
 
				+        return []
			
 
				+
			
 
				+    # 先尝试直接解析
			
 
				+    try:
			
 
				+        data = json.loads(response_text)
			
 
				+        if isinstance(data, list):
			
 
				+            return data
			
 
				+        elif isinstance(data, dict):
			
 
				+            return [data]
			
 
				+    except (json.JSONDecodeError, TypeError):
			
 
				+        pass
			
 
				+
			
 
				+    # 尝试从 markdown 代码块中提取
			
 
				+    json_blocks = re.findall(r'```(?:json)?\s*\n?(.*?)\n?```', response_text, re.DOTALL)
			
 
				+    for block in json_blocks:
			
 
				+        try:
			
 
				+            data = json.loads(block.strip())
			
 
				+            if isinstance(data, list):
			
 
				+                return data
			
 
				+            elif isinstance(data, dict):
			
 
				+                return [data]
			
 
				+        except (json.JSONDecodeError, TypeError):
			
 
				+            continue
			
 
				+
			
 
				+    # 尝试找到第一个 [ 或 { 开始解析
			
 
				+    for start_char, end_char in [('[', ']'), ('{', '}')]:
			
 
				+        start = response_text.find(start_char)
			
 
				+        if start >= 0:
			
 
				+            # 从后往前找匹配的结束符
			
 
				+            for end in range(len(response_text) - 1, start, -1):
			
 
				+                if response_text[end] == end_char:
			
 
				+                    try:
			
 
				+                        data = json.loads(response_text[start:end + 1])
			
 
				+                        if isinstance(data, list):
			
 
				+                            return data
			
 
				+                        elif isinstance(data, dict):
			
 
				+                            return [data]
			
 
				+                    except (json.JSONDecodeError, TypeError):
			
 
				+                        continue
			
 
				+
			
 
				+    return []
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 测试类
			
 
				+# ============================================================
			
 
				+class TestGrammarCheckPromptFix:
			
 
				+    """词句语法审查 Prompt 修复验证"""
			
 
				+
			
 
				+    @pytest.fixture(autouse=True)
			
 
				+    def setup(self):
			
 
				+        """初始化"""
			
 
				+        self.chunks = _load_chunks()
			
 
				+        # bug 复现的 chunk: [8] 包含 "采取充分的安全保证措施"
			
 
				+        self.bug_chunk = self.chunks[8]
			
 
				+        assert '充分' in self.bug_chunk['content'], "chunk[8] 应包含 '充分' 文本"
			
 
				+
			
 
				+    @pytest.mark.asyncio
			
 
				+    @pytest.mark.integration
			
 
				+    async def test_bug_chunk_no_a_to_a(self):
			
 
				+        """
			
 
				+        【核心测试】原 bug chunk 不再产生 "将A改为A" 的结果
			
 
				+
			
 
				+        这是触发原始 bug 的具体文本（chunk[8]: 施工要求和技术保证条件），
			
 
				+        模型曾对"充分"一词产生自我辩论，输出"将'充分'改为'充分'"。
			
 
				+        """
			
 
				+        from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
			
 
				+
			
 
				+        reviewer = GrammarCheckReviewer()
			
 
				+        trace_id = f"grammar_fix_test_bug_{int(time.time())}"
			
 
				+
			
 
				+        print(f"\n{'='*70}")
			
 
				+        print(f"  测试原 bug chunk: {self.bug_chunk['section_label']}")
			
 
				+        print(f"  内容长度: {len(self.bug_chunk['content'])} 字符")
			
 
				+        print(f"{'='*70}")
			
 
				+
			
 
				+        start = time.time()
			
 
				+        result = await reviewer.check_grammar(
			
 
				+            trace_id=trace_id,
			
 
				+            review_content=self.bug_chunk['content'],
			
 
				+            state=None,
			
 
				+            stage_name=None,
			
 
				+            enable_thinking=False,
			
 
				+        )
			
 
				+        wall_time = time.time() - start
			
 
				+
			
 
				+        print(f"\n  审查耗时: {wall_time:.2f}s")
			
 
				+        print(f"  success: {result.success}")
			
 
				+
			
 
				+        response_text = result.details.get('response', '')
			
 
				+        print(f"  响应长度: {len(response_text)} 字符")
			
 
				+
			
 
				+        # 判断是否输出"无明显问题"
			
 
				+        is_no_issue = '无明显问题' in response_text and len(response_text) < 50
			
 
				+        print(f"  是否无明显问题: {is_no_issue}")
			
 
				+
			
 
				+        if not is_no_issue:
			
 
				+            # 解析 JSON 结果
			
 
				+            issues = parse_json_from_response(response_text)
			
 
				+            print(f"  发现 {len(issues)} 个问题")
			
 
				+            for idx, issue in enumerate(issues):
			
 
				+                print(f"\n  --- 问题 {idx + 1} ---")
			
 
				+                print(f"  issue_point: {issue.get('issue_point', 'N/A')}")
			
 
				+                print(f"  location: {issue.get('location', 'N/A')[:80]}...")
			
 
				+                print(f"  suggestion: {issue.get('suggestion', 'N/A')[:120]}")
			
 
				+                print(f"  reason: {issue.get('reason', 'N/A')[:120]}")
			
 
				+                print(f"  risk_level: {issue.get('risk_level', 'N/A')}")
			
 
				+
			
 
				+            # 打印原始响应供人工检查
			
 
				+            print(f"\n  --- 原始响应 ---")
			
 
				+            print(response_text[:2000])
			
 
				+        else:
			
 
				+            print(f"  原始响应: {response_text}")
			
 
				+
			
 
				+        # ===== 断言 =====
			
 
				+        assert result.success, f"审查应成功，实际错误: {result.error_message}"
			
 
				+
			
 
				+        # 核心断言：不应出现 "将A改为A" 模式
			
 
				+        a_to_a_issues = detect_a_to_a_pattern(response_text)
			
 
				+        assert not a_to_a_issues, (
			
 
				+            f"检测到 '将A改为A' 模式仍存在！\n"
			
 
				+            f"问题片段: {a_to_a_issues}\n"
			
 
				+            f"完整响应:\n{response_text}"
			
 
				+        )
			
 
				+
			
 
				+    @pytest.mark.asyncio
			
 
				+    @pytest.mark.integration
			
 
				+    async def test_multiple_overview_chunks(self):
			
 
				+        """
			
 
				+        【扩展测试】多个 overview chunk 均不产生 "将A改为A" 结果
			
 
				+
			
 
				+        测试所有 overview 章节的 chunk，确保修复具有泛化性。
			
 
				+        """
			
 
				+        from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
			
 
				+
			
 
				+        reviewer = GrammarCheckReviewer()
			
 
				+
			
 
				+        # 筛选 overview chunks
			
 
				+        overview_chunks = [
			
 
				+            c for c in self.chunks
			
 
				+            if c.get('chapter_classification') == 'overview'
			
 
				+        ]
			
 
				+        print(f"\n{'='*70}")
			
 
				+        print(f"  扩展测试: {len(overview_chunks)} 个 overview chunks")
			
 
				+        print(f"{'='*70}")
			
 
				+
			
 
				+        all_a_to_a_issues = []
			
 
				+
			
 
				+        for idx, chunk in enumerate(overview_chunks):
			
 
				+            trace_id = f"grammar_fix_test_overview_{idx}_{int(time.time())}"
			
 
				+            section = chunk.get('section_label', f'chunk_{idx}')
			
 
				+            content = chunk['content']
			
 
				+
			
 
				+            print(f"\n  [{idx}] {section} (len={len(content)})")
			
 
				+
			
 
				+            start = time.time()
			
 
				+            result = await reviewer.check_grammar(
			
 
				+                trace_id=trace_id,
			
 
				+                review_content=content,
			
 
				+                state=None,
			
 
				+                stage_name=None,
			
 
				+                enable_thinking=False,
			
 
				+            )
			
 
				+            wall_time = time.time() - start
			
 
				+
			
 
				+            response_text = result.details.get('response', '')
			
 
				+            is_no_issue = '无明显问题' in response_text and len(response_text) < 50
			
 
				+
			
 
				+            # 检测 A→A 模式
			
 
				+            a_to_a = detect_a_to_a_pattern(response_text)
			
 
				+            status = "[OK] 无明显问题" if is_no_issue else (
			
 
				+                f"[!!] 有 {len(parse_json_from_response(response_text))} 个问题"
			
 
				+            )
			
 
				+            if a_to_a:
			
 
				+                status += f" [FAIL] 检测到A->A模式: {a_to_a}"
			
 
				+                all_a_to_a_issues.extend([(section, issue) for issue in a_to_a])
			
 
				+
			
 
				+            print(f"      耗时: {wall_time:.2f}s | {status}")
			
 
				+
			
 
				+            if not is_no_issue and not a_to_a:
			
 
				+                # 打印发现的问题摘要
			
 
				+                issues = parse_json_from_response(response_text)
			
 
				+                for issue in issues:
			
 
				+                    ip = issue.get('issue_point', '')[:60]
			
 
				+                    sg = issue.get('suggestion', '')[:80]
			
 
				+                    print(f"      -> {ip} | 建议: {sg}")
			
 
				+
			
 
				+        print(f"\n{'='*70}")
			
 
				+        print(f"  扩展测试完成: {len(overview_chunks)} 个 chunks")
			
 
				+        print(f"  A->A 问题数: {len(all_a_to_a_issues)}")
			
 
				+        print(f"{'='*70}")
			
 
				+
			
 
				+        # 核心断言
			
 
				+        assert not all_a_to_a_issues, (
			
 
				+            f"检测到 {len(all_a_to_a_issues)} 个 '将A改为A' 模式！\n"
			
 
				+            + "\n".join(f"  {sec}: {issue}" for sec, issue in all_a_to_a_issues)
			
 
				+        )
			
 
				+
			
 
				+    @pytest.mark.asyncio
			
 
				+    @pytest.mark.integration
			
 
				+    async def test_suggestion_field_concise(self):
			
 
				+        """
			
 
				+        【格式验证】suggestion 字段应简洁，不包含推理过程
			
 
				+
			
 
				+        新 prompt 要求 suggestion 只写最终结论，禁止自我辩论。
			
 
				+        """
			
 
				+        from core.construction_review.component.reviewers.grammar_check_reviewer import GrammarCheckReviewer
			
 
				+
			
 
				+        reviewer = GrammarCheckReviewer()
			
 
				+        trace_id = f"grammar_fix_test_concise_{int(time.time())}"
			
 
				+
			
 
				+        # 使用 bug chunk
			
 
				+        result = await reviewer.check_grammar(
			
 
				+            trace_id=trace_id,
			
 
				+            review_content=self.bug_chunk['content'],
			
 
				+            state=None,
			
 
				+            stage_name=None,
			
 
				+            enable_thinking=False,
			
 
				+        )
			
 
				+
			
 
				+        response_text = result.details.get('response', '')
			
 
				+        issues = parse_json_from_response(response_text)
			
 
				+
			
 
				+        if not issues:
			
 
				+            print("\n  模型输出'无明显问题'，无需验证 suggestion 格式")
			
 
				+            return
			
 
				+
			
 
				+        print(f"\n  发现 {len(issues)} 个问题，验证 suggestion 格式:")
			
 
				+
			
 
				+        for idx, issue in enumerate(issues):
			
 
				+            suggestion = issue.get('suggestion', '')
			
 
				+            reason = issue.get('reason', '')
			
 
				+            print(f"\n  --- 问题 {idx + 1} ---")
			
 
				+            print(f"  suggestion ({len(suggestion)}字): {suggestion[:150]}")
			
 
				+            print(f"  reason ({len(reason)}字): {reason[:150]}")
			
 
				+
			
 
				+            # suggestion 不应包含推理/辩论关键词
			
 
				+            debate_keywords = ['然而', '再细看', '重新审视', '让我们', '再审视']
			
 
				+            found_debate = [kw for kw in debate_keywords if kw in suggestion]
			
 
				+            assert not found_debate, (
			
 
				+                f"suggestion 字段包含推理过程！\n"
			
 
				+                f"检测到辩论关键词: {found_debate}\n"
			
 
				+                f"suggestion 内容: {suggestion}"
			
 
				+            )
			
 
				+
			
 
				+            # suggestion 不应过长（超过 200 字大概率包含推理）
			
 
				+            assert len(suggestion) < 200, (
			
 
				+                f"suggestion 字段过长（{len(suggestion)}字），可能包含推理过程:\n{suggestion}"
			
 
				+            )
			
--- a/utils_test/Grammar_Check_Test/test_grammar_check_split.py
+++ b/utils_test/Grammar_Check_Test/test_grammar_check_split.py
@@ -0,0 +1,197 @@
 
				+"""
			
 
				+测试词句语法审查的长文本切分逻辑
			
 
				+
			
 
				+测试内容：
			
 
				+1. 切分触发条件（>5000字）
			
 
				+2. 切分后并行审查
			
 
				+3. 结果合并去重
			
 
				+4. JSON 解析鲁棒性
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import os
			
 
				+import json
			
 
				+import asyncio
			
 
				+import time
			
 
				+
			
 
				+# 注入项目根目录
			
 
				+project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+from core.construction_review.component.reviewers.grammar_check_reviewer import (
			
 
				+    GrammarCheckReviewer,
			
 
				+    SPLIT_THRESHOLD,
			
 
				+    SEGMENT_MIN_LENGTH,
			
 
				+    SEGMENT_TARGET_LENGTH,
			
 
				+    SEGMENT_OVERLAP,
			
 
				+)
			
 
				+from core.construction_review.component.reviewers.utils.text_split import split_text_with_overlap
			
 
				+
			
 
				+
			
 
				+def load_test_chunks():
			
 
				+    """加载测试数据"""
			
 
				+    test_file = os.path.join(
			
 
				+        project_root,
			
 
				+        "temp", "construction_review", "final_result",
			
 
				+        "67d45692fb97aeef8f896e78475ce539-1779781589.json"
			
 
				+    )
			
 
				+    with open(test_file, encoding="utf-8") as f:
			
 
				+        data = json.load(f)
			
 
				+    return data["document_result"]["structured_content"]["chunks"]
			
 
				+
			
 
				+
			
 
				+def test_parse_segment_response():
			
 
				+    """测试 JSON 解析鲁棒性"""
			
 
				+    reviewer = GrammarCheckReviewer()
			
 
				+
			
 
				+    # 测试1: 标准 JSON 数组
			
 
				+    response1 = '''```json
			
 
				+[
			
 
				+  {"issue_point": "错别字", "location": "位置1", "suggestion": "将A改为B", "reason": "原因", "risk_level": "中风险"},
			
 
				+  {"issue_point": "重复字词", "location": "位置2", "suggestion": "删除重复", "reason": "原因", "risk_level": "低风险"}
			
 
				+]
			
 
				+```'''
			
 
				+    issues1 = reviewer._parse_segment_response(response1)
			
 
				+    assert len(issues1) == 2, f"Expected 2 issues, got {len(issues1)}"
			
 
				+    print("[PASS] test_parse_segment_response: JSON array parsed correctly")
			
 
				+
			
 
				+    # 测试2: 单个 JSON 对象
			
 
				+    response2 = '''```json
			
 
				+{"issue_point": "错别字", "location": "位置1", "suggestion": "将A改为B", "reason": "原因", "risk_level": "中风险"}
			
 
				+```'''
			
 
				+    issues2 = reviewer._parse_segment_response(response2)
			
 
				+    assert len(issues2) == 1, f"Expected 1 issue, got {len(issues2)}"
			
 
				+    print("[PASS] test_parse_segment_response: JSON object parsed correctly")
			
 
				+
			
 
				+    # 测试3: 无明显问题
			
 
				+    response3 = "无明显问题"
			
 
				+    issues3 = reviewer._parse_segment_response(response3)
			
 
				+    assert len(issues3) == 0, f"Expected 0 issues, got {len(issues3)}"
			
 
				+    print("[PASS] test_parse_segment_response: no-issue response handled correctly")
			
 
				+
			
 
				+    # 测试4: 空响应
			
 
				+    response4 = ""
			
 
				+    issues4 = reviewer._parse_segment_response(response4)
			
 
				+    assert len(issues4) == 0, f"Expected 0 issues, got {len(issues4)}"
			
 
				+    print("[PASS] test_parse_segment_response: empty response handled correctly")
			
 
				+
			
 
				+    # 测试5: JSON 中嵌套"无明显问题"（reason 字段中）
			
 
				+    response5 = '''```json
			
 
				+[{"issue_point": "错别字", "location": "位置1", "suggestion": "将A改为B", "reason": "原文无明显问题但实际有错", "risk_level": "中风险"}]
			
 
				+```'''
			
 
				+    issues5 = reviewer._parse_segment_response(response5)
			
 
				+    assert len(issues5) == 1, f"Expected 1 issue, got {len(issues5)}"
			
 
				+    print("[PASS] test_parse_segment_response: JSON with 'no-issue' keyword in reason parsed correctly")
			
 
				+
			
 
				+
			
 
				+def test_deduplicate_issues():
			
 
				+    """测试去重逻辑"""
			
 
				+    reviewer = GrammarCheckReviewer()
			
 
				+
			
 
				+    issues = [
			
 
				+        {"issue_point": "错别字", "location": "位置1", "suggestion": "将'混泥土'改为'混凝土'", "reason": "原因", "risk_level": "中风险"},
			
 
				+        {"issue_point": "错别字", "location": "位置1", "suggestion": "将'混泥土'改为'混凝土'", "reason": "原因重复", "risk_level": "中风险"},  # 精确重复
			
 
				+        {"issue_point": "错别字", "location": "位置2", "suggestion": "将'珩架梁'改为'桁架梁'", "reason": "原因", "risk_level": "中风险"},
			
 
				+        {"issue_point": "错别字", "location": "位置3", "suggestion": "将'卷拨'改为'卷扬'", "reason": "原因", "risk_level": "中风险"},
			
 
				+        {"issue_point": "无明显问题", "location": "位置4", "suggestion": "无明显问题", "reason": "原因", "risk_level": "低风险"},  # 无效条目
			
 
				+        {"issue_point": "错别字", "location": "位置5", "suggestion": "将'不和'改为'不得'", "reason": "原因", "risk_level": ""},  # risk_level 为空
			
 
				+        {"issue_point": "错别字", "location": "位置6", "suggestion": "将'千斤项'改为'千斤顶'", "reason": "原因", "risk_level": "高风险"},  # 有效
			
 
				+    ]
			
 
				+
			
 
				+    unique = reviewer._deduplicate_issues(issues)
			
 
				+
			
 
				+    # 应该保留: 混泥土→混凝土, 珩架梁→桁架梁, 卷拨→卷扬, 千斤项→千斤顶 = 4个
			
 
				+    assert len(unique) == 4, f"Expected 4 unique issues, got {len(unique)}: {[i['suggestion'] for i in unique]}"
			
 
				+    print(f"[PASS] test_deduplicate_issues: {len(issues)} -> {len(unique)} issues")
			
 
				+
			
 
				+    # 验证过滤了无效条目
			
 
				+    suggestions = [i["suggestion"] for i in unique]
			
 
				+    assert "无明显问题" not in suggestions, "Should filter out 'no-issue' suggestions"
			
 
				+    assert all(i["risk_level"] for i in unique), "Should filter out empty risk_level"
			
 
				+    print("[PASS] test_deduplicate_issues: invalid entries filtered correctly")
			
 
				+
			
 
				+
			
 
				+def test_split_trigger():
			
 
				+    """测试切分触发条件"""
			
 
				+    chunks = load_test_chunks()
			
 
				+
			
 
				+    # 统计哪些 chunk 会触发切分
			
 
				+    trigger_count = 0
			
 
				+    no_trigger_count = 0
			
 
				+    for i, chunk in enumerate(chunks):
			
 
				+        content = chunk.get("content", "")
			
 
				+        if len(content) > SPLIT_THRESHOLD:
			
 
				+            trigger_count += 1
			
 
				+            segments = split_text_with_overlap(
			
 
				+                content,
			
 
				+                min_length=SEGMENT_MIN_LENGTH,
			
 
				+                target_length=SEGMENT_TARGET_LENGTH,
			
 
				+                overlap=SEGMENT_OVERLAP,
			
 
				+            )
			
 
				+            print(f"  Chunk[{i}] len={len(content)} -> {len(segments)} segments")
			
 
				+        else:
			
 
				+            no_trigger_count += 1
			
 
				+
			
 
				+    print(f"[PASS] test_split_trigger: {trigger_count} chunks will be split, {no_trigger_count} chunks will not")
			
 
				+
			
 
				+
			
 
				+async def test_full_split_review():
			
 
				+    """完整测试：对 Chunk 24 进行切分审查"""
			
 
				+    chunks = load_test_chunks()
			
 
				+    chunk24 = chunks[24]["content"]
			
 
				+
			
 
				+    print(f"\nChunk 24 length: {len(chunk24)}")
			
 
				+    print(f"Split threshold: {SPLIT_THRESHOLD}")
			
 
				+
			
 
				+    reviewer = GrammarCheckReviewer()
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    response = await reviewer._check_grammar_with_split(
			
 
				+        trace_id="test_split_chunk24",
			
 
				+        review_content=chunk24,
			
 
				+        enable_thinking=False,
			
 
				+    )
			
 
				+    elapsed = time.time() - start_time
			
 
				+
			
 
				+    print(f"\nSplit review completed in {elapsed:.2f}s")
			
 
				+    print(f"Response length: {len(response)}")
			
 
				+
			
 
				+    # 解析响应验证
			
 
				+    if response == "无明显问题":
			
 
				+        print("[INFO] No issues found after split review")
			
 
				+    else:
			
 
				+        try:
			
 
				+            issues = json.loads(response)
			
 
				+            print(f"[PASS] test_full_split_review: {len(issues)} unique issues found")
			
 
				+            for i, issue in enumerate(issues):
			
 
				+                print(f"  [{i+1}] {issue.get('issue_point', '')}: {issue.get('suggestion', '')[:50]}...")
			
 
				+        except json.JSONDecodeError:
			
 
				+            print(f"[FAIL] Response is not valid JSON: {response[:200]}...")
			
 
				+
			
 
				+    # 保存结果
			
 
				+    output_file = os.path.join(
			
 
				+        project_root,
			
 
				+        "utils_test", "Grammar_Check_Test", "full_scan_results",
			
 
				+        "chunk24_split_review_new.json"
			
 
				+    )
			
 
				+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
			
 
				+    with open(output_file, "w", encoding="utf-8") as f:
			
 
				+        f.write(response)
			
 
				+    print(f"Results saved to: {output_file}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("=" * 60)
			
 
				+    print("Testing grammar_check split logic")
			
 
				+    print("=" * 60)
			
 
				+
			
 
				+    # 同步测试
			
 
				+    test_parse_segment_response()
			
 
				+    test_deduplicate_issues()
			
 
				+    test_split_trigger()
			
 
				+
			
 
				+    # 异步测试
			
 
				+    print("\n" + "=" * 60)
			
 
				+    print("Running full split review test (async)...")
			
 
				+    print("=" * 60)
			
 
				+    asyncio.run(test_full_split_review())