Kaynağa Gözat

fix: AC初筛纯数字敏感词直接过滤,不走LLM二审

施工方案中大量数字(桩号、里程、日期等)会被数字敏感词库误命中,
纯数字不构成政治敏感风险,直接跳过无需消耗LLM调用。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
WangXuMing 4 hafta önce
ebeveyn
işleme
780c5d7fe3

+ 13 - 1
core/construction_review/component/reviewers/sensitive_check_reviewer.py

@@ -40,8 +40,20 @@ class SensitiveCheckReviewer(BaseReviewer):
 
             first_results = await check_sensitive_words_async(review_content)
 
+            # 过滤纯数字敏感词:施工方案中大量数字(桩号、里程、日期等)会被 AC 误命中,
+            # 纯数字词不构成政治敏感风险,直接跳过,不走 LLM 二审
             if first_results:
-                logger.info(f"[敏感词] AC检测到 {len(first_results)} 个词, trace: {trace_id}")
+                digit_words = [r for r in first_results if r['word'].isdigit()]
+                non_digit_results = [r for r in first_results if not r['word'].isdigit()]
+                if digit_words:
+                    logger.info(
+                        f"[敏感词] 过滤纯数字敏感词 {len(digit_words)} 个: "
+                        f"{[r['word'] for r in digit_words[:10]]}, trace: {trace_id}"
+                    )
+                first_results = non_digit_results
+
+            if first_results:
+                logger.info(f"[敏感词] AC检测到 {len(first_results)} 个词(已过滤纯数字), trace: {trace_id}")
                 # 诊断日志:打印前 5 个命中的敏感词
                 sample_words = [f"{r['word']}({r['source']})" for r in first_results[:5]]
                 logger.info(f"[敏感词] 命中样本(前5): {', '.join(sample_words)}")