|
|
@@ -0,0 +1,325 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+
|
|
|
+"""
|
|
|
+敏感词检查功能单元测试
|
|
|
+测试 AIReviewEngine.check_sensitive 方法的各种场景
|
|
|
+"""
|
|
|
+
|
|
|
+import asyncio
|
|
|
+import sys
|
|
|
+import os
|
|
|
+import time
|
|
|
+from pathlib import Path
|
|
|
+from typing import Dict, Any, List
|
|
|
+from dataclasses import dataclass
|
|
|
+
|
|
|
+# 添加项目根目录到Python路径
|
|
|
+current_dir = Path(__file__).parent.absolute()
|
|
|
+project_root = current_dir.parent.parent
|
|
|
+sys.path.insert(0, str(project_root))
|
|
|
+os.chdir(str(project_root))
|
|
|
+
|
|
|
+from core.construction_review.component.reviewers.base_reviewer import ReviewResult
|
|
|
+from foundation.observability.logger.loggering import server_logger as logger
|
|
|
+
|
|
|
+
|
|
|
+class TestSensitiveCheck:
|
|
|
+ """敏感词检查功能测试类"""
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ self.test_results = []
|
|
|
+ logger.info("=" * 80)
|
|
|
+ logger.info("初始化敏感词检查测试类")
|
|
|
+ logger.info("=" * 80)
|
|
|
+
|
|
|
+ async def check_sensitive_simplified(
|
|
|
+ self,
|
|
|
+ trace_id_idx: str,
|
|
|
+ review_content: str,
|
|
|
+ review_references: str = "",
|
|
|
+ review_location_label: str = "",
|
|
|
+ state: Dict = None,
|
|
|
+ stage_name: str = "测试阶段"
|
|
|
+ ) -> Dict[str, Any]:
|
|
|
+ """
|
|
|
+ 简化版的敏感信息检查函数(用于测试)
|
|
|
+
|
|
|
+ 功能说明:
|
|
|
+ 1. 使用关键词匹配进行敏感词检测
|
|
|
+ 2. 如果检测到敏感词,格式化敏感词信息并返回
|
|
|
+ 3. 如果未检测到敏感词,返回成功结果
|
|
|
+ 4. 移除了原函数中的消息推送和大模型二审功能
|
|
|
+
|
|
|
+ Args:
|
|
|
+ trace_id_idx: 追踪ID索引
|
|
|
+ review_content: 审查内容
|
|
|
+ review_references: 审查参考信息
|
|
|
+ review_location_label: 审查位置标签
|
|
|
+ state: 状态字典(简化版不使用)
|
|
|
+ stage_name: 阶段名称
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ Dict[str, Any]: 敏感信息检查结果
|
|
|
+ """
|
|
|
+ from core.construction_review.component.reviewers.utils import (
|
|
|
+ check_sensitive_words_async,
|
|
|
+ format_check_results
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info("=" * 80)
|
|
|
+ logger.info(f"开始执行敏感词检查 - trace_id: sensitive_check{trace_id_idx}")
|
|
|
+ logger.info(f"阶段名称: {stage_name}")
|
|
|
+ logger.info(f"审查位置: {review_location_label}")
|
|
|
+ logger.info(f"审查内容长度: {len(review_content)} 字符")
|
|
|
+ logger.info(f"审查内容预览: {review_content[:100]}..." if len(review_content) > 100 else f"审查内容: {review_content}")
|
|
|
+ logger.info("=" * 80)
|
|
|
+
|
|
|
+ start_time = time.time()
|
|
|
+ trace_id = "sensitive_check" + trace_id_idx
|
|
|
+
|
|
|
+ # 第一步:使用关键词匹配式审查
|
|
|
+ logger.info("步骤1: 开始关键词匹配检测...")
|
|
|
+ first_results = await check_sensitive_words_async(review_content)
|
|
|
+ detection_time = time.time() - start_time
|
|
|
+ logger.info(f"步骤1: 关键词检测完成,耗时: {detection_time:.4f}s")
|
|
|
+
|
|
|
+ # 判断是否检测到敏感词
|
|
|
+ if first_results:
|
|
|
+ logger.warning(f"步骤2: 检测到 {len(first_results)} 个敏感词")
|
|
|
+ logger.warning("-" * 80)
|
|
|
+
|
|
|
+ # 格式化敏感词列表
|
|
|
+ sensitive_words_info = []
|
|
|
+ for idx, item in enumerate(first_results, 1):
|
|
|
+ word_info = f"敏感词: {item['word']}, 位置: {item['position']}-{item['end_position']}, 来源: {item['source']}"
|
|
|
+ sensitive_words_info.append(word_info)
|
|
|
+ logger.warning(f" 敏感词 #{idx}: {word_info}")
|
|
|
+
|
|
|
+ formatted_sensitive_words = "\n".join(sensitive_words_info)
|
|
|
+ logger.warning("-" * 80)
|
|
|
+
|
|
|
+ # 简化版:直接返回检测结果,不调用大模型二审
|
|
|
+ execution_time = time.time() - start_time
|
|
|
+
|
|
|
+ result = ReviewResult(
|
|
|
+ success=False,
|
|
|
+ details={
|
|
|
+ "name": "sensitive_check",
|
|
|
+ "response": f"检测到{len(first_results)}个敏感词",
|
|
|
+ "sensitive_words": first_results,
|
|
|
+ "formatted_info": formatted_sensitive_words
|
|
|
+ },
|
|
|
+ error_message=None,
|
|
|
+ execution_time=execution_time
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.warning(f"步骤3: 敏感词检查完成(检测到敏感词),总耗时: {execution_time:.4f}s")
|
|
|
+ logger.info("=" * 80)
|
|
|
+
|
|
|
+ return result
|
|
|
+
|
|
|
+ else:
|
|
|
+ # 没有检测到敏感词,构造返回体
|
|
|
+ logger.info("步骤2: 未检测到敏感词")
|
|
|
+
|
|
|
+ execution_time = time.time() - start_time
|
|
|
+ result = ReviewResult(
|
|
|
+ success=True,
|
|
|
+ details={"name": "sensitive_check", "response": "无明显问题"},
|
|
|
+ error_message=None,
|
|
|
+ execution_time=execution_time
|
|
|
+ )
|
|
|
+
|
|
|
+ # 简化版:移除消息推送功能
|
|
|
+ logger.info(f"步骤3: 敏感词检查完成(未检测到敏感词),总耗时: {execution_time:.4f}s")
|
|
|
+ logger.info("=" * 80)
|
|
|
+
|
|
|
+ return result
|
|
|
+
|
|
|
+ def get_test_cases(self) -> List[Dict[str, Any]]:
|
|
|
+ """获取测试用例"""
|
|
|
+ return [
|
|
|
+ {
|
|
|
+ "name": "正常文本-无敏感词",
|
|
|
+ "trace_id_idx": "_test_001",
|
|
|
+ "review_content": "本工程为住宅楼建设项目,采用框架结构,建筑面积约5000平方米。施工过程中应严格按照国家规范执行,确保工程质量。",
|
|
|
+ "review_location_label": "第一章 工程概况",
|
|
|
+ "expected_success": True,
|
|
|
+ "description": "测试正常的施工方案文本,不包含任何敏感词"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "空文本",
|
|
|
+ "trace_id_idx": "_test_002",
|
|
|
+ "review_content": "",
|
|
|
+ "review_location_label": "空内容测试",
|
|
|
+ "expected_success": True,
|
|
|
+ "description": "测试空文本的处理"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "短文本-无敏感词",
|
|
|
+ "trace_id_idx": "_test_003",
|
|
|
+ "review_content": "施工安全第一",
|
|
|
+ "review_location_label": "安全标语",
|
|
|
+ "expected_success": True,
|
|
|
+ "description": "测试短文本的处理"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "长文本-无敏感词",
|
|
|
+ "trace_id_idx": "_test_004",
|
|
|
+ "review_content": """
|
|
|
+ 本施工方案编制依据包括:
|
|
|
+ 1. 《建筑工程施工质量验收统一标准》GB50300-2013
|
|
|
+ 2. 《混凝土结构工程施工质量验收规范》GB50204-2015
|
|
|
+ 3. 《建筑地基基础工程施工质量验收规范》GB50202-2018
|
|
|
+ 4. 施工图纸及相关设计文件
|
|
|
+ 5. 现场实际情况及勘察报告
|
|
|
+
|
|
|
+ 工程概况:
|
|
|
+ 本工程位于某市某区,为高层住宅建筑,地上30层,地下2层。
|
|
|
+ 建筑高度99.8米,总建筑面积约28000平方米。
|
|
|
+ 结构形式为框架剪力墙结构,抗震设防烈度为7度。
|
|
|
+ """,
|
|
|
+ "review_location_label": "第一章 编制依据与工程概况",
|
|
|
+ "expected_success": True,
|
|
|
+ "description": "测试包含多段落的长文本"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "包含特殊字符",
|
|
|
+ "trace_id_idx": "_test_005",
|
|
|
+ "review_content": "施工现场温度:-5℃~35℃,相对湿度:≤85%,风力:≤5级。混凝土强度等级:C30、C35。钢筋规格:Φ12、Φ16、Φ20。",
|
|
|
+ "review_location_label": "第三章 施工条件",
|
|
|
+ "expected_success": True,
|
|
|
+ "description": "测试包含特殊符号和技术参数的文本"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+
|
|
|
+ async def run_single_test(self, test_case: Dict[str, Any]) -> bool:
|
|
|
+ """运行单个测试用例"""
|
|
|
+ logger.info("\n" + "█" * 80)
|
|
|
+ logger.info(f"测试用例: {test_case['name']}")
|
|
|
+ logger.info(f"描述: {test_case['description']}")
|
|
|
+ logger.info("█" * 80)
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 执行测试
|
|
|
+ result = await self.check_sensitive_simplified(
|
|
|
+ trace_id_idx=test_case['trace_id_idx'],
|
|
|
+ review_content=test_case['review_content'],
|
|
|
+ review_location_label=test_case['review_location_label'],
|
|
|
+ stage_name="单元测试阶段"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 验证结果
|
|
|
+ logger.info("\n" + "-" * 80)
|
|
|
+ logger.info("测试结果验证:")
|
|
|
+ logger.info(f" 返回类型: {type(result)}")
|
|
|
+ logger.info(f" 是否成功: {result.success}")
|
|
|
+ logger.info(f" 预期成功: {test_case['expected_success']}")
|
|
|
+ logger.info(f" 执行时间: {result.execution_time:.4f}s")
|
|
|
+ logger.info(f" 详细信息: {result.details}")
|
|
|
+
|
|
|
+ if result.error_message:
|
|
|
+ logger.error(f" 错误信息: {result.error_message}")
|
|
|
+
|
|
|
+ # 判断测试是否通过
|
|
|
+ test_passed = result.success == test_case['expected_success']
|
|
|
+
|
|
|
+ if test_passed:
|
|
|
+ logger.info("✓ 测试通过")
|
|
|
+ else:
|
|
|
+ logger.error("✗ 测试失败")
|
|
|
+ logger.error(f" 预期 success={test_case['expected_success']}, 实际 success={result.success}")
|
|
|
+
|
|
|
+ logger.info("-" * 80)
|
|
|
+
|
|
|
+ return test_passed
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"✗ 测试异常: {str(e)}")
|
|
|
+ logger.exception("详细异常信息:")
|
|
|
+ return False
|
|
|
+
|
|
|
+ async def run_all_tests(self):
|
|
|
+ """运行所有测试用例"""
|
|
|
+ logger.info("\n" + "█" * 80)
|
|
|
+ logger.info("开始执行敏感词检查单元测试")
|
|
|
+ logger.info("█" * 80)
|
|
|
+
|
|
|
+ # 初始化敏感词检测器
|
|
|
+ try:
|
|
|
+ from core.construction_review.component.reviewers.utils import SensitiveWordChecker
|
|
|
+ logger.info("\n正在初始化敏感词检测器...")
|
|
|
+ stats = SensitiveWordChecker.initialize()
|
|
|
+ logger.info(f"敏感词检测器初始化成功: {stats}")
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"敏感词检测器初始化失败: {str(e)}")
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 获取测试用例
|
|
|
+ test_cases = self.get_test_cases()
|
|
|
+ logger.info(f"\n共有 {len(test_cases)} 个测试用例")
|
|
|
+
|
|
|
+ # 运行测试
|
|
|
+ results = []
|
|
|
+ for idx, test_case in enumerate(test_cases, 1):
|
|
|
+ logger.info(f"\n{'=' * 80}")
|
|
|
+ logger.info(f"执行测试 {idx}/{len(test_cases)}")
|
|
|
+ logger.info(f"{'=' * 80}")
|
|
|
+
|
|
|
+ passed = await self.run_single_test(test_case)
|
|
|
+ results.append({
|
|
|
+ 'name': test_case['name'],
|
|
|
+ 'passed': passed
|
|
|
+ })
|
|
|
+
|
|
|
+ # 统计结果
|
|
|
+ logger.info("\n" + "█" * 80)
|
|
|
+ logger.info("测试结果汇总")
|
|
|
+ logger.info("█" * 80)
|
|
|
+
|
|
|
+ passed_count = sum(1 for r in results if r['passed'])
|
|
|
+ total_count = len(results)
|
|
|
+
|
|
|
+ logger.info(f"\n总测试数: {total_count}")
|
|
|
+ logger.info(f"通过数量: {passed_count}")
|
|
|
+ logger.info(f"失败数量: {total_count - passed_count}")
|
|
|
+ logger.info(f"通过率: {passed_count/total_count*100:.2f}%")
|
|
|
+
|
|
|
+ logger.info("\n详细结果:")
|
|
|
+ for idx, result in enumerate(results, 1):
|
|
|
+ status = "✓ 通过" if result['passed'] else "✗ 失败"
|
|
|
+ logger.info(f" {idx}. {result['name']}: {status}")
|
|
|
+
|
|
|
+ logger.info("\n" + "█" * 80)
|
|
|
+
|
|
|
+ if passed_count == total_count:
|
|
|
+ logger.info("✓ 所有测试通过!")
|
|
|
+ logger.info("█" * 80)
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ logger.error(f"✗ 有 {total_count - passed_count} 个测试失败")
|
|
|
+ logger.info("█" * 80)
|
|
|
+ return False
|
|
|
+
|
|
|
+ def run_tests_sync(self):
|
|
|
+ """同步方式运行测试"""
|
|
|
+ return asyncio.run(self.run_all_tests())
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ """主函数"""
|
|
|
+ logger.info("=" * 80)
|
|
|
+ logger.info("敏感词检查功能单元测试")
|
|
|
+ logger.info("测试文件: test_sensitive_check.py")
|
|
|
+ logger.info("=" * 80)
|
|
|
+
|
|
|
+ tester = TestSensitiveCheck()
|
|
|
+ success = tester.run_tests_sync()
|
|
|
+
|
|
|
+ sys.exit(0 if success else 1)
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|
|
|
+
|