| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- 敏感词检查功能单元测试
- 测试 AIReviewEngine.check_sensitive 方法的各种场景
- """
- import asyncio
- import sys
- import os
- import time
- from pathlib import Path
- from typing import Dict, Any, List
- from dataclasses import dataclass
- # 添加项目根目录到Python路径
- current_dir = Path(__file__).parent.absolute()
- project_root = current_dir.parent.parent
- sys.path.insert(0, str(project_root))
- os.chdir(str(project_root))
- from core.construction_review.component.reviewers.base_reviewer import ReviewResult
- from foundation.observability.logger.loggering import review_logger as logger
- class TestSensitiveCheck:
- """敏感词检查功能测试类"""
- def __init__(self):
- self.test_results = []
- logger.info("=" * 80)
- logger.info("初始化敏感词检查测试类")
- logger.info("=" * 80)
- async def check_sensitive_simplified(
- self,
- trace_id_idx: str,
- review_content: str,
- review_references: str = "",
- review_location_label: str = "",
- state: Dict = None,
- stage_name: str = "测试阶段"
- ) -> Dict[str, Any]:
- """
- 简化版的敏感信息检查函数(用于测试)
-
- 功能说明:
- 1. 使用关键词匹配进行敏感词检测
- 2. 如果检测到敏感词,格式化敏感词信息并返回
- 3. 如果未检测到敏感词,返回成功结果
- 4. 移除了原函数中的消息推送和大模型二审功能
-
- Args:
- trace_id_idx: 追踪ID索引
- review_content: 审查内容
- review_references: 审查参考信息
- review_location_label: 审查位置标签
- state: 状态字典(简化版不使用)
- stage_name: 阶段名称
-
- Returns:
- Dict[str, Any]: 敏感信息检查结果
- """
- from core.construction_review.component.reviewers.utils import (
- check_sensitive_words_async,
- format_check_results
- )
-
- logger.info("=" * 80)
- logger.info(f"开始执行敏感词检查 - trace_id: sensitive_check{trace_id_idx}")
- logger.info(f"阶段名称: {stage_name}")
- logger.info(f"审查位置: {review_location_label}")
- logger.info(f"审查内容长度: {len(review_content)} 字符")
- logger.info(f"审查内容预览: {review_content[:100]}..." if len(review_content) > 100 else f"审查内容: {review_content}")
- logger.info("=" * 80)
-
- start_time = time.time()
- trace_id = "sensitive_check" + trace_id_idx
-
- # 第一步:使用关键词匹配式审查
- logger.info("步骤1: 开始关键词匹配检测...")
- first_results = await check_sensitive_words_async(review_content)
- detection_time = time.time() - start_time
- logger.info(f"步骤1: 关键词检测完成,耗时: {detection_time:.4f}s")
-
- # 判断是否检测到敏感词
- if first_results:
- logger.warning(f"步骤2: 检测到 {len(first_results)} 个敏感词")
- logger.warning("-" * 80)
-
- # 格式化敏感词列表
- sensitive_words_info = []
- for idx, item in enumerate(first_results, 1):
- word_info = f"敏感词: {item['word']}, 位置: {item['position']}-{item['end_position']}, 来源: {item['source']}"
- sensitive_words_info.append(word_info)
- logger.warning(f" 敏感词 #{idx}: {word_info}")
-
- formatted_sensitive_words = "\n".join(sensitive_words_info)
- logger.warning("-" * 80)
-
- # 简化版:直接返回检测结果,不调用大模型二审
- execution_time = time.time() - start_time
-
- result = ReviewResult(
- success=False,
- details={
- "name": "sensitive_check",
- "response": f"检测到{len(first_results)}个敏感词",
- "sensitive_words": first_results,
- "formatted_info": formatted_sensitive_words
- },
- error_message=None,
- execution_time=execution_time
- )
-
- logger.warning(f"步骤3: 敏感词检查完成(检测到敏感词),总耗时: {execution_time:.4f}s")
- logger.info("=" * 80)
-
- return result
-
- else:
- # 没有检测到敏感词,构造返回体
- logger.info("步骤2: 未检测到敏感词")
-
- execution_time = time.time() - start_time
- result = ReviewResult(
- success=True,
- details={"name": "sensitive_check", "response": "无明显问题"},
- error_message=None,
- execution_time=execution_time
- )
-
- # 简化版:移除消息推送功能
- logger.info(f"步骤3: 敏感词检查完成(未检测到敏感词),总耗时: {execution_time:.4f}s")
- logger.info("=" * 80)
-
- return result
- def get_test_cases(self) -> List[Dict[str, Any]]:
- """获取测试用例"""
- return [
- {
- "name": "正常文本-无敏感词",
- "trace_id_idx": "_test_001",
- "review_content": "本工程为住宅楼建设项目,采用框架结构,建筑面积约5000平方米。施工过程中应严格按照国家规范执行,确保工程质量。",
- "review_location_label": "第一章 工程概况",
- "expected_success": True,
- "description": "测试正常的施工方案文本,不包含任何敏感词"
- },
- {
- "name": "空文本",
- "trace_id_idx": "_test_002",
- "review_content": "",
- "review_location_label": "空内容测试",
- "expected_success": True,
- "description": "测试空文本的处理"
- },
- {
- "name": "短文本-无敏感词",
- "trace_id_idx": "_test_003",
- "review_content": "施工安全第一",
- "review_location_label": "安全标语",
- "expected_success": True,
- "description": "测试短文本的处理"
- },
- {
- "name": "长文本-无敏感词",
- "trace_id_idx": "_test_004",
- "review_content": """
- 本施工方案编制依据包括:
- 1. 《建筑工程施工质量验收统一标准》GB50300-2013
- 2. 《混凝土结构工程施工质量验收规范》GB50204-2015
- 3. 《建筑地基基础工程施工质量验收规范》GB50202-2018
- 4. 施工图纸及相关设计文件
- 5. 现场实际情况及勘察报告
-
- 工程概况:
- 本工程位于某市某区,为高层住宅建筑,地上30层,地下2层。
- 建筑高度99.8米,总建筑面积约28000平方米。
- 结构形式为框架剪力墙结构,抗震设防烈度为7度。
- """,
- "review_location_label": "第一章 编制依据与工程概况",
- "expected_success": True,
- "description": "测试包含多段落的长文本"
- },
- {
- "name": "包含特殊字符",
- "trace_id_idx": "_test_005",
- "review_content": "施工现场温度:-5℃~35℃,相对湿度:≤85%,风力:≤5级。混凝土强度等级:C30、C35。钢筋规格:Φ12、Φ16、Φ20。",
- "review_location_label": "第三章 施工条件",
- "expected_success": True,
- "description": "测试包含特殊符号和技术参数的文本"
- }
- ]
- async def run_single_test(self, test_case: Dict[str, Any]) -> bool:
- """运行单个测试用例"""
- logger.info("\n" + "█" * 80)
- logger.info(f"测试用例: {test_case['name']}")
- logger.info(f"描述: {test_case['description']}")
- logger.info("█" * 80)
-
- try:
- # 执行测试
- result = await self.check_sensitive_simplified(
- trace_id_idx=test_case['trace_id_idx'],
- review_content=test_case['review_content'],
- review_location_label=test_case['review_location_label'],
- stage_name="单元测试阶段"
- )
-
- # 验证结果
- logger.info("\n" + "-" * 80)
- logger.info("测试结果验证:")
- logger.info(f" 返回类型: {type(result)}")
- logger.info(f" 是否成功: {result.success}")
- logger.info(f" 预期成功: {test_case['expected_success']}")
- logger.info(f" 执行时间: {result.execution_time:.4f}s")
- logger.info(f" 详细信息: {result.details}")
-
- if result.error_message:
- logger.error(f" 错误信息: {result.error_message}")
-
- # 判断测试是否通过
- test_passed = result.success == test_case['expected_success']
-
- if test_passed:
- logger.info("✓ 测试通过")
- else:
- logger.error("✗ 测试失败")
- logger.error(f" 预期 success={test_case['expected_success']}, 实际 success={result.success}")
-
- logger.info("-" * 80)
-
- return test_passed
-
- except Exception as e:
- logger.error(f"✗ 测试异常: {str(e)}")
- logger.exception("详细异常信息:")
- return False
- async def run_all_tests(self):
- """运行所有测试用例"""
- logger.info("\n" + "█" * 80)
- logger.info("开始执行敏感词检查单元测试")
- logger.info("█" * 80)
-
- # 初始化敏感词检测器
- try:
- from core.construction_review.component.reviewers.utils import SensitiveWordChecker
- logger.info("\n正在初始化敏感词检测器...")
- stats = SensitiveWordChecker.initialize()
- logger.info(f"敏感词检测器初始化成功: {stats}")
- except Exception as e:
- logger.error(f"敏感词检测器初始化失败: {str(e)}")
- return False
-
- # 获取测试用例
- test_cases = self.get_test_cases()
- logger.info(f"\n共有 {len(test_cases)} 个测试用例")
-
- # 运行测试
- results = []
- for idx, test_case in enumerate(test_cases, 1):
- logger.info(f"\n{'=' * 80}")
- logger.info(f"执行测试 {idx}/{len(test_cases)}")
- logger.info(f"{'=' * 80}")
-
- passed = await self.run_single_test(test_case)
- results.append({
- 'name': test_case['name'],
- 'passed': passed
- })
-
- # 统计结果
- logger.info("\n" + "█" * 80)
- logger.info("测试结果汇总")
- logger.info("█" * 80)
-
- passed_count = sum(1 for r in results if r['passed'])
- total_count = len(results)
-
- logger.info(f"\n总测试数: {total_count}")
- logger.info(f"通过数量: {passed_count}")
- logger.info(f"失败数量: {total_count - passed_count}")
- logger.info(f"通过率: {passed_count/total_count*100:.2f}%")
-
- logger.info("\n详细结果:")
- for idx, result in enumerate(results, 1):
- status = "✓ 通过" if result['passed'] else "✗ 失败"
- logger.info(f" {idx}. {result['name']}: {status}")
-
- logger.info("\n" + "█" * 80)
-
- if passed_count == total_count:
- logger.info("✓ 所有测试通过!")
- logger.info("█" * 80)
- return True
- else:
- logger.error(f"✗ 有 {total_count - passed_count} 个测试失败")
- logger.info("█" * 80)
- return False
- def run_tests_sync(self):
- """同步方式运行测试"""
- return asyncio.run(self.run_all_tests())
- def main():
- """主函数"""
- logger.info("=" * 80)
- logger.info("敏感词检查功能单元测试")
- logger.info("测试文件: test_sensitive_check.py")
- logger.info("=" * 80)
-
- tester = TestSensitiveCheck()
- success = tester.run_tests_sync()
-
- sys.exit(0 if success else 1)
- if __name__ == "__main__":
- main()
|