#!/usr/bin/env python # -*- coding: utf-8 -*- """ 敏感词检查功能单元测试 测试 AIReviewEngine.check_sensitive 方法的各种场景 """ import asyncio import sys import os import time from pathlib import Path from typing import Dict, Any, List from dataclasses import dataclass # 添加项目根目录到Python路径 current_dir = Path(__file__).parent.absolute() project_root = current_dir.parent.parent sys.path.insert(0, str(project_root)) os.chdir(str(project_root)) from core.construction_review.component.reviewers.base_reviewer import ReviewResult from foundation.observability.logger.loggering import server_logger as logger class TestSensitiveCheck: """敏感词检查功能测试类""" def __init__(self): self.test_results = [] logger.info("=" * 80) logger.info("初始化敏感词检查测试类") logger.info("=" * 80) async def check_sensitive_simplified( self, trace_id_idx: str, review_content: str, review_references: str = "", review_location_label: str = "", state: Dict = None, stage_name: str = "测试阶段" ) -> Dict[str, Any]: """ 简化版的敏感信息检查函数(用于测试) 功能说明: 1. 使用关键词匹配进行敏感词检测 2. 如果检测到敏感词,格式化敏感词信息并返回 3. 如果未检测到敏感词,返回成功结果 4. 移除了原函数中的消息推送和大模型二审功能 Args: trace_id_idx: 追踪ID索引 review_content: 审查内容 review_references: 审查参考信息 review_location_label: 审查位置标签 state: 状态字典(简化版不使用) stage_name: 阶段名称 Returns: Dict[str, Any]: 敏感信息检查结果 """ from core.construction_review.component.reviewers.utils import ( check_sensitive_words_async, format_check_results ) logger.info("=" * 80) logger.info(f"开始执行敏感词检查 - trace_id: sensitive_check{trace_id_idx}") logger.info(f"阶段名称: {stage_name}") logger.info(f"审查位置: {review_location_label}") logger.info(f"审查内容长度: {len(review_content)} 字符") logger.info(f"审查内容预览: {review_content[:100]}..." if len(review_content) > 100 else f"审查内容: {review_content}") logger.info("=" * 80) start_time = time.time() trace_id = "sensitive_check" + trace_id_idx # 第一步:使用关键词匹配式审查 logger.info("步骤1: 开始关键词匹配检测...") first_results = await check_sensitive_words_async(review_content) detection_time = time.time() - start_time logger.info(f"步骤1: 关键词检测完成,耗时: {detection_time:.4f}s") # 判断是否检测到敏感词 if first_results: logger.warning(f"步骤2: 检测到 {len(first_results)} 个敏感词") logger.warning("-" * 80) # 格式化敏感词列表 sensitive_words_info = [] for idx, item in enumerate(first_results, 1): word_info = f"敏感词: {item['word']}, 位置: {item['position']}-{item['end_position']}, 来源: {item['source']}" sensitive_words_info.append(word_info) logger.warning(f" 敏感词 #{idx}: {word_info}") formatted_sensitive_words = "\n".join(sensitive_words_info) logger.warning("-" * 80) # 简化版:直接返回检测结果,不调用大模型二审 execution_time = time.time() - start_time result = ReviewResult( success=False, details={ "name": "sensitive_check", "response": f"检测到{len(first_results)}个敏感词", "sensitive_words": first_results, "formatted_info": formatted_sensitive_words }, error_message=None, execution_time=execution_time ) logger.warning(f"步骤3: 敏感词检查完成(检测到敏感词),总耗时: {execution_time:.4f}s") logger.info("=" * 80) return result else: # 没有检测到敏感词,构造返回体 logger.info("步骤2: 未检测到敏感词") execution_time = time.time() - start_time result = ReviewResult( success=True, details={"name": "sensitive_check", "response": "无明显问题"}, error_message=None, execution_time=execution_time ) # 简化版:移除消息推送功能 logger.info(f"步骤3: 敏感词检查完成(未检测到敏感词),总耗时: {execution_time:.4f}s") logger.info("=" * 80) return result def get_test_cases(self) -> List[Dict[str, Any]]: """获取测试用例""" return [ { "name": "正常文本-无敏感词", "trace_id_idx": "_test_001", "review_content": "本工程为住宅楼建设项目,采用框架结构,建筑面积约5000平方米。施工过程中应严格按照国家规范执行,确保工程质量。", "review_location_label": "第一章 工程概况", "expected_success": True, "description": "测试正常的施工方案文本,不包含任何敏感词" }, { "name": "空文本", "trace_id_idx": "_test_002", "review_content": "", "review_location_label": "空内容测试", "expected_success": True, "description": "测试空文本的处理" }, { "name": "短文本-无敏感词", "trace_id_idx": "_test_003", "review_content": "施工安全第一", "review_location_label": "安全标语", "expected_success": True, "description": "测试短文本的处理" }, { "name": "长文本-无敏感词", "trace_id_idx": "_test_004", "review_content": """ 本施工方案编制依据包括: 1. 《建筑工程施工质量验收统一标准》GB50300-2013 2. 《混凝土结构工程施工质量验收规范》GB50204-2015 3. 《建筑地基基础工程施工质量验收规范》GB50202-2018 4. 施工图纸及相关设计文件 5. 现场实际情况及勘察报告 工程概况: 本工程位于某市某区,为高层住宅建筑,地上30层,地下2层。 建筑高度99.8米,总建筑面积约28000平方米。 结构形式为框架剪力墙结构,抗震设防烈度为7度。 """, "review_location_label": "第一章 编制依据与工程概况", "expected_success": True, "description": "测试包含多段落的长文本" }, { "name": "包含特殊字符", "trace_id_idx": "_test_005", "review_content": "施工现场温度:-5℃~35℃,相对湿度:≤85%,风力:≤5级。混凝土强度等级:C30、C35。钢筋规格:Φ12、Φ16、Φ20。", "review_location_label": "第三章 施工条件", "expected_success": True, "description": "测试包含特殊符号和技术参数的文本" } ] async def run_single_test(self, test_case: Dict[str, Any]) -> bool: """运行单个测试用例""" logger.info("\n" + "█" * 80) logger.info(f"测试用例: {test_case['name']}") logger.info(f"描述: {test_case['description']}") logger.info("█" * 80) try: # 执行测试 result = await self.check_sensitive_simplified( trace_id_idx=test_case['trace_id_idx'], review_content=test_case['review_content'], review_location_label=test_case['review_location_label'], stage_name="单元测试阶段" ) # 验证结果 logger.info("\n" + "-" * 80) logger.info("测试结果验证:") logger.info(f" 返回类型: {type(result)}") logger.info(f" 是否成功: {result.success}") logger.info(f" 预期成功: {test_case['expected_success']}") logger.info(f" 执行时间: {result.execution_time:.4f}s") logger.info(f" 详细信息: {result.details}") if result.error_message: logger.error(f" 错误信息: {result.error_message}") # 判断测试是否通过 test_passed = result.success == test_case['expected_success'] if test_passed: logger.info("✓ 测试通过") else: logger.error("✗ 测试失败") logger.error(f" 预期 success={test_case['expected_success']}, 实际 success={result.success}") logger.info("-" * 80) return test_passed except Exception as e: logger.error(f"✗ 测试异常: {str(e)}") logger.exception("详细异常信息:") return False async def run_all_tests(self): """运行所有测试用例""" logger.info("\n" + "█" * 80) logger.info("开始执行敏感词检查单元测试") logger.info("█" * 80) # 初始化敏感词检测器 try: from core.construction_review.component.reviewers.utils import SensitiveWordChecker logger.info("\n正在初始化敏感词检测器...") stats = SensitiveWordChecker.initialize() logger.info(f"敏感词检测器初始化成功: {stats}") except Exception as e: logger.error(f"敏感词检测器初始化失败: {str(e)}") return False # 获取测试用例 test_cases = self.get_test_cases() logger.info(f"\n共有 {len(test_cases)} 个测试用例") # 运行测试 results = [] for idx, test_case in enumerate(test_cases, 1): logger.info(f"\n{'=' * 80}") logger.info(f"执行测试 {idx}/{len(test_cases)}") logger.info(f"{'=' * 80}") passed = await self.run_single_test(test_case) results.append({ 'name': test_case['name'], 'passed': passed }) # 统计结果 logger.info("\n" + "█" * 80) logger.info("测试结果汇总") logger.info("█" * 80) passed_count = sum(1 for r in results if r['passed']) total_count = len(results) logger.info(f"\n总测试数: {total_count}") logger.info(f"通过数量: {passed_count}") logger.info(f"失败数量: {total_count - passed_count}") logger.info(f"通过率: {passed_count/total_count*100:.2f}%") logger.info("\n详细结果:") for idx, result in enumerate(results, 1): status = "✓ 通过" if result['passed'] else "✗ 失败" logger.info(f" {idx}. {result['name']}: {status}") logger.info("\n" + "█" * 80) if passed_count == total_count: logger.info("✓ 所有测试通过!") logger.info("█" * 80) return True else: logger.error(f"✗ 有 {total_count - passed_count} 个测试失败") logger.info("█" * 80) return False def run_tests_sync(self): """同步方式运行测试""" return asyncio.run(self.run_all_tests()) def main(): """主函数""" logger.info("=" * 80) logger.info("敏感词检查功能单元测试") logger.info("测试文件: test_sensitive_check.py") logger.info("=" * 80) tester = TestSensitiveCheck() success = tester.run_tests_sync() sys.exit(0 if success else 1) if __name__ == "__main__": main()