#!/usr/bin/env python # -*- coding: utf-8 -*- """ 敏感词检查功能单元测试(完整版本) 测试 AIReviewEngine.check_sensitive 方法 包含大模型二审功能,支持自定义检查文本 """ import asyncio import sys import os import time from pathlib import Path from typing import Dict, Any, Optional # 添加项目根目录到Python路径 current_dir = Path(__file__).parent.absolute() project_root = current_dir.parent.parent sys.path.insert(0, str(project_root)) os.chdir(str(project_root)) class SimpleLogger: """简单日志类""" @staticmethod def info(msg): print(f"[INFO] {msg}") @staticmethod def warning(msg): print(f"[WARNING] {msg}") @staticmethod def error(msg): print(f"[ERROR] {msg}") @staticmethod def exception(msg): import traceback print(f"[EXCEPTION] {msg}") traceback.print_exc() logger = SimpleLogger() class TestSensitiveCheck: """敏感词检查功能测试类""" def __init__(self): self.test_results = [] self.ai_review_engine = None logger.info("=" * 80) logger.info("初始化敏感词检查测试类") logger.info("=" * 80) def initialize_engine(self): """初始化AI审查引擎""" try: from core.construction_review.component.ai_review_engine import AIReviewEngine from core.base.task_models import TaskFileInfo # 创建 mock 的 TaskFileInfo 对象 logger.info("创建 mock TaskFileInfo 对象...") mock_task_info = TaskFileInfo( file_id="test_file_001", callback_task_id="test_task_001", user_id="test_user", review_config=["sensitive_check"], # 只启用敏感词检查 project_plan_type="test_project", tendency_review_role="test_role" ) logger.info("初始化 AI 审查引擎...") self.ai_review_engine = AIReviewEngine(task_file_info=mock_task_info) logger.info("AI审查引擎初始化成功") return True except Exception as e: logger.error(f"AI审查引擎初始化失败: {e}") logger.exception("详细错误:") return False async def check_sensitive_with_llm_review( self, trace_id_idx: str, review_content: str, review_references: str = "", review_location_label: str = "", state: Dict = None, stage_name: str = "测试阶段" ) -> Any: """ 完整版的敏感信息检查函数(包含大模型二审) 功能说明: 1. 使用关键词匹配进行敏感词检测 2. 如果检测到敏感词,调用大模型进行二审(与原函数一致) 3. 如果未检测到敏感词,返回成功结果 4. 移除了消息推送功能(简化测试) Args: trace_id_idx: 追踪ID索引 review_content: 审查内容 review_references: 审查参考信息 review_location_label: 审查位置标签 state: 状态字典 stage_name: 阶段名称 Returns: 审查结果对象 """ try: from core.construction_review.component.reviewers.utils import check_sensitive_words_async from core.construction_review.component.reviewers.base_reviewer import ReviewResult except ImportError as e: logger.error(f"导入模块失败: {e}") return None logger.info("=" * 80) logger.info(f"开始执行敏感词检查 - trace_id: sensitive_check{trace_id_idx}") logger.info(f"阶段名称: {stage_name}") logger.info(f"审查位置: {review_location_label}") logger.info(f"审查内容长度: {len(review_content)} 字符") if len(review_content) > 100: logger.info(f"审查内容预览: {review_content[:100]}...") else: logger.info(f"审查内容: {review_content}") logger.info("=" * 80) start_time = time.time() trace_id = "sensitive_check" + trace_id_idx # 第一步:使用关键词匹配式审查 logger.info("步骤1: 开始关键词匹配检测...") try: first_results = await check_sensitive_words_async(review_content) except Exception as e: logger.error(f"关键词检测失败: {e}") logger.exception("详细错误:") return None detection_time = time.time() - start_time logger.info(f"步骤1: 关键词检测完成,耗时: {detection_time:.4f}s") # 判断是否检测到敏感词 if first_results: logger.warning(f"步骤2: 检测到 {len(first_results)} 个敏感词,准备送入大模型二审") logger.warning("-" * 80) # 格式化敏感词列表 sensitive_words_info = [] for idx, item in enumerate(first_results, 1): word_info = f"敏感词: {item['word']}, 位置: {item['position']}-{item['end_position']}, 来源: {item['source']}" sensitive_words_info.append(word_info) logger.warning(f" 敏感词 #{idx}: {word_info}") formatted_sensitive_words = "\n".join(sensitive_words_info) logger.warning("-" * 80) # 调用大模型进行二审(与原函数保持一致) logger.info("步骤3: 调用大模型进行二审...") try: if not self.ai_review_engine: logger.error("AI审查引擎未初始化") return None # 调用 review 方法进行大模型二审 result = await self.ai_review_engine.review( "sensitive_check", trace_id, "basic", "sensitive_word_check", review_content, formatted_sensitive_words, None, review_location_label, state, stage_name ) llm_review_time = time.time() - start_time logger.info(f"步骤3: 大模型二审完成,总耗时: {llm_review_time:.4f}s") logger.info("=" * 80) return result except Exception as e: logger.error(f"大模型二审失败: {e}") logger.exception("详细错误:") return None else: # 没有检测到敏感词,构造返回体 logger.info("步骤2: 未检测到敏感词") execution_time = time.time() - start_time result = ReviewResult( success=True, details={"name": "sensitive_check", "response": "无明显问题"}, error_message=None, execution_time=execution_time ) logger.info(f"步骤3: 敏感词检查完成(未检测到敏感词),总耗时: {execution_time:.4f}s") logger.info("=" * 80) return result def get_custom_text_from_user(self) -> Optional[str]: """从用户获取自定义检查文本""" logger.info("\n" + "=" * 80) logger.info("请输入要检查的文本内容") logger.info("=" * 80) logger.info("提示:") logger.info(" 1. 可以输入多行文本,输入完成后单独一行输入 'END' 结束") logger.info(" 2. 直接按回车(输入空行)将跳过自定义输入") logger.info("=" * 80) lines = [] first_line = True while True: try: if first_line: line = input(">>> ") first_line = False # 如果第一行就是空行,跳过自定义输入 if not line.strip(): logger.info("跳过自定义文本输入") return None else: line = input("... ") # 检查是否结束输入 if line.strip().upper() == 'END': break lines.append(line) except EOFError: break except KeyboardInterrupt: logger.info("\n用户取消输入") return None custom_text = "\n".join(lines) if not custom_text.strip(): logger.info("未输入有效文本") return None logger.info(f"\n已接收文本,长度: {len(custom_text)} 字符") return custom_text async def run_custom_text_check(self, custom_text: str): """运行自定义文本检查""" logger.info("\n" + "█" * 80) logger.info("执行自定义文本敏感词检查") logger.info("█" * 80) try: # 执行检查 result = await self.check_sensitive_with_llm_review( trace_id_idx="_custom_001", review_content=custom_text, review_location_label="自定义文本检查", stage_name="自定义测试" ) # 显示结果 logger.info("\n" + "-" * 80) logger.info("检查结果:") logger.info("-" * 80) if result: logger.info(f" 返回类型: {type(result).__name__}") logger.info(f" 是否成功: {result.success}") logger.info(f" 执行时间: {result.execution_time:.4f}s") logger.info(f" 详细信息: {result.details}") if result.error_message: logger.error(f" 错误信息: {result.error_message}") if not result.success: logger.warning("\n检测到敏感内容,请查看上方详细信息") else: logger.info("\n✓ 未检测到敏感内容") else: logger.error("检查失败,未返回结果") logger.info("-" * 80) except Exception as e: logger.error(f"✗ 检查异常: {str(e)}") logger.exception("详细异常信息:") async def run_interactive_mode(self): """运行交互模式""" logger.info("\n" + "█" * 80) logger.info("敏感词检查 - 交互模式") logger.info("█" * 80) # 初始化AI审查引擎 if not self.initialize_engine(): logger.error("AI审查引擎初始化失败,无法继续") return False # 初始化敏感词检测器 try: from core.construction_review.component.reviewers.utils import SensitiveWordChecker logger.info("\n正在初始化敏感词检测器...") stats = SensitiveWordChecker.initialize() logger.info(f"敏感词检测器初始化成功: {stats}") except Exception as e: logger.error(f"敏感词检测器初始化失败: {str(e)}") logger.exception("详细错误:") return False # 获取自定义文本 custom_text = self.get_custom_text_from_user() if custom_text: # 执行自定义文本检查 await self.run_custom_text_check(custom_text) else: logger.info("\n未提供自定义文本,程序结束") return True def run_interactive_sync(self): """同步方式运行交互模式""" return asyncio.run(self.run_interactive_mode()) def main(): """主函数""" print("=" * 80) print("敏感词检查功能单元测试(完整版本)") print("测试文件: test_sensitive_check_standalone.py") print("=" * 80) tester = TestSensitiveCheck() success = tester.run_interactive_sync() sys.exit(0 if success else 1) if __name__ == "__main__": main()