| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- 敏感词检查功能单元测试(完整版本)
- 测试 AIReviewEngine.check_sensitive 方法
- 包含大模型二审功能,支持自定义检查文本
- """
- import asyncio
- import sys
- import os
- import time
- from pathlib import Path
- from typing import Dict, Any, Optional
- # 添加项目根目录到Python路径
- current_dir = Path(__file__).parent.absolute()
- project_root = current_dir.parent.parent
- sys.path.insert(0, str(project_root))
- os.chdir(str(project_root))
- class SimpleLogger:
- """简单日志类"""
-
- @staticmethod
- def info(msg):
- print(f"[INFO] {msg}")
-
- @staticmethod
- def warning(msg):
- print(f"[WARNING] {msg}")
-
- @staticmethod
- def error(msg):
- print(f"[ERROR] {msg}")
-
- @staticmethod
- def exception(msg):
- import traceback
- print(f"[EXCEPTION] {msg}")
- traceback.print_exc()
- logger = SimpleLogger()
- class TestSensitiveCheck:
- """敏感词检查功能测试类"""
- def __init__(self):
- self.test_results = []
- self.ai_review_engine = None
- logger.info("=" * 80)
- logger.info("初始化敏感词检查测试类")
- logger.info("=" * 80)
- def initialize_engine(self):
- """初始化AI审查引擎"""
- try:
- from core.construction_review.component.ai_review_engine import AIReviewEngine
- from core.base.task_models import TaskFileInfo
-
- # 创建 mock 的 TaskFileInfo 对象
- logger.info("创建 mock TaskFileInfo 对象...")
- mock_task_info = TaskFileInfo(
- file_id="test_file_001",
- callback_task_id="test_task_001",
- user_id="test_user",
- review_config=["sensitive_check"], # 只启用敏感词检查
- project_plan_type="test_project",
- tendency_review_role="test_role"
- )
-
- logger.info("初始化 AI 审查引擎...")
- self.ai_review_engine = AIReviewEngine(task_file_info=mock_task_info)
- logger.info("AI审查引擎初始化成功")
- return True
- except Exception as e:
- logger.error(f"AI审查引擎初始化失败: {e}")
- logger.exception("详细错误:")
- return False
- async def check_sensitive_with_llm_review(
- self,
- trace_id_idx: str,
- review_content: str,
- review_references: str = "",
- review_location_label: str = "",
- state: Dict = None,
- stage_name: str = "测试阶段"
- ) -> Any:
- """
- 完整版的敏感信息检查函数(包含大模型二审)
-
- 功能说明:
- 1. 使用关键词匹配进行敏感词检测
- 2. 如果检测到敏感词,调用大模型进行二审(与原函数一致)
- 3. 如果未检测到敏感词,返回成功结果
- 4. 移除了消息推送功能(简化测试)
-
- Args:
- trace_id_idx: 追踪ID索引
- review_content: 审查内容
- review_references: 审查参考信息
- review_location_label: 审查位置标签
- state: 状态字典
- stage_name: 阶段名称
-
- Returns:
- 审查结果对象
- """
- try:
- from core.construction_review.component.reviewers.utils import check_sensitive_words_async
- from core.construction_review.component.reviewers.base_reviewer import ReviewResult
- except ImportError as e:
- logger.error(f"导入模块失败: {e}")
- return None
-
- logger.info("=" * 80)
- logger.info(f"开始执行敏感词检查 - trace_id: sensitive_check{trace_id_idx}")
- logger.info(f"阶段名称: {stage_name}")
- logger.info(f"审查位置: {review_location_label}")
- logger.info(f"审查内容长度: {len(review_content)} 字符")
- if len(review_content) > 100:
- logger.info(f"审查内容预览: {review_content[:100]}...")
- else:
- logger.info(f"审查内容: {review_content}")
- logger.info("=" * 80)
-
- start_time = time.time()
- trace_id = "sensitive_check" + trace_id_idx
-
- # 第一步:使用关键词匹配式审查
- logger.info("步骤1: 开始关键词匹配检测...")
- try:
- first_results = await check_sensitive_words_async(review_content)
- except Exception as e:
- logger.error(f"关键词检测失败: {e}")
- logger.exception("详细错误:")
- return None
-
- detection_time = time.time() - start_time
- logger.info(f"步骤1: 关键词检测完成,耗时: {detection_time:.4f}s")
-
- # 判断是否检测到敏感词
- if first_results:
- logger.warning(f"步骤2: 检测到 {len(first_results)} 个敏感词,准备送入大模型二审")
- logger.warning("-" * 80)
-
- # 格式化敏感词列表
- sensitive_words_info = []
- for idx, item in enumerate(first_results, 1):
- word_info = f"敏感词: {item['word']}, 位置: {item['position']}-{item['end_position']}, 来源: {item['source']}"
- sensitive_words_info.append(word_info)
- logger.warning(f" 敏感词 #{idx}: {word_info}")
-
- formatted_sensitive_words = "\n".join(sensitive_words_info)
- logger.warning("-" * 80)
-
- # 调用大模型进行二审(与原函数保持一致)
- logger.info("步骤3: 调用大模型进行二审...")
- try:
- if not self.ai_review_engine:
- logger.error("AI审查引擎未初始化")
- return None
-
- # 调用 review 方法进行大模型二审
- result = await self.ai_review_engine.review(
- "sensitive_check",
- trace_id,
- "basic",
- "sensitive_word_check",
- review_content,
- formatted_sensitive_words,
- None,
- review_location_label,
- state,
- stage_name
- )
-
- llm_review_time = time.time() - start_time
- logger.info(f"步骤3: 大模型二审完成,总耗时: {llm_review_time:.4f}s")
- logger.info("=" * 80)
-
- return result
-
- except Exception as e:
- logger.error(f"大模型二审失败: {e}")
- logger.exception("详细错误:")
- return None
-
- else:
- # 没有检测到敏感词,构造返回体
- logger.info("步骤2: 未检测到敏感词")
-
- execution_time = time.time() - start_time
- result = ReviewResult(
- success=True,
- details={"name": "sensitive_check", "response": "无明显问题"},
- error_message=None,
- execution_time=execution_time
- )
-
- logger.info(f"步骤3: 敏感词检查完成(未检测到敏感词),总耗时: {execution_time:.4f}s")
- logger.info("=" * 80)
-
- return result
- def get_custom_text_from_user(self) -> Optional[str]:
- """从用户获取自定义检查文本"""
- logger.info("\n" + "=" * 80)
- logger.info("请输入要检查的文本内容")
- logger.info("=" * 80)
- logger.info("提示:")
- logger.info(" 1. 可以输入多行文本,输入完成后单独一行输入 'END' 结束")
- logger.info(" 2. 直接按回车(输入空行)将跳过自定义输入")
- logger.info("=" * 80)
-
- lines = []
- first_line = True
-
- while True:
- try:
- if first_line:
- line = input(">>> ")
- first_line = False
-
- # 如果第一行就是空行,跳过自定义输入
- if not line.strip():
- logger.info("跳过自定义文本输入")
- return None
- else:
- line = input("... ")
-
- # 检查是否结束输入
- if line.strip().upper() == 'END':
- break
-
- lines.append(line)
-
- except EOFError:
- break
- except KeyboardInterrupt:
- logger.info("\n用户取消输入")
- return None
-
- custom_text = "\n".join(lines)
-
- if not custom_text.strip():
- logger.info("未输入有效文本")
- return None
-
- logger.info(f"\n已接收文本,长度: {len(custom_text)} 字符")
- return custom_text
- async def run_custom_text_check(self, custom_text: str):
- """运行自定义文本检查"""
- logger.info("\n" + "█" * 80)
- logger.info("执行自定义文本敏感词检查")
- logger.info("█" * 80)
-
- try:
- # 执行检查
- result = await self.check_sensitive_with_llm_review(
- trace_id_idx="_custom_001",
- review_content=custom_text,
- review_location_label="自定义文本检查",
- stage_name="自定义测试"
- )
-
- # 显示结果
- logger.info("\n" + "-" * 80)
- logger.info("检查结果:")
- logger.info("-" * 80)
-
- if result:
- logger.info(f" 返回类型: {type(result).__name__}")
- logger.info(f" 是否成功: {result.success}")
- logger.info(f" 执行时间: {result.execution_time:.4f}s")
- logger.info(f" 详细信息: {result.details}")
-
- if result.error_message:
- logger.error(f" 错误信息: {result.error_message}")
-
- if not result.success:
- logger.warning("\n检测到敏感内容,请查看上方详细信息")
- else:
- logger.info("\n✓ 未检测到敏感内容")
- else:
- logger.error("检查失败,未返回结果")
-
- logger.info("-" * 80)
-
- except Exception as e:
- logger.error(f"✗ 检查异常: {str(e)}")
- logger.exception("详细异常信息:")
- async def run_interactive_mode(self):
- """运行交互模式"""
- logger.info("\n" + "█" * 80)
- logger.info("敏感词检查 - 交互模式")
- logger.info("█" * 80)
-
- # 初始化AI审查引擎
- if not self.initialize_engine():
- logger.error("AI审查引擎初始化失败,无法继续")
- return False
-
- # 初始化敏感词检测器
- try:
- from core.construction_review.component.reviewers.utils import SensitiveWordChecker
- logger.info("\n正在初始化敏感词检测器...")
- stats = SensitiveWordChecker.initialize()
- logger.info(f"敏感词检测器初始化成功: {stats}")
- except Exception as e:
- logger.error(f"敏感词检测器初始化失败: {str(e)}")
- logger.exception("详细错误:")
- return False
-
- # 获取自定义文本
- custom_text = self.get_custom_text_from_user()
-
- if custom_text:
- # 执行自定义文本检查
- await self.run_custom_text_check(custom_text)
- else:
- logger.info("\n未提供自定义文本,程序结束")
-
- return True
- def run_interactive_sync(self):
- """同步方式运行交互模式"""
- return asyncio.run(self.run_interactive_mode())
- def main():
- """主函数"""
- print("=" * 80)
- print("敏感词检查功能单元测试(完整版本)")
- print("测试文件: test_sensitive_check_standalone.py")
- print("=" * 80)
-
- tester = TestSensitiveCheck()
- success = tester.run_interactive_sync()
-
- sys.exit(0 if success else 1)
- if __name__ == "__main__":
- main()
|