test_sensitive_check_standalone.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 敏感词检查功能单元测试(完整版本)
  5. 测试 AIReviewEngine.check_sensitive 方法
  6. 包含大模型二审功能,支持自定义检查文本
  7. """
  8. import asyncio
  9. import sys
  10. import os
  11. import time
  12. from pathlib import Path
  13. from typing import Dict, Any, Optional
  14. # 添加项目根目录到Python路径
  15. current_dir = Path(__file__).parent.absolute()
  16. project_root = current_dir.parent.parent
  17. sys.path.insert(0, str(project_root))
  18. os.chdir(str(project_root))
  19. class SimpleLogger:
  20. """简单日志类"""
  21. @staticmethod
  22. def info(msg):
  23. print(f"[INFO] {msg}")
  24. @staticmethod
  25. def warning(msg):
  26. print(f"[WARNING] {msg}")
  27. @staticmethod
  28. def error(msg):
  29. print(f"[ERROR] {msg}")
  30. @staticmethod
  31. def exception(msg):
  32. import traceback
  33. print(f"[EXCEPTION] {msg}")
  34. traceback.print_exc()
  35. logger = SimpleLogger()
  36. class TestSensitiveCheck:
  37. """敏感词检查功能测试类"""
  38. def __init__(self):
  39. self.test_results = []
  40. self.ai_review_engine = None
  41. logger.info("=" * 80)
  42. logger.info("初始化敏感词检查测试类")
  43. logger.info("=" * 80)
  44. def initialize_engine(self):
  45. """初始化AI审查引擎"""
  46. try:
  47. from core.construction_review.component.ai_review_engine import AIReviewEngine
  48. from core.base.task_models import TaskFileInfo
  49. # 创建 mock 的 TaskFileInfo 对象
  50. logger.info("创建 mock TaskFileInfo 对象...")
  51. mock_task_info = TaskFileInfo(
  52. file_id="test_file_001",
  53. callback_task_id="test_task_001",
  54. user_id="test_user",
  55. review_config=["sensitive_check"], # 只启用敏感词检查
  56. project_plan_type="test_project",
  57. tendency_review_role="test_role"
  58. )
  59. logger.info("初始化 AI 审查引擎...")
  60. self.ai_review_engine = AIReviewEngine(task_file_info=mock_task_info)
  61. logger.info("AI审查引擎初始化成功")
  62. return True
  63. except Exception as e:
  64. logger.error(f"AI审查引擎初始化失败: {e}")
  65. logger.exception("详细错误:")
  66. return False
  67. async def check_sensitive_with_llm_review(
  68. self,
  69. trace_id_idx: str,
  70. review_content: str,
  71. review_references: str = "",
  72. review_location_label: str = "",
  73. state: Dict = None,
  74. stage_name: str = "测试阶段"
  75. ) -> Any:
  76. """
  77. 完整版的敏感信息检查函数(包含大模型二审)
  78. 功能说明:
  79. 1. 使用关键词匹配进行敏感词检测
  80. 2. 如果检测到敏感词,调用大模型进行二审(与原函数一致)
  81. 3. 如果未检测到敏感词,返回成功结果
  82. 4. 移除了消息推送功能(简化测试)
  83. Args:
  84. trace_id_idx: 追踪ID索引
  85. review_content: 审查内容
  86. review_references: 审查参考信息
  87. review_location_label: 审查位置标签
  88. state: 状态字典
  89. stage_name: 阶段名称
  90. Returns:
  91. 审查结果对象
  92. """
  93. try:
  94. from core.construction_review.component.reviewers.utils import check_sensitive_words_async
  95. from core.construction_review.component.reviewers.base_reviewer import ReviewResult
  96. except ImportError as e:
  97. logger.error(f"导入模块失败: {e}")
  98. return None
  99. logger.info("=" * 80)
  100. logger.info(f"开始执行敏感词检查 - trace_id: sensitive_check{trace_id_idx}")
  101. logger.info(f"阶段名称: {stage_name}")
  102. logger.info(f"审查位置: {review_location_label}")
  103. logger.info(f"审查内容长度: {len(review_content)} 字符")
  104. if len(review_content) > 100:
  105. logger.info(f"审查内容预览: {review_content[:100]}...")
  106. else:
  107. logger.info(f"审查内容: {review_content}")
  108. logger.info("=" * 80)
  109. start_time = time.time()
  110. trace_id = "sensitive_check" + trace_id_idx
  111. # 第一步:使用关键词匹配式审查
  112. logger.info("步骤1: 开始关键词匹配检测...")
  113. try:
  114. first_results = await check_sensitive_words_async(review_content)
  115. except Exception as e:
  116. logger.error(f"关键词检测失败: {e}")
  117. logger.exception("详细错误:")
  118. return None
  119. detection_time = time.time() - start_time
  120. logger.info(f"步骤1: 关键词检测完成,耗时: {detection_time:.4f}s")
  121. # 判断是否检测到敏感词
  122. if first_results:
  123. logger.warning(f"步骤2: 检测到 {len(first_results)} 个敏感词,准备送入大模型二审")
  124. logger.warning("-" * 80)
  125. # 格式化敏感词列表
  126. sensitive_words_info = []
  127. for idx, item in enumerate(first_results, 1):
  128. word_info = f"敏感词: {item['word']}, 位置: {item['position']}-{item['end_position']}, 来源: {item['source']}"
  129. sensitive_words_info.append(word_info)
  130. logger.warning(f" 敏感词 #{idx}: {word_info}")
  131. formatted_sensitive_words = "\n".join(sensitive_words_info)
  132. logger.warning("-" * 80)
  133. # 调用大模型进行二审(与原函数保持一致)
  134. logger.info("步骤3: 调用大模型进行二审...")
  135. try:
  136. if not self.ai_review_engine:
  137. logger.error("AI审查引擎未初始化")
  138. return None
  139. # 调用 review 方法进行大模型二审
  140. result = await self.ai_review_engine.review(
  141. "sensitive_check",
  142. trace_id,
  143. "basic",
  144. "sensitive_word_check",
  145. review_content,
  146. formatted_sensitive_words,
  147. None,
  148. review_location_label,
  149. state,
  150. stage_name
  151. )
  152. llm_review_time = time.time() - start_time
  153. logger.info(f"步骤3: 大模型二审完成,总耗时: {llm_review_time:.4f}s")
  154. logger.info("=" * 80)
  155. return result
  156. except Exception as e:
  157. logger.error(f"大模型二审失败: {e}")
  158. logger.exception("详细错误:")
  159. return None
  160. else:
  161. # 没有检测到敏感词,构造返回体
  162. logger.info("步骤2: 未检测到敏感词")
  163. execution_time = time.time() - start_time
  164. result = ReviewResult(
  165. success=True,
  166. details={"name": "sensitive_check", "response": "无明显问题"},
  167. error_message=None,
  168. execution_time=execution_time
  169. )
  170. logger.info(f"步骤3: 敏感词检查完成(未检测到敏感词),总耗时: {execution_time:.4f}s")
  171. logger.info("=" * 80)
  172. return result
  173. def get_custom_text_from_user(self) -> Optional[str]:
  174. """从用户获取自定义检查文本"""
  175. logger.info("\n" + "=" * 80)
  176. logger.info("请输入要检查的文本内容")
  177. logger.info("=" * 80)
  178. logger.info("提示:")
  179. logger.info(" 1. 可以输入多行文本,输入完成后单独一行输入 'END' 结束")
  180. logger.info(" 2. 直接按回车(输入空行)将跳过自定义输入")
  181. logger.info("=" * 80)
  182. lines = []
  183. first_line = True
  184. while True:
  185. try:
  186. if first_line:
  187. line = input(">>> ")
  188. first_line = False
  189. # 如果第一行就是空行,跳过自定义输入
  190. if not line.strip():
  191. logger.info("跳过自定义文本输入")
  192. return None
  193. else:
  194. line = input("... ")
  195. # 检查是否结束输入
  196. if line.strip().upper() == 'END':
  197. break
  198. lines.append(line)
  199. except EOFError:
  200. break
  201. except KeyboardInterrupt:
  202. logger.info("\n用户取消输入")
  203. return None
  204. custom_text = "\n".join(lines)
  205. if not custom_text.strip():
  206. logger.info("未输入有效文本")
  207. return None
  208. logger.info(f"\n已接收文本,长度: {len(custom_text)} 字符")
  209. return custom_text
  210. async def run_custom_text_check(self, custom_text: str):
  211. """运行自定义文本检查"""
  212. logger.info("\n" + "█" * 80)
  213. logger.info("执行自定义文本敏感词检查")
  214. logger.info("█" * 80)
  215. try:
  216. # 执行检查
  217. result = await self.check_sensitive_with_llm_review(
  218. trace_id_idx="_custom_001",
  219. review_content=custom_text,
  220. review_location_label="自定义文本检查",
  221. stage_name="自定义测试"
  222. )
  223. # 显示结果
  224. logger.info("\n" + "-" * 80)
  225. logger.info("检查结果:")
  226. logger.info("-" * 80)
  227. if result:
  228. logger.info(f" 返回类型: {type(result).__name__}")
  229. logger.info(f" 是否成功: {result.success}")
  230. logger.info(f" 执行时间: {result.execution_time:.4f}s")
  231. logger.info(f" 详细信息: {result.details}")
  232. if result.error_message:
  233. logger.error(f" 错误信息: {result.error_message}")
  234. if not result.success:
  235. logger.warning("\n检测到敏感内容,请查看上方详细信息")
  236. else:
  237. logger.info("\n✓ 未检测到敏感内容")
  238. else:
  239. logger.error("检查失败,未返回结果")
  240. logger.info("-" * 80)
  241. except Exception as e:
  242. logger.error(f"✗ 检查异常: {str(e)}")
  243. logger.exception("详细异常信息:")
  244. async def run_interactive_mode(self):
  245. """运行交互模式"""
  246. logger.info("\n" + "█" * 80)
  247. logger.info("敏感词检查 - 交互模式")
  248. logger.info("█" * 80)
  249. # 初始化AI审查引擎
  250. if not self.initialize_engine():
  251. logger.error("AI审查引擎初始化失败,无法继续")
  252. return False
  253. # 初始化敏感词检测器
  254. try:
  255. from core.construction_review.component.reviewers.utils import SensitiveWordChecker
  256. logger.info("\n正在初始化敏感词检测器...")
  257. stats = SensitiveWordChecker.initialize()
  258. logger.info(f"敏感词检测器初始化成功: {stats}")
  259. except Exception as e:
  260. logger.error(f"敏感词检测器初始化失败: {str(e)}")
  261. logger.exception("详细错误:")
  262. return False
  263. # 获取自定义文本
  264. custom_text = self.get_custom_text_from_user()
  265. if custom_text:
  266. # 执行自定义文本检查
  267. await self.run_custom_text_check(custom_text)
  268. else:
  269. logger.info("\n未提供自定义文本,程序结束")
  270. return True
  271. def run_interactive_sync(self):
  272. """同步方式运行交互模式"""
  273. return asyncio.run(self.run_interactive_mode())
  274. def main():
  275. """主函数"""
  276. print("=" * 80)
  277. print("敏感词检查功能单元测试(完整版本)")
  278. print("测试文件: test_sensitive_check_standalone.py")
  279. print("=" * 80)
  280. tester = TestSensitiveCheck()
  281. success = tester.run_interactive_sync()
  282. sys.exit(0 if success else 1)
  283. if __name__ == "__main__":
  284. main()