test_sensitive_check.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 敏感词检查功能单元测试
  5. 测试 AIReviewEngine.check_sensitive 方法的各种场景
  6. """
  7. import asyncio
  8. import sys
  9. import os
  10. import time
  11. from pathlib import Path
  12. from typing import Dict, Any, List
  13. from dataclasses import dataclass
  14. # 添加项目根目录到Python路径
  15. current_dir = Path(__file__).parent.absolute()
  16. project_root = current_dir.parent.parent
  17. sys.path.insert(0, str(project_root))
  18. os.chdir(str(project_root))
  19. from core.construction_review.component.reviewers.base_reviewer import ReviewResult
  20. from foundation.observability.logger.loggering import review_logger as logger
  21. class TestSensitiveCheck:
  22. """敏感词检查功能测试类"""
  23. def __init__(self):
  24. self.test_results = []
  25. logger.info("=" * 80)
  26. logger.info("初始化敏感词检查测试类")
  27. logger.info("=" * 80)
  28. async def check_sensitive_simplified(
  29. self,
  30. trace_id_idx: str,
  31. review_content: str,
  32. review_references: str = "",
  33. review_location_label: str = "",
  34. state: Dict = None,
  35. stage_name: str = "测试阶段"
  36. ) -> Dict[str, Any]:
  37. """
  38. 简化版的敏感信息检查函数(用于测试)
  39. 功能说明:
  40. 1. 使用关键词匹配进行敏感词检测
  41. 2. 如果检测到敏感词,格式化敏感词信息并返回
  42. 3. 如果未检测到敏感词,返回成功结果
  43. 4. 移除了原函数中的消息推送和大模型二审功能
  44. Args:
  45. trace_id_idx: 追踪ID索引
  46. review_content: 审查内容
  47. review_references: 审查参考信息
  48. review_location_label: 审查位置标签
  49. state: 状态字典(简化版不使用)
  50. stage_name: 阶段名称
  51. Returns:
  52. Dict[str, Any]: 敏感信息检查结果
  53. """
  54. from core.construction_review.component.reviewers.utils import (
  55. check_sensitive_words_async,
  56. format_check_results
  57. )
  58. logger.info("=" * 80)
  59. logger.info(f"开始执行敏感词检查 - trace_id: sensitive_check{trace_id_idx}")
  60. logger.info(f"阶段名称: {stage_name}")
  61. logger.info(f"审查位置: {review_location_label}")
  62. logger.info(f"审查内容长度: {len(review_content)} 字符")
  63. logger.info(f"审查内容预览: {review_content[:100]}..." if len(review_content) > 100 else f"审查内容: {review_content}")
  64. logger.info("=" * 80)
  65. start_time = time.time()
  66. trace_id = "sensitive_check" + trace_id_idx
  67. # 第一步:使用关键词匹配式审查
  68. logger.info("步骤1: 开始关键词匹配检测...")
  69. first_results = await check_sensitive_words_async(review_content)
  70. detection_time = time.time() - start_time
  71. logger.info(f"步骤1: 关键词检测完成,耗时: {detection_time:.4f}s")
  72. # 判断是否检测到敏感词
  73. if first_results:
  74. logger.warning(f"步骤2: 检测到 {len(first_results)} 个敏感词")
  75. logger.warning("-" * 80)
  76. # 格式化敏感词列表
  77. sensitive_words_info = []
  78. for idx, item in enumerate(first_results, 1):
  79. word_info = f"敏感词: {item['word']}, 位置: {item['position']}-{item['end_position']}, 来源: {item['source']}"
  80. sensitive_words_info.append(word_info)
  81. logger.warning(f" 敏感词 #{idx}: {word_info}")
  82. formatted_sensitive_words = "\n".join(sensitive_words_info)
  83. logger.warning("-" * 80)
  84. # 简化版:直接返回检测结果,不调用大模型二审
  85. execution_time = time.time() - start_time
  86. result = ReviewResult(
  87. success=False,
  88. details={
  89. "name": "sensitive_check",
  90. "response": f"检测到{len(first_results)}个敏感词",
  91. "sensitive_words": first_results,
  92. "formatted_info": formatted_sensitive_words
  93. },
  94. error_message=None,
  95. execution_time=execution_time
  96. )
  97. logger.warning(f"步骤3: 敏感词检查完成(检测到敏感词),总耗时: {execution_time:.4f}s")
  98. logger.info("=" * 80)
  99. return result
  100. else:
  101. # 没有检测到敏感词,构造返回体
  102. logger.info("步骤2: 未检测到敏感词")
  103. execution_time = time.time() - start_time
  104. result = ReviewResult(
  105. success=True,
  106. details={"name": "sensitive_check", "response": "无明显问题"},
  107. error_message=None,
  108. execution_time=execution_time
  109. )
  110. # 简化版:移除消息推送功能
  111. logger.info(f"步骤3: 敏感词检查完成(未检测到敏感词),总耗时: {execution_time:.4f}s")
  112. logger.info("=" * 80)
  113. return result
  114. def get_test_cases(self) -> List[Dict[str, Any]]:
  115. """获取测试用例"""
  116. return [
  117. {
  118. "name": "正常文本-无敏感词",
  119. "trace_id_idx": "_test_001",
  120. "review_content": "本工程为住宅楼建设项目,采用框架结构,建筑面积约5000平方米。施工过程中应严格按照国家规范执行,确保工程质量。",
  121. "review_location_label": "第一章 工程概况",
  122. "expected_success": True,
  123. "description": "测试正常的施工方案文本,不包含任何敏感词"
  124. },
  125. {
  126. "name": "空文本",
  127. "trace_id_idx": "_test_002",
  128. "review_content": "",
  129. "review_location_label": "空内容测试",
  130. "expected_success": True,
  131. "description": "测试空文本的处理"
  132. },
  133. {
  134. "name": "短文本-无敏感词",
  135. "trace_id_idx": "_test_003",
  136. "review_content": "施工安全第一",
  137. "review_location_label": "安全标语",
  138. "expected_success": True,
  139. "description": "测试短文本的处理"
  140. },
  141. {
  142. "name": "长文本-无敏感词",
  143. "trace_id_idx": "_test_004",
  144. "review_content": """
  145. 本施工方案编制依据包括:
  146. 1. 《建筑工程施工质量验收统一标准》GB50300-2013
  147. 2. 《混凝土结构工程施工质量验收规范》GB50204-2015
  148. 3. 《建筑地基基础工程施工质量验收规范》GB50202-2018
  149. 4. 施工图纸及相关设计文件
  150. 5. 现场实际情况及勘察报告
  151. 工程概况:
  152. 本工程位于某市某区,为高层住宅建筑,地上30层,地下2层。
  153. 建筑高度99.8米,总建筑面积约28000平方米。
  154. 结构形式为框架剪力墙结构,抗震设防烈度为7度。
  155. """,
  156. "review_location_label": "第一章 编制依据与工程概况",
  157. "expected_success": True,
  158. "description": "测试包含多段落的长文本"
  159. },
  160. {
  161. "name": "包含特殊字符",
  162. "trace_id_idx": "_test_005",
  163. "review_content": "施工现场温度:-5℃~35℃,相对湿度:≤85%,风力:≤5级。混凝土强度等级:C30、C35。钢筋规格:Φ12、Φ16、Φ20。",
  164. "review_location_label": "第三章 施工条件",
  165. "expected_success": True,
  166. "description": "测试包含特殊符号和技术参数的文本"
  167. }
  168. ]
  169. async def run_single_test(self, test_case: Dict[str, Any]) -> bool:
  170. """运行单个测试用例"""
  171. logger.info("\n" + "█" * 80)
  172. logger.info(f"测试用例: {test_case['name']}")
  173. logger.info(f"描述: {test_case['description']}")
  174. logger.info("█" * 80)
  175. try:
  176. # 执行测试
  177. result = await self.check_sensitive_simplified(
  178. trace_id_idx=test_case['trace_id_idx'],
  179. review_content=test_case['review_content'],
  180. review_location_label=test_case['review_location_label'],
  181. stage_name="单元测试阶段"
  182. )
  183. # 验证结果
  184. logger.info("\n" + "-" * 80)
  185. logger.info("测试结果验证:")
  186. logger.info(f" 返回类型: {type(result)}")
  187. logger.info(f" 是否成功: {result.success}")
  188. logger.info(f" 预期成功: {test_case['expected_success']}")
  189. logger.info(f" 执行时间: {result.execution_time:.4f}s")
  190. logger.info(f" 详细信息: {result.details}")
  191. if result.error_message:
  192. logger.error(f" 错误信息: {result.error_message}")
  193. # 判断测试是否通过
  194. test_passed = result.success == test_case['expected_success']
  195. if test_passed:
  196. logger.info("✓ 测试通过")
  197. else:
  198. logger.error("✗ 测试失败")
  199. logger.error(f" 预期 success={test_case['expected_success']}, 实际 success={result.success}")
  200. logger.info("-" * 80)
  201. return test_passed
  202. except Exception as e:
  203. logger.error(f"✗ 测试异常: {str(e)}")
  204. logger.exception("详细异常信息:")
  205. return False
  206. async def run_all_tests(self):
  207. """运行所有测试用例"""
  208. logger.info("\n" + "█" * 80)
  209. logger.info("开始执行敏感词检查单元测试")
  210. logger.info("█" * 80)
  211. # 初始化敏感词检测器
  212. try:
  213. from core.construction_review.component.reviewers.utils import SensitiveWordChecker
  214. logger.info("\n正在初始化敏感词检测器...")
  215. stats = SensitiveWordChecker.initialize()
  216. logger.info(f"敏感词检测器初始化成功: {stats}")
  217. except Exception as e:
  218. logger.error(f"敏感词检测器初始化失败: {str(e)}")
  219. return False
  220. # 获取测试用例
  221. test_cases = self.get_test_cases()
  222. logger.info(f"\n共有 {len(test_cases)} 个测试用例")
  223. # 运行测试
  224. results = []
  225. for idx, test_case in enumerate(test_cases, 1):
  226. logger.info(f"\n{'=' * 80}")
  227. logger.info(f"执行测试 {idx}/{len(test_cases)}")
  228. logger.info(f"{'=' * 80}")
  229. passed = await self.run_single_test(test_case)
  230. results.append({
  231. 'name': test_case['name'],
  232. 'passed': passed
  233. })
  234. # 统计结果
  235. logger.info("\n" + "█" * 80)
  236. logger.info("测试结果汇总")
  237. logger.info("█" * 80)
  238. passed_count = sum(1 for r in results if r['passed'])
  239. total_count = len(results)
  240. logger.info(f"\n总测试数: {total_count}")
  241. logger.info(f"通过数量: {passed_count}")
  242. logger.info(f"失败数量: {total_count - passed_count}")
  243. logger.info(f"通过率: {passed_count/total_count*100:.2f}%")
  244. logger.info("\n详细结果:")
  245. for idx, result in enumerate(results, 1):
  246. status = "✓ 通过" if result['passed'] else "✗ 失败"
  247. logger.info(f" {idx}. {result['name']}: {status}")
  248. logger.info("\n" + "█" * 80)
  249. if passed_count == total_count:
  250. logger.info("✓ 所有测试通过!")
  251. logger.info("█" * 80)
  252. return True
  253. else:
  254. logger.error(f"✗ 有 {total_count - passed_count} 个测试失败")
  255. logger.info("█" * 80)
  256. return False
  257. def run_tests_sync(self):
  258. """同步方式运行测试"""
  259. return asyncio.run(self.run_all_tests())
  260. def main():
  261. """主函数"""
  262. logger.info("=" * 80)
  263. logger.info("敏感词检查功能单元测试")
  264. logger.info("测试文件: test_sensitive_check.py")
  265. logger.info("=" * 80)
  266. tester = TestSensitiveCheck()
  267. success = tester.run_tests_sync()
  268. sys.exit(0 if success else 1)
  269. if __name__ == "__main__":
  270. main()