test_sensitive_word_checker.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 敏感词检测器测试脚本
  5. """
  6. import asyncio
  7. from core.construction_review.component.reviewers.utils import (
  8. SensitiveWordChecker,
  9. check_sensitive_words,
  10. check_sensitive_words_async,
  11. format_check_results
  12. )
  13. def test_sync():
  14. """测试同步检测"""
  15. print("=" * 60)
  16. print("测试同步敏感词检测")
  17. print("=" * 60)
  18. # 初始化
  19. print("\n1. 初始化敏感词检测器...")
  20. stats = SensitiveWordChecker.initialize()
  21. print(f" 加载统计: {stats}")
  22. # 测试文本
  23. test_texts = [
  24. "这是一段正常的文本内容",
  25. "施工方案中使用了最好的材料",
  26. "本项目采用国内最先进的技术",
  27. ]
  28. print("\n2. 开始检测...")
  29. for i, text in enumerate(test_texts, 1):
  30. print(f"\n 测试 {i}: {text}")
  31. results = check_sensitive_words(text)
  32. if results:
  33. print(f" ⚠️ 发现 {len(results)} 个敏感词:")
  34. for item in results:
  35. print(f" - 敏感词: '{item['word']}' | 位置: {item['position']}-{item['end_position']} | 来源: {item['source']}")
  36. else:
  37. print(" ✓ 未发现敏感词")
  38. # 测试格式化结果
  39. print("\n3. 测试格式化结果...")
  40. text = "本项目采用最好的材料和最先进的技术"
  41. results = check_sensitive_words(text)
  42. formatted = format_check_results(results, text)
  43. print(f" 格式化结果: {formatted}")
  44. async def test_async():
  45. """测试异步检测"""
  46. print("\n" + "=" * 60)
  47. print("测试异步敏感词检测(并发)")
  48. print("=" * 60)
  49. test_texts = [
  50. "这是第一段测试文本",
  51. "这是第二段包含最好的文本",
  52. "这是第三段包含最先进的文本",
  53. "这是第四段正常文本",
  54. "这是第五段包含绝对化用语的文本",
  55. ]
  56. print(f"\n并发检测 {len(test_texts)} 段文本...")
  57. # 并发执行
  58. tasks = [check_sensitive_words_async(text) for text in test_texts]
  59. results_list = await asyncio.gather(*tasks)
  60. # 输出结果
  61. for i, (text, results) in enumerate(zip(test_texts, results_list), 1):
  62. print(f"\n文本 {i}: {text}")
  63. if results:
  64. print(f"⚠️ 发现 {len(results)} 个敏感词:")
  65. for item in results:
  66. print(f" - {item['word']} (位置: {item['position']}, 来源: {item['source']})")
  67. else:
  68. print("✓ 未发现敏感词")
  69. def test_performance():
  70. """测试性能"""
  71. print("\n" + "=" * 60)
  72. print("性能测试")
  73. print("=" * 60)
  74. import time
  75. # 生成大量文本
  76. test_text = "这是一段包含最好、最先进、绝对等敏感词的长文本。" * 100
  77. print(f"\n测试文本长度: {len(test_text)} 字符")
  78. # 测试检测速度
  79. iterations = 100
  80. start_time = time.time()
  81. for _ in range(iterations):
  82. results = check_sensitive_words(test_text)
  83. end_time = time.time()
  84. elapsed = end_time - start_time
  85. avg_time = elapsed / iterations * 1000
  86. print(f"执行 {iterations} 次检测")
  87. print(f"总耗时: {elapsed:.3f} 秒")
  88. print(f"平均耗时: {avg_time:.3f} 毫秒/次")
  89. print(f"检测速度: {iterations/elapsed:.2f} 次/秒")
  90. def main():
  91. """主函数"""
  92. print("\n" + "=" * 60)
  93. print("敏感词检测系统测试")
  94. print("=" * 60)
  95. # 同步测试
  96. test_sync()
  97. # 异步测试
  98. asyncio.run(test_async())
  99. # 性能测试
  100. test_performance()
  101. print("\n" + "=" * 60)
  102. print("测试完成!")
  103. print("=" * 60)
  104. if __name__ == "__main__":
  105. main()