| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- 敏感词检测器测试脚本
- """
- import asyncio
- from core.construction_review.component.reviewers.utils import (
- SensitiveWordChecker,
- check_sensitive_words,
- check_sensitive_words_async,
- format_check_results
- )
- def test_sync():
- """测试同步检测"""
- print("=" * 60)
- print("测试同步敏感词检测")
- print("=" * 60)
-
- # 初始化
- print("\n1. 初始化敏感词检测器...")
- stats = SensitiveWordChecker.initialize()
- print(f" 加载统计: {stats}")
-
- # 测试文本
- test_texts = [
- "这是一段正常的文本内容",
- "施工方案中使用了最好的材料",
- "本项目采用国内最先进的技术",
- ]
-
- print("\n2. 开始检测...")
- for i, text in enumerate(test_texts, 1):
- print(f"\n 测试 {i}: {text}")
- results = check_sensitive_words(text)
-
- if results:
- print(f" ⚠️ 发现 {len(results)} 个敏感词:")
- for item in results:
- print(f" - 敏感词: '{item['word']}' | 位置: {item['position']}-{item['end_position']} | 来源: {item['source']}")
- else:
- print(" ✓ 未发现敏感词")
-
- # 测试格式化结果
- print("\n3. 测试格式化结果...")
- text = "本项目采用最好的材料和最先进的技术"
- results = check_sensitive_words(text)
- formatted = format_check_results(results, text)
- print(f" 格式化结果: {formatted}")
- async def test_async():
- """测试异步检测"""
- print("\n" + "=" * 60)
- print("测试异步敏感词检测(并发)")
- print("=" * 60)
-
- test_texts = [
- "这是第一段测试文本",
- "这是第二段包含最好的文本",
- "这是第三段包含最先进的文本",
- "这是第四段正常文本",
- "这是第五段包含绝对化用语的文本",
- ]
-
- print(f"\n并发检测 {len(test_texts)} 段文本...")
-
- # 并发执行
- tasks = [check_sensitive_words_async(text) for text in test_texts]
- results_list = await asyncio.gather(*tasks)
-
- # 输出结果
- for i, (text, results) in enumerate(zip(test_texts, results_list), 1):
- print(f"\n文本 {i}: {text}")
- if results:
- print(f"⚠️ 发现 {len(results)} 个敏感词:")
- for item in results:
- print(f" - {item['word']} (位置: {item['position']}, 来源: {item['source']})")
- else:
- print("✓ 未发现敏感词")
- def test_performance():
- """测试性能"""
- print("\n" + "=" * 60)
- print("性能测试")
- print("=" * 60)
-
- import time
-
- # 生成大量文本
- test_text = "这是一段包含最好、最先进、绝对等敏感词的长文本。" * 100
-
- print(f"\n测试文本长度: {len(test_text)} 字符")
-
- # 测试检测速度
- iterations = 100
- start_time = time.time()
-
- for _ in range(iterations):
- results = check_sensitive_words(test_text)
-
- end_time = time.time()
- elapsed = end_time - start_time
- avg_time = elapsed / iterations * 1000
-
- print(f"执行 {iterations} 次检测")
- print(f"总耗时: {elapsed:.3f} 秒")
- print(f"平均耗时: {avg_time:.3f} 毫秒/次")
- print(f"检测速度: {iterations/elapsed:.2f} 次/秒")
- def main():
- """主函数"""
- print("\n" + "=" * 60)
- print("敏感词检测系统测试")
- print("=" * 60)
-
- # 同步测试
- test_sync()
-
- # 异步测试
- asyncio.run(test_async())
-
- # 性能测试
- test_performance()
-
- print("\n" + "=" * 60)
- print("测试完成!")
- print("=" * 60)
- if __name__ == "__main__":
- main()
|