test_content_timeliness.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 测试内容时效性审查是否正确处理 JTG B01-2011 的情况
  5. """
  6. import json
  7. import asyncio
  8. from core.construction_review.component.reviewers.timeliness_content_reviewer import (
  9. StandardExtractor, ContentTimelinessReviewer
  10. )
  11. # 测试数据 - 模拟 problem.json 中的情况
  12. test_tertiary_details = [
  13. {
  14. "third_category_name": "国家方针、政策、标准和设计文件",
  15. "third_category_code": "NationalPoliciesStandardsAndDesignDocument",
  16. "start_line": 80,
  17. "end_line": 82,
  18. "content": """<80> 国家方针、政策、标准和设计文件
  19. <81> 《公路工程技术标准》(JTG B01-2011)
  20. <82> 《公路桥涵设计通用规范》(JTG D60-2015)"""
  21. }
  22. ]
  23. # 测试提取器
  24. def test_extractor():
  25. print("=" * 60)
  26. print("测试规范提取器")
  27. print("=" * 60)
  28. extractor = StandardExtractor()
  29. for detail in test_tertiary_details:
  30. refs = extractor.extract_from_content(detail["content"])
  31. print(f"\n从 '{detail['third_category_name']}' 提取到 {len(refs)} 个规范引用:")
  32. for ref in refs:
  33. print(f" - 原始文本: {ref.original_text}")
  34. print(f" 名称: {ref.name}")
  35. print(f" 编号: {ref.number}")
  36. print(f" 上下文: {ref.context[:100]}...")
  37. return refs
  38. # 测试过滤逻辑
  39. def test_filter_logic():
  40. print("\n" + "=" * 60)
  41. print("测试过滤逻辑")
  42. print("=" * 60)
  43. # 模拟 match_reference_files 返回的数据
  44. mock_match_result = [
  45. {
  46. "review_item": "《公路工程技术标准》(JTG B01-2011)",
  47. "has_related_file": True,
  48. "has_exact_match": False,
  49. "exact_match_info": "",
  50. "same_name_current": "《公路工程技术标准》(JTG B01-2014)状态为现行"
  51. },
  52. {
  53. "review_item": "《公路桥涵设计通用规范》(JTG D60-2015)",
  54. "has_related_file": True,
  55. "has_exact_match": True,
  56. "exact_match_info": "《公路桥涵设计通用规范》(JTG D60-2015)状态为现行",
  57. "same_name_current": ""
  58. }
  59. ]
  60. print("\n模拟 match_reference_files 返回数据:")
  61. for idx, item in enumerate(mock_match_result):
  62. print(f"\n 项{idx}:")
  63. print(f" review_item: {item['review_item']}")
  64. print(f" has_related_file: {item['has_related_file']}")
  65. print(f" has_exact_match: {item['has_exact_match']}")
  66. print(f" exact_match_info: {item['exact_match_info']}")
  67. print(f" same_name_current: {item['same_name_current']}")
  68. # 测试旧过滤逻辑(只保留 exact_match_info 不为空的)
  69. old_filtered = [item for item in mock_match_result if item.get('exact_match_info')]
  70. print(f"\n旧过滤逻辑(只保留 exact_match_info 不为空的): {len(old_filtered)} 个项")
  71. for item in old_filtered:
  72. print(f" - {item['review_item']}")
  73. # 测试新过滤逻辑(保留有相关信息的)
  74. new_filtered = [
  75. item for item in mock_match_result
  76. if item.get('has_related_file') or
  77. item.get('exact_match_info') or
  78. item.get('same_name_current')
  79. ]
  80. print(f"\n新过滤逻辑(保留有相关信息的): {len(new_filtered)} 个项")
  81. for item in new_filtered:
  82. print(f" - {item['review_item']}")
  83. # 分析差异
  84. missed = [item for item in mock_match_result if item not in old_filtered]
  85. if missed:
  86. print(f"\n[警告] 旧逻辑漏检的项:")
  87. for item in missed:
  88. print(f" - {item['review_item']}")
  89. print(f" has_related_file: {item['has_related_file']}")
  90. print(f" same_name_current: {item['same_name_current']}")
  91. # 完整测试
  92. async def test_full_review():
  93. print("\n" + "=" * 60)
  94. print("完整审查测试(需要 Milvus 连接)")
  95. print("=" * 60)
  96. try:
  97. async with ContentTimelinessReviewer(max_concurrent=4) as reviewer:
  98. results = await reviewer.review_tertiary_content(
  99. tertiary_details=test_tertiary_details,
  100. collection_name="first_bfp_collection_status"
  101. )
  102. print(f"\n审查完成,共 {len(results)} 个结果:")
  103. for idx, result in enumerate(results):
  104. print(f"\n 结果{idx}:")
  105. print(f" check_item: {result.get('check_item')}")
  106. print(f" exist_issue: {result.get('exist_issue')}")
  107. print(f" risk_info: {result.get('risk_info')}")
  108. check_result = result.get('check_result', {})
  109. print(f" issue_point: {check_result.get('issue_point')}")
  110. print(f" suggestion: {check_result.get('suggestion')}")
  111. print(f" reason: {check_result.get('reason')}")
  112. except Exception as e:
  113. print(f"测试失败: {e}")
  114. import traceback
  115. traceback.print_exc()
  116. if __name__ == "__main__":
  117. # 测试提取器
  118. refs = test_extractor()
  119. # 测试过滤逻辑
  120. test_filter_logic()
  121. # 完整测试(可选)
  122. # asyncio.run(test_full_review())