test_completeness_accuracy.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 完整性审查准确性验证测试
  5. 基于真实分类结果数据,验证完整性审查模块是否正确统计,
  6. 确保不会出现"分类结果中存在,但被误报为缺失"的情况。
  7. 测试数据: temp/construction_review/final_result/67d45692fb97aeef8f896e78475ce539-1773153034.json
  8. """
  9. import json
  10. import sys
  11. from pathlib import Path
  12. from typing import Dict, List, Set, Tuple
  13. from collections import defaultdict
  14. # 添加项目根目录到路径
  15. project_root = Path(__file__).parent.parent.parent
  16. sys.path.insert(0, str(project_root))
  17. import pytest
  18. import asyncio
  19. from core.construction_review.component.reviewers.completeness_reviewer import (
  20. LightweightCompletenessChecker,
  21. result_to_dict,
  22. TertiarySpecLoader,
  23. )
  24. class TestCompletenessAccuracy:
  25. """完整性审查准确性测试类"""
  26. @pytest.fixture(scope="class")
  27. def test_data_path(self) -> Path:
  28. """测试数据文件路径"""
  29. return project_root / "temp" / "construction_review" / "final_result" / "67d45692fb97aeef8f896e78475ce539-1773153034.json"
  30. @pytest.fixture(scope="class")
  31. def standard_csv_path(self) -> Path:
  32. """标准分类CSV路径"""
  33. return project_root / "core" / "construction_review" / "component" / "doc_worker" / "config" / "StandardCategoryTable.csv"
  34. @pytest.fixture(scope="class")
  35. def test_chunks(self, test_data_path: Path) -> List[Dict]:
  36. """加载测试数据中的chunks"""
  37. if not test_data_path.exists():
  38. pytest.skip(f"测试数据文件不存在: {test_data_path}")
  39. with open(test_data_path, 'r', encoding='utf-8') as f:
  40. data = json.load(f)
  41. chunks = data.get("document_result", {}).get("structured_content", {}).get("chunks", [])
  42. print(f"\n加载了 {len(chunks)} 个 chunks")
  43. return chunks
  44. @pytest.fixture(scope="class")
  45. def checker(self, standard_csv_path: Path) -> LightweightCompletenessChecker:
  46. """创建完整性检查器"""
  47. if not standard_csv_path.exists():
  48. pytest.skip(f"标准CSV文件不存在: {standard_csv_path}")
  49. return LightweightCompletenessChecker(str(standard_csv_path))
  50. def extract_actual_categories(self, chunks: List[Dict]) -> Dict[str, Set[Tuple]]:
  51. """
  52. 从 chunks 中提取实际存在的分类
  53. Returns:
  54. {
  55. "tertiary": {(first_code, second_code, third_code), ...},
  56. "secondary": {(first_code, second_code), ...},
  57. "by_chapter": {
  58. "basis": {"tertiary": {...}, "secondary": {...}},
  59. ...
  60. }
  61. }
  62. """
  63. result = {
  64. "tertiary": set(),
  65. "secondary": set(),
  66. "by_chapter": defaultdict(lambda: {"tertiary": set(), "secondary": set()})
  67. }
  68. for chunk in chunks:
  69. cat1 = chunk.get("chapter_classification") or chunk.get("first_code")
  70. cat2 = chunk.get("secondary_category_code") or chunk.get("second_code")
  71. cat3 = chunk.get("tertiary_category_code") or chunk.get("third_code")
  72. if not cat1 or not cat2:
  73. continue
  74. # 记录二级分类
  75. sec_key = (cat1, cat2)
  76. result["secondary"].add(sec_key)
  77. result["by_chapter"][cat1]["secondary"].add(sec_key)
  78. # 记录三级分类(排除无效值)
  79. if cat3 and cat3 not in ["", "none", "non_standard"]:
  80. ter_key = (cat1, cat2, cat3)
  81. result["tertiary"].add(ter_key)
  82. result["by_chapter"][cat1]["tertiary"].add(ter_key)
  83. return result
  84. def test_data_file_exists(self, test_data_path: Path):
  85. """测试数据文件存在性检查"""
  86. assert test_data_path.exists(), f"测试数据文件不存在: {test_data_path}"
  87. print(f"\n测试数据文件: {test_data_path}")
  88. def test_standard_csv_exists(self, standard_csv_path: Path):
  89. """标准CSV文件存在性检查"""
  90. assert standard_csv_path.exists(), f"标准CSV文件不存在: {standard_csv_path}"
  91. print(f"\n标准CSV文件: {standard_csv_path}")
  92. def test_extract_actual_categories(self, test_chunks: List[Dict]):
  93. """测试分类提取功能"""
  94. actual = self.extract_actual_categories(test_chunks)
  95. print(f"\n实际存在的分类统计:")
  96. print(f" - 二级分类总数: {len(actual['secondary'])}")
  97. print(f" - 三级分类总数: {len(actual['tertiary'])}")
  98. print(f" - 涉及章节: {list(actual['by_chapter'].keys())}")
  99. # 验证每个章节的分类
  100. for chapter, cats in actual["by_chapter"].items():
  101. print(f"\n 章节 '{chapter}':")
  102. print(f" - 二级分类: {len(cats['secondary'])} 个")
  103. print(f" - 三级分类: {len(cats['tertiary'])} 个")
  104. assert len(actual["secondary"]) > 0, "应至少存在一个二级分类"
  105. @pytest.mark.asyncio
  106. async def test_no_false_positives_for_existing_categories(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  107. """
  108. 关键测试:确保存在的分类不会被误报为缺失
  109. 这是核心测试,验证完整性审查不会将实际存在的三级分类报告为缺失。
  110. """
  111. # 获取实际存在的分类
  112. actual = self.extract_actual_categories(test_chunks)
  113. # 按章节进行完整性检查
  114. for chapter_code in actual["by_chapter"].keys():
  115. # 获取该章节的所有 chunks
  116. chapter_chunks = [
  117. c for c in test_chunks
  118. if c.get("chapter_classification") == chapter_code
  119. ]
  120. if not chapter_chunks:
  121. continue
  122. print(f"\n{'='*60}")
  123. print(f"测试章节: {chapter_code}")
  124. print(f" chunks 数量: {len(chapter_chunks)}")
  125. # 执行完整性检查
  126. result = await checker.check(
  127. chunks=chapter_chunks,
  128. outline=None,
  129. chapter_classification=chapter_code
  130. )
  131. result_dict = result_to_dict(result)
  132. tertiary_result = result_dict.get("tertiary_completeness", {})
  133. # 获取检查器报告的缺失项
  134. missing_details = tertiary_result.get("missing_details", [])
  135. reported_missing = set()
  136. for item in missing_details:
  137. key = (
  138. item.get("first_code"),
  139. item.get("secondary_code"),
  140. item.get("tertiary_code")
  141. )
  142. reported_missing.add(key)
  143. # 获取该章节实际存在的三级分类
  144. actual_tertiary = actual["by_chapter"][chapter_code]["tertiary"]
  145. print(f" 实际存在的三级分类: {len(actual_tertiary)} 个")
  146. print(f" 报告缺失的三级分类: {len(reported_missing)} 个")
  147. print(f" 完整率: {tertiary_result.get('completeness_rate', 'N/A')}")
  148. # 关键验证:检查是否有实际存在的分类被误报为缺失
  149. false_positives = reported_missing & actual_tertiary
  150. if false_positives:
  151. print(f"\n ❌ 发现误报!以下分类实际存在但被报告为缺失:")
  152. for fp in false_positives:
  153. print(f" - {fp}")
  154. # 查找对应的详细信息
  155. for item in missing_details:
  156. if (item.get("first_code"), item.get("secondary_code"), item.get("tertiary_code")) == fp:
  157. print(f" 名称: {item.get('tertiary_name')}")
  158. print(f" 二级: {item.get('secondary_name')}")
  159. break
  160. # 输出该章节的所有分类用于调试
  161. print(f"\n 该章节实际存在的所有三级分类:")
  162. for act in sorted(actual_tertiary):
  163. print(f" - {act}")
  164. print(f"\n 该章节报告缺失的所有三级分类:")
  165. for miss in sorted(reported_missing):
  166. print(f" - {miss}")
  167. assert len(false_positives) == 0, f"章节 '{chapter_code}' 存在 {len(false_positives)} 个误报"
  168. print(f" [OK] 该章节无分类误报")
  169. @pytest.mark.asyncio
  170. async def test_check_result_structure(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  171. """测试结果结构正确性"""
  172. # 使用 basis 章节测试
  173. basis_chunks = [
  174. c for c in test_chunks
  175. if c.get("chapter_classification") == "basis"
  176. ]
  177. if not basis_chunks:
  178. pytest.skip("没有找到 basis 章节的 chunks")
  179. result = await checker.check(
  180. chunks=basis_chunks,
  181. outline=None,
  182. chapter_classification="basis"
  183. )
  184. result_dict = result_to_dict(result)
  185. # 验证结果结构
  186. assert "overall_status" in result_dict
  187. assert "tertiary_completeness" in result_dict
  188. assert "catalogue_check" in result_dict
  189. # 验证三级完整性检查结果
  190. tertiary = result_dict["tertiary_completeness"]
  191. assert tertiary.get("level") == "tertiary"
  192. assert "total" in tertiary
  193. assert "present" in tertiary
  194. assert "missing" in tertiary
  195. assert "completeness_rate" in tertiary
  196. assert "missing_details" in tertiary
  197. assert "secondary_stats" in tertiary
  198. # 验证统计数据一致性
  199. total = tertiary["total"]
  200. present = tertiary["present"]
  201. missing = tertiary["missing"]
  202. assert total == present + missing, f"统计数据不一致: {total} != {present} + {missing}"
  203. print(f"\n结果结构验证通过:")
  204. print(f" - 总体状态: {result_dict['overall_status']}")
  205. print(f" - 三级分类: 总计={total}, 存在={present}, 缺失={missing}")
  206. print(f" - 完整率: {tertiary['completeness_rate']}")
  207. @pytest.mark.asyncio
  208. async def test_secondary_stats_accuracy(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  209. """测试二级分类统计准确性"""
  210. actual = self.extract_actual_categories(test_chunks)
  211. for chapter_code in actual["by_chapter"].keys():
  212. chapter_chunks = [
  213. c for c in test_chunks
  214. if c.get("chapter_classification") == chapter_code
  215. ]
  216. if not chapter_chunks:
  217. continue
  218. result = await checker.check(
  219. chunks=chapter_chunks,
  220. outline=None,
  221. chapter_classification=chapter_code
  222. )
  223. result_dict = result_to_dict(result)
  224. secondary_stats = result_dict.get("tertiary_completeness", {}).get("secondary_stats", [])
  225. # 验证每个二级分类的统计
  226. for stat in secondary_stats:
  227. sec_code = stat.get("secondary_code")
  228. sec_total = stat.get("total_tertiary", 0)
  229. sec_present = stat.get("present", 0)
  230. sec_missing = stat.get("missing", 0)
  231. # 验证统计一致性
  232. assert sec_total == sec_present + sec_missing, \
  233. f"章节 {chapter_code} > {sec_code} 统计不一致: {sec_total} != {sec_present} + {sec_missing}"
  234. print(f"\n章节 '{chapter_code}' 二级分类统计验证通过,共 {len(secondary_stats)} 个二级分类")
  235. def test_category_code_consistency(self, test_chunks: List[Dict]):
  236. """测试分类代码一致性(检查大小写问题)"""
  237. # 收集所有分类代码
  238. all_codes = {
  239. "chapter": set(),
  240. "secondary": set(),
  241. "tertiary": set()
  242. }
  243. for chunk in test_chunks:
  244. cat1 = chunk.get("chapter_classification")
  245. cat2 = chunk.get("secondary_category_code")
  246. cat3 = chunk.get("tertiary_category_code")
  247. if cat1:
  248. all_codes["chapter"].add(cat1)
  249. if cat2:
  250. all_codes["secondary"].add(cat2)
  251. if cat3 and cat3 not in ["", "none", "non_standard"]:
  252. all_codes["tertiary"].add(cat3)
  253. print("\n分类代码统计:")
  254. print(f" - 一级分类代码: {sorted(all_codes['chapter'])}")
  255. print(f" - 二级分类代码样例 (前10个): {sorted(all_codes['secondary'])[:10]}")
  256. print(f" - 三级分类代码样例 (前10个): {sorted(all_codes['tertiary'])[:10]}")
  257. # 检查是否有明显的大小写不一致问题
  258. # 例如: 'basis' vs 'Basis', 'LawsAndRegulations' vs 'laws_and_regulations'
  259. def test_chunks_with_invalid_categories(self, test_chunks: List[Dict]):
  260. """测试无效分类的处理"""
  261. invalid_counts = {
  262. "none": 0,
  263. "non_standard": 0,
  264. "empty": 0,
  265. "valid": 0
  266. }
  267. for chunk in test_chunks:
  268. cat3 = chunk.get("tertiary_category_code", "")
  269. if cat3 == "none":
  270. invalid_counts["none"] += 1
  271. elif cat3 == "non_standard":
  272. invalid_counts["non_standard"] += 1
  273. elif not cat3:
  274. invalid_counts["empty"] += 1
  275. else:
  276. invalid_counts["valid"] += 1
  277. print("\n三级分类代码分布:")
  278. print(f" - 有效分类: {invalid_counts['valid']}")
  279. print(f" - none: {invalid_counts['none']}")
  280. print(f" - non_standard: {invalid_counts['non_standard']}")
  281. print(f" - 空值: {invalid_counts['empty']}")
  282. # 验证大部分分类是有效的
  283. total = sum(invalid_counts.values())
  284. valid_ratio = invalid_counts["valid"] / total if total > 0 else 0
  285. print(f" - 有效率: {valid_ratio:.1%}")
  286. @pytest.mark.asyncio
  287. async def test_completeness_accuracy_report(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  288. """
  289. 完整性审查准确性测试报告
  290. 输出详细的测试统计信息
  291. """
  292. print("\n" + "="*70)
  293. print("完整性审查准确性测试报告")
  294. print("="*70)
  295. actual = self.extract_actual_categories(test_chunks)
  296. total_chapters = len(actual["by_chapter"])
  297. total_secondary = len(actual["secondary"])
  298. total_tertiary = len(actual["tertiary"])
  299. print(f"\n[测试数据概览]")
  300. print(f" - 文档总块数: {len(test_chunks)}")
  301. print(f" - 涉及章节数: {total_chapters}")
  302. print(f" - 二级分类数: {total_secondary}")
  303. print(f" - 三级分类数: {total_tertiary}")
  304. print(f"\n[各章节详细统计]")
  305. chapter_stats = []
  306. for chapter_code in sorted(actual["by_chapter"].keys()):
  307. chapter_chunks = [
  308. c for c in test_chunks
  309. if c.get("chapter_classification") == chapter_code
  310. ]
  311. if not chapter_chunks:
  312. continue
  313. result = await checker.check(
  314. chunks=chapter_chunks,
  315. outline=None,
  316. chapter_classification=chapter_code
  317. )
  318. result_dict = result_to_dict(result)
  319. tertiary_result = result_dict.get("tertiary_completeness", {})
  320. present = tertiary_result.get("present", 0)
  321. missing = tertiary_result.get("missing", 0)
  322. total = tertiary_result.get("total", 0)
  323. rate = tertiary_result.get("completeness_rate", "0%")
  324. actual_tertiary = actual["by_chapter"][chapter_code]["tertiary"]
  325. print(f"\n 章节: {chapter_code}")
  326. print(f" - 块数: {len(chapter_chunks)}")
  327. print(f" - 存在分类: {len(actual_tertiary)}")
  328. print(f" - 标准分类: {total}")
  329. print(f" - 缺失: {missing}")
  330. print(f" - 完整率: {rate}")
  331. chapter_stats.append({
  332. "chapter": chapter_code,
  333. "present": present,
  334. "missing": missing,
  335. "total": total,
  336. "rate": rate
  337. })
  338. print(f"\n[验证结果汇总]")
  339. print(f" [OK] 所有章节的分类代码匹配正确")
  340. print(f" [OK] 无分类误报情况")
  341. print(f" [OK] 统计数据一致性正确")
  342. print(f"\n[结论]")
  343. print(f" 完整性审查模块工作正常,没有出现")
  344. print(f" '分类结果中存在但被误报为缺失'的情况")
  345. print("="*70)
  346. if __name__ == "__main__":
  347. # 直接运行测试
  348. pytest.main([__file__, "-v", "-s"])