test_completeness_accuracy.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 完整性审查准确性验证测试
  5. 基于真实分类结果数据,验证完整性审查模块是否正确统计,
  6. 确保不会出现"分类结果中存在,但被误报为缺失"的情况。
  7. 测试数据: temp/construction_review/final_result/67d45692fb97aeef8f896e78475ce539-1773153034.json
  8. """
  9. import json
  10. import sys
  11. from pathlib import Path
  12. from typing import Dict, List, Set, Tuple
  13. from collections import defaultdict
  14. project_root = Path(__file__).parent.parent.parent
  15. import pytest
  16. import asyncio
  17. from core.construction_review.component.reviewers.completeness_reviewer import (
  18. LightweightCompletenessChecker,
  19. result_to_dict,
  20. TertiarySpecLoader,
  21. )
  22. class TestCompletenessAccuracy:
  23. """完整性审查准确性测试类"""
  24. @pytest.fixture(scope="class")
  25. def test_data_path(self) -> Path:
  26. """测试数据文件路径"""
  27. return project_root / "temp" / "construction_review" / "final_result" / "67d45692fb97aeef8f896e78475ce539-1773153034.json"
  28. @pytest.fixture(scope="class")
  29. def standard_csv_path(self) -> Path:
  30. """标准分类CSV路径"""
  31. return project_root / "core" / "construction_review" / "component" / "doc_worker" / "config" / "StandardCategoryTable.csv"
  32. @pytest.fixture(scope="class")
  33. def test_chunks(self, test_data_path: Path) -> List[Dict]:
  34. """加载测试数据中的chunks"""
  35. if not test_data_path.exists():
  36. pytest.skip(f"测试数据文件不存在: {test_data_path}")
  37. with open(test_data_path, 'r', encoding='utf-8') as f:
  38. data = json.load(f)
  39. chunks = data.get("document_result", {}).get("structured_content", {}).get("chunks", [])
  40. print(f"\n加载了 {len(chunks)} 个 chunks")
  41. return chunks
  42. @pytest.fixture(scope="class")
  43. def checker(self, standard_csv_path: Path) -> LightweightCompletenessChecker:
  44. """创建完整性检查器"""
  45. if not standard_csv_path.exists():
  46. pytest.skip(f"标准CSV文件不存在: {standard_csv_path}")
  47. return LightweightCompletenessChecker(str(standard_csv_path))
  48. def extract_actual_categories(self, chunks: List[Dict]) -> Dict[str, Set[Tuple]]:
  49. """
  50. 从 chunks 中提取实际存在的分类
  51. Returns:
  52. {
  53. "tertiary": {(first_code, second_code, third_code), ...},
  54. "secondary": {(first_code, second_code), ...},
  55. "by_chapter": {
  56. "basis": {"tertiary": {...}, "secondary": {...}},
  57. ...
  58. }
  59. }
  60. """
  61. result = {
  62. "tertiary": set(),
  63. "secondary": set(),
  64. "by_chapter": defaultdict(lambda: {"tertiary": set(), "secondary": set()})
  65. }
  66. for chunk in chunks:
  67. cat1 = chunk.get("chapter_classification") or chunk.get("first_code")
  68. cat2 = chunk.get("secondary_category_code") or chunk.get("second_code")
  69. cat3 = chunk.get("tertiary_category_code") or chunk.get("third_code")
  70. if not cat1 or not cat2:
  71. continue
  72. # 记录二级分类
  73. sec_key = (cat1, cat2)
  74. result["secondary"].add(sec_key)
  75. result["by_chapter"][cat1]["secondary"].add(sec_key)
  76. # 记录三级分类(排除无效值)
  77. if cat3 and cat3 not in ["", "none", "non_standard"]:
  78. ter_key = (cat1, cat2, cat3)
  79. result["tertiary"].add(ter_key)
  80. result["by_chapter"][cat1]["tertiary"].add(ter_key)
  81. return result
  82. def test_data_file_exists(self, test_data_path: Path):
  83. """测试数据文件存在性检查"""
  84. assert test_data_path.exists(), f"测试数据文件不存在: {test_data_path}"
  85. print(f"\n测试数据文件: {test_data_path}")
  86. def test_standard_csv_exists(self, standard_csv_path: Path):
  87. """标准CSV文件存在性检查"""
  88. assert standard_csv_path.exists(), f"标准CSV文件不存在: {standard_csv_path}"
  89. print(f"\n标准CSV文件: {standard_csv_path}")
  90. def test_extract_actual_categories(self, test_chunks: List[Dict]):
  91. """测试分类提取功能"""
  92. actual = self.extract_actual_categories(test_chunks)
  93. print(f"\n实际存在的分类统计:")
  94. print(f" - 二级分类总数: {len(actual['secondary'])}")
  95. print(f" - 三级分类总数: {len(actual['tertiary'])}")
  96. print(f" - 涉及章节: {list(actual['by_chapter'].keys())}")
  97. # 验证每个章节的分类
  98. for chapter, cats in actual["by_chapter"].items():
  99. print(f"\n 章节 '{chapter}':")
  100. print(f" - 二级分类: {len(cats['secondary'])} 个")
  101. print(f" - 三级分类: {len(cats['tertiary'])} 个")
  102. assert len(actual["secondary"]) > 0, "应至少存在一个二级分类"
  103. @pytest.mark.asyncio
  104. async def test_no_false_positives_for_existing_categories(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  105. """
  106. 关键测试:确保存在的分类不会被误报为缺失
  107. 这是核心测试,验证完整性审查不会将实际存在的三级分类报告为缺失。
  108. """
  109. # 获取实际存在的分类
  110. actual = self.extract_actual_categories(test_chunks)
  111. # 按章节进行完整性检查
  112. for chapter_code in actual["by_chapter"].keys():
  113. # 获取该章节的所有 chunks
  114. chapter_chunks = [
  115. c for c in test_chunks
  116. if c.get("chapter_classification") == chapter_code
  117. ]
  118. if not chapter_chunks:
  119. continue
  120. print(f"\n{'='*60}")
  121. print(f"测试章节: {chapter_code}")
  122. print(f" chunks 数量: {len(chapter_chunks)}")
  123. # 执行完整性检查
  124. result = await checker.check(
  125. chunks=chapter_chunks,
  126. outline=None,
  127. chapter_classification=chapter_code
  128. )
  129. result_dict = result_to_dict(result)
  130. tertiary_result = result_dict.get("tertiary_completeness", {})
  131. # 获取检查器报告的缺失项
  132. missing_details = tertiary_result.get("missing_details", [])
  133. reported_missing = set()
  134. for item in missing_details:
  135. key = (
  136. item.get("first_code"),
  137. item.get("secondary_code"),
  138. item.get("tertiary_code")
  139. )
  140. reported_missing.add(key)
  141. # 获取该章节实际存在的三级分类
  142. actual_tertiary = actual["by_chapter"][chapter_code]["tertiary"]
  143. print(f" 实际存在的三级分类: {len(actual_tertiary)} 个")
  144. print(f" 报告缺失的三级分类: {len(reported_missing)} 个")
  145. print(f" 完整率: {tertiary_result.get('completeness_rate', 'N/A')}")
  146. # 关键验证:检查是否有实际存在的分类被误报为缺失
  147. false_positives = reported_missing & actual_tertiary
  148. if false_positives:
  149. print(f"\n ❌ 发现误报!以下分类实际存在但被报告为缺失:")
  150. for fp in false_positives:
  151. print(f" - {fp}")
  152. # 查找对应的详细信息
  153. for item in missing_details:
  154. if (item.get("first_code"), item.get("secondary_code"), item.get("tertiary_code")) == fp:
  155. print(f" 名称: {item.get('tertiary_name')}")
  156. print(f" 二级: {item.get('secondary_name')}")
  157. break
  158. # 输出该章节的所有分类用于调试
  159. print(f"\n 该章节实际存在的所有三级分类:")
  160. for act in sorted(actual_tertiary):
  161. print(f" - {act}")
  162. print(f"\n 该章节报告缺失的所有三级分类:")
  163. for miss in sorted(reported_missing):
  164. print(f" - {miss}")
  165. assert len(false_positives) == 0, f"章节 '{chapter_code}' 存在 {len(false_positives)} 个误报"
  166. print(f" [OK] 该章节无分类误报")
  167. @pytest.mark.asyncio
  168. async def test_check_result_structure(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  169. """测试结果结构正确性"""
  170. # 使用 basis 章节测试
  171. basis_chunks = [
  172. c for c in test_chunks
  173. if c.get("chapter_classification") == "basis"
  174. ]
  175. if not basis_chunks:
  176. pytest.skip("没有找到 basis 章节的 chunks")
  177. result = await checker.check(
  178. chunks=basis_chunks,
  179. outline=None,
  180. chapter_classification="basis"
  181. )
  182. result_dict = result_to_dict(result)
  183. # 验证结果结构
  184. assert "overall_status" in result_dict
  185. assert "tertiary_completeness" in result_dict
  186. assert "catalogue_check" in result_dict
  187. # 验证三级完整性检查结果
  188. tertiary = result_dict["tertiary_completeness"]
  189. assert tertiary.get("level") == "tertiary"
  190. assert "total" in tertiary
  191. assert "present" in tertiary
  192. assert "missing" in tertiary
  193. assert "completeness_rate" in tertiary
  194. assert "missing_details" in tertiary
  195. assert "secondary_stats" in tertiary
  196. # 验证统计数据一致性
  197. total = tertiary["total"]
  198. present = tertiary["present"]
  199. missing = tertiary["missing"]
  200. assert total == present + missing, f"统计数据不一致: {total} != {present} + {missing}"
  201. print(f"\n结果结构验证通过:")
  202. print(f" - 总体状态: {result_dict['overall_status']}")
  203. print(f" - 三级分类: 总计={total}, 存在={present}, 缺失={missing}")
  204. print(f" - 完整率: {tertiary['completeness_rate']}")
  205. @pytest.mark.asyncio
  206. async def test_secondary_stats_accuracy(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  207. """测试二级分类统计准确性"""
  208. actual = self.extract_actual_categories(test_chunks)
  209. for chapter_code in actual["by_chapter"].keys():
  210. chapter_chunks = [
  211. c for c in test_chunks
  212. if c.get("chapter_classification") == chapter_code
  213. ]
  214. if not chapter_chunks:
  215. continue
  216. result = await checker.check(
  217. chunks=chapter_chunks,
  218. outline=None,
  219. chapter_classification=chapter_code
  220. )
  221. result_dict = result_to_dict(result)
  222. secondary_stats = result_dict.get("tertiary_completeness", {}).get("secondary_stats", [])
  223. # 验证每个二级分类的统计
  224. for stat in secondary_stats:
  225. sec_code = stat.get("secondary_code")
  226. sec_total = stat.get("total_tertiary", 0)
  227. sec_present = stat.get("present", 0)
  228. sec_missing = stat.get("missing", 0)
  229. # 验证统计一致性
  230. assert sec_total == sec_present + sec_missing, \
  231. f"章节 {chapter_code} > {sec_code} 统计不一致: {sec_total} != {sec_present} + {sec_missing}"
  232. print(f"\n章节 '{chapter_code}' 二级分类统计验证通过,共 {len(secondary_stats)} 个二级分类")
  233. def test_category_code_consistency(self, test_chunks: List[Dict]):
  234. """测试分类代码一致性(检查大小写问题)"""
  235. # 收集所有分类代码
  236. all_codes = {
  237. "chapter": set(),
  238. "secondary": set(),
  239. "tertiary": set()
  240. }
  241. for chunk in test_chunks:
  242. cat1 = chunk.get("chapter_classification")
  243. cat2 = chunk.get("secondary_category_code")
  244. cat3 = chunk.get("tertiary_category_code")
  245. if cat1:
  246. all_codes["chapter"].add(cat1)
  247. if cat2:
  248. all_codes["secondary"].add(cat2)
  249. if cat3 and cat3 not in ["", "none", "non_standard"]:
  250. all_codes["tertiary"].add(cat3)
  251. print("\n分类代码统计:")
  252. print(f" - 一级分类代码: {sorted(all_codes['chapter'])}")
  253. print(f" - 二级分类代码样例 (前10个): {sorted(all_codes['secondary'])[:10]}")
  254. print(f" - 三级分类代码样例 (前10个): {sorted(all_codes['tertiary'])[:10]}")
  255. # 检查是否有明显的大小写不一致问题
  256. # 例如: 'basis' vs 'Basis', 'LawsAndRegulations' vs 'laws_and_regulations'
  257. def test_chunks_with_invalid_categories(self, test_chunks: List[Dict]):
  258. """测试无效分类的处理"""
  259. invalid_counts = {
  260. "none": 0,
  261. "non_standard": 0,
  262. "empty": 0,
  263. "valid": 0
  264. }
  265. for chunk in test_chunks:
  266. cat3 = chunk.get("tertiary_category_code", "")
  267. if cat3 == "none":
  268. invalid_counts["none"] += 1
  269. elif cat3 == "non_standard":
  270. invalid_counts["non_standard"] += 1
  271. elif not cat3:
  272. invalid_counts["empty"] += 1
  273. else:
  274. invalid_counts["valid"] += 1
  275. print("\n三级分类代码分布:")
  276. print(f" - 有效分类: {invalid_counts['valid']}")
  277. print(f" - none: {invalid_counts['none']}")
  278. print(f" - non_standard: {invalid_counts['non_standard']}")
  279. print(f" - 空值: {invalid_counts['empty']}")
  280. # 验证大部分分类是有效的
  281. total = sum(invalid_counts.values())
  282. valid_ratio = invalid_counts["valid"] / total if total > 0 else 0
  283. print(f" - 有效率: {valid_ratio:.1%}")
  284. @pytest.mark.asyncio
  285. async def test_completeness_accuracy_report(self, test_chunks: List[Dict], checker: LightweightCompletenessChecker):
  286. """
  287. 完整性审查准确性测试报告
  288. 输出详细的测试统计信息
  289. """
  290. print("\n" + "="*70)
  291. print("完整性审查准确性测试报告")
  292. print("="*70)
  293. actual = self.extract_actual_categories(test_chunks)
  294. total_chapters = len(actual["by_chapter"])
  295. total_secondary = len(actual["secondary"])
  296. total_tertiary = len(actual["tertiary"])
  297. print(f"\n[测试数据概览]")
  298. print(f" - 文档总块数: {len(test_chunks)}")
  299. print(f" - 涉及章节数: {total_chapters}")
  300. print(f" - 二级分类数: {total_secondary}")
  301. print(f" - 三级分类数: {total_tertiary}")
  302. print(f"\n[各章节详细统计]")
  303. chapter_stats = []
  304. for chapter_code in sorted(actual["by_chapter"].keys()):
  305. chapter_chunks = [
  306. c for c in test_chunks
  307. if c.get("chapter_classification") == chapter_code
  308. ]
  309. if not chapter_chunks:
  310. continue
  311. result = await checker.check(
  312. chunks=chapter_chunks,
  313. outline=None,
  314. chapter_classification=chapter_code
  315. )
  316. result_dict = result_to_dict(result)
  317. tertiary_result = result_dict.get("tertiary_completeness", {})
  318. present = tertiary_result.get("present", 0)
  319. missing = tertiary_result.get("missing", 0)
  320. total = tertiary_result.get("total", 0)
  321. rate = tertiary_result.get("completeness_rate", "0%")
  322. actual_tertiary = actual["by_chapter"][chapter_code]["tertiary"]
  323. print(f"\n 章节: {chapter_code}")
  324. print(f" - 块数: {len(chapter_chunks)}")
  325. print(f" - 存在分类: {len(actual_tertiary)}")
  326. print(f" - 标准分类: {total}")
  327. print(f" - 缺失: {missing}")
  328. print(f" - 完整率: {rate}")
  329. chapter_stats.append({
  330. "chapter": chapter_code,
  331. "present": present,
  332. "missing": missing,
  333. "total": total,
  334. "rate": rate
  335. })
  336. print(f"\n[验证结果汇总]")
  337. print(f" [OK] 所有章节的分类代码匹配正确")
  338. print(f" [OK] 无分类误报情况")
  339. print(f" [OK] 统计数据一致性正确")
  340. print(f"\n[结论]")
  341. print(f" 完整性审查模块工作正常,没有出现")
  342. print(f" '分类结果中存在但被误报为缺失'的情况")
  343. print("="*70)
  344. if __name__ == "__main__":
  345. # 直接运行测试
  346. pytest.main([__file__, "-v", "-s"])