batch_test.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. """
  2. 批量对比测试 — 对5个不同文件执行方案A vs 方案B,输出汇总JSON供分析。
  3. """
  4. import asyncio
  5. import json
  6. import sys
  7. import time
  8. from pathlib import Path
  9. from collections import defaultdict
  10. PROJECT_ROOT = str(Path(__file__).parent.parent.parent)
  11. if PROJECT_ROOT not in sys.path:
  12. from utils_test.Completeness_Compare_Test.compare_test import (
  13. load_final_result,
  14. extract_chunks_by_chapter,
  15. get_all_chapter_codes,
  16. load_standard_items_for_chapter,
  17. run_method_a,
  18. compare_results,
  19. )
  20. from utils_test.Completeness_Compare_Test.method_b_direct_llm import (
  21. run_direct_llm_check,
  22. direct_result_to_dict,
  23. )
  24. RESULT_DIR = Path(PROJECT_ROOT) / "temp" / "construction_review" / "final_result"
  25. CSV_PATH = (
  26. Path(PROJECT_ROOT)
  27. / "core"
  28. / "construction_review"
  29. / "component"
  30. / "doc_worker"
  31. / "config"
  32. / "StandardCategoryTable.csv"
  33. )
  34. def pick_5_distinct_files():
  35. """选出5个不同文件(按hash前缀去重,取最新的)"""
  36. files_by_hash = {}
  37. for f in sorted(RESULT_DIR.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True):
  38. hash_prefix = f.stem.split("-")[0]
  39. if hash_prefix not in files_by_hash:
  40. files_by_hash[hash_prefix] = f
  41. if len(files_by_hash) >= 5:
  42. break
  43. return list(files_by_hash.values())
  44. async def test_one_file(json_path: Path):
  45. """对一个文件的所有章节执行双方案对比"""
  46. data = load_final_result(str(json_path))
  47. file_name = data.get("file_name", json_path.stem)
  48. chapter_codes = get_all_chapter_codes(data)
  49. file_result = {
  50. "file_id": json_path.stem,
  51. "file_name": file_name,
  52. "chapters": [],
  53. "summary": {},
  54. }
  55. total_a_time = 0
  56. total_b_time = 0
  57. total_agree = 0
  58. total_disagree = 0
  59. total_a_missing = 0
  60. total_b_missing = 0
  61. total_required = 0
  62. for chapter_code in chapter_codes:
  63. chunks = extract_chunks_by_chapter(data, chapter_code)
  64. if not chunks:
  65. continue
  66. chapter_name = chunks[0].get("first_name", chapter_code)
  67. standard_items = load_standard_items_for_chapter(str(CSV_PATH), chapter_code)
  68. if not standard_items:
  69. continue
  70. # 方案A
  71. a_result, a_time, a_llm_calls = await run_method_a(
  72. chunks=chunks,
  73. csv_path=str(CSV_PATH),
  74. chapter_code=chapter_code,
  75. )
  76. # 方案B
  77. b_result = await run_direct_llm_check(
  78. chunks=chunks,
  79. standard_items=standard_items,
  80. chapter_code=chapter_code,
  81. chapter_name=chapter_name,
  82. )
  83. # 对比
  84. cr = compare_results(
  85. chapter_code=chapter_code,
  86. chapter_name=chapter_name,
  87. method_a=a_result,
  88. method_b=b_result,
  89. a_time=a_time,
  90. a_llm_calls=a_llm_calls,
  91. )
  92. chapter_data = {
  93. "chapter_code": chapter_code,
  94. "chapter_name": chapter_name,
  95. "a_total": cr.a_total_required,
  96. "a_missing": cr.a_missing,
  97. "a_rate": cr.a_completeness_rate,
  98. "a_time": round(a_time, 2),
  99. "b_total": cr.b_total_required,
  100. "b_missing": cr.b_missing,
  101. "b_rate": cr.b_completeness_rate,
  102. "b_time": round(b_result.execution_time, 2),
  103. "agreement": cr.agreement_count,
  104. "disagreement": cr.disagreement_count,
  105. "a_only_missing": cr.a_only_missing,
  106. "b_only_missing": cr.b_only_missing,
  107. }
  108. file_result["chapters"].append(chapter_data)
  109. total_a_time += a_time
  110. total_b_time += b_result.execution_time
  111. total_agree += cr.agreement_count
  112. total_disagree += cr.disagreement_count
  113. total_a_missing += cr.a_missing
  114. total_b_missing += cr.b_missing
  115. total_required += cr.a_total_required
  116. n = len(file_result["chapters"])
  117. file_result["summary"] = {
  118. "chapter_count": n,
  119. "total_required": total_required,
  120. "total_a_missing": total_a_missing,
  121. "total_b_missing": total_b_missing,
  122. "total_a_time": round(total_a_time, 2),
  123. "total_b_time": round(total_b_time, 2),
  124. "total_agreement": total_agree,
  125. "total_disagreement": total_disagree,
  126. "agreement_rate": round(total_agree / (total_agree + total_disagree) * 100, 1)
  127. if (total_agree + total_disagree) > 0 else 0,
  128. }
  129. return file_result
  130. async def main():
  131. files = pick_5_distinct_files()
  132. print(f"选出 {len(files)} 个文件进行批量测试:")
  133. for f in files:
  134. print(f" - {f.name}")
  135. print()
  136. all_results = []
  137. for i, fpath in enumerate(files):
  138. print(f"[{i+1}/{len(files)}] 测试: {fpath.name}")
  139. t0 = time.time()
  140. result = await test_one_file(fpath)
  141. result["total_wall_time"] = round(time.time() - t0, 2)
  142. all_results.append(result)
  143. s = result["summary"]
  144. print(f" 完成: {s['chapter_count']}章节, A缺失{s['total_a_missing']}, B缺失{s['total_b_missing']}, "
  145. f"一致率{s['agreement_rate']}%, 耗时{result['total_wall_time']}s")
  146. print()
  147. # 保存
  148. out_path = Path(__file__).parent / "batch_result.json"
  149. with open(out_path, "w", encoding="utf-8") as fp:
  150. json.dump(all_results, fp, ensure_ascii=False, indent=2)
  151. print(f"结果已保存: {out_path}")
  152. if __name__ == "__main__":
  153. asyncio.run(main())