""" Agent 驱动的 RAG 管线测试运行器 执行全部测试样本,评估并生成报告 """ import sys import os # 确保项目根目录在路径中 project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) if project_root not in sys.path: sys.path.insert(0, project_root) from utils_test.RAG_Pipeline_Test.test_data import TEST_SAMPLES from utils_test.RAG_Pipeline_Test.rag_pipeline_runner import RAGPipelineRunner from utils_test.RAG_Pipeline_Test.rag_evaluator import RAGEvaluator def main(): print("=" * 70) print("RAG 管线 Agent 驱动测试") print("=" * 70) print(f"测试样本数: {len(TEST_SAMPLES)}") print() # 初始化 print("[1/3] 初始化管线执行器和评估器...") runner = RAGPipelineRunner() evaluator = RAGEvaluator() print(" 初始化完成") print() # 执行管线 print("[2/3] 执行 RAG 管线...") results = runner.run_batch(TEST_SAMPLES) print() # 评估 print("[3/3] 评估结果...") evaluations = [] for i, (result, sample) in enumerate(zip(results, TEST_SAMPLES)): print(f" 评估样本 {i+1}/{len(results)}: {result.chunk_id}") ev = evaluator.evaluate_sample(result, sample["content"]) evaluations.append(ev) print(f" 总分: {ev.overall_score:.1f}/5.0 [{ev.overall_status}]") print(f" {ev.analysis}") print() # 生成报告 report = evaluator.generate_report(evaluations) # 先保存报告到文件(避免打印编码问题导致丢失) report_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "reports") os.makedirs(report_dir, exist_ok=True) report_path = os.path.join(report_dir, "rag_pipeline_test_report.md") with open(report_path, "w", encoding="utf-8") as f: f.write(report) print(f"\n报告已保存: {report_path}") # 输出报告 print("=" * 70) try: print(report) except UnicodeEncodeError: safe_report = report.replace("✅", "[PASS]").replace("⚠️", "[WARN]").replace("❌", "[FAIL]") print(safe_report) print("=" * 70) # 返回汇总 pass_count = sum(1 for ev in evaluations if ev.overall_status == "PASS") warn_count = sum(1 for ev in evaluations if ev.overall_status == "WARN") fail_count = sum(1 for ev in evaluations if ev.overall_status == "FAIL") print(f"\n汇总: {pass_count} PASS / {warn_count} WARN / {fail_count} FAIL") return evaluations if __name__ == "__main__": main()