"""C02 审查链路执行器 - 自测脚本""" import asyncio import json import sys import os sys.path.insert(0, os.path.abspath(".")) from core.debug.executor import ( DebugExecutor, CHAIN_CONFIG, CHAIN_STEPS, _STEP_DEPS, VALID_CHAIN_IDS, StepResult, ) from views.debug.debug_api import DebugExecuteRequest # ==================== 测试结果记录 ==================== results = [] def check(name: str, passed: bool, detail: str = ""): status = "PASS" if passed else "FAIL" results.append((name, passed, detail)) print(f" [{status}] {name}" + (f" -- {detail}" if detail else "")) def print_summary(): print(f"\n{'='*60}") passed = sum(1 for _, p, _ in results if p) total = len(results) print(f"结果: {passed}/{total} 通过") if passed < total: print("失败项:") for name, p, detail in results: if not p: print(f" - {name}: {detail}") print(f"{'='*60}\n") # ==================== TC-C02-CONFIG-001: CHAIN_CONFIG 完整性 ==================== print("\n--- TC-C02-CONFIG-001: CHAIN_CONFIG 完整性 ---") expected_chains = { "completeness", "timeliness", "reference", "sensitive", "semantic", "grammar", "professional", } check("7 个 chain_id 全量覆盖", set(CHAIN_CONFIG.keys()) == expected_chains, f"实际: {set(CHAIN_CONFIG.keys())}") for cid in expected_chains: cfg = CHAIN_CONFIG[cid] check(f"{cid} 包含 name 字段", "name" in cfg) check(f"{cid} 包含 reviewer_type 字段", "reviewer_type" in cfg) check(f"{cid} 包含 prompt_name 字段", "prompt_name" in cfg) check(f"{cid} 包含 function_name 字段", "function_name" in cfg) # ==================== TC-C02-CONFIG-002: CHAIN_STEPS 完整性 ==================== print("\n--- TC-C02-CONFIG-002: CHAIN_STEPS 完整性 ---") for cid in expected_chains: steps = CHAIN_STEPS.get(cid) check(f"{cid} 有步骤定义", steps is not None, f"steps: {steps}") if steps: check(f"{cid} 步骤数正确 (直调=3, 专业=7)", len(steps) == (7 if cid == "professional" else 3), f"实际步骤数: {len(steps)}") for s in steps: check(f"{cid} step {s['index']} 有合法名称", bool(s.get("name")), s["name"]) # ==================== TC-C02-CONFIG-003: _STEP_DEPS 完整性 ==================== print("\n--- TC-C02-CONFIG-003: 步骤依赖完整性 ---") for cid in expected_chains: deps = _STEP_DEPS.get(cid) check(f"{cid} 有依赖定义", deps is not None) if deps: steps = CHAIN_STEPS[cid] for s in steps: check(f"{cid} step {s['index']} 依赖已定义", s["index"] in deps) # ==================== TC-C02-LOGIC-001: 步骤依赖/跳过逻辑 ==================== print("\n--- TC-C02-LOGIC-001: 步骤依赖/跳过逻辑 ---") async def test_skip_logic(): """模拟步骤 1 失败时步骤 2 被标记为 skipped""" # 使用 asyncio.Queue 但不上传真实 LLM,直接构造场景 from core.debug.executor import ( _STEP_DEPS, CHAIN_STEPS, CHAIN_CONFIG, ) # 模拟直调链路:步骤 0 成功,步骤 1 失败 → 步骤 2 应跳过 chain_id = "completeness" deps = _STEP_DEPS[chain_id] step_results = [ StepResult(index=0, name="Prompt 渲染", status="success", duration=0.1), StepResult(index=1, name="LLM 调用", status="error", duration=0.1, error="模拟超时"), ] # 手动推导步骤 2 的跳过逻辑 dep = deps.get(2) should_skip = False if dep is not None: for prev in step_results: if prev.index == dep and prev.status in ("error", "skipped"): should_skip = True break check("步骤 1 失败 → 步骤 2 应跳过", should_skip is True, f"依赖: step {dep}, 上一步状态: error → should_skip={should_skip}") # 乐观场景:步骤 0 成功,步骤 1 成功 → 步骤 2 不应跳过 step_results_ok = [ StepResult(index=0, name="Prompt 渲染", status="success", duration=0.1), StepResult(index=1, name="LLM 调用", status="success", duration=0.1), ] dep = deps.get(2) should_skip_2 = False if dep is not None: for prev in step_results_ok: if prev.index == dep and prev.status in ("error", "skipped"): should_skip_2 = True break check("步骤 0+1 成功 → 步骤 2 不应跳过", should_skip_2 is False) asyncio.run(test_skip_logic()) # ==================== TC-C02-LOGIC-002: 结果摘要统计 ==================== print("\n--- TC-C02-LOGIC-002: 结果摘要统计 ---") mixed_steps = [ StepResult(index=0, name="A", status="success", duration=0.1), StepResult(index=1, name="B", status="error", duration=0.1, error="模拟错误"), StepResult(index=2, name="C", status="skipped", duration=0), ] summary = DebugExecutor._build_final_result(mixed_steps) check("结果摘要包含总步骤数", summary["total_steps"] == 3) check("结果摘要成功计数正确", summary["success_count"] == 1) check("结果摘要错误计数正确", summary["error_count"] == 1) check("结果摘要跳过计数正确", summary["skipped_count"] == 1) # ==================== TC-C02-LOGIC-003: Trace ID 前缀隔离 ==================== print("\n--- TC-C02-LOGIC-003: Trace ID 生产隔离 ---") from core.debug.executor import _make_trace_id tid = _make_trace_id("completeness") check("trace_id 以 debug_ 开头", tid.startswith("debug_"), f"实际: {tid}") check("trace_id 包含 chain_id", "completeness" in tid, f"实际: {tid}") # ==================== TC-C02-LOGIC-004: record_id 生成 ==================== print("\n--- TC-C02-LOGIC-004: record_id 生成 ---") from core.debug.executor import _make_record_id rid = _make_record_id() check("record_id 以 call- 开头", rid.startswith("call-"), f"实际: {rid}") check("record_id 包含日期", len(rid) > 20, f"实际长度: {len(rid)}, value: {rid}") # ==================== 汇总 ==================== print_summary() # 返回退出码供脚本调用 sys.exit(0 if all(p for _, p, _ in results) else 1)