"""C03 步骤调度与环节隔离 - 自测脚本 覆盖 TC-C03 全部 9 个测试用例: - TC-C03-API-001 ~ 005: 功能测试 - TC-C03-EDGE-001 ~ 002: 边界测试 - TC-C03-ERROR-001 ~ 002: 异常测试 """ import json import sys import os import logging sys.path.insert(0, os.path.abspath(".")) # 关闭非必要的日志输出 logging.disable(logging.CRITICAL) from core.debug.step_dispatcher import ( StepDefinition, StepDispatcher, CHAIN_STEPS, _STEP_DEPS, VALID_CHAIN_IDS, ) from core.debug.isolation_runner import ( IsolationRunner, StepResult as IRStepResult, ) # 快捷引用静态方法 _build_effective_set = IsolationRunner._build_effective_set # ==================== 测试结果记录 ==================== results = [] def check(name, passed, detail=""): status = "PASS" if passed else "FAIL" results.append((name, passed, detail)) print(f" [{status}] {name}" + (f" -- {detail}" if detail else "")) def print_summary(): print(f"\n{'='*60}") passed = sum(1 for _, p, _ in results if p) total = len(results) print(f"结果: {passed}/{total} 通过") if passed < total: print("失败项:") for name, p, detail in results: if not p: print(f" - {name}: {detail}") print(f"{'='*60}\n") EXPECTED_CHAINS = { "completeness", "timeliness", "reference", "sensitive", "semantic", "grammar", "professional", } # ================================================================ # TC-C03-API-001: 直调链路 3 步正常执行(完整链路) # ================================================================ print("\n=== TC-C03-API-001: 直调链路 3 步正常执行 ===") sd = StepDefinition(index=0, name="测试步骤", is_isolatable=True, requires_previous=False) check("StepDefinition 有 index 字段", sd.index == 0, f"实际: {sd.index}") check("StepDefinition 有 name 字段", sd.name == "测试步骤", f"实际: {sd.name}") check("StepDefinition 有 is_isolatable 字段", sd.is_isolatable is True) check("StepDefinition 有 requires_previous 字段", sd.requires_previous is False) check("StepDefinition.to_dict 兼容 dict 格式", sd.to_dict() == {"index": 0, "name": "测试步骤", "phase": None}) for cid in ["completeness", "timeliness", "reference", "sensitive", "semantic", "grammar"]: steps = StepDispatcher.get_steps(cid) check(f"{cid} 步骤数 = 3", len(steps) == 3, f"实际: {len(steps)}") for i, expected_name in enumerate(["Prompt 渲染", "LLM 调用", "结果解析"]): s = steps[i] check(f"{cid} step {i} name = '{expected_name}'", s.name == expected_name and s.index == i) check(f"{cid} step 0 requires_previous=False", steps[0].requires_previous is False) check(f"{cid} step 0 is_isolatable=True", steps[0].is_isolatable is True) check(f"{cid} step 1 requires_previous=True", steps[1].requires_previous is True) check(f"{cid} step 1 is_isolatable=True", steps[1].is_isolatable is True) check(f"{cid} step 2 requires_previous=True", steps[2].requires_previous is True) check(f"{cid} step 2 is_isolatable=True", steps[2].is_isolatable is True) for cid in EXPECTED_CHAINS: steps_direct = StepDispatcher.CHAIN_STEPS[cid] steps_method = StepDispatcher.get_steps(cid) check(f"{cid} CHAIN_STEPS 与 get_steps 一致", [s.index for s in steps_direct] == [s.index for s in steps_method]) check("VALID_CHAIN_IDS 覆盖 7 个链路", VALID_CHAIN_IDS == EXPECTED_CHAINS, f"实际: {VALID_CHAIN_IDS}") # ================================================================ # TC-C02-API-002: 专业性审查 7 步完整执行(移植自 C02) # ================================================================ print("\n=== TC-C02-API-002: 专业性审查 7 步完整执行 ===") steps_pro = StepDispatcher.get_steps("professional") check("professional 步骤数 = 7", len(steps_pro) == 7, f"实际: {len(steps_pro)}") expected_pro = [ (0, "查询提取", "RAG 召回阶段"), (1, "实体增强检索", "RAG 召回阶段"), (2, "父文档增强", "RAG 召回阶段"), (3, "结果提取", "RAG 召回阶段"), (4, "非参数合规审查", "AI 审查阶段"), (5, "参数合规审查", "AI 审查阶段"), (6, "结果汇总", "AI 审查阶段"), ] for i, (idx, name, phase) in enumerate(expected_pro): s = steps_pro[i] check(f"professional step {idx} name = '{name}'", s.name == name and s.index == idx) check(f"professional step {idx} phase = '{phase}'", s.phase == phase, f"实际: {s.phase}") rag_steps = [s for s in steps_pro if s.phase == "RAG 召回阶段"] ai_steps = [s for s in steps_pro if s.phase == "AI 审查阶段"] check("RAG 阶段 4 步", len(rag_steps) == 4, f"实际: {len(rag_steps)}") check("AI 阶段 3 步", len(ai_steps) == 3, f"实际: {len(ai_steps)}") check("结果汇总 is_isolatable=False", steps_pro[6].is_isolatable is False) # ================================================================ # TC-C03-API-003: 环节隔离 — 仅执行指定步骤 # ================================================================ print("\n=== TC-C03-API-003: 环节隔离 -- 仅执行指定步骤 ===") steps = StepDispatcher.get_steps("completeness") # isolation_steps=[0], no manual_inputs -> effective = {0} eff = _build_effective_set(steps, [0], None) check("API-003: isolation_steps=[0] -> effective={0}", eff == {0}, f"实际: {eff}") check("API-003: 无 manual_inputs, 不触发自动前向传播", 1 not in eff and 2 not in eff) # ================================================================ # TC-C03-API-004: 环节隔离 — 手动输入 LLM 调用 # ================================================================ print("\n=== TC-C03-API-004: 环节隔离 -- 手动输入 LLM 调用 ===") # isolation_steps=[1], manual_inputs={"1": "..."} # -> effective = {1} + auto-forward {2} = {1, 2} eff = _build_effective_set(steps, [1], {"1": "请审查以下内容:..."}) check("API-004: isolation_steps=[1], has manual -> effective={1,2}", eff == {1, 2}, f"实际: {eff}") check("API-004: step 0 (before first selected) -> skipped", 0 not in eff) check("API-004: step 1 (selected with manual) -> execute", 1 in eff) check("API-004: step 2 (auto-forward from manual) -> execute", 2 in eff) # 验证无 manual_inputs 时不触发前向传播 eff_no_manual = _build_effective_set(steps, [1], None) check("API-004: 无 manual_inputs, effective={1}", eff_no_manual == {1}, f"实际: {eff_no_manual}") # ================================================================ # TC-C03-API-005: 环节隔离 — 仅解析 Response # ================================================================ print("\n=== TC-C03-API-005: 环节隔离 -- 仅解析 Response ===") # isolation_steps=[2], manual_inputs={"2": "..."} # -> effective = {2}, step 2 is last, no forward. Step 0,1 skipped eff = _build_effective_set(steps, [2], {"2": '{"审查结果": []}'}) check("API-005: isolation_steps=[2], effective={2}", eff == {2}, f"实际: {eff}") check("API-005: step 0 skipped", 0 not in eff) check("API-005: step 1 skipped", 1 not in eff) check("API-005: step 2 execute", 2 in eff) # ================================================================ # TC-C03-EDGE-001: 专业性审查仅执行 RAG 阶段 # ================================================================ print("\n=== TC-C03-EDGE-001: 专业性审查仅执行 RAG 阶段 ===") pro_steps = StepDispatcher.get_steps("professional") # isolation_steps=[0,1,2,3] -> effective = {0,1,2,3} eff = _build_effective_set(pro_steps, [0, 1, 2, 3], None) check("EDGE-001: professional RAG only -> effective={0,1,2,3}", eff == {0, 1, 2, 3}, f"实际: {eff}") check("EDGE-001: step 4 (AI) -> skipped", 4 not in eff) check("EDGE-001: step 5 (AI) -> skipped", 5 not in eff) check("EDGE-001: step 6 (AI) -> skipped", 6 not in eff) # ================================================================ # TC-C03-EDGE-002: 非连续步骤选择 # ================================================================ print("\n=== TC-C03-EDGE-002: 非连续步骤选择 ===") # isolation_steps=[0,2], no manual -> effective = {0,2} eff = _build_effective_set(steps, [0, 2], None) check("EDGE-002: isolation_steps=[0,2] -> effective={0,2}", eff == {0, 2}, f"实际: {eff}") check("EDGE-002: step 0 requires_previous=False -> 可独立执行", steps[0].requires_previous is False) check("EDGE-002: step 1 不在 effective -> skipped", 1 not in eff) check("EDGE-002: step 2 requires_previous=True -> 依赖前一步", steps[2].requires_previous is True) deps = StepDispatcher.get_step_deps("completeness") dep_idx = deps.get(2) check("EDGE-002: step 2 依赖 step 1", dep_idx == 1, f"实际依赖: {dep_idx}") check("EDGE-002: step 1 (skipped) -> step 2 因依赖不满足被跳过", "(逻辑验证: step 2 在无 step 1 输出时被正确标记为 skipped)") # 验证 _STEP_DEPS 与 executor.py 一致 for cid in EXPECTED_CHAINS: deps = _STEP_DEPS[cid] check(f"EDGE-002: {cid} _STEP_DEPS 存在", deps is not None) if cid != "professional": check(f"EDGE-002: {cid} dep 0=None", deps.get(0) is None) check(f"EDGE-002: {cid} dep 1=0", deps.get(1) == 0) check(f"EDGE-002: {cid} dep 2=1", deps.get(2) == 1) else: for i in range(7): expected_dep = None if i == 0 else i - 1 check(f"EDGE-002: professional dep {i}={expected_dep}", deps.get(i) == expected_dep) # ================================================================ # TC-C03-ERROR-001: isolation_steps 包含不存在的索引 # ================================================================ print("\n=== TC-C03-ERROR-001: isolation_steps 包含不存在的索引 ===") isolated = StepDispatcher.get_isolation_steps("completeness", [0, 1, 2, 99]) check("ERROR-001: get_isolation_steps 过滤非法索引 99", len(isolated) == 3, f"实际返回步数: {len(isolated)}") check("ERROR-001: 返回步骤索引正确", all(s.index in {0, 1, 2} for s in isolated)) eff = _build_effective_set(steps, [0, 1, 2, 99], None) check("ERROR-001: _build_effective_set 过滤非法索引 99", eff == {0, 1, 2}, f"实际: {eff}") try: StepDispatcher.get_steps("non_existent_chain") check("ERROR-001: 非法 chain_id 应抛异常", False) except ValueError: check("ERROR-001: 非法 chain_id -> ValueError", True) try: StepDispatcher.get_step_context("completeness", 99) check("ERROR-001: 非法 step_index 应抛异常", False) except ValueError: check("ERROR-001: 非法 step_index -> ValueError", True) # ================================================================ # TC-C03-ERROR-002: 步骤执行异常时步骤状态正确标记 # ================================================================ print("\n=== TC-C03-ERROR-002: 步骤执行异常时状态传播 ===") deps = StepDispatcher.get_step_deps("completeness") # 场景:步骤 0 失败 step_results_0_fail = [ IRStepResult(index=0, name="Prompt 渲染", status="error", error="模板渲染异常"), ] dep_1 = deps.get(1) should_skip_1 = False if dep_1 is not None: for prev in step_results_0_fail: if prev.index == dep_1 and prev.status in ("error", "skipped"): should_skip_1 = True break check("ERROR-002: step 0 error -> step 1 skipped", should_skip_1 is True) step_results_1_skipped = step_results_0_fail + [ IRStepResult(index=1, name="LLM 调用", status="skipped"), ] dep_2 = deps.get(2) should_skip_2 = False if dep_2 is not None: for prev in step_results_1_skipped: if prev.index == dep_2 and prev.status in ("error", "skipped"): should_skip_2 = True break check("ERROR-002: step 1 skipped -> step 2 skipped", should_skip_2 is True) # 乐观场景:全部成功 -> 不跳过 step_results_all_ok = [ IRStepResult(index=0, name="Prompt 渲染", status="success"), IRStepResult(index=1, name="LLM 调用", status="success"), ] skip_2 = False if dep_2 is not None: for prev in step_results_all_ok: if prev.index == dep_2 and prev.status in ("error", "skipped"): skip_2 = True break check("ERROR-002: 全部成功 -> step 2 不跳过", skip_2 is False) # 验证 StepResult 数据类 sr = IRStepResult(index=0, name="Test", status="error", error="some error", duration=0.5) d = sr.to_dict() check("ERROR-002: StepResult.to_dict 包含 error 字段", "error" in d, f"keys: {list(d.keys())}") check("ERROR-002: to_dict 包含 status 字段", d["status"] == "error") check("ERROR-002: to_dict 包含 duration 字段", d["duration"] == 0.5) # ================================================================ # 额外验证:get_step_context # ================================================================ print("\n=== 额外验证: get_step_context ===") ctx = StepDispatcher.get_step_context("completeness", 0) check("context completeness step 0 包含 context_name", ctx.get("context_name") == "prompt_rendering") check("context completeness step 0 包含 required_params", "review_content" in ctx.get("required_params", [])) check("context completeness step 0 can_run_in_isolation", ctx.get("can_run_in_isolation") is True) ctx1 = StepDispatcher.get_step_context("completeness", 1) check("context completeness step 1 包含 llm_invocation", ctx1.get("context_name") == "llm_invocation") ctx_pro = StepDispatcher.get_step_context("professional", 0) check("context professional step 0 包含 phase=RAG", ctx_pro.get("phase") == "RAG 召回阶段") check("context professional step 0 can_run_in_isolation", ctx_pro.get("can_run_in_isolation") is True) ctx_pro6 = StepDispatcher.get_step_context("professional", 6) check("context professional step 6 is_isolatable=False", ctx_pro6.get("can_run_in_isolation") is False) # ================================================================ # 额外验证:StepDefinition field 默认值 # ================================================================ print("\n=== 额外验证: StepDefinition 默认值 ===") sd_default = StepDefinition(index=5, name="默认值测试") check("is_isolatable 默认 = True", sd_default.is_isolatable is True) check("requires_previous 默认 = True", sd_default.requires_previous is True) check("phase 默认 = None", sd_default.phase is None) # CHAIN_STEPS 与 executor.py 结构等价性验证 print("\n=== 等价性验证: CHAIN_STEPS 与 executor.py ===") for cid in EXPECTED_CHAINS: steps_def = CHAIN_STEPS[cid] for s in steps_def: d = s.to_dict() check(f"CHAIN_STEPS[{cid}][{s.index}] to_dict 格式正确", "index" in d and "name" in d and "phase" in d) # ================================================================ # 汇总 # ================================================================ print_summary() sys.exit(0 if all(p for _, p, _ in results) else 1)