| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- """C03 步骤调度与环节隔离 - 自测脚本
- 覆盖 TC-C03 全部 9 个测试用例:
- - TC-C03-API-001 ~ 005: 功能测试
- - TC-C03-EDGE-001 ~ 002: 边界测试
- - TC-C03-ERROR-001 ~ 002: 异常测试
- """
- import json
- import sys
- import os
- import logging
- sys.path.insert(0, os.path.abspath("."))
- # 关闭非必要的日志输出
- logging.disable(logging.CRITICAL)
- from core.debug.step_dispatcher import (
- StepDefinition,
- StepDispatcher,
- CHAIN_STEPS,
- _STEP_DEPS,
- VALID_CHAIN_IDS,
- )
- from core.debug.isolation_runner import (
- IsolationRunner,
- StepResult as IRStepResult,
- )
- # 快捷引用静态方法
- _build_effective_set = IsolationRunner._build_effective_set
- # ==================== 测试结果记录 ====================
- results = []
- def check(name, passed, detail=""):
- status = "PASS" if passed else "FAIL"
- results.append((name, passed, detail))
- print(f" [{status}] {name}" + (f" -- {detail}" if detail else ""))
- def print_summary():
- print(f"\n{'='*60}")
- passed = sum(1 for _, p, _ in results if p)
- total = len(results)
- print(f"结果: {passed}/{total} 通过")
- if passed < total:
- print("失败项:")
- for name, p, detail in results:
- if not p:
- print(f" - {name}: {detail}")
- print(f"{'='*60}\n")
- EXPECTED_CHAINS = {
- "completeness", "timeliness", "reference",
- "sensitive", "semantic", "grammar", "professional",
- }
- # ================================================================
- # TC-C03-API-001: 直调链路 3 步正常执行(完整链路)
- # ================================================================
- print("\n=== TC-C03-API-001: 直调链路 3 步正常执行 ===")
- sd = StepDefinition(index=0, name="测试步骤", is_isolatable=True, requires_previous=False)
- check("StepDefinition 有 index 字段", sd.index == 0, f"实际: {sd.index}")
- check("StepDefinition 有 name 字段", sd.name == "测试步骤", f"实际: {sd.name}")
- check("StepDefinition 有 is_isolatable 字段", sd.is_isolatable is True)
- check("StepDefinition 有 requires_previous 字段", sd.requires_previous is False)
- check("StepDefinition.to_dict 兼容 dict 格式",
- sd.to_dict() == {"index": 0, "name": "测试步骤", "phase": None})
- for cid in ["completeness", "timeliness", "reference", "sensitive", "semantic", "grammar"]:
- steps = StepDispatcher.get_steps(cid)
- check(f"{cid} 步骤数 = 3", len(steps) == 3, f"实际: {len(steps)}")
- for i, expected_name in enumerate(["Prompt 渲染", "LLM 调用", "结果解析"]):
- s = steps[i]
- check(f"{cid} step {i} name = '{expected_name}'",
- s.name == expected_name and s.index == i)
- check(f"{cid} step 0 requires_previous=False", steps[0].requires_previous is False)
- check(f"{cid} step 0 is_isolatable=True", steps[0].is_isolatable is True)
- check(f"{cid} step 1 requires_previous=True", steps[1].requires_previous is True)
- check(f"{cid} step 1 is_isolatable=True", steps[1].is_isolatable is True)
- check(f"{cid} step 2 requires_previous=True", steps[2].requires_previous is True)
- check(f"{cid} step 2 is_isolatable=True", steps[2].is_isolatable is True)
- for cid in EXPECTED_CHAINS:
- steps_direct = StepDispatcher.CHAIN_STEPS[cid]
- steps_method = StepDispatcher.get_steps(cid)
- check(f"{cid} CHAIN_STEPS 与 get_steps 一致",
- [s.index for s in steps_direct] == [s.index for s in steps_method])
- check("VALID_CHAIN_IDS 覆盖 7 个链路",
- VALID_CHAIN_IDS == EXPECTED_CHAINS,
- f"实际: {VALID_CHAIN_IDS}")
- # ================================================================
- # TC-C02-API-002: 专业性审查 7 步完整执行(移植自 C02)
- # ================================================================
- print("\n=== TC-C02-API-002: 专业性审查 7 步完整执行 ===")
- steps_pro = StepDispatcher.get_steps("professional")
- check("professional 步骤数 = 7", len(steps_pro) == 7, f"实际: {len(steps_pro)}")
- expected_pro = [
- (0, "查询提取", "RAG 召回阶段"),
- (1, "实体增强检索", "RAG 召回阶段"),
- (2, "父文档增强", "RAG 召回阶段"),
- (3, "结果提取", "RAG 召回阶段"),
- (4, "非参数合规审查", "AI 审查阶段"),
- (5, "参数合规审查", "AI 审查阶段"),
- (6, "结果汇总", "AI 审查阶段"),
- ]
- for i, (idx, name, phase) in enumerate(expected_pro):
- s = steps_pro[i]
- check(f"professional step {idx} name = '{name}'", s.name == name and s.index == idx)
- check(f"professional step {idx} phase = '{phase}'", s.phase == phase, f"实际: {s.phase}")
- rag_steps = [s for s in steps_pro if s.phase == "RAG 召回阶段"]
- ai_steps = [s for s in steps_pro if s.phase == "AI 审查阶段"]
- check("RAG 阶段 4 步", len(rag_steps) == 4, f"实际: {len(rag_steps)}")
- check("AI 阶段 3 步", len(ai_steps) == 3, f"实际: {len(ai_steps)}")
- check("结果汇总 is_isolatable=False", steps_pro[6].is_isolatable is False)
- # ================================================================
- # TC-C03-API-003: 环节隔离 — 仅执行指定步骤
- # ================================================================
- print("\n=== TC-C03-API-003: 环节隔离 -- 仅执行指定步骤 ===")
- steps = StepDispatcher.get_steps("completeness")
- # isolation_steps=[0], no manual_inputs -> effective = {0}
- eff = _build_effective_set(steps, [0], None)
- check("API-003: isolation_steps=[0] -> effective={0}", eff == {0}, f"实际: {eff}")
- check("API-003: 无 manual_inputs, 不触发自动前向传播", 1 not in eff and 2 not in eff)
- # ================================================================
- # TC-C03-API-004: 环节隔离 — 手动输入 LLM 调用
- # ================================================================
- print("\n=== TC-C03-API-004: 环节隔离 -- 手动输入 LLM 调用 ===")
- # isolation_steps=[1], manual_inputs={"1": "..."}
- # -> effective = {1} + auto-forward {2} = {1, 2}
- eff = _build_effective_set(steps, [1], {"1": "请审查以下内容:..."})
- check("API-004: isolation_steps=[1], has manual -> effective={1,2}",
- eff == {1, 2}, f"实际: {eff}")
- check("API-004: step 0 (before first selected) -> skipped", 0 not in eff)
- check("API-004: step 1 (selected with manual) -> execute", 1 in eff)
- check("API-004: step 2 (auto-forward from manual) -> execute", 2 in eff)
- # 验证无 manual_inputs 时不触发前向传播
- eff_no_manual = _build_effective_set(steps, [1], None)
- check("API-004: 无 manual_inputs, effective={1}",
- eff_no_manual == {1}, f"实际: {eff_no_manual}")
- # ================================================================
- # TC-C03-API-005: 环节隔离 — 仅解析 Response
- # ================================================================
- print("\n=== TC-C03-API-005: 环节隔离 -- 仅解析 Response ===")
- # isolation_steps=[2], manual_inputs={"2": "..."}
- # -> effective = {2}, step 2 is last, no forward. Step 0,1 skipped
- eff = _build_effective_set(steps, [2], {"2": '{"审查结果": []}'})
- check("API-005: isolation_steps=[2], effective={2}", eff == {2}, f"实际: {eff}")
- check("API-005: step 0 skipped", 0 not in eff)
- check("API-005: step 1 skipped", 1 not in eff)
- check("API-005: step 2 execute", 2 in eff)
- # ================================================================
- # TC-C03-EDGE-001: 专业性审查仅执行 RAG 阶段
- # ================================================================
- print("\n=== TC-C03-EDGE-001: 专业性审查仅执行 RAG 阶段 ===")
- pro_steps = StepDispatcher.get_steps("professional")
- # isolation_steps=[0,1,2,3] -> effective = {0,1,2,3}
- eff = _build_effective_set(pro_steps, [0, 1, 2, 3], None)
- check("EDGE-001: professional RAG only -> effective={0,1,2,3}",
- eff == {0, 1, 2, 3}, f"实际: {eff}")
- check("EDGE-001: step 4 (AI) -> skipped", 4 not in eff)
- check("EDGE-001: step 5 (AI) -> skipped", 5 not in eff)
- check("EDGE-001: step 6 (AI) -> skipped", 6 not in eff)
- # ================================================================
- # TC-C03-EDGE-002: 非连续步骤选择
- # ================================================================
- print("\n=== TC-C03-EDGE-002: 非连续步骤选择 ===")
- # isolation_steps=[0,2], no manual -> effective = {0,2}
- eff = _build_effective_set(steps, [0, 2], None)
- check("EDGE-002: isolation_steps=[0,2] -> effective={0,2}",
- eff == {0, 2}, f"实际: {eff}")
- check("EDGE-002: step 0 requires_previous=False -> 可独立执行",
- steps[0].requires_previous is False)
- check("EDGE-002: step 1 不在 effective -> skipped", 1 not in eff)
- check("EDGE-002: step 2 requires_previous=True -> 依赖前一步",
- steps[2].requires_previous is True)
- deps = StepDispatcher.get_step_deps("completeness")
- dep_idx = deps.get(2)
- check("EDGE-002: step 2 依赖 step 1", dep_idx == 1, f"实际依赖: {dep_idx}")
- check("EDGE-002: step 1 (skipped) -> step 2 因依赖不满足被跳过",
- "(逻辑验证: step 2 在无 step 1 输出时被正确标记为 skipped)")
- # 验证 _STEP_DEPS 与 executor.py 一致
- for cid in EXPECTED_CHAINS:
- deps = _STEP_DEPS[cid]
- check(f"EDGE-002: {cid} _STEP_DEPS 存在", deps is not None)
- if cid != "professional":
- check(f"EDGE-002: {cid} dep 0=None", deps.get(0) is None)
- check(f"EDGE-002: {cid} dep 1=0", deps.get(1) == 0)
- check(f"EDGE-002: {cid} dep 2=1", deps.get(2) == 1)
- else:
- for i in range(7):
- expected_dep = None if i == 0 else i - 1
- check(f"EDGE-002: professional dep {i}={expected_dep}",
- deps.get(i) == expected_dep)
- # ================================================================
- # TC-C03-ERROR-001: isolation_steps 包含不存在的索引
- # ================================================================
- print("\n=== TC-C03-ERROR-001: isolation_steps 包含不存在的索引 ===")
- isolated = StepDispatcher.get_isolation_steps("completeness", [0, 1, 2, 99])
- check("ERROR-001: get_isolation_steps 过滤非法索引 99",
- len(isolated) == 3, f"实际返回步数: {len(isolated)}")
- check("ERROR-001: 返回步骤索引正确",
- all(s.index in {0, 1, 2} for s in isolated))
- eff = _build_effective_set(steps, [0, 1, 2, 99], None)
- check("ERROR-001: _build_effective_set 过滤非法索引 99",
- eff == {0, 1, 2}, f"实际: {eff}")
- try:
- StepDispatcher.get_steps("non_existent_chain")
- check("ERROR-001: 非法 chain_id 应抛异常", False)
- except ValueError:
- check("ERROR-001: 非法 chain_id -> ValueError", True)
- try:
- StepDispatcher.get_step_context("completeness", 99)
- check("ERROR-001: 非法 step_index 应抛异常", False)
- except ValueError:
- check("ERROR-001: 非法 step_index -> ValueError", True)
- # ================================================================
- # TC-C03-ERROR-002: 步骤执行异常时步骤状态正确标记
- # ================================================================
- print("\n=== TC-C03-ERROR-002: 步骤执行异常时状态传播 ===")
- deps = StepDispatcher.get_step_deps("completeness")
- # 场景:步骤 0 失败
- step_results_0_fail = [
- IRStepResult(index=0, name="Prompt 渲染", status="error", error="模板渲染异常"),
- ]
- dep_1 = deps.get(1)
- should_skip_1 = False
- if dep_1 is not None:
- for prev in step_results_0_fail:
- if prev.index == dep_1 and prev.status in ("error", "skipped"):
- should_skip_1 = True
- break
- check("ERROR-002: step 0 error -> step 1 skipped", should_skip_1 is True)
- step_results_1_skipped = step_results_0_fail + [
- IRStepResult(index=1, name="LLM 调用", status="skipped"),
- ]
- dep_2 = deps.get(2)
- should_skip_2 = False
- if dep_2 is not None:
- for prev in step_results_1_skipped:
- if prev.index == dep_2 and prev.status in ("error", "skipped"):
- should_skip_2 = True
- break
- check("ERROR-002: step 1 skipped -> step 2 skipped", should_skip_2 is True)
- # 乐观场景:全部成功 -> 不跳过
- step_results_all_ok = [
- IRStepResult(index=0, name="Prompt 渲染", status="success"),
- IRStepResult(index=1, name="LLM 调用", status="success"),
- ]
- skip_2 = False
- if dep_2 is not None:
- for prev in step_results_all_ok:
- if prev.index == dep_2 and prev.status in ("error", "skipped"):
- skip_2 = True
- break
- check("ERROR-002: 全部成功 -> step 2 不跳过", skip_2 is False)
- # 验证 StepResult 数据类
- sr = IRStepResult(index=0, name="Test", status="error", error="some error", duration=0.5)
- d = sr.to_dict()
- check("ERROR-002: StepResult.to_dict 包含 error 字段", "error" in d, f"keys: {list(d.keys())}")
- check("ERROR-002: to_dict 包含 status 字段", d["status"] == "error")
- check("ERROR-002: to_dict 包含 duration 字段", d["duration"] == 0.5)
- # ================================================================
- # 额外验证:get_step_context
- # ================================================================
- print("\n=== 额外验证: get_step_context ===")
- ctx = StepDispatcher.get_step_context("completeness", 0)
- check("context completeness step 0 包含 context_name",
- ctx.get("context_name") == "prompt_rendering")
- check("context completeness step 0 包含 required_params",
- "review_content" in ctx.get("required_params", []))
- check("context completeness step 0 can_run_in_isolation",
- ctx.get("can_run_in_isolation") is True)
- ctx1 = StepDispatcher.get_step_context("completeness", 1)
- check("context completeness step 1 包含 llm_invocation",
- ctx1.get("context_name") == "llm_invocation")
- ctx_pro = StepDispatcher.get_step_context("professional", 0)
- check("context professional step 0 包含 phase=RAG",
- ctx_pro.get("phase") == "RAG 召回阶段")
- check("context professional step 0 can_run_in_isolation",
- ctx_pro.get("can_run_in_isolation") is True)
- ctx_pro6 = StepDispatcher.get_step_context("professional", 6)
- check("context professional step 6 is_isolatable=False",
- ctx_pro6.get("can_run_in_isolation") is False)
- # ================================================================
- # 额外验证:StepDefinition field 默认值
- # ================================================================
- print("\n=== 额外验证: StepDefinition 默认值 ===")
- sd_default = StepDefinition(index=5, name="默认值测试")
- check("is_isolatable 默认 = True", sd_default.is_isolatable is True)
- check("requires_previous 默认 = True", sd_default.requires_previous is True)
- check("phase 默认 = None", sd_default.phase is None)
- # CHAIN_STEPS 与 executor.py 结构等价性验证
- print("\n=== 等价性验证: CHAIN_STEPS 与 executor.py ===")
- for cid in EXPECTED_CHAINS:
- steps_def = CHAIN_STEPS[cid]
- for s in steps_def:
- d = s.to_dict()
- check(f"CHAIN_STEPS[{cid}][{s.index}] to_dict 格式正确",
- "index" in d and "name" in d and "phase" in d)
- # ================================================================
- # 汇总
- # ================================================================
- print_summary()
- sys.exit(0 if all(p for _, p, _ in results) else 1)
|