CRBC-MaaS-Platform-Project
/
LQAgentPlatform


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
							"""C03 步骤调度与环节隔离 - 自测脚本

覆盖 TC-C03 全部 9 个测试用例：
  - TC-C03-API-001 ~ 005: 功能测试
  - TC-C03-EDGE-001 ~ 002: 边界测试
  - TC-C03-ERROR-001 ~ 002: 异常测试
"""

import json
import sys
import os
import logging

sys.path.insert(0, os.path.abspath("."))

# 关闭非必要的日志输出
logging.disable(logging.CRITICAL)

from core.debug.step_dispatcher import (
    StepDefinition,
    StepDispatcher,
    CHAIN_STEPS,
    _STEP_DEPS,
    VALID_CHAIN_IDS,
)
from core.debug.isolation_runner import (
    IsolationRunner,
    StepResult as IRStepResult,
)

# 快捷引用静态方法
_build_effective_set = IsolationRunner._build_effective_set


# ==================== 测试结果记录 ====================

results = []


def check(name, passed, detail=""):
    status = "PASS" if passed else "FAIL"
    results.append((name, passed, detail))
    print(f"  [{status}] {name}" + (f" -- {detail}" if detail else ""))


def print_summary():
    print(f"\n{'='*60}")
    passed = sum(1 for _, p, _ in results if p)
    total = len(results)
    print(f"结果: {passed}/{total} 通过")
    if passed < total:
        print("失败项:")
        for name, p, detail in results:
            if not p:
                print(f"  - {name}: {detail}")
    print(f"{'='*60}\n")


EXPECTED_CHAINS = {
    "completeness", "timeliness", "reference",
    "sensitive", "semantic", "grammar", "professional",
}


# ================================================================
# TC-C03-API-001: 直调链路 3 步正常执行（完整链路）
# ================================================================

print("\n=== TC-C03-API-001: 直调链路 3 步正常执行 ===")

sd = StepDefinition(index=0, name="测试步骤", is_isolatable=True, requires_previous=False)
check("StepDefinition 有 index 字段", sd.index == 0, f"实际: {sd.index}")
check("StepDefinition 有 name 字段", sd.name == "测试步骤", f"实际: {sd.name}")
check("StepDefinition 有 is_isolatable 字段", sd.is_isolatable is True)
check("StepDefinition 有 requires_previous 字段", sd.requires_previous is False)
check("StepDefinition.to_dict 兼容 dict 格式",
      sd.to_dict() == {"index": 0, "name": "测试步骤", "phase": None})

for cid in ["completeness", "timeliness", "reference", "sensitive", "semantic", "grammar"]:
    steps = StepDispatcher.get_steps(cid)
    check(f"{cid} 步骤数 = 3", len(steps) == 3, f"实际: {len(steps)}")
    for i, expected_name in enumerate(["Prompt 渲染", "LLM 调用", "结果解析"]):
        s = steps[i]
        check(f"{cid} step {i} name = '{expected_name}'",
              s.name == expected_name and s.index == i)
    check(f"{cid} step 0 requires_previous=False", steps[0].requires_previous is False)
    check(f"{cid} step 0 is_isolatable=True", steps[0].is_isolatable is True)
    check(f"{cid} step 1 requires_previous=True", steps[1].requires_previous is True)
    check(f"{cid} step 1 is_isolatable=True", steps[1].is_isolatable is True)
    check(f"{cid} step 2 requires_previous=True", steps[2].requires_previous is True)
    check(f"{cid} step 2 is_isolatable=True", steps[2].is_isolatable is True)

for cid in EXPECTED_CHAINS:
    steps_direct = StepDispatcher.CHAIN_STEPS[cid]
    steps_method = StepDispatcher.get_steps(cid)
    check(f"{cid} CHAIN_STEPS 与 get_steps 一致",
          [s.index for s in steps_direct] == [s.index for s in steps_method])

check("VALID_CHAIN_IDS 覆盖 7 个链路",
      VALID_CHAIN_IDS == EXPECTED_CHAINS,
      f"实际: {VALID_CHAIN_IDS}")


# ================================================================
# TC-C02-API-002: 专业性审查 7 步完整执行（移植自 C02）
# ================================================================

print("\n=== TC-C02-API-002: 专业性审查 7 步完整执行 ===")

steps_pro = StepDispatcher.get_steps("professional")
check("professional 步骤数 = 7", len(steps_pro) == 7, f"实际: {len(steps_pro)}")

expected_pro = [
    (0, "查询提取",       "RAG 召回阶段"),
    (1, "实体增强检索",   "RAG 召回阶段"),
    (2, "父文档增强",     "RAG 召回阶段"),
    (3, "结果提取",       "RAG 召回阶段"),
    (4, "非参数合规审查", "AI 审查阶段"),
    (5, "参数合规审查",   "AI 审查阶段"),
    (6, "结果汇总",       "AI 审查阶段"),
]
for i, (idx, name, phase) in enumerate(expected_pro):
    s = steps_pro[i]
    check(f"professional step {idx} name = '{name}'", s.name == name and s.index == idx)
    check(f"professional step {idx} phase = '{phase}'", s.phase == phase, f"实际: {s.phase}")

rag_steps = [s for s in steps_pro if s.phase == "RAG 召回阶段"]
ai_steps = [s for s in steps_pro if s.phase == "AI 审查阶段"]
check("RAG 阶段 4 步", len(rag_steps) == 4, f"实际: {len(rag_steps)}")
check("AI 阶段 3 步", len(ai_steps) == 3, f"实际: {len(ai_steps)}")
check("结果汇总 is_isolatable=False", steps_pro[6].is_isolatable is False)


# ================================================================
# TC-C03-API-003: 环节隔离 — 仅执行指定步骤
# ================================================================

print("\n=== TC-C03-API-003: 环节隔离 -- 仅执行指定步骤 ===")

steps = StepDispatcher.get_steps("completeness")

# isolation_steps=[0], no manual_inputs -> effective = {0}
eff = _build_effective_set(steps, [0], None)
check("API-003: isolation_steps=[0] -> effective={0}", eff == {0}, f"实际: {eff}")
check("API-003: 无 manual_inputs, 不触发自动前向传播", 1 not in eff and 2 not in eff)


# ================================================================
# TC-C03-API-004: 环节隔离 — 手动输入 LLM 调用
# ================================================================

print("\n=== TC-C03-API-004: 环节隔离 -- 手动输入 LLM 调用 ===")

# isolation_steps=[1], manual_inputs={"1": "..."}
# -> effective = {1} + auto-forward {2} = {1, 2}
eff = _build_effective_set(steps, [1], {"1": "请审查以下内容：..."})
check("API-004: isolation_steps=[1], has manual -> effective={1,2}",
      eff == {1, 2}, f"实际: {eff}")
check("API-004: step 0 (before first selected) -> skipped", 0 not in eff)
check("API-004: step 1 (selected with manual) -> execute", 1 in eff)
check("API-004: step 2 (auto-forward from manual) -> execute", 2 in eff)

# 验证无 manual_inputs 时不触发前向传播
eff_no_manual = _build_effective_set(steps, [1], None)
check("API-004: 无 manual_inputs, effective={1}",
      eff_no_manual == {1}, f"实际: {eff_no_manual}")


# ================================================================
# TC-C03-API-005: 环节隔离 — 仅解析 Response
# ================================================================

print("\n=== TC-C03-API-005: 环节隔离 -- 仅解析 Response ===")

# isolation_steps=[2], manual_inputs={"2": "..."}
# -> effective = {2}, step 2 is last, no forward. Step 0,1 skipped
eff = _build_effective_set(steps, [2], {"2": '{"审查结果": []}'})
check("API-005: isolation_steps=[2], effective={2}", eff == {2}, f"实际: {eff}")
check("API-005: step 0 skipped", 0 not in eff)
check("API-005: step 1 skipped", 1 not in eff)
check("API-005: step 2 execute", 2 in eff)


# ================================================================
# TC-C03-EDGE-001: 专业性审查仅执行 RAG 阶段
# ================================================================

print("\n=== TC-C03-EDGE-001: 专业性审查仅执行 RAG 阶段 ===")

pro_steps = StepDispatcher.get_steps("professional")

# isolation_steps=[0,1,2,3] -> effective = {0,1,2,3}
eff = _build_effective_set(pro_steps, [0, 1, 2, 3], None)
check("EDGE-001: professional RAG only -> effective={0,1,2,3}",
      eff == {0, 1, 2, 3}, f"实际: {eff}")
check("EDGE-001: step 4 (AI) -> skipped", 4 not in eff)
check("EDGE-001: step 5 (AI) -> skipped", 5 not in eff)
check("EDGE-001: step 6 (AI) -> skipped", 6 not in eff)


# ================================================================
# TC-C03-EDGE-002: 非连续步骤选择
# ================================================================

print("\n=== TC-C03-EDGE-002: 非连续步骤选择 ===")

# isolation_steps=[0,2], no manual -> effective = {0,2}
eff = _build_effective_set(steps, [0, 2], None)
check("EDGE-002: isolation_steps=[0,2] -> effective={0,2}",
      eff == {0, 2}, f"实际: {eff}")

check("EDGE-002: step 0 requires_previous=False -> 可独立执行",
      steps[0].requires_previous is False)
check("EDGE-002: step 1 不在 effective -> skipped", 1 not in eff)
check("EDGE-002: step 2 requires_previous=True -> 依赖前一步",
      steps[2].requires_previous is True)

deps = StepDispatcher.get_step_deps("completeness")
dep_idx = deps.get(2)
check("EDGE-002: step 2 依赖 step 1", dep_idx == 1, f"实际依赖: {dep_idx}")
check("EDGE-002: step 1 (skipped) -> step 2 因依赖不满足被跳过",
      "(逻辑验证: step 2 在无 step 1 输出时被正确标记为 skipped)")

# 验证 _STEP_DEPS 与 executor.py 一致
for cid in EXPECTED_CHAINS:
    deps = _STEP_DEPS[cid]
    check(f"EDGE-002: {cid} _STEP_DEPS 存在", deps is not None)
    if cid != "professional":
        check(f"EDGE-002: {cid} dep 0=None", deps.get(0) is None)
        check(f"EDGE-002: {cid} dep 1=0", deps.get(1) == 0)
        check(f"EDGE-002: {cid} dep 2=1", deps.get(2) == 1)
    else:
        for i in range(7):
            expected_dep = None if i == 0 else i - 1
            check(f"EDGE-002: professional dep {i}={expected_dep}",
                  deps.get(i) == expected_dep)


# ================================================================
# TC-C03-ERROR-001: isolation_steps 包含不存在的索引
# ================================================================

print("\n=== TC-C03-ERROR-001: isolation_steps 包含不存在的索引 ===")

isolated = StepDispatcher.get_isolation_steps("completeness", [0, 1, 2, 99])
check("ERROR-001: get_isolation_steps 过滤非法索引 99",
      len(isolated) == 3, f"实际返回步数: {len(isolated)}")
check("ERROR-001: 返回步骤索引正确",
      all(s.index in {0, 1, 2} for s in isolated))

eff = _build_effective_set(steps, [0, 1, 2, 99], None)
check("ERROR-001: _build_effective_set 过滤非法索引 99",
      eff == {0, 1, 2}, f"实际: {eff}")

try:
    StepDispatcher.get_steps("non_existent_chain")
    check("ERROR-001: 非法 chain_id 应抛异常", False)
except ValueError:
    check("ERROR-001: 非法 chain_id -> ValueError", True)

try:
    StepDispatcher.get_step_context("completeness", 99)
    check("ERROR-001: 非法 step_index 应抛异常", False)
except ValueError:
    check("ERROR-001: 非法 step_index -> ValueError", True)


# ================================================================
# TC-C03-ERROR-002: 步骤执行异常时步骤状态正确标记
# ================================================================

print("\n=== TC-C03-ERROR-002: 步骤执行异常时状态传播 ===")

deps = StepDispatcher.get_step_deps("completeness")

# 场景：步骤 0 失败
step_results_0_fail = [
    IRStepResult(index=0, name="Prompt 渲染", status="error", error="模板渲染异常"),
]

dep_1 = deps.get(1)
should_skip_1 = False
if dep_1 is not None:
    for prev in step_results_0_fail:
        if prev.index == dep_1 and prev.status in ("error", "skipped"):
            should_skip_1 = True
            break
check("ERROR-002: step 0 error -> step 1 skipped", should_skip_1 is True)

step_results_1_skipped = step_results_0_fail + [
    IRStepResult(index=1, name="LLM 调用", status="skipped"),
]
dep_2 = deps.get(2)
should_skip_2 = False
if dep_2 is not None:
    for prev in step_results_1_skipped:
        if prev.index == dep_2 and prev.status in ("error", "skipped"):
            should_skip_2 = True
            break
check("ERROR-002: step 1 skipped -> step 2 skipped", should_skip_2 is True)

# 乐观场景：全部成功 -> 不跳过
step_results_all_ok = [
    IRStepResult(index=0, name="Prompt 渲染", status="success"),
    IRStepResult(index=1, name="LLM 调用", status="success"),
]
skip_2 = False
if dep_2 is not None:
    for prev in step_results_all_ok:
        if prev.index == dep_2 and prev.status in ("error", "skipped"):
            skip_2 = True
            break
check("ERROR-002: 全部成功 -> step 2 不跳过", skip_2 is False)

# 验证 StepResult 数据类
sr = IRStepResult(index=0, name="Test", status="error", error="some error", duration=0.5)
d = sr.to_dict()
check("ERROR-002: StepResult.to_dict 包含 error 字段", "error" in d, f"keys: {list(d.keys())}")
check("ERROR-002: to_dict 包含 status 字段", d["status"] == "error")
check("ERROR-002: to_dict 包含 duration 字段", d["duration"] == 0.5)


# ================================================================
# 额外验证：get_step_context
# ================================================================

print("\n=== 额外验证: get_step_context ===")

ctx = StepDispatcher.get_step_context("completeness", 0)
check("context completeness step 0 包含 context_name",
      ctx.get("context_name") == "prompt_rendering")
check("context completeness step 0 包含 required_params",
      "review_content" in ctx.get("required_params", []))
check("context completeness step 0 can_run_in_isolation",
      ctx.get("can_run_in_isolation") is True)

ctx1 = StepDispatcher.get_step_context("completeness", 1)
check("context completeness step 1 包含 llm_invocation",
      ctx1.get("context_name") == "llm_invocation")

ctx_pro = StepDispatcher.get_step_context("professional", 0)
check("context professional step 0 包含 phase=RAG",
      ctx_pro.get("phase") == "RAG 召回阶段")
check("context professional step 0 can_run_in_isolation",
      ctx_pro.get("can_run_in_isolation") is True)

ctx_pro6 = StepDispatcher.get_step_context("professional", 6)
check("context professional step 6 is_isolatable=False",
      ctx_pro6.get("can_run_in_isolation") is False)


# ================================================================
# 额外验证：StepDefinition field 默认值
# ================================================================

print("\n=== 额外验证: StepDefinition 默认值 ===")

sd_default = StepDefinition(index=5, name="默认值测试")
check("is_isolatable 默认 = True", sd_default.is_isolatable is True)
check("requires_previous 默认 = True", sd_default.requires_previous is True)
check("phase 默认 = None", sd_default.phase is None)

# CHAIN_STEPS 与 executor.py 结构等价性验证
print("\n=== 等价性验证: CHAIN_STEPS 与 executor.py ===")

for cid in EXPECTED_CHAINS:
    steps_def = CHAIN_STEPS[cid]
    for s in steps_def:
        d = s.to_dict()
        check(f"CHAIN_STEPS[{cid}][{s.index}] to_dict 格式正确",
              "index" in d and "name" in d and "phase" in d)


# ================================================================
# 汇总
# ================================================================

print_summary()

sys.exit(0 if all(p for _, p, _ in results) else 1)