| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- """C02 审查链路执行器 - 自测脚本"""
- import asyncio
- import json
- import sys
- import os
- sys.path.insert(0, os.path.abspath("."))
- from core.debug.executor import (
- DebugExecutor,
- CHAIN_CONFIG,
- CHAIN_STEPS,
- _STEP_DEPS,
- VALID_CHAIN_IDS,
- StepResult,
- )
- from views.debug.debug_api import DebugExecuteRequest
- # ==================== 测试结果记录 ====================
- results = []
- def check(name: str, passed: bool, detail: str = ""):
- status = "PASS" if passed else "FAIL"
- results.append((name, passed, detail))
- print(f" [{status}] {name}" + (f" -- {detail}" if detail else ""))
- def print_summary():
- print(f"\n{'='*60}")
- passed = sum(1 for _, p, _ in results if p)
- total = len(results)
- print(f"结果: {passed}/{total} 通过")
- if passed < total:
- print("失败项:")
- for name, p, detail in results:
- if not p:
- print(f" - {name}: {detail}")
- print(f"{'='*60}\n")
- # ==================== TC-C02-CONFIG-001: CHAIN_CONFIG 完整性 ====================
- print("\n--- TC-C02-CONFIG-001: CHAIN_CONFIG 完整性 ---")
- expected_chains = {
- "completeness", "timeliness", "reference",
- "sensitive", "semantic", "grammar", "professional",
- }
- check("7 个 chain_id 全量覆盖",
- set(CHAIN_CONFIG.keys()) == expected_chains,
- f"实际: {set(CHAIN_CONFIG.keys())}")
- for cid in expected_chains:
- cfg = CHAIN_CONFIG[cid]
- check(f"{cid} 包含 name 字段", "name" in cfg)
- check(f"{cid} 包含 reviewer_type 字段", "reviewer_type" in cfg)
- check(f"{cid} 包含 prompt_name 字段", "prompt_name" in cfg)
- check(f"{cid} 包含 function_name 字段", "function_name" in cfg)
- # ==================== TC-C02-CONFIG-002: CHAIN_STEPS 完整性 ====================
- print("\n--- TC-C02-CONFIG-002: CHAIN_STEPS 完整性 ---")
- for cid in expected_chains:
- steps = CHAIN_STEPS.get(cid)
- check(f"{cid} 有步骤定义", steps is not None, f"steps: {steps}")
- if steps:
- check(f"{cid} 步骤数正确 (直调=3, 专业=7)",
- len(steps) == (7 if cid == "professional" else 3),
- f"实际步骤数: {len(steps)}")
- for s in steps:
- check(f"{cid} step {s['index']} 有合法名称",
- bool(s.get("name")), s["name"])
- # ==================== TC-C02-CONFIG-003: _STEP_DEPS 完整性 ====================
- print("\n--- TC-C02-CONFIG-003: 步骤依赖完整性 ---")
- for cid in expected_chains:
- deps = _STEP_DEPS.get(cid)
- check(f"{cid} 有依赖定义", deps is not None)
- if deps:
- steps = CHAIN_STEPS[cid]
- for s in steps:
- check(f"{cid} step {s['index']} 依赖已定义",
- s["index"] in deps)
- # ==================== TC-C02-LOGIC-001: 步骤依赖/跳过逻辑 ====================
- print("\n--- TC-C02-LOGIC-001: 步骤依赖/跳过逻辑 ---")
- async def test_skip_logic():
- """模拟步骤 1 失败时步骤 2 被标记为 skipped"""
- # 使用 asyncio.Queue 但不上传真实 LLM,直接构造场景
- from core.debug.executor import (
- _STEP_DEPS, CHAIN_STEPS, CHAIN_CONFIG,
- )
- # 模拟直调链路:步骤 0 成功,步骤 1 失败 → 步骤 2 应跳过
- chain_id = "completeness"
- deps = _STEP_DEPS[chain_id]
- step_results = [
- StepResult(index=0, name="Prompt 渲染", status="success", duration=0.1),
- StepResult(index=1, name="LLM 调用", status="error", duration=0.1,
- error="模拟超时"),
- ]
- # 手动推导步骤 2 的跳过逻辑
- dep = deps.get(2)
- should_skip = False
- if dep is not None:
- for prev in step_results:
- if prev.index == dep and prev.status in ("error", "skipped"):
- should_skip = True
- break
- check("步骤 1 失败 → 步骤 2 应跳过",
- should_skip is True,
- f"依赖: step {dep}, 上一步状态: error → should_skip={should_skip}")
- # 乐观场景:步骤 0 成功,步骤 1 成功 → 步骤 2 不应跳过
- step_results_ok = [
- StepResult(index=0, name="Prompt 渲染", status="success", duration=0.1),
- StepResult(index=1, name="LLM 调用", status="success", duration=0.1),
- ]
- dep = deps.get(2)
- should_skip_2 = False
- if dep is not None:
- for prev in step_results_ok:
- if prev.index == dep and prev.status in ("error", "skipped"):
- should_skip_2 = True
- break
- check("步骤 0+1 成功 → 步骤 2 不应跳过",
- should_skip_2 is False)
- asyncio.run(test_skip_logic())
- # ==================== TC-C02-LOGIC-002: 结果摘要统计 ====================
- print("\n--- TC-C02-LOGIC-002: 结果摘要统计 ---")
- mixed_steps = [
- StepResult(index=0, name="A", status="success", duration=0.1),
- StepResult(index=1, name="B", status="error", duration=0.1,
- error="模拟错误"),
- StepResult(index=2, name="C", status="skipped", duration=0),
- ]
- summary = DebugExecutor._build_final_result(mixed_steps)
- check("结果摘要包含总步骤数",
- summary["total_steps"] == 3)
- check("结果摘要成功计数正确",
- summary["success_count"] == 1)
- check("结果摘要错误计数正确",
- summary["error_count"] == 1)
- check("结果摘要跳过计数正确",
- summary["skipped_count"] == 1)
- # ==================== TC-C02-LOGIC-003: Trace ID 前缀隔离 ====================
- print("\n--- TC-C02-LOGIC-003: Trace ID 生产隔离 ---")
- from core.debug.executor import _make_trace_id
- tid = _make_trace_id("completeness")
- check("trace_id 以 debug_ 开头",
- tid.startswith("debug_"),
- f"实际: {tid}")
- check("trace_id 包含 chain_id",
- "completeness" in tid,
- f"实际: {tid}")
- # ==================== TC-C02-LOGIC-004: record_id 生成 ====================
- print("\n--- TC-C02-LOGIC-004: record_id 生成 ---")
- from core.debug.executor import _make_record_id
- rid = _make_record_id()
- check("record_id 以 call- 开头",
- rid.startswith("call-"),
- f"实际: {rid}")
- check("record_id 包含日期",
- len(rid) > 20,
- f"实际长度: {len(rid)}, value: {rid}")
- # ==================== 汇总 ====================
- print_summary()
- # 返回退出码供脚本调用
- sys.exit(0 if all(p for _, p, _ in results) else 1)
|