test_executor_selfcheck.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. """C02 审查链路执行器 - 自测脚本"""
  2. import asyncio
  3. import json
  4. import sys
  5. import os
  6. sys.path.insert(0, os.path.abspath("."))
  7. from core.debug.executor import (
  8. DebugExecutor,
  9. CHAIN_CONFIG,
  10. CHAIN_STEPS,
  11. _STEP_DEPS,
  12. VALID_CHAIN_IDS,
  13. StepResult,
  14. )
  15. from views.debug.debug_api import DebugExecuteRequest
  16. # ==================== 测试结果记录 ====================
  17. results = []
  18. def check(name: str, passed: bool, detail: str = ""):
  19. status = "PASS" if passed else "FAIL"
  20. results.append((name, passed, detail))
  21. print(f" [{status}] {name}" + (f" -- {detail}" if detail else ""))
  22. def print_summary():
  23. print(f"\n{'='*60}")
  24. passed = sum(1 for _, p, _ in results if p)
  25. total = len(results)
  26. print(f"结果: {passed}/{total} 通过")
  27. if passed < total:
  28. print("失败项:")
  29. for name, p, detail in results:
  30. if not p:
  31. print(f" - {name}: {detail}")
  32. print(f"{'='*60}\n")
  33. # ==================== TC-C02-CONFIG-001: CHAIN_CONFIG 完整性 ====================
  34. print("\n--- TC-C02-CONFIG-001: CHAIN_CONFIG 完整性 ---")
  35. expected_chains = {
  36. "completeness", "timeliness", "reference",
  37. "sensitive", "semantic", "grammar", "professional",
  38. }
  39. check("7 个 chain_id 全量覆盖",
  40. set(CHAIN_CONFIG.keys()) == expected_chains,
  41. f"实际: {set(CHAIN_CONFIG.keys())}")
  42. for cid in expected_chains:
  43. cfg = CHAIN_CONFIG[cid]
  44. check(f"{cid} 包含 name 字段", "name" in cfg)
  45. check(f"{cid} 包含 reviewer_type 字段", "reviewer_type" in cfg)
  46. check(f"{cid} 包含 prompt_name 字段", "prompt_name" in cfg)
  47. check(f"{cid} 包含 function_name 字段", "function_name" in cfg)
  48. # ==================== TC-C02-CONFIG-002: CHAIN_STEPS 完整性 ====================
  49. print("\n--- TC-C02-CONFIG-002: CHAIN_STEPS 完整性 ---")
  50. for cid in expected_chains:
  51. steps = CHAIN_STEPS.get(cid)
  52. check(f"{cid} 有步骤定义", steps is not None, f"steps: {steps}")
  53. if steps:
  54. check(f"{cid} 步骤数正确 (直调=3, 专业=7)",
  55. len(steps) == (7 if cid == "professional" else 3),
  56. f"实际步骤数: {len(steps)}")
  57. for s in steps:
  58. check(f"{cid} step {s['index']} 有合法名称",
  59. bool(s.get("name")), s["name"])
  60. # ==================== TC-C02-CONFIG-003: _STEP_DEPS 完整性 ====================
  61. print("\n--- TC-C02-CONFIG-003: 步骤依赖完整性 ---")
  62. for cid in expected_chains:
  63. deps = _STEP_DEPS.get(cid)
  64. check(f"{cid} 有依赖定义", deps is not None)
  65. if deps:
  66. steps = CHAIN_STEPS[cid]
  67. for s in steps:
  68. check(f"{cid} step {s['index']} 依赖已定义",
  69. s["index"] in deps)
  70. # ==================== TC-C02-LOGIC-001: 步骤依赖/跳过逻辑 ====================
  71. print("\n--- TC-C02-LOGIC-001: 步骤依赖/跳过逻辑 ---")
  72. async def test_skip_logic():
  73. """模拟步骤 1 失败时步骤 2 被标记为 skipped"""
  74. # 使用 asyncio.Queue 但不上传真实 LLM,直接构造场景
  75. from core.debug.executor import (
  76. _STEP_DEPS, CHAIN_STEPS, CHAIN_CONFIG,
  77. )
  78. # 模拟直调链路:步骤 0 成功,步骤 1 失败 → 步骤 2 应跳过
  79. chain_id = "completeness"
  80. deps = _STEP_DEPS[chain_id]
  81. step_results = [
  82. StepResult(index=0, name="Prompt 渲染", status="success", duration=0.1),
  83. StepResult(index=1, name="LLM 调用", status="error", duration=0.1,
  84. error="模拟超时"),
  85. ]
  86. # 手动推导步骤 2 的跳过逻辑
  87. dep = deps.get(2)
  88. should_skip = False
  89. if dep is not None:
  90. for prev in step_results:
  91. if prev.index == dep and prev.status in ("error", "skipped"):
  92. should_skip = True
  93. break
  94. check("步骤 1 失败 → 步骤 2 应跳过",
  95. should_skip is True,
  96. f"依赖: step {dep}, 上一步状态: error → should_skip={should_skip}")
  97. # 乐观场景:步骤 0 成功,步骤 1 成功 → 步骤 2 不应跳过
  98. step_results_ok = [
  99. StepResult(index=0, name="Prompt 渲染", status="success", duration=0.1),
  100. StepResult(index=1, name="LLM 调用", status="success", duration=0.1),
  101. ]
  102. dep = deps.get(2)
  103. should_skip_2 = False
  104. if dep is not None:
  105. for prev in step_results_ok:
  106. if prev.index == dep and prev.status in ("error", "skipped"):
  107. should_skip_2 = True
  108. break
  109. check("步骤 0+1 成功 → 步骤 2 不应跳过",
  110. should_skip_2 is False)
  111. asyncio.run(test_skip_logic())
  112. # ==================== TC-C02-LOGIC-002: 结果摘要统计 ====================
  113. print("\n--- TC-C02-LOGIC-002: 结果摘要统计 ---")
  114. mixed_steps = [
  115. StepResult(index=0, name="A", status="success", duration=0.1),
  116. StepResult(index=1, name="B", status="error", duration=0.1,
  117. error="模拟错误"),
  118. StepResult(index=2, name="C", status="skipped", duration=0),
  119. ]
  120. summary = DebugExecutor._build_final_result(mixed_steps)
  121. check("结果摘要包含总步骤数",
  122. summary["total_steps"] == 3)
  123. check("结果摘要成功计数正确",
  124. summary["success_count"] == 1)
  125. check("结果摘要错误计数正确",
  126. summary["error_count"] == 1)
  127. check("结果摘要跳过计数正确",
  128. summary["skipped_count"] == 1)
  129. # ==================== TC-C02-LOGIC-003: Trace ID 前缀隔离 ====================
  130. print("\n--- TC-C02-LOGIC-003: Trace ID 生产隔离 ---")
  131. from core.debug.executor import _make_trace_id
  132. tid = _make_trace_id("completeness")
  133. check("trace_id 以 debug_ 开头",
  134. tid.startswith("debug_"),
  135. f"实际: {tid}")
  136. check("trace_id 包含 chain_id",
  137. "completeness" in tid,
  138. f"实际: {tid}")
  139. # ==================== TC-C02-LOGIC-004: record_id 生成 ====================
  140. print("\n--- TC-C02-LOGIC-004: record_id 生成 ---")
  141. from core.debug.executor import _make_record_id
  142. rid = _make_record_id()
  143. check("record_id 以 call- 开头",
  144. rid.startswith("call-"),
  145. f"实际: {rid}")
  146. check("record_id 包含日期",
  147. len(rid) > 20,
  148. f"实际长度: {len(rid)}, value: {rid}")
  149. # ==================== 汇总 ====================
  150. print_summary()
  151. # 返回退出码供脚本调用
  152. sys.exit(0 if all(p for _, p, _ in results) else 1)