test_prompt_manager.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. """
  2. C04 PromptManager self-test script.
  3. Tests all test cases from TC-C04-API-001 through TC-C04-ERROR-003.
  4. Run from project root: python core/debug/test_prompt_manager.py
  5. """
  6. import sys
  7. import os
  8. import json
  9. import shutil
  10. import traceback
  11. PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  12. if PROJECT_ROOT not in sys.path:
  13. sys.path.insert(0, PROJECT_ROOT)
  14. from core.debug.prompt_manager import (
  15. PromptManager,
  16. PROMPT_DIR,
  17. VERSIONS_DIR,
  18. PROMPT_FILE_MAP,
  19. CHAINS,
  20. _extract_variables,
  21. _make_diff_lines,
  22. )
  23. PASS = 0
  24. FAIL = 0
  25. ERRORS = []
  26. def check(cond, msg):
  27. global PASS, FAIL
  28. if cond:
  29. PASS += 1
  30. print(f' [PASS] {msg}')
  31. else:
  32. FAIL += 1
  33. print(f' [FAIL] {msg}')
  34. ERRORS.append(msg)
  35. def section(title):
  36. print(f'\n{"=" * 60}')
  37. print(f' {title}')
  38. print(f'{"=" * 60}')
  39. # ================================================================
  40. # Setup: backup existing versions directory
  41. # ================================================================
  42. section('Setup test environment')
  43. versions_backup = None
  44. if os.path.exists(VERSIONS_DIR):
  45. import tempfile
  46. versions_backup = VERSIONS_DIR + '_backup_' + os.urandom(4).hex()
  47. shutil.copytree(VERSIONS_DIR, versions_backup)
  48. shutil.rmtree(VERSIONS_DIR)
  49. print(f' Backed up old versions dir to: {versions_backup}')
  50. manager = PromptManager()
  51. # Capture initial state for later verification (before any save/activate modifies it)
  52. INITIAL_CC_V1 = manager._load_version_file('completeness_check', 'v1.0')
  53. INITIAL_CC_MAIN = manager._read_current_from_main('completeness_check')
  54. print(f' PROMPT_DIR: {PROMPT_DIR}')
  55. print(f' VERSIONS_DIR: {VERSIONS_DIR}')
  56. print(f' Total prompts: {len(PROMPT_FILE_MAP)}')
  57. print(f' Chains: {CHAINS}')
  58. # ================================================================
  59. # TC-C04-API-001: Get prompt list with version info
  60. # ================================================================
  61. section('TC-C04-API-001: Get prompt list')
  62. try:
  63. all_prompts = manager.get_all_prompts()
  64. check(len(all_prompts) >= 8, f'items count >= 8 (actual: {len(all_prompts)})')
  65. if all_prompts:
  66. item = all_prompts[0]
  67. check('name' in item, 'items[0] has name')
  68. check('version' in item, 'items[0] has version')
  69. check('time' in item, 'items[0] has time')
  70. check('chain' in item, 'items[0] has chain')
  71. check('is_current' in item, 'items[0] has is_current')
  72. check('note' in item, 'items[0] has note')
  73. names = [i['name'] for i in all_prompts]
  74. check('completeness_check' in names, 'list contains completeness_check')
  75. check(len(CHAINS) >= 7, f'chains count {len(CHAINS)} >= 7')
  76. check('完整性' in CHAINS, 'chains contains 完整性')
  77. check('时效性' in CHAINS, 'chains contains 时效性')
  78. check('规范性' in CHAINS, 'chains contains 规范性')
  79. check('敏感词' in CHAINS, 'chains contains 敏感词')
  80. check('语义逻辑' in CHAINS, 'chains contains 语义逻辑')
  81. check('语法' in CHAINS, 'chains contains 语法')
  82. check('专业性' in CHAINS, 'chains contains 专业性')
  83. except Exception as e:
  84. print(f' [EXCEPTION] {e}')
  85. traceback.print_exc()
  86. FAIL += 1
  87. ERRORS.append(f'TC-C04-API-001 exception: {e}')
  88. # ================================================================
  89. # TC-C04-API-002: Get prompt detail
  90. # ================================================================
  91. section('TC-C04-API-002: Get prompt detail')
  92. try:
  93. detail = manager.get_prompt_detail('completeness_check')
  94. check(detail is not None, 'get_prompt_detail returns non-None')
  95. if detail:
  96. check('name' in detail and detail['name'] == 'completeness_check', 'detail has name')
  97. check('version' in detail, 'detail has version')
  98. check('time' in detail, 'detail has time')
  99. check('chain' in detail, 'detail has chain')
  100. check('is_current' in detail, 'detail has is_current')
  101. check('system_prompt' in detail and len(detail['system_prompt']) > 0, 'system_prompt not empty')
  102. check('user_prompt' in detail and len(detail['user_prompt']) > 0, 'user_prompt not empty')
  103. check('note' in detail, 'detail has note')
  104. check('variables' in detail, 'detail has variables')
  105. check('file_path' in detail, 'detail has file_path')
  106. variables = detail.get('variables', [])
  107. check(len(variables) > 0, f'variables has {len(variables)} items: {variables}')
  108. # review_content comes from user_prompt; review_references is in system_prompt only
  109. check('review_content' in variables, 'variables contains review_content')
  110. except Exception as e:
  111. print(f' [EXCEPTION] {e}')
  112. traceback.print_exc()
  113. FAIL += 1
  114. ERRORS.append(f'TC-C04-API-002 exception: {e}')
  115. # ================================================================
  116. # TC-C04-API-003: Save new version and set as current
  117. # ================================================================
  118. section('TC-C04-API-003: Save new version')
  119. try:
  120. detail_before = manager.get_prompt_detail('completeness_check')
  121. print(f' Version before save: {detail_before["version"] if detail_before else "N/A"}')
  122. new_system = '你是一个专业的施工方案完整性审查专家。测试新版本内容。'
  123. new_user = '请审查以下施工方案内容的完整性:\n方案内容:{review_content}\n参考依据:{review_references}'
  124. result = manager.save_new_version(
  125. 'completeness_check',
  126. system_prompt=new_system,
  127. user_prompt=new_user,
  128. note='测试保存新版本',
  129. set_current=True,
  130. )
  131. check(result is not None, 'save_new_version returns non-None')
  132. version = result.get('version', '')
  133. check(version.startswith('v'), f'version format: {version}')
  134. major = int(version.lstrip('v').split('.')[0])
  135. check(major >= 2, f'version incremented: {version} (major >= 2)')
  136. check('name' in result and result['name'] == 'completeness_check', 'result has name')
  137. import glob
  138. ver_files = glob.glob(os.path.join(VERSIONS_DIR, 'completeness_check', '*.yaml'))
  139. check(len(ver_files) >= 2, f'version files >= 2 (actual: {len(ver_files)})')
  140. detail_after = manager.get_prompt_detail('completeness_check')
  141. if detail_after:
  142. check(detail_after['system_prompt'] == new_system, 'main file system_prompt updated')
  143. check(detail_after['user_prompt'] == new_user, 'main file user_prompt updated')
  144. new_ver_data = manager._load_version_file('completeness_check', version)
  145. if new_ver_data:
  146. check(new_ver_data.get('is_current') == True, f'version file {version} is_current=true')
  147. print(f' Version after save: {version}')
  148. except Exception as e:
  149. print(f' [EXCEPTION] {e}')
  150. traceback.print_exc()
  151. FAIL += 1
  152. ERRORS.append(f'TC-C04-API-003 exception: {e}')
  153. # ================================================================
  154. # TC-C04-API-004: Version comparison (Diff)
  155. # ================================================================
  156. section('TC-C04-API-004: Version comparison (Diff)')
  157. try:
  158. versions = manager.get_versions('completeness_check')
  159. check(len(versions) >= 2, f'at least 2 versions (actual: {len(versions)})')
  160. if len(versions) >= 2:
  161. v1 = versions[-1]['version']
  162. v2 = versions[0]['version']
  163. diff_result = manager.compare_versions('completeness_check', v1, v2)
  164. check(diff_result is not None, 'compare_versions returns non-None')
  165. check('name' in diff_result, 'diff has name')
  166. check('base_version' in diff_result, 'diff has base_version')
  167. check('target_version' in diff_result, 'diff has target_version')
  168. check('diffs' in diff_result, 'diff has diffs')
  169. diffs = diff_result.get('diffs', [])
  170. check(len(diffs) == 2, f'diffs has 2 sections (actual: {len(diffs)})')
  171. for d in diffs:
  172. check('section' in d, 'diff item has section')
  173. check('type' in d, 'diff item has type')
  174. check('lines' in d, 'diff item has lines')
  175. check(d['type'] == 'text_diff', f'diff type is text_diff')
  176. sys_diff = [d for d in diffs if d['section'] == 'system_prompt']
  177. if sys_diff:
  178. lines = sys_diff[0]['lines']
  179. check(len(lines) > 0, f'system_prompt diff lines > 0 (actual: {len(lines)})')
  180. for line in lines[:3]:
  181. check('type' in line and line['type'] in ('add', 'del', 'ctx'),
  182. f'line has valid type ({line.get("type")})')
  183. check('text' in line, 'line has text')
  184. except Exception as e:
  185. print(f' [EXCEPTION] {e}')
  186. traceback.print_exc()
  187. FAIL += 1
  188. ERRORS.append(f'TC-C04-API-004 exception: {e}')
  189. # ================================================================
  190. # TC-C04-API-005: Activate version
  191. # ================================================================
  192. section('TC-C04-API-005: Activate version')
  193. try:
  194. versions = manager.get_versions('completeness_check')
  195. check(len(versions) >= 2, f'at least 2 versions (actual: {len(versions)})')
  196. if len(versions) >= 2:
  197. oldest_ver = versions[-1]['version']
  198. print(f' Activating version: {oldest_ver}')
  199. old_detail = manager.get_prompt_detail('completeness_check', version=oldest_ver)
  200. check(old_detail is not None, f'can read {oldest_ver} version')
  201. if old_detail:
  202. old_system = old_detail['system_prompt']
  203. act_result = manager.activate_version('completeness_check', oldest_ver)
  204. check(act_result.get('success') == True, 'activate_version returns success=true')
  205. check(act_result.get('name') == 'completeness_check', 'result has name')
  206. check(act_result.get('version') == oldest_ver, f'result version is {oldest_ver}')
  207. current = manager.get_prompt_detail('completeness_check')
  208. if current and old_detail:
  209. check(current['system_prompt'] == old_detail['system_prompt'],
  210. 'main file system_prompt matches activated version')
  211. if old_detail:
  212. ver_data = manager._load_version_file('completeness_check', oldest_ver)
  213. if ver_data:
  214. check(ver_data.get('is_current') == True,
  215. f'{oldest_ver} has is_current=true')
  216. except Exception as e:
  217. print(f' [EXCEPTION] {e}')
  218. traceback.print_exc()
  219. FAIL += 1
  220. ERRORS.append(f'TC-C04-API-005 exception: {e}')
  221. # ================================================================
  222. # TC-C04-API-006: Rollback version
  223. # ================================================================
  224. section('TC-C04-API-006: Rollback version')
  225. try:
  226. rollback_result = manager.rollback_version('completeness_check', 'v1.0')
  227. check(rollback_result.get('success') == True, 'rollback_version returns success=true')
  228. check(rollback_result.get('name') == 'completeness_check', 'result has name')
  229. check(rollback_result.get('version') == 'v1.0', 'result version is v1.0')
  230. current = manager.get_prompt_detail('completeness_check')
  231. v1_detail = manager.get_prompt_detail('completeness_check', version='v1.0')
  232. if current and v1_detail:
  233. check(current['system_prompt'] == v1_detail['system_prompt'],
  234. 'after rollback, main file system_prompt matches v1.0')
  235. check(current['user_prompt'] == v1_detail['user_prompt'],
  236. 'after rollback, main file user_prompt matches v1.0')
  237. except Exception as e:
  238. print(f' [EXCEPTION] {e}')
  239. traceback.print_exc()
  240. FAIL += 1
  241. ERRORS.append(f'TC-C04-API-006 exception: {e}')
  242. # ================================================================
  243. # TC-C04-EDGE-001: No version specified returns current
  244. # ================================================================
  245. section('TC-C04-EDGE-001: No version returns current')
  246. try:
  247. result = manager.save_new_version(
  248. 'completeness_check',
  249. system_prompt='EDGE-001 test new version content',
  250. user_prompt='Review content: {review_content}',
  251. note='EDGE-001 test',
  252. set_current=True,
  253. )
  254. print(f' Current version: {result["version"]}')
  255. detail_no_ver = manager.get_prompt_detail('completeness_check')
  256. if detail_no_ver:
  257. check(detail_no_ver['is_current'] == True, 'no version param: is_current=True')
  258. check(detail_no_ver['version'] == result['version'],
  259. f'returns current version ({detail_no_ver["version"]})')
  260. detail_with_ver = manager.get_prompt_detail('completeness_check', version='v1.0')
  261. if detail_with_ver:
  262. check(detail_with_ver['is_current'] == False,
  263. 'with v1.0 param: is_current=False (current is later version)')
  264. check(detail_with_ver['version'] == 'v1.0',
  265. f'with v1.0 param: version=v1.0')
  266. except Exception as e:
  267. print(f' [EXCEPTION] {e}')
  268. traceback.print_exc()
  269. FAIL += 1
  270. ERRORS.append(f'TC-C04-EDGE-001 exception: {e}')
  271. # ================================================================
  272. # TC-C04-EDGE-002: Special characters in prompt name
  273. # ================================================================
  274. section('TC-C04-EDGE-002: Special chars in prompt name')
  275. try:
  276. detail_special = manager.get_prompt_detail('non_parameter_compliance_check')
  277. check(detail_special is not None,
  278. 'get_prompt_detail("non_parameter_compliance_check") returns non-None')
  279. if detail_special:
  280. check(len(detail_special.get('system_prompt', '')) > 0, 'system_prompt not empty')
  281. check(len(detail_special.get('user_prompt', '')) > 0, 'user_prompt not empty')
  282. ver_files = manager._list_version_files('non_parameter_compliance_check')
  283. check(len(ver_files) >= 1,
  284. f'version files >= 1 (actual: {len(ver_files)})')
  285. expected_dir = os.path.join(VERSIONS_DIR, 'non_parameter_compliance_check')
  286. check(os.path.exists(expected_dir),
  287. f'version dir exists: {expected_dir}')
  288. if ver_files:
  289. ver_path = os.path.join(expected_dir, ver_files[0])
  290. check(os.path.exists(ver_path), f'version file exists: {ver_files[0]}')
  291. except Exception as e:
  292. print(f' [EXCEPTION] {e}')
  293. traceback.print_exc()
  294. FAIL += 1
  295. ERRORS.append(f'TC-C04-EDGE-002 exception: {e}')
  296. # ================================================================
  297. # TC-C04-ERROR-001: Empty system prompt handled by API layer
  298. # ================================================================
  299. section('TC-C04-ERROR-001: Empty system prompt')
  300. try:
  301. try:
  302. manager.save_new_version(
  303. name='nonexistent_prompt',
  304. system_prompt='test',
  305. user_prompt='test',
  306. note='',
  307. )
  308. check(False, 'nonexistent prompt should raise ValueError')
  309. except ValueError:
  310. check(True, 'nonexistent prompt raises ValueError')
  311. except Exception as e:
  312. print(f' [EXCEPTION] {e}')
  313. traceback.print_exc()
  314. FAIL += 1
  315. ERRORS.append(f'TC-C04-ERROR-001 exception: {e}')
  316. # ================================================================
  317. # TC-C04-ERROR-002: Non-existent prompt name returns None/empty
  318. # ================================================================
  319. section('TC-C04-ERROR-002: Non-existent prompt name')
  320. try:
  321. detail = manager.get_prompt_detail('nonexistent_check')
  322. check(detail is None, 'nonexistent prompt detail returns None')
  323. versions = manager.get_versions('nonexistent_check')
  324. check(isinstance(versions, list), 'nonexistent prompt versions returns list')
  325. check(len(versions) == 0, 'nonexistent prompt versions returns empty list')
  326. except Exception as e:
  327. print(f' [EXCEPTION] {e}')
  328. traceback.print_exc()
  329. FAIL += 1
  330. ERRORS.append(f'TC-C04-ERROR-002 exception: {e}')
  331. # ================================================================
  332. # TC-C04-ERROR-003: Corrupted version file gracefully skipped
  333. # ================================================================
  334. section('TC-C04-ERROR-003: Corrupted version file')
  335. try:
  336. completeness_dir = os.path.join(VERSIONS_DIR, 'completeness_check')
  337. bad_file = os.path.join(completeness_dir, 'corrupt.yaml')
  338. with open(bad_file, 'w', encoding='utf-8') as f:
  339. f.write('{invalid: yaml: content\n broken indent\n')
  340. try:
  341. versions = manager.get_versions('completeness_check')
  342. check(True, f'corrupted file skipped gracefully ({len(versions)} valid versions)')
  343. for v in versions:
  344. check(v['version'] != 'corrupt', 'corrupt file not in version list')
  345. except Exception as e:
  346. check(False, f'corrupted file should not raise exception: {e}')
  347. if os.path.exists(bad_file):
  348. os.remove(bad_file)
  349. except Exception as e:
  350. print(f' [EXCEPTION] {e}')
  351. traceback.print_exc()
  352. FAIL += 1
  353. ERRORS.append(f'TC-C04-ERROR-003 exception: {e}')
  354. # ================================================================
  355. # Extra: _extract_variables
  356. # ================================================================
  357. section('Extra: _extract_variables utility')
  358. try:
  359. vars = _extract_variables('Hello {name}, age is {age}')
  360. check('name' in vars and 'age' in vars, f'extracted: {vars}')
  361. vars_empty = _extract_variables('no variables')
  362. check(len(vars_empty) == 0, 'no variables returns empty list')
  363. vars_multi = _extract_variables('{a} {b} {c} {a}')
  364. check(len(set(vars_multi)) == 3, f'3 unique vars: {set(vars_multi)}')
  365. except Exception as e:
  366. print(f' [EXCEPTION] {e}')
  367. traceback.print_exc()
  368. FAIL += 1
  369. ERRORS.append(f'_extract_variables exception: {e}')
  370. # ================================================================
  371. # Extra: _make_diff_lines
  372. # ================================================================
  373. section('Extra: _make_diff_lines utility')
  374. try:
  375. a = 'line1\nline2\nline3'
  376. b = 'line1\nline2_modified\nline3'
  377. lines = _make_diff_lines(a, b)
  378. check(len(lines) > 0, f'Diff has {len(lines)} lines')
  379. has_ctx = any(l['type'] == 'ctx' for l in lines)
  380. has_del = any(l['type'] == 'del' for l in lines)
  381. has_add = any(l['type'] == 'add' for l in lines)
  382. check(has_ctx, 'Diff has ctx lines')
  383. check(has_del, 'Diff has del lines')
  384. check(has_add, 'Diff has add lines')
  385. same_diff = _make_diff_lines('abc', 'abc')
  386. check(len(same_diff) == 0, 'identical content => empty diff')
  387. except Exception as e:
  388. print(f' [EXCEPTION] {e}')
  389. traceback.print_exc()
  390. FAIL += 1
  391. ERRORS.append(f'_make_diff_lines exception: {e}')
  392. # ================================================================
  393. # Extra: get_all_prompts filtering
  394. # ================================================================
  395. section('Extra: get_all_prompts filtering')
  396. try:
  397. all_items = manager.get_all_prompts()
  398. completeness_items = manager.get_all_prompts(chain_filter='完整性')
  399. for item in completeness_items:
  400. check(item['chain'] == '完整性', f'filtered chain 完整性 (actual: {item["chain"]})')
  401. search_items = manager.get_all_prompts(search='completeness')
  402. for item in search_items:
  403. check('completeness' in item['name'].lower(),
  404. f'search result contains completeness (actual: {item["name"]})')
  405. except Exception as e:
  406. print(f' [EXCEPTION] {e}')
  407. traceback.print_exc()
  408. FAIL += 1
  409. ERRORS.append(f'Filtering exception: {e}')
  410. # ================================================================
  411. # Extra: First run initialization (run early, before state changes)
  412. # ================================================================
  413. section('Extra: First run initialization')
  414. try:
  415. for prompt_name in PROMPT_FILE_MAP:
  416. version_dir = os.path.join(VERSIONS_DIR, prompt_name)
  417. check(os.path.exists(version_dir), f'{prompt_name} version dir exists')
  418. v1_file = os.path.join(version_dir, 'v1.0.yaml')
  419. check(os.path.exists(v1_file), f'{prompt_name} v1.0.yaml exists')
  420. # Verify v1.0 matches main file at initialization time
  421. # (Note: run this BEFORE save/activate tests that modify main file)
  422. if INITIAL_CC_V1 and INITIAL_CC_MAIN:
  423. check(INITIAL_CC_V1['system_prompt'] == INITIAL_CC_MAIN['system_prompt'],
  424. 'v1.0 system_prompt matches main file at init')
  425. check(INITIAL_CC_V1['user_prompt_template'] == INITIAL_CC_MAIN['user_prompt'],
  426. 'v1.0 user_prompt matches main file at init')
  427. except Exception as e:
  428. print(f' [EXCEPTION] {e}')
  429. traceback.print_exc()
  430. FAIL += 1
  431. ERRORS.append(f'Initialization exception: {e}')
  432. # ================================================================
  433. # Summary
  434. # ================================================================
  435. section('Test Results')
  436. total = PASS + FAIL
  437. print(f' Total: {total} Passed: {PASS} Failed: {FAIL}')
  438. if ERRORS:
  439. print(f'\n Failed details:')
  440. for i, err in enumerate(ERRORS, 1):
  441. print(f' {i}. {err}')
  442. print(f'\n {"=" * 20} {"ALL PASSED!" if FAIL == 0 else "SOME FAILED!"} {"=" * 20}')
  443. # ================================================================
  444. # Cleanup: restore original versions directory
  445. # ================================================================
  446. if versions_backup and os.path.exists(versions_backup):
  447. if os.path.exists(VERSIONS_DIR):
  448. shutil.rmtree(VERSIONS_DIR)
  449. shutil.copytree(versions_backup, VERSIONS_DIR)
  450. shutil.rmtree(versions_backup)
  451. print(f'\n Restored original versions directory')
  452. else:
  453. print(f'\n Note: test-created version files left at {VERSIONS_DIR}')
  454. sys.exit(0 if FAIL == 0 else 1)