test_exam_response_sanitizer.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. import importlib.util
  2. import json
  3. import unittest
  4. from pathlib import Path
  5. CHAT_PATH = Path(__file__).resolve().parents[1] / "routers" / "chat.py"
  6. spec = importlib.util.spec_from_file_location(
  7. "chat_under_test_exam", CHAT_PATH)
  8. chat = importlib.util.module_from_spec(spec)
  9. spec.loader.exec_module(chat)
  10. def exam_payload(title="桩基础施工技术考核"):
  11. return {
  12. "title": title,
  13. "totalScore": 100,
  14. "totalQuestions": 1,
  15. "singleChoice": {
  16. "scorePerQuestion": 2,
  17. "totalScore": 2,
  18. "count": 1,
  19. "questions": [
  20. {
  21. "text": "钻孔灌注桩清孔完成后应重点检查哪项指标?",
  22. "options": [
  23. {"key": "A", "text": "孔底沉渣厚度"},
  24. {"key": "B", "text": "施工便道宽度"},
  25. {"key": "C", "text": "钢筋棚颜色"},
  26. {"key": "D", "text": "围挡广告内容"},
  27. ],
  28. "answer": "A",
  29. "analysis": "孔底沉渣厚度直接影响桩端承载力。",
  30. }
  31. ],
  32. },
  33. "judge": {"scorePerQuestion": 3, "totalScore": 0, "count": 0, "questions": []},
  34. "multiple": {"scorePerQuestion": 5, "totalScore": 0, "count": 0, "questions": []},
  35. "short": {"scorePerQuestion": 10, "totalScore": 0, "count": 0, "questions": []},
  36. }
  37. class ExamResponseSanitizerTests(unittest.TestCase):
  38. def test_removes_thinking_process_prefix(self):
  39. raw = "Thinking Process:\n\n1. Analyze the Request.\n\n" + \
  40. json.dumps(exam_payload(), ensure_ascii=False)
  41. cleaned = chat._sanitize_exam_response(raw)
  42. parsed = json.loads(cleaned)
  43. self.assertEqual(parsed["title"], "桩基础施工技术考核")
  44. self.assertIn("singleChoice", parsed)
  45. self.assertNotIn("Thinking Process", cleaned)
  46. def test_extracts_json_from_markdown_code_block(self):
  47. raw = "下面是生成结果:\n```json\n" + \
  48. json.dumps(exam_payload("桥梁考试"), ensure_ascii=False) + "\n```"
  49. cleaned = chat._sanitize_exam_response(raw)
  50. parsed = json.loads(cleaned)
  51. self.assertEqual(parsed["title"], "桥梁考试")
  52. def test_prefers_exam_payload_over_other_json_noise(self):
  53. raw = (
  54. "Thinking Process:\n"
  55. '{"note":"not exam"}\n'
  56. "Final Answer:\n"
  57. + json.dumps(exam_payload("最终试卷"), ensure_ascii=False)
  58. )
  59. cleaned = chat._sanitize_exam_response(raw)
  60. parsed = json.loads(cleaned)
  61. self.assertEqual(parsed["title"], "最终试卷")
  62. self.assertIn("singleChoice", parsed)
  63. def test_extracts_exam_payload_when_reasoning_contains_quotes_and_examples(self):
  64. raw = (
  65. 'Thinking Process:\n'
  66. 'The output must contain "title", "totalScore", "singleChoice".\n'
  67. 'Use {"key": "A", "text": "..."} as the option shape example.\n'
  68. 'Section example: {"scorePerQuestion": 2, "totalScore": 20, "count": 10, "questions": [...]}.\n\n'
  69. + json.dumps(exam_payload("带说明的最终试卷"), ensure_ascii=False)
  70. )
  71. cleaned = chat._sanitize_exam_response(raw)
  72. parsed = json.loads(cleaned)
  73. self.assertEqual(parsed["title"], "带说明的最终试卷")
  74. self.assertIn("singleChoice", parsed)
  75. self.assertFalse(cleaned.startswith("Thinking Process"))
  76. def test_extracts_trailing_exam_json_after_think_suffix(self):
  77. raw = (
  78. "Thinking Process:\n"
  79. 'Use {"key": "A", "text": "..."} as example.\n'
  80. "</think>\n\n"
  81. + json.dumps(exam_payload("尾部试卷"), ensure_ascii=False)
  82. )
  83. cleaned = chat._sanitize_exam_response(raw)
  84. parsed = json.loads(cleaned)
  85. self.assertEqual(parsed["title"], "尾部试卷")
  86. self.assertEqual(parsed["totalQuestions"], 1)
  87. def test_repairs_unescaped_quotes_inside_string_values(self):
  88. payload = json.dumps(exam_payload("引号容错"), ensure_ascii=False)
  89. payload = payload.replace(
  90. "钻孔灌注桩清孔完成后应重点检查哪项指标?", '钻孔灌注桩必须实行"一炮三检"制度吗?')
  91. payload = payload.replace("孔底沉渣厚度直接影响桩端承载力。", '"一炮三检"是爆破作业的常见安全检查制度。')
  92. cleaned = chat._sanitize_exam_response(payload)
  93. parsed = json.loads(cleaned)
  94. self.assertEqual(parsed["title"], "引号容错")
  95. self.assertIn('"一炮三检"', parsed["singleChoice"]["questions"][0]["text"])
  96. if __name__ == "__main__":
  97. unittest.main()