exam.py 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. from routers.chat import _build_conversation_preview, _rag_search, _sanitize_exam_response
  2. from models.chat import AIConversation, AIMessage
  3. from database import SessionLocal
  4. from fastapi.responses import StreamingResponse
  5. import asyncio
  6. import time
  7. import json
  8. import re
  9. from fastapi import APIRouter, Depends, Request
  10. from sqlalchemy.orm import Session
  11. from sqlalchemy.exc import OperationalError
  12. from pydantic import BaseModel, Field
  13. from typing import Optional
  14. from database import get_db
  15. from services.qwen_service import qwen_service
  16. from utils.config import settings
  17. from utils.logger import logger
  18. router = APIRouter()
  19. class QuestionTypeItem(BaseModel):
  20. questionType: str = ""
  21. name: str = ""
  22. count: int = 0
  23. questionCount: int = 0
  24. scorePerQuestion: int = 0
  25. romanNumeral: str = ""
  26. class BuildPromptRequest(BaseModel):
  27. mode: str = ""
  28. client: str = ""
  29. projectType: str = ""
  30. examTitle: str = ""
  31. totalScore: int = 0
  32. questionTypes: list[QuestionTypeItem] = Field(default_factory=list)
  33. pptContent: str = ""
  34. requireBasis: bool = False
  35. def _get_exam_section(payload: dict, question_type: str) -> Optional[dict]:
  36. if not isinstance(payload, dict):
  37. return None
  38. question_map = {
  39. "单选题": ("singleChoice", "single_choice", "single"),
  40. "判断题": ("judge",),
  41. "多选题": ("multiple", "multiple_choice"),
  42. "简答题": ("short", "short_answer"),
  43. }
  44. keys = question_map.get(question_type, ())
  45. questions_obj = payload.get("questions")
  46. for key in keys:
  47. section = payload.get(key)
  48. if isinstance(section, dict):
  49. return section
  50. if isinstance(questions_obj, dict):
  51. nested = questions_obj.get(key)
  52. if isinstance(nested, dict):
  53. return nested
  54. return None
  55. def _get_section_question_count(section: Optional[dict]) -> int:
  56. if not isinstance(section, dict):
  57. return 0
  58. questions = section.get("questions")
  59. if not isinstance(questions, list):
  60. return 0
  61. return len(questions)
  62. def _get_knowledge_search_api_url() -> str:
  63. aichat_config = getattr(settings, "aichat", None)
  64. aichat_base_url = getattr(aichat_config, "api_url", "").rstrip("/")
  65. if aichat_base_url:
  66. return f"{aichat_base_url}/knowledge/search"
  67. return "http://127.0.0.1:28002/api/v1/knowledge/search"
  68. def _save_exam_messages_with_fresh_session(
  69. conv_id: int,
  70. user_id: int,
  71. request_payload: dict,
  72. exam_payload: dict,
  73. ) -> None:
  74. last_error = None
  75. for attempt in range(2):
  76. save_db = SessionLocal()
  77. try:
  78. now = int(time.time())
  79. user_msg = AIMessage(
  80. ai_conversation_id=conv_id,
  81. user_id=user_id,
  82. type="user",
  83. content=json.dumps(request_payload, ensure_ascii=False),
  84. created_at=now,
  85. updated_at=now,
  86. is_deleted=0,
  87. )
  88. save_db.add(user_msg)
  89. save_db.flush()
  90. ai_msg = AIMessage(
  91. ai_conversation_id=conv_id,
  92. user_id=user_id,
  93. type="ai",
  94. content=json.dumps(exam_payload, ensure_ascii=False),
  95. prev_user_id=user_msg.id,
  96. created_at=now,
  97. updated_at=now,
  98. is_deleted=0,
  99. )
  100. save_db.add(ai_msg)
  101. save_db.commit()
  102. return
  103. except OperationalError as e:
  104. save_db.rollback()
  105. last_error = e
  106. logger.warning(
  107. f"[exam/generate_stream] 保存试卷时数据库连接异常,准备重试: "
  108. f"attempt={attempt + 1}/2, detail={repr(e)}")
  109. if attempt == 1:
  110. raise
  111. except Exception:
  112. save_db.rollback()
  113. raise
  114. finally:
  115. save_db.close()
  116. if last_error:
  117. raise last_error
  118. def _ensure_exam_conversation_with_fresh_session(
  119. user_id: int,
  120. exam_title: str,
  121. ai_conversation_id: Optional[int],
  122. ) -> int:
  123. last_error = None
  124. for attempt in range(2):
  125. db = SessionLocal()
  126. try:
  127. now = int(time.time())
  128. preview = _build_conversation_preview(
  129. exam_title or "智能生成试卷", limit=100)
  130. if not ai_conversation_id:
  131. conversation = AIConversation(
  132. user_id=user_id,
  133. content=preview,
  134. business_type=3,
  135. exam_name=exam_title,
  136. created_at=now,
  137. updated_at=now,
  138. is_deleted=0,
  139. )
  140. db.add(conversation)
  141. db.commit()
  142. db.refresh(conversation)
  143. return conversation.id
  144. db.query(AIConversation).filter(
  145. AIConversation.id == ai_conversation_id,
  146. AIConversation.user_id == user_id,
  147. ).update({
  148. "content": preview,
  149. "business_type": 3,
  150. "exam_name": exam_title,
  151. "updated_at": now,
  152. })
  153. db.commit()
  154. return ai_conversation_id
  155. except OperationalError as e:
  156. db.rollback()
  157. last_error = e
  158. logger.warning(
  159. f"[exam/generate_stream] 初始化试卷会话时数据库连接异常,准备重试: "
  160. f"attempt={attempt + 1}/2, detail={repr(e)}")
  161. if attempt == 1:
  162. raise
  163. except Exception:
  164. db.rollback()
  165. raise
  166. finally:
  167. db.close()
  168. if last_error:
  169. raise last_error
  170. raise RuntimeError("初始化试卷会话失败")
  171. def _fallback_exam_title(source_text: str) -> str:
  172. text = (source_text or "").strip()
  173. if not text:
  174. return "智能生成试卷"
  175. text = re.sub(r"用户指定的主题/关键词[::]\s*", "", text)
  176. text = re.sub(r"以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题[::]?\s*", "", text)
  177. text = re.sub(r"\s+", " ", text).strip()
  178. text = text.strip("`\"'“”‘’[]【】()(){}<>")
  179. for sep in ("。", ";", ";", ",", ",", "\n", ":", ":"):
  180. if sep in text:
  181. text = text.split(sep, 1)[0].strip()
  182. break
  183. text = re.sub(r"[^\u4e00-\u9fa5A-Za-z0-9]", "", text)
  184. if not text:
  185. return "智能生成试卷"
  186. return _refine_exam_title_candidate(text) or "智能生成试卷"
  187. def _refine_exam_title_candidate(candidate: str) -> str:
  188. text = re.sub(r"\s+", "", (candidate or "").strip())
  189. text = text.strip("`\"'“”‘’[]【】()(){}<>")
  190. text = re.sub(r"[^\u4e00-\u9fa5A-Za-z0-9]", "", text)
  191. if not text:
  192. return ""
  193. for marker in (
  194. "仅供内部交流学习",
  195. "仅供内部交流",
  196. "请勿外传",
  197. "讲授人",
  198. "授课人",
  199. "主讲人",
  200. "时间",
  201. "日期",
  202. "联系电话",
  203. "联系方式",
  204. ):
  205. idx = text.find(marker)
  206. if idx > 0:
  207. text = text[:idx]
  208. break
  209. for prefix in ("关于", "有关", "针对", "围绕", "基于", "依据", "结合", "开展", "组织"):
  210. if text.startswith(prefix) and len(text) - len(prefix) >= 4:
  211. text = text[len(prefix):]
  212. break
  213. suffixes = (
  214. "相关知识要点", "相关管理要求", "相关技术要求", "相关施工要求", "相关安全要求",
  215. "培训考试题库", "培训考试", "考试题库", "试题题库", "培训题库",
  216. "的培训考核", "的培训考试", "的考试题库", "的考试", "的考核", "的考查", "的测验", "的测试", "的练习",
  217. "相关内容", "主要内容", "核心内容", "培训内容", "培训要点", "知识要点", "基础知识",
  218. "管理要求", "技术要求", "施工要求", "安全要求", "作业要求", "实施要求",
  219. "工作要点", "控制要点", "操作要点", "注意事项", "质量要求", "验收要求",
  220. "考试内容", "试题内容", "考试试题", "考试重点", "培训重点",
  221. "管理规定", "技术规定", "施工规定", "安全规定",
  222. "工作方案", "专项方案", "管理办法", "技术措施",
  223. "相关知识", "基本要求", "有关要求", "总体要求",
  224. "考核", "考查", "测验", "测试", "练习", "试卷", "考试", "题库", "试题", "内容", "要求",
  225. )
  226. changed = True
  227. while changed and text:
  228. changed = False
  229. for suffix in suffixes:
  230. if text.endswith(suffix) and len(text) - len(suffix) >= 4:
  231. text = text[:-len(suffix)]
  232. changed = True
  233. break
  234. # 标题尽量收敛成名词短语,去掉“的”这类连接词
  235. text = re.sub(
  236. r"(?<=[\u4e00-\u9fa5A-Za-z0-9])的(?=[\u4e00-\u9fa5A-Za-z0-9])",
  237. "",
  238. text,
  239. )
  240. text = text.strip("的及与和等")
  241. if len(text) > 15:
  242. text = text[:15]
  243. return text if len(text) >= 2 else ""
  244. def _split_basis_sources(source_text: str) -> tuple[str, str]:
  245. text = _normalize_exam_text((source_text or "").strip())
  246. if not text:
  247. return "", ""
  248. user_match = re.search(
  249. r"用户输入依据[::]\s*([\s\S]*?)(?=\n\s*PPT提取内容[::]|\Z)",
  250. text,
  251. )
  252. ppt_match = re.search(r"PPT提取内容[::]\s*([\s\S]*)\Z", text)
  253. user_text = (user_match.group(1) if user_match else "").strip()
  254. ppt_text = (ppt_match.group(1) if ppt_match else "").strip()
  255. if user_text or ppt_text:
  256. return user_text, ppt_text
  257. return text, ""
  258. def _extract_basis_candidate_lines(source_text: str, max_lines: int = 8) -> list[str]:
  259. text = _normalize_exam_text((source_text or "").strip())
  260. if not text:
  261. return []
  262. candidates = []
  263. seen = set()
  264. metadata_patterns = (
  265. r"^PPT文件信息",
  266. r"^PPT第一页内容提取结果",
  267. r"^提取的文本内容",
  268. r"^WPS",
  269. r".*全屏显示.*",
  270. r".*幻灯片放映.*",
  271. r"^DocumentProperties$",
  272. r"^DocumentSecurity$",
  273. r"^DocumentProtection$",
  274. r"^文件名[::]",
  275. r"^文件大小[::]",
  276. r"^文件类型[::]",
  277. r"^修改时间[::]",
  278. r"^PPT正文提取失败",
  279. r"^请手动补充",
  280. r"^请手动描述",
  281. r"^您可以描述",
  282. r"^仅供内部交流",
  283. r"^请勿外传",
  284. r"^讲授人[::]",
  285. r"^授课人[::]",
  286. r"^主讲人[::]",
  287. r"^时间[::]",
  288. r"^日期[::]",
  289. r"^联系电话[::]",
  290. r"^联系方式[::]",
  291. r"^[0-9]+[.、]",
  292. )
  293. for raw_line in text.splitlines():
  294. line = raw_line.strip()
  295. if not line:
  296. continue
  297. line = re.sub(
  298. r"^(用户输入依据|PPT提取内容|出题依据内容|出题依据|培训主题|主题)[::]\s*", "", line)
  299. line = re.sub(r"^第\s*\d+\s*(页|张|部分|章|节)[::]?\s*", "", line)
  300. line = re.sub(r"\.(ppt|pptx)$", "", line, flags=re.IGNORECASE)
  301. if any(re.search(pattern, line, re.IGNORECASE) for pattern in metadata_patterns):
  302. continue
  303. line = re.sub(r"\s+", "", line)
  304. if len(line) < 2:
  305. continue
  306. if re.fullmatch(r"[0-9A-Za-z_-]+", line):
  307. continue
  308. if "文件名" in line or "PPT文件信息" in line or "WPS" in line:
  309. continue
  310. if line not in seen:
  311. candidates.append(line)
  312. seen.add(line)
  313. if len(candidates) >= max_lines:
  314. break
  315. return candidates
  316. def _build_knowledge_search_query(source_text: str, project_type: str = "") -> str:
  317. text = _normalize_exam_text((source_text or "").strip())
  318. if not text:
  319. return ""
  320. user_text, ppt_text = _split_basis_sources(text)
  321. candidates = []
  322. seen = set()
  323. title_candidate = _extract_exam_title_from_source(text, project_type)
  324. if title_candidate and title_candidate != "智能生成试卷":
  325. candidates.append(title_candidate)
  326. seen.add(title_candidate)
  327. for block in filter(None, [user_text, ppt_text, text]):
  328. for line in _extract_basis_candidate_lines(block, max_lines=6):
  329. refined = _refine_exam_title_candidate(line)
  330. if len(refined) < 2:
  331. continue
  332. if refined not in seen:
  333. candidates.append(refined)
  334. seen.add(refined)
  335. if len(candidates) >= 5:
  336. break
  337. if len(candidates) >= 5:
  338. break
  339. return " ".join(candidates[:5]).strip()
  340. def _extract_exam_title_from_source(source_text: str, project_type: str = "") -> str:
  341. text = (source_text or "").strip()
  342. if not text:
  343. return "智能生成试卷"
  344. text = _normalize_exam_text(text)
  345. text = re.sub(r"用户指定的主题/关键词[::]\s*", "", text)
  346. text = re.sub(r"^(出题依据内容|出题依据|培训主题|主题)[::]\s*", "", text)
  347. text = re.sub(r"以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题[::]?\s*", "", text)
  348. text = re.sub(r"[ \t]+", " ", text).strip()
  349. candidate_pool = []
  350. for line in _extract_basis_candidate_lines(text, max_lines=8):
  351. candidate_pool.append(line)
  352. for marker in ("\n\n", "\n", "。", ";", ";"):
  353. if marker in text:
  354. head = text.split(marker, 1)[0].strip()
  355. if head:
  356. candidate_pool.append(head)
  357. break
  358. stop_phrases = (
  359. "出题依据", "正文", "章节条款", "文件名", "答案解析", "要求", "规定",
  360. "内容", "相关", "进行", "采用", "包括", "本项目", "本工程", "本次",
  361. "施工", "安全", "管理", "技术", "规范", "标准", "方案", "办法", "措施",
  362. "PPT文件信息", "PPT第一页内容提取结果", "提取的文本内容",
  363. "WPS演示", "全屏显示", "幻灯片放映",
  364. "仅供内部交流学习", "仅供内部交流", "请勿外传", "讲授人", "授课人", "主讲人",
  365. "时间", "日期", "联系电话", "联系方式",
  366. )
  367. candidates = []
  368. for piece in candidate_pool + re.split(r"[,,、/()()\-::]", text):
  369. cleaned = re.sub(r"\s+", "", piece)
  370. cleaned = re.sub(r"^[0-9A-Za-z一二三四五六七八九十.]+$", "", cleaned)
  371. cleaned = re.sub(r"[^\u4e00-\u9fa5A-Za-z0-9]", "", cleaned)
  372. if len(cleaned) < 2:
  373. continue
  374. if cleaned in stop_phrases:
  375. continue
  376. candidates.append(cleaned)
  377. if candidates:
  378. def score(candidate: str) -> tuple[int, int]:
  379. keyword_bonus = sum(
  380. 2 for token in ("桥梁", "隧道", "桩基", "桩基础", "钢筋", "混凝土", "施工", "安全", "验收", "培训", "作业", "起重", "便桥")
  381. if token in candidate
  382. )
  383. return (keyword_bonus, min(len(candidate), 15))
  384. best = max(candidates, key=score)
  385. best = _refine_exam_title_candidate(best)
  386. if len(best) >= 2:
  387. return best
  388. prefix = _fallback_exam_title(text)
  389. if prefix != "智能生成试卷":
  390. return prefix
  391. project_prefix = re.sub(
  392. r"[^\u4e00-\u9fa5A-Za-z0-9]", "", (project_type or "").strip())
  393. if project_prefix:
  394. return f"{project_prefix[:8]}试卷"
  395. return "智能生成试卷"
  396. def _build_exam_section_example(
  397. question_type: str,
  398. count: int,
  399. score: int,
  400. basis_enabled: bool,
  401. ) -> str:
  402. basis_field = ', "basis": "文件名:...;章节条款:...;正文:..."' if basis_enabled else ""
  403. total_score = count * score
  404. if question_type == "单选题":
  405. return (
  406. f'{{"singleChoice": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  407. f'"count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], '
  408. f'"selectedAnswer": "A", "analysis": "解析"{basis_field}}}]}}}}'
  409. )
  410. if question_type == "多选题":
  411. return (
  412. f'{{"multiple": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  413. f'"count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], '
  414. f'"selectedAnswers": ["A", "B"], "analysis": "解析"{basis_field}}}]}}}}'
  415. )
  416. if question_type == "判断题":
  417. return (
  418. f'{{"judge": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  419. f'"count": {count}, "questions": [{{"text": "题干", "selectedAnswer": "正确", '
  420. f'"analysis": "解析"{basis_field}}}]}}}}'
  421. )
  422. return (
  423. f'{{"short": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  424. f'"count": {count}, "questions": [{{"text": "题干", "outline": {{"keyFactors": "答题要点"}}, '
  425. f'"analysis": "解析"{basis_field}}}]}}}}'
  426. )
  427. def _normalize_exam_text(value: str) -> str:
  428. text = (value or "").strip()
  429. if not text:
  430. return text
  431. text = text.replace("章节名/条款名", "章节条款")
  432. text = text.replace("\\sim", "~")
  433. text = text.replace("$", "")
  434. text = re.sub(r"\\mathrm\s*\{\s*([^{}]+?)\s*\}",
  435. lambda m: re.sub(r"\s+", "", m.group(1)), text)
  436. text = re.sub(r"([A-Za-z])\s*\^\s*\{\s*2\s*\}", r"\1²", text)
  437. text = re.sub(r"([A-Za-z])\s*\^\s*\{\s*3\s*\}", r"\1³", text)
  438. text = re.sub(r"(?<=\d)\s+(?=\d)", "", text)
  439. text = re.sub(r"(?<=\d)\s*\.\s*(?=\d)", ".", text)
  440. text = re.sub(r"(?<=\d)\s*~\s*(?=\d)", "~", text)
  441. text = re.sub(r"(?<=\d)\s*([a-zA-Zµμ%℃°²³]+)\b", r" \1", text)
  442. text = re.sub(r"\b([A-Za-z])\s+([²³])\b", r"\1\2", text)
  443. text = re.sub(r"\s+", " ", text)
  444. return text.strip()
  445. def _normalize_exam_payload_texts(value):
  446. if isinstance(value, dict):
  447. return {key: _normalize_exam_payload_texts(val) for key, val in value.items()}
  448. if isinstance(value, list):
  449. return [_normalize_exam_payload_texts(item) for item in value]
  450. if isinstance(value, str):
  451. return _normalize_exam_text(value)
  452. return value
  453. def _parse_exam_section_payload(raw_response: str, question_type: str) -> tuple[dict, int]:
  454. cleaned = _sanitize_exam_response(raw_response)
  455. parsed = _normalize_exam_payload_texts(json.loads(cleaned))
  456. section = _get_exam_section(parsed, question_type)
  457. actual_count = _get_section_question_count(section)
  458. return parsed, actual_count
  459. async def _repair_exam_section_payload(
  460. raw_response: str,
  461. question_type: str,
  462. count: int,
  463. score: int,
  464. basis_enabled: bool,
  465. ) -> Optional[tuple[dict, int]]:
  466. source_text = (raw_response or "").strip()
  467. if not source_text:
  468. return None
  469. repair_prompt = (
  470. f"下面是一段为【{question_type}】生成的原始内容,其中可能夹杂 Thinking Process、说明文字或不合法 JSON。\n"
  471. "任务:不要重新出题,只对已有内容做结构化整理,输出一个可被 json.loads 直接解析的 JSON 对象。\n"
  472. "要求:\n"
  473. f"1. 顶层只保留当前题型对应字段,count 应写为 {count}。\n"
  474. "2. 只整理原有题目内容,不要新增解释、前言、后记、markdown 代码块或推理过程。\n"
  475. "3. 如果原文中已经有 analysis、basis、options、selectedAnswer/selectedAnswers 等字段,尽量原样保留。\n"
  476. "4. 如果存在 basis 字段,必须保留其原始语言和原始含义;知识库原文是中文时,basis 中的文件名、章节条款、正文都必须保持中文,不得翻译成英文,不得改写为英文摘要。\n"
  477. "5. 如果原文内容本身不足指定题量,不要臆造新题,按已有内容整理即可。\n"
  478. "6. 最终回复必须以 { 开头、以 } 结尾。\n"
  479. f"目标 JSON 结构示例:{_build_exam_section_example(question_type, count, score, basis_enabled)}\n"
  480. f"原始内容如下:\n{source_text[:6000]}"
  481. )
  482. try:
  483. repaired_response = await qwen_service.chat(
  484. [{"role": "user", "content": repair_prompt}],
  485. disable_reasoning=True,
  486. )
  487. parsed, actual_count = _parse_exam_section_payload(
  488. repaired_response, question_type)
  489. logger.info(
  490. f"[exam/generate_stream] {question_type}轻量修复成功: repaired_count={actual_count}"
  491. )
  492. return parsed, actual_count
  493. except Exception as e:
  494. logger.warning(
  495. f"[exam/generate_stream] {question_type}轻量修复失败: {repr(e)}"
  496. )
  497. return None
  498. async def _resolve_exam_title(
  499. user_title: str,
  500. title_source: str,
  501. project_type: str = "",
  502. ) -> str:
  503. manual_title = (user_title or "").strip()
  504. if manual_title:
  505. return manual_title
  506. source_text = (title_source or "").strip()
  507. if not source_text:
  508. return "智能生成试卷"
  509. resolved = _extract_exam_title_from_source(source_text, project_type)
  510. logger.info(
  511. f"[exam/title] 基于用户输入出题依据提取试卷标题: source_len={len(source_text)}, title={resolved}"
  512. )
  513. return resolved
  514. async def _fetch_knowledge_docs(query_str: str, log_prefix: str) -> Optional[list[str]]:
  515. import httpx
  516. search_api_url = _get_knowledge_search_api_url()
  517. timeout = httpx.Timeout(20.0, connect=5.0)
  518. last_error = None
  519. for attempt in range(2):
  520. try:
  521. async with httpx.AsyncClient(timeout=timeout) as client:
  522. resp = await client.post(
  523. search_api_url,
  524. json={"query_str": query_str, "n": 20}
  525. )
  526. if resp.status_code != 200:
  527. logger.error(
  528. f"[{log_prefix}] 知识库检索API响应错误: url={search_api_url}, "
  529. f"status={resp.status_code}, body={resp.text}")
  530. return None
  531. search_data = resp.json()
  532. results = search_data.get("results")
  533. if results is None:
  534. results = search_data.get("data", [])
  535. if isinstance(results, dict):
  536. results = results.get("items", [])
  537. if not isinstance(results, list):
  538. results = []
  539. retrieved_docs = []
  540. for res in results:
  541. if not isinstance(res, dict):
  542. continue
  543. doc_text = res.get("document") or res.get(
  544. "content") or res.get("text")
  545. if doc_text:
  546. retrieved_docs.append(doc_text)
  547. return retrieved_docs
  548. except httpx.ReadTimeout as e:
  549. last_error = e
  550. if attempt == 0:
  551. logger.warning(
  552. f"[{log_prefix}] 知识库检索读取超时,准备重试: "
  553. f"url={search_api_url}, timeout=20s, attempt={attempt + 1}/2")
  554. continue
  555. logger.exception(
  556. f"[{log_prefix}] 知识库检索失败: url={search_api_url}, "
  557. f"exc_type={type(e).__name__}, detail={repr(e)}")
  558. return None
  559. except Exception as e:
  560. last_error = e
  561. logger.exception(
  562. f"[{log_prefix}] 知识库检索失败: url={search_api_url}, "
  563. f"exc_type={type(e).__name__}, detail={repr(e)}")
  564. return None
  565. if last_error:
  566. logger.exception(
  567. f"[{log_prefix}] 知识库检索失败: url={search_api_url}, "
  568. f"exc_type={type(last_error).__name__}, detail={repr(last_error)}")
  569. return None
  570. @router.post("/exam/build_prompt")
  571. async def build_exam_prompt(
  572. request: Request,
  573. data: BuildPromptRequest,
  574. db: Session = Depends(get_db)
  575. ):
  576. """根据前端考试工坊参数生成提示词"""
  577. user = request.state.user
  578. if not user:
  579. return {"statusCode": 401, "msg": "未授权"}
  580. question_desc = []
  581. total_count = 0
  582. for item in data.questionTypes:
  583. count = item.count or item.questionCount or 0
  584. score = item.scorePerQuestion or 0
  585. qtype = item.questionType or item.name or "未命名题型"
  586. total_count += count
  587. question_desc.append(f"{qtype}{count}道,每道{score}分")
  588. question_text = ";".join(question_desc) if question_desc else "题型未提供"
  589. question_schema_lines = []
  590. for item in data.questionTypes:
  591. count = item.count or item.questionCount or 0
  592. score = item.scorePerQuestion or 0
  593. qtype = item.questionType or item.name or "未命名题型"
  594. if count <= 0:
  595. continue
  596. question_schema_lines.append(f"- {qtype}: {count}道,每道{score}分")
  597. question_schema = "\n".join(
  598. question_schema_lines) if question_schema_lines else "- 未提供有效题型"
  599. ppt_content = (data.pptContent or "").strip()
  600. retrieval_query = _build_knowledge_search_query(
  601. ppt_content, data.projectType)
  602. combined_source_mode = "用户输入依据:" in ppt_content and "PPT提取内容:" in ppt_content
  603. # === 新增:如果出题依据不是真正的长文本,而是关键词,则调用远端知识库检索服务 ===
  604. # 启发式判断:如果文本长度小于 50 字,且不包含明显的段落换行,认为它是关键词,去查知识库
  605. if ppt_content and (
  606. (len(ppt_content) < 50 and "\n" not in ppt_content)
  607. or (combined_source_mode and retrieval_query)
  608. ):
  609. query_str = retrieval_query or ppt_content
  610. logger.info(
  611. f"[exam/build_prompt] 检测到可检索出题依据,尝试调用知识库检索API: query='{query_str}'")
  612. retrieved_docs = await _fetch_knowledge_docs(query_str, "exam/build_prompt")
  613. if retrieved_docs:
  614. retrieved_text = "\n\n---\n\n".join(retrieved_docs)
  615. logger.info(
  616. f"[exam/build_prompt] 知识库检索成功,拼接了 {len(retrieved_docs)} 个文档块,总长度 {len(retrieved_text)}")
  617. # 覆盖原来的 ppt_content,改为:用户关键词 + 检索到的真实知识库内容
  618. ppt_content = (
  619. f"用户指定的主题/关键词:{query_str}\n\n"
  620. f"原始出题依据:\n{text[:2000] if (text := (data.pptContent or '').strip()) else '无'}\n\n"
  621. "以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题:\n\n"
  622. f"{retrieved_text}"
  623. )
  624. elif retrieved_docs == []:
  625. logger.warning(
  626. f"[exam/build_prompt] 知识库中未检索到与 '{query_str}' 相关的文档块")
  627. ppt_content = f"(注:未能在知识库中检索到相关文档,请仅根据以下关键词及原始依据出题:{query_str}\n\n{data.pptContent or ''})"
  628. if ppt_content:
  629. max_chars = 12000
  630. if len(ppt_content) > max_chars:
  631. head_len = max_chars // 2
  632. tail_len = max_chars - head_len
  633. ppt_content = (
  634. ppt_content[:head_len]
  635. + "\n\n(出题依据内容过长,已截断,以下为结尾片段)\n\n"
  636. + ppt_content[-tail_len:]
  637. )
  638. logger.info(
  639. f"[exam/build_prompt] pptContent truncated: original_len={len(data.pptContent)} kept_len={len(ppt_content)}"
  640. )
  641. basis_field = ', "basis": "<文件名:...;章节条款:...;正文:...>"' if data.requireBasis else ''
  642. basis_instruction = (
  643. "【出题依据要求】:每道题必须附带一个 'basis' 字段。\n"
  644. "basis 必须严格按以下顺序组织:先写相关文件名,再写章节条款,最后写与题目直接相关的正文原文内容。\n"
  645. "推荐格式为:“文件名:xxx;章节条款:xxx;正文:xxx”。\n"
  646. "basis 只能填写知识库中的原文依据,不得改写成题目,不得出现题干句式,不得包含选项内容,不得直接写出正确答案、错误答案、解析结论或“应选A/应选B/正确/错误”等判断结果。\n"
  647. "basis 应尽量保持知识库原文原貌,不得做摘要、润色、优化、同义替换、翻译或重组;知识库原文如果是中文,basis 也必须保持中文原文,不得改写成英文或中英混杂表述;如果缺少文件名或章节条款,也必须保留固定标识位,分别写为“文件名:未标注”“章节条款:未标注”。\n"
  648. "答案解析请放在独立的 analysis 字段中,不要混入 basis。\n"
  649. ) if data.requireBasis else ""
  650. prompt = (
  651. "请根据以下要求直接生成一份完整试卷,并严格返回纯 JSON,不要输出 markdown 代码块、解释说明或额外文字。\n"
  652. f"生成模式:{data.mode or '未指定'}\n"
  653. f"客户端:{data.client or '未指定'}\n"
  654. f"项目类型:{data.projectType or '未指定'}\n"
  655. f"考试标题:{data.examTitle if data.examTitle else '未提供。请你仔细阅读出题依据内容,高度凝练其核心主题(不要生硬拼凑前缀),生成一个不超过15个字的贴切的试卷名称。特别注意:如果试卷名称中包含公司或组织名称,要么完全省略不写,要么必须使用完整的全称(例如:如果原内容是“蜀道矿业集团”,必须写“蜀道矿业集团”,绝不能擅自简写为“蜀道矿业”)'}\n"
  656. f"总分:{data.totalScore or 0}\n"
  657. f"总题量:{total_count}\n"
  658. f"题型要求:{question_text}\n"
  659. f"出题依据内容:{ppt_content or '无'}\n"
  660. "出题依据内容是本次试卷的核心来源,所有题目必须围绕该内容中的知识点、术语、流程、规范要求和场景展开。\n"
  661. "如果出题依据内容中出现了章节、条款、培训主题或专业术语,题目必须优先考查这些内容,不能偏离到无关知识。\n"
  662. "单选题、多选题、判断题和简答题的题干、选项、答案解析都要与出题依据内容直接相关,不能泛泛而谈。\n"
  663. "请结合出题依据内容、工程类型和题型要求,生成有具体内容、具体选项、具体答案、具体解析的试卷。\n"
  664. "凡是题型配置中 count 大于 0 的题型,必须返回对应数量的非空题目,不能返回空数组,不能少题。\n"
  665. "即使出题依据内容较短,也要优先围绕已有内容中的关键词、术语、场景和要求组织出题,不能因为信息少而返回空题目。\n"
  666. "如果某题型要求生成 3 道题,就必须生成 3 道完整可作答的题目,少于要求数量视为不合格。\n"
  667. "禁止输出“选项A”“题目1”“桥梁工程相关单选题1”“题目内容”“解析内容”这类占位内容,所有题目必须是可直接展示和作答的真实内容。\n"
  668. "【极度重要的多选题防作弊要求】:\n"
  669. "近期发现你生成的多选题中,正确答案总是偷懒按顺序排列(比如全都包含A、全都连号如AB、ABC、ABCD)!这在真实考试中是绝对不允许的。\n"
  670. "你必须强制打乱正确答案的字母组合,严格遵守以下分布规则:\n"
  671. " - 必须有至少 30% 的题目正确答案【完全不包含A】(如 BC, CD, BD, BCD)!\n"
  672. " - 必须有至少 30% 的题目正确答案【跳跃分布】(如 AC, AD, BD, ABD, ACD)!\n"
  673. " - 包含2个正确选项的题目占比应达到 40%\n"
  674. " - 包含3个正确选项的题目占比应达到 40%\n"
  675. " - 包含4个正确选项的题目(ABCD)绝对不能超过 20%!\n"
  676. "【答案随机性要求】:\n"
  677. "1. 单选题:提供4个选项(A/B/C/D),正确答案只能是其中1个,且正确答案必须在A、B、C、D中随机分布,绝不能所有题目的正确答案都相同。\n"
  678. "2. 多选题:提供4个选项(A/B/C/D),正确答案的个数在2~4个之间随机,且答案组合必须随机(例如:可以是AB、AC、AD、BC、BD、CD、ABC、ABD、BCD、ABCD等),绝不能都从A开始或全都是ABCD。\n"
  679. "3. 判断题:正确答案必须在“正确”和“错误”之间随机分布,绝不能所有判断题的答案全都是“正确”或全都是“错误”。\n"
  680. f"{basis_instruction}"
  681. "下面的 JSON 结构示例只用于说明字段格式,示例中的字符串不能原样照抄到最终结果中,最终返回的每个字符串都必须替换成结合出题依据生成的具体内容。\n"
  682. "JSON 输出结构必须符合以下格式:\n"
  683. "{\n"
  684. ' "title": "试卷标题",\n'
  685. ' "totalScore": 100,\n'
  686. ' "totalQuestions": 10,\n'
  687. f' "singleChoice": {{"scorePerQuestion": 2, "totalScore": 20, "count": 10, "questions": [{{"text": "<单选题题干>", "options": [{{"key": "A", "text": "<选项A具体内容>"}}, {{"key": "B", "text": "<选项B具体内容>"}}, {{"key": "C", "text": "<选项C具体内容>"}}, {{"key": "D", "text": "<选项D具体内容>"}}], "answer": "A", "analysis": "<解析内容>"{basis_field}}}]}},\n'
  688. f' "judge": {{"scorePerQuestion": 2, "totalScore": 0, "count": 0, "questions": [{{"text": "<判断题题干>", "answer": "正确", "analysis": "<解析内容>"{basis_field}}}]}},\n'
  689. f' "multiple": {{"scorePerQuestion": 3, "totalScore": 0, "count": 0, "questions": [{{"text": "<多选题题干>", "options": [{{"key": "A", "text": "<选项A具体内容>"}}, {{"key": "B", "text": "<选项B具体内容>"}}, {{"key": "C", "text": "<选项C具体内容>"}}, {{"key": "D", "text": "<选项D具体内容>"}}], "answers": ["A", "C"], "analysis": "<解析内容>"{basis_field}}}]}},\n'
  690. f' "short": {{"scorePerQuestion": 10, "totalScore": 0, "count": 0, "questions": [{{"text": "<简答题题干>", "outline": {{"keyFactors": "<答题要点>", "measures": "<参考措施>"}}{basis_field}}}]}}\n'
  691. "}\n"
  692. "请按下面的题型配置生成对应数量的题目,没有的题型 count 返回 0、questions 返回空数组:\n"
  693. f"{question_schema}"
  694. )
  695. return {
  696. "statusCode": 200,
  697. "msg": "success",
  698. "data": {"prompt": prompt}
  699. }
  700. class BuildSinglePromptRequest(BaseModel):
  701. question_type: str
  702. topic: str
  703. difficulty: str
  704. class GenerateStreamRequest(BaseModel):
  705. mode: str = ""
  706. client: str = ""
  707. projectType: str = ""
  708. examTitle: str = ""
  709. totalScore: int = 0
  710. questionTypes: list[QuestionTypeItem] = Field(default_factory=list)
  711. pptContent: str = ""
  712. requireBasis: bool = False
  713. ai_conversation_id: Optional[int] = 0
  714. @router.post("/exam/generate_stream")
  715. async def generate_exam_stream(
  716. request: Request,
  717. data: GenerateStreamRequest,
  718. ):
  719. """
  720. 流式生成试卷(按题型分批输出)
  721. """
  722. user = request.state.user
  723. if not user:
  724. return {"statusCode": 401, "msg": "未授权"}
  725. async def event_generator():
  726. db = None
  727. try:
  728. yield f"data: {json.dumps({'type': 'progress', 'message': '正在检索知识库...', 'percent': 5}, ensure_ascii=False)}\n\n"
  729. # 2. 获取上下文
  730. raw_basis_content = (data.pptContent or "").strip()
  731. ppt_content = raw_basis_content
  732. retrieval_query = _build_knowledge_search_query(
  733. raw_basis_content, data.projectType)
  734. combined_source_mode = "用户输入依据:" in raw_basis_content and "PPT提取内容:" in raw_basis_content
  735. keyword_search_mode = bool(
  736. ppt_content and len(
  737. ppt_content) < 50 and "\n" not in ppt_content
  738. )
  739. retrieval_mode = bool(
  740. keyword_search_mode or (
  741. combined_source_mode and retrieval_query)
  742. )
  743. retrieval_succeeded = False
  744. if retrieval_mode:
  745. query_str = retrieval_query or ppt_content
  746. retrieved_docs = await _fetch_knowledge_docs(query_str, "exam/generate_stream")
  747. if retrieved_docs:
  748. logger.info(
  749. f"[exam/generate_stream] 知识库检索成功,拼接了 {len(retrieved_docs)} 个文档块")
  750. retrieval_succeeded = True
  751. original_basis = raw_basis_content[:
  752. 4000] if raw_basis_content else "无"
  753. ppt_content = f"用户指定的主题/关键词:{query_str}\n\n原始出题依据:\n{original_basis}\n\n以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题:\n\n" + "\n\n---\n\n".join(
  754. retrieved_docs)
  755. elif retrieved_docs == []:
  756. logger.warning(
  757. f"[exam/generate_stream] 知识库中未检索到与 '{query_str}' 相关的文档块")
  758. resolved_exam_title = await _resolve_exam_title(
  759. user_title=data.examTitle,
  760. title_source=raw_basis_content,
  761. project_type=data.projectType,
  762. )
  763. # 1. 创建或获取对话
  764. conv_id = _ensure_exam_conversation_with_fresh_session(
  765. user_id=user.user_id,
  766. exam_title=resolved_exam_title,
  767. ai_conversation_id=data.ai_conversation_id,
  768. )
  769. yield f"data: {json.dumps({'type': 'initial', 'ai_conversation_id': conv_id, 'title': resolved_exam_title}, ensure_ascii=False)}\n\n"
  770. if ppt_content and len(ppt_content) > 12000:
  771. head_len = 6000
  772. tail_len = 6000
  773. ppt_content = ppt_content[:head_len] + \
  774. "\n\n(已截断)\n\n" + ppt_content[-tail_len:]
  775. basis_enabled = bool(
  776. data.requireBasis and (
  777. not retrieval_mode or retrieval_succeeded)
  778. )
  779. if data.requireBasis and retrieval_mode and not retrieval_succeeded:
  780. logger.warning(
  781. "[exam/generate_stream] 联合关键词未成功检索到知识库原文,已禁用 basis 字段以避免模型虚构依据")
  782. basis_instruction = (
  783. "【出题依据要求】:本次未成功检索到可核验的知识库原文,因此禁止输出 'basis' 字段。\n"
  784. "禁止虚构规范名称、标准编号、章节条款、出处或依据内容。\n"
  785. )
  786. elif basis_enabled:
  787. basis_instruction = (
  788. "【出题依据要求】:每道题必须附带一个 'basis' 字段。\n"
  789. "basis 必须严格按以下顺序组织:先写相关文件名,再写章节条款,最后写与本题直接相关的正文原文内容。\n"
  790. "推荐格式为:“文件名:xxx;章节条款:xxx;正文:xxx”。\n"
  791. "basis 必须尽量保持知识库原文原貌,模型不得做摘要、润色、优化、改写、同义替换、翻译或重组,不得省略关键表述;知识库原文如果是中文,basis 也必须保持中文原文,不得改写成英文或中英混杂表述。\n"
  792. "如果检索结果中存在文件名或章节标题,必须显式写出;如果同一题涉及多处原文,也必须在每段原文前先写文件名,再写章节条款,最后再写正文。\n"
  793. "如果缺少文件名或章节名,也必须保留固定标识位,分别写为“文件名:未标注”“章节条款:未标注”。\n"
  794. "analysis 字段保留为独立解析字段,负责输出基于原文的答案解析;basis 本身只能是原文依据,不能混入模型总结。\n"
  795. )
  796. else:
  797. basis_instruction = ""
  798. # 过滤出需要生成的题型
  799. valid_types = []
  800. for item in data.questionTypes:
  801. count = item.count or item.questionCount or 0
  802. if count > 0:
  803. valid_types.append(item)
  804. total_types = len(valid_types)
  805. if total_types == 0:
  806. yield f"data: {json.dumps({'type': 'progress', 'message': '未配置任何题型', 'percent': 100}, ensure_ascii=False)}\n\n"
  807. yield "data: [DONE]\n\n"
  808. return
  809. full_exam_data = {
  810. "title": resolved_exam_title,
  811. "totalScore": data.totalScore
  812. }
  813. for index, qtype_item in enumerate(valid_types):
  814. count = qtype_item.count or qtype_item.questionCount
  815. score = qtype_item.scorePerQuestion
  816. name = qtype_item.questionType or qtype_item.name
  817. # 开始生成当前题型,进度区间起始点
  818. start_percent = 10 + int(80 * (index / total_types))
  819. yield f"data: {json.dumps({'type': 'progress', 'message': f'正在生成{name}({index+1}/{total_types})...', 'percent': start_percent}, ensure_ascii=False)}\n\n"
  820. prompt = (
  821. f"请根据以下要求直接生成【{name}】题目,并严格返回纯 JSON,不要输出 markdown 代码块、解释说明或额外文字。\n"
  822. f"生成模式:{data.mode or '未指定'}\n"
  823. f"客户端:{data.client or '未指定'}\n"
  824. f"项目类型:{data.projectType or '未指定'}\n"
  825. f"试卷标题:{resolved_exam_title}\n"
  826. f"出题依据内容:{ppt_content or '无'}\n"
  827. "出题依据内容是本次试题的核心来源,必须围绕该内容中的知识点、术语、流程、规范要求和场景展开。\n"
  828. f"你需要生成:{count}道【{name}】,每道{score}分。\n"
  829. f"{basis_instruction}\n"
  830. f"返回 JSON 中的 count 必须等于 {count},questions 数组必须恰好包含 {count} 个题目对象,不能只返回 1 个示例对象。\n"
  831. "下面的 JSON 仅用于展示字段结构,questions 内的对象格式按此扩展到要求数量。\n"
  832. "JSON 输出结构必须符合以下格式(根据题型返回单个字段):\n"
  833. )
  834. basis_field = ', "basis": "文件名:...;章节条款:...;正文:..."' if basis_enabled else ""
  835. total_score = count * score
  836. if name == "单选题":
  837. prompt += f'{{"singleChoice": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], "selectedAnswer": "A", "analysis": "解析"{basis_field}}}]}}}}\n'
  838. prompt += "注意:选项必须且只能是4个,固定为A、B、C、D,禁止出现E或更多选项。正确答案在A、B、C、D中随机。"
  839. elif name == "多选题":
  840. prompt += f'{{"multiple": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], "selectedAnswers": ["A", "B"], "analysis": "解析"{basis_field}}}]}}}}\n'
  841. prompt += (
  842. "注意:选项必须且只能是4个,固定为A、B、C、D,禁止出现E或更多选项。"
  843. "正确答案的个数必须在2~4个之间随机分布,且不能全部都为同一种数量。\n"
  844. "你必须严格遵守以下多选题正确答案分布规则:\n"
  845. f" - 在本次生成的 {count} 道多选题中,包含2个正确选项的题目占比应接近40%。\n"
  846. f" - 在本次生成的 {count} 道多选题中,包含3个正确选项的题目占比应接近40%。\n"
  847. f" - 在本次生成的 {count} 道多选题中,包含4个正确选项(ABCD)的题目占比不得超过20%。\n"
  848. "你必须强制打乱正确答案的字母组合,严格遵守以下规则:\n"
  849. " - 必须有至少30%的题目正确答案完全不包含A(如 BC、CD、BD、BCD)。\n"
  850. " - 必须有至少30%的题目正确答案采用跳跃分布(如 AC、AD、BD、ABD、ACD)。\n"
  851. " - 绝不能所有题目都从A开始,绝不能大量重复 ABC、ABD、ACD、BCD、ABCD 这类固定模式。\n"
  852. " - 正确答案组合必须在 AB、AC、AD、BC、BD、CD、ABC、ABD、ACD、BCD、ABCD 等形式之间充分打散。\n"
  853. "如果数量分布无法完全整除,也必须尽量逼近上述比例,绝不能出现全部都是3个正确选项或全部都是同一组合模式。"
  854. )
  855. elif name == "判断题":
  856. prompt += f'{{"judge": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "selectedAnswer": "正确", "analysis": "解析"{basis_field}}}]}}}}\n'
  857. prompt += (
  858. "注意:正确答案必须在“正确”和“错误”之间随机分布。\n"
  859. f"判断题必须一次性返回 {count} 道完整题目,questions 数组中必须实际展开为 {count} 个不同的题目对象,"
  860. "不能只给 1 个示例对象,不能让前端或调用方自行复制。"
  861. )
  862. elif name == "简答题":
  863. prompt += f'{{"short": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "outline": {{"keyFactors": "答题要点"}}, "analysis": "解析"{basis_field}}}]}}}}\n'
  864. try:
  865. qwen_response = ""
  866. parsed = None
  867. last_error = None
  868. for attempt in range(2):
  869. current_prompt = prompt
  870. if attempt == 1:
  871. current_prompt += (
  872. f"\n这是第2次重试,上一次生成的【{name}】结果不可用。\n"
  873. f"本次必须一次性完整返回 {count} 道【{name}】,"
  874. "不得少题、不得只返回示例题、不得返回 1 道占位题。\n"
  875. "严禁输出 Thinking Process、Reasoning、思考过程、解释说明、前言、后记、markdown 代码块或任何 JSON 之外的内容。\n"
  876. "你的最终回复必须以 { 开头、以 } 结尾,且整个回复只能是一个可被 json.loads 直接解析的 JSON 对象。"
  877. )
  878. if name == "判断题":
  879. current_prompt += (
  880. f"\n特别强调:你现在生成的是【判断题】。"
  881. f"questions 数组里必须真实返回 {count} 个判断题对象,"
  882. "每个对象都要有独立题干和答案,绝不能只返回 1 个对象作为模板。"
  883. )
  884. logger.warning(
  885. f"[exam/generate_stream] {name}首次生成结果不可用,开始第{attempt + 1}次重试")
  886. qwen_response = await qwen_service.chat(
  887. [{"role": "user", "content": current_prompt}],
  888. disable_reasoning=True,
  889. )
  890. try:
  891. parsed, actual_count = _parse_exam_section_payload(
  892. qwen_response, name)
  893. if actual_count == count:
  894. last_error = None
  895. break
  896. last_error = ValueError(
  897. f"{name}返回题量不完整,期望{count}道,实际{actual_count}道")
  898. logger.warning(
  899. f"[exam/generate_stream] {last_error}; attempt={attempt + 1}/2")
  900. except Exception as inner_error:
  901. last_error = inner_error
  902. repaired_payload = await _repair_exam_section_payload(
  903. raw_response=qwen_response,
  904. question_type=name,
  905. count=count,
  906. score=score,
  907. basis_enabled=basis_enabled,
  908. )
  909. if repaired_payload is not None:
  910. parsed, actual_count = repaired_payload
  911. if actual_count == count:
  912. last_error = None
  913. break
  914. last_error = ValueError(
  915. f"{name}轻量修复后题量仍不完整,期望{count}道,实际{actual_count}道")
  916. logger.warning(
  917. f"[exam/generate_stream] {last_error}; attempt={attempt + 1}/2")
  918. continue
  919. logger.warning(
  920. f"[exam/generate_stream] {name}结果解析失败,准备重试: "
  921. f"attempt={attempt + 1}/2, detail={inner_error!r}")
  922. if last_error is not None:
  923. raise last_error
  924. if resolved_exam_title and not parsed.get("title"):
  925. parsed["title"] = resolved_exam_title
  926. # 合并到完整试卷
  927. full_exam_data.update(parsed)
  928. # 当前题型生成完成,进度推到当前区间的终点
  929. end_percent = 10 + int(80 * ((index + 1) / total_types))
  930. yield f"data: {json.dumps({'type': 'progress', 'message': f'{name}生成完成...', 'percent': end_percent}, ensure_ascii=False)}\n\n"
  931. yield f"data: {json.dumps({'type': 'batch_data', 'questionType': name, 'data': parsed}, ensure_ascii=False)}\n\n"
  932. except Exception as e:
  933. logger.error(
  934. f"生成{name}失败: {e}; raw_snippet={(qwen_response or '')[:300]}")
  935. yield f"data: {json.dumps({'type': 'error', 'message': f'{name}生成失败,未保存残缺试卷,请重试。'}, ensure_ascii=False)}\n\n"
  936. return
  937. # 保存完整试卷到数据库
  938. yield f"data: {json.dumps({'type': 'progress', 'message': '正在保存试卷...', 'percent': 98}, ensure_ascii=False)}\n\n"
  939. request_payload = (
  940. data.model_dump()
  941. if hasattr(data, "model_dump")
  942. else data.dict()
  943. )
  944. _save_exam_messages_with_fresh_session(
  945. conv_id=conv_id,
  946. user_id=user.user_id,
  947. request_payload=request_payload,
  948. exam_payload=full_exam_data,
  949. )
  950. yield f"data: {json.dumps({'type': 'progress', 'message': '试卷生成完成', 'percent': 100}, ensure_ascii=False)}\n\n"
  951. except Exception as e:
  952. logger.error(f"[exam/generate_stream] 异常: {e}")
  953. yield f"data: {json.dumps({'type': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
  954. finally:
  955. if db is not None:
  956. db.close()
  957. yield "data: [DONE]\n\n"
  958. return StreamingResponse(event_generator(), media_type="text/event-stream")
  959. @router.post("/exam/build_single_prompt")
  960. async def build_single_question_prompt(
  961. request: Request,
  962. data: BuildSinglePromptRequest,
  963. db: Session = Depends(get_db)
  964. ):
  965. """生成单题提示词 - 对齐Go版本函数名"""
  966. user = request.state.user
  967. if not user:
  968. return {"statusCode": 401, "msg": "未授权"}
  969. prompt = f"""请生成1道关于{data.topic}的{data.question_type},难度为{data.difficulty}。"""
  970. return {
  971. "statusCode": 200,
  972. "msg": "success",
  973. "data": {"prompt": prompt}
  974. }
  975. class ModifyQuestionRequest(BaseModel):
  976. ai_conversation_id: int
  977. content: str
  978. @router.post("/re_modify_question")
  979. async def re_modify_question(
  980. request: Request,
  981. data: ModifyQuestionRequest,
  982. db: Session = Depends(get_db)
  983. ):
  984. """修改考试题目 - 实际修改ai_message表"""
  985. user = request.state.user
  986. if not user:
  987. return {"statusCode": 401, "msg": "未授权"}
  988. # 修改ai_message表中type='ai'的content
  989. result = db.query(AIMessage).filter(
  990. AIMessage.ai_conversation_id == data.ai_conversation_id,
  991. AIMessage.type == 'ai'
  992. ).update({"content": data.content})
  993. if result == 0:
  994. return {"statusCode": 404, "msg": "消息不存在"}
  995. db.commit()
  996. return {"statusCode": 200, "msg": "success"}
  997. class ReproduceSingleQuestionRequest(BaseModel):
  998. message: str = ""
  999. ai_conversation_id: Optional[int] = None
  1000. regenerate_reason: str = ""
  1001. @router.post("/re_produce_single_question")
  1002. async def re_produce_single_question(
  1003. request: Request,
  1004. data: ReproduceSingleQuestionRequest,
  1005. db: Session = Depends(get_db)
  1006. ):
  1007. """重新生成单题"""
  1008. user = request.state.user
  1009. if not user:
  1010. return {"statusCode": 401, "msg": "未授权"}
  1011. prompt = (data.message or "").strip()
  1012. # 兼容旧版调用:未传 message 时,尝试根据会话和重生成原因构造提示词。
  1013. if not prompt and data.ai_conversation_id:
  1014. message = db.query(AIMessage).filter(
  1015. AIMessage.ai_conversation_id == data.ai_conversation_id,
  1016. AIMessage.type == 'ai'
  1017. ).first()
  1018. if not message:
  1019. return {"statusCode": 404, "msg": "消息不存在"}
  1020. prompt = (message.content or "").strip()
  1021. if data.regenerate_reason:
  1022. prompt = f"{prompt}\n\n请根据以下要求重新生成:{data.regenerate_reason}"
  1023. if not prompt:
  1024. return {"statusCode": 400, "msg": "缺少生成内容"}
  1025. try:
  1026. new_question = await qwen_service.chat([
  1027. {"role": "user", "content": prompt}
  1028. ])
  1029. except Exception as e:
  1030. return {"statusCode": 500, "msg": f"AI生成失败: {str(e)}"}
  1031. return {
  1032. "statusCode": 200,
  1033. "msg": "success",
  1034. "data": {
  1035. "ai_conversation_id": data.ai_conversation_id,
  1036. "new_question": new_question,
  1037. "reply": new_question,
  1038. "content": new_question,
  1039. "message": new_question
  1040. }
  1041. }