exam.py 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350
  1. from routers.chat import _build_conversation_preview, _rag_search, _sanitize_exam_response
  2. from models.chat import AIConversation, AIMessage
  3. from database import SessionLocal
  4. from fastapi.responses import StreamingResponse
  5. import asyncio
  6. import time
  7. import json
  8. import re
  9. from fastapi import APIRouter, Depends, Request
  10. from sqlalchemy.orm import Session
  11. from sqlalchemy.exc import OperationalError
  12. from pydantic import BaseModel, Field
  13. from typing import Optional
  14. from database import get_db
  15. from services.qwen_service import qwen_service
  16. from utils.config import settings
  17. from utils.logger import logger
  18. router = APIRouter()
  19. class QuestionTypeItem(BaseModel):
  20. questionType: str = ""
  21. name: str = ""
  22. count: int = 0
  23. questionCount: int = 0
  24. scorePerQuestion: int = 0
  25. romanNumeral: str = ""
  26. class BuildPromptRequest(BaseModel):
  27. mode: str = ""
  28. client: str = ""
  29. projectType: str = ""
  30. examTitle: str = ""
  31. totalScore: int = 0
  32. questionTypes: list[QuestionTypeItem] = Field(default_factory=list)
  33. pptContent: str = ""
  34. basisContent: str = ""
  35. requireBasis: bool = False
  36. class GenerateTitleRequest(BaseModel):
  37. projectType: str = ""
  38. sourceContent: str = ""
  39. def _get_exam_section(payload: dict, question_type: str) -> Optional[dict]:
  40. if not isinstance(payload, dict):
  41. return None
  42. question_map = {
  43. "单选题": ("singleChoice", "single_choice", "single"),
  44. "判断题": ("judge",),
  45. "多选题": ("multiple", "multiple_choice"),
  46. "简答题": ("short", "short_answer"),
  47. }
  48. keys = question_map.get(question_type, ())
  49. questions_obj = payload.get("questions")
  50. for key in keys:
  51. section = payload.get(key)
  52. if isinstance(section, dict):
  53. return section
  54. if isinstance(questions_obj, dict):
  55. nested = questions_obj.get(key)
  56. if isinstance(nested, dict):
  57. return nested
  58. return None
  59. def _get_section_question_count(section: Optional[dict]) -> int:
  60. if not isinstance(section, dict):
  61. return 0
  62. questions = section.get("questions")
  63. if not isinstance(questions, list):
  64. return 0
  65. return len(questions)
  66. def _is_placeholder_text(value: object) -> bool:
  67. if value is None:
  68. return True
  69. text = str(value).strip()
  70. if not text:
  71. return True
  72. placeholder_patterns = (
  73. r"^\.\.\.$",
  74. r"^…+$",
  75. r"^题干$",
  76. r"^题目$",
  77. r"^题目内容$",
  78. r"^解析$",
  79. r"^解析内容$",
  80. r"^答案解析$",
  81. r"^答题要点$",
  82. r"^参考答案$",
  83. r"^未设置$",
  84. )
  85. return any(re.fullmatch(pattern, text) for pattern in placeholder_patterns)
  86. def _extract_short_outline_text(question: dict) -> str:
  87. outline = question.get("outline") or question.get(
  88. "answer_outline") or question.get("答题要点")
  89. if isinstance(outline, dict):
  90. key_factors = outline.get("keyFactors") or outline.get("key_factors")
  91. if isinstance(key_factors, list):
  92. return ";".join(str(item).strip() for item in key_factors if str(item).strip())
  93. return str(key_factors or "").strip()
  94. if isinstance(outline, list):
  95. return ";".join(str(item).strip() for item in outline if str(item).strip())
  96. if isinstance(outline, str):
  97. return outline.strip()
  98. return ""
  99. def _validate_section_questions(section: Optional[dict], question_type: str) -> tuple[bool, str]:
  100. if not isinstance(section, dict):
  101. return False, f"{question_type}缺少题型对象"
  102. questions = section.get("questions")
  103. if not isinstance(questions, list) or not questions:
  104. return False, f"{question_type}缺少题目列表"
  105. for index, question in enumerate(questions, start=1):
  106. if not isinstance(question, dict):
  107. return False, f"{question_type}第{index}题不是对象"
  108. text = str(
  109. question.get("text")
  110. or question.get("question_text")
  111. or question.get("question")
  112. or question.get("title")
  113. or question.get("content")
  114. or ""
  115. ).strip()
  116. if _is_placeholder_text(text):
  117. return False, f"{question_type}第{index}题题干是占位内容"
  118. if question_type == "简答题":
  119. outline_text = _extract_short_outline_text(question)
  120. if _is_placeholder_text(outline_text):
  121. return False, f"{question_type}第{index}题答题要点是占位内容"
  122. return True, ""
  123. def _get_knowledge_search_api_url() -> str:
  124. aichat_config = getattr(settings, "aichat", None)
  125. aichat_base_url = getattr(aichat_config, "api_url", "").rstrip("/")
  126. if aichat_base_url:
  127. return f"{aichat_base_url}/knowledge/search"
  128. return "http://127.0.0.1:28002/api/v1/knowledge/search"
  129. def _save_exam_messages_with_fresh_session(
  130. conv_id: int,
  131. user_id: int,
  132. request_payload: dict,
  133. exam_payload: dict,
  134. ) -> None:
  135. last_error = None
  136. for attempt in range(2):
  137. save_db = SessionLocal()
  138. try:
  139. now = int(time.time())
  140. user_msg = AIMessage(
  141. ai_conversation_id=conv_id,
  142. user_id=user_id,
  143. type="user",
  144. content=json.dumps(request_payload, ensure_ascii=False),
  145. created_at=now,
  146. updated_at=now,
  147. is_deleted=0,
  148. )
  149. save_db.add(user_msg)
  150. save_db.flush()
  151. ai_msg = AIMessage(
  152. ai_conversation_id=conv_id,
  153. user_id=user_id,
  154. type="ai",
  155. content=json.dumps(exam_payload, ensure_ascii=False),
  156. prev_user_id=user_msg.id,
  157. created_at=now,
  158. updated_at=now,
  159. is_deleted=0,
  160. )
  161. save_db.add(ai_msg)
  162. save_db.commit()
  163. return
  164. except OperationalError as e:
  165. save_db.rollback()
  166. last_error = e
  167. logger.warning(
  168. f"[exam/generate_stream] 保存试卷时数据库连接异常,准备重试: "
  169. f"attempt={attempt + 1}/2, detail={repr(e)}")
  170. if attempt == 1:
  171. raise
  172. except Exception:
  173. save_db.rollback()
  174. raise
  175. finally:
  176. save_db.close()
  177. if last_error:
  178. raise last_error
  179. def _ensure_exam_conversation_with_fresh_session(
  180. user_id: int,
  181. exam_title: str,
  182. ai_conversation_id: Optional[int],
  183. ) -> int:
  184. last_error = None
  185. for attempt in range(2):
  186. db = SessionLocal()
  187. try:
  188. now = int(time.time())
  189. preview = _build_conversation_preview(
  190. exam_title or "智能生成试卷", limit=100)
  191. if not ai_conversation_id:
  192. conversation = AIConversation(
  193. user_id=user_id,
  194. content=preview,
  195. business_type=3,
  196. exam_name=exam_title,
  197. created_at=now,
  198. updated_at=now,
  199. is_deleted=0,
  200. )
  201. db.add(conversation)
  202. db.commit()
  203. db.refresh(conversation)
  204. return conversation.id
  205. db.query(AIConversation).filter(
  206. AIConversation.id == ai_conversation_id,
  207. AIConversation.user_id == user_id,
  208. ).update({
  209. "content": preview,
  210. "business_type": 3,
  211. "exam_name": exam_title,
  212. "updated_at": now,
  213. })
  214. db.commit()
  215. return ai_conversation_id
  216. except OperationalError as e:
  217. db.rollback()
  218. last_error = e
  219. logger.warning(
  220. f"[exam/generate_stream] 初始化试卷会话时数据库连接异常,准备重试: "
  221. f"attempt={attempt + 1}/2, detail={repr(e)}")
  222. if attempt == 1:
  223. raise
  224. except Exception:
  225. db.rollback()
  226. raise
  227. finally:
  228. db.close()
  229. if last_error:
  230. raise last_error
  231. raise RuntimeError("初始化试卷会话失败")
  232. def _fallback_exam_title(source_text: str) -> str:
  233. text = (source_text or "").strip()
  234. if not text:
  235. return "智能生成试卷"
  236. text = re.sub(r"用户指定的主题/关键词[::]\s*", "", text)
  237. text = re.sub(r"以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题[::]?\s*", "", text)
  238. text = re.sub(r"\s+", " ", text).strip()
  239. text = text.strip("`\"'“”‘’[]【】()(){}<>")
  240. for sep in ("。", ";", ";", ",", ",", "\n", ":", ":"):
  241. if sep in text:
  242. text = text.split(sep, 1)[0].strip()
  243. break
  244. text = re.sub(r"[^\u4e00-\u9fa5A-Za-z0-9]", "", text)
  245. if not text:
  246. return "智能生成试卷"
  247. return _refine_exam_title_candidate(text) or "智能生成试卷"
  248. def _refine_exam_title_candidate(candidate: str) -> str:
  249. text = re.sub(r"\s+", "", (candidate or "").strip())
  250. text = text.strip("`\"'“”‘’[]【】()(){}<>")
  251. text = re.sub(r"[^\u4e00-\u9fa5A-Za-z0-9]", "", text)
  252. if not text:
  253. return ""
  254. for marker in (
  255. "仅供内部交流学习",
  256. "仅供内部交流",
  257. "请勿外传",
  258. "讲授人",
  259. "授课人",
  260. "主讲人",
  261. "时间",
  262. "日期",
  263. "联系电话",
  264. "联系方式",
  265. ):
  266. idx = text.find(marker)
  267. if idx > 0:
  268. text = text[:idx]
  269. break
  270. for prefix in ("关于", "有关", "针对", "围绕", "基于", "依据", "结合", "开展", "组织"):
  271. if text.startswith(prefix) and len(text) - len(prefix) >= 4:
  272. text = text[len(prefix):]
  273. break
  274. suffixes = (
  275. "相关知识要点", "相关管理要求", "相关技术要求", "相关施工要求", "相关安全要求",
  276. "培训考试题库", "培训考试", "考试题库", "试题题库", "培训题库",
  277. "的培训考核", "的培训考试", "的考试题库", "的考试", "的考核", "的考查", "的测验", "的测试", "的练习",
  278. "相关内容", "主要内容", "核心内容", "培训内容", "培训要点", "知识要点", "基础知识",
  279. "管理要求", "技术要求", "施工要求", "安全要求", "作业要求", "实施要求",
  280. "工作要点", "控制要点", "操作要点", "注意事项", "质量要求", "验收要求",
  281. "考试内容", "试题内容", "考试试题", "考试重点", "培训重点",
  282. "管理规定", "技术规定", "施工规定", "安全规定",
  283. "工作方案", "专项方案", "管理办法", "技术措施",
  284. "相关知识", "基本要求", "有关要求", "总体要求",
  285. "考核", "考查", "测验", "测试", "练习", "试卷", "考试", "题库", "试题", "内容", "要求",
  286. )
  287. changed = True
  288. while changed and text:
  289. changed = False
  290. for suffix in suffixes:
  291. if text.endswith(suffix) and len(text) - len(suffix) >= 4:
  292. text = text[:-len(suffix)]
  293. changed = True
  294. break
  295. # 标题尽量收敛成名词短语,去掉“的”这类连接词
  296. text = re.sub(
  297. r"(?<=[\u4e00-\u9fa5A-Za-z0-9])的(?=[\u4e00-\u9fa5A-Za-z0-9])",
  298. "",
  299. text,
  300. )
  301. text = text.strip("的及与和等")
  302. if len(text) > 15:
  303. text = text[:15]
  304. return text if len(text) >= 2 else ""
  305. def _split_basis_sources(source_text: str) -> tuple[str, str]:
  306. text = _normalize_exam_text((source_text or "").strip())
  307. if not text:
  308. return "", ""
  309. user_match = re.search(
  310. r"用户输入依据[::]\s*([\s\S]*?)(?=\n\s*PPT提取内容[::]|\Z)",
  311. text,
  312. )
  313. ppt_match = re.search(r"PPT提取内容[::]\s*([\s\S]*)\Z", text)
  314. user_text = (user_match.group(1) if user_match else "").strip()
  315. ppt_text = (ppt_match.group(1) if ppt_match else "").strip()
  316. if user_text or ppt_text:
  317. return user_text, ppt_text
  318. return text, ""
  319. def _extract_basis_candidate_lines(source_text: str, max_lines: int = 8) -> list[str]:
  320. text = _normalize_exam_text((source_text or "").strip())
  321. if not text:
  322. return []
  323. candidates = []
  324. seen = set()
  325. metadata_patterns = (
  326. r"^PPT文件信息",
  327. r"^PPT第一页内容提取结果",
  328. r"^提取的文本内容",
  329. r"^WPS",
  330. r".*全屏显示.*",
  331. r".*幻灯片放映.*",
  332. r"^DocumentProperties$",
  333. r"^DocumentSecurity$",
  334. r"^DocumentProtection$",
  335. r"^文件名[::]",
  336. r"^文件大小[::]",
  337. r"^文件类型[::]",
  338. r"^修改时间[::]",
  339. r"^PPT正文提取失败",
  340. r"^请手动补充",
  341. r"^请手动描述",
  342. r"^您可以描述",
  343. r"^仅供内部交流",
  344. r"^请勿外传",
  345. r"^讲授人[::]",
  346. r"^授课人[::]",
  347. r"^主讲人[::]",
  348. r"^时间[::]",
  349. r"^日期[::]",
  350. r"^联系电话[::]",
  351. r"^联系方式[::]",
  352. r"^[0-9]+[.、]",
  353. )
  354. for raw_line in text.splitlines():
  355. line = raw_line.strip()
  356. if not line:
  357. continue
  358. line = re.sub(
  359. r"^(用户输入依据|PPT提取内容|出题依据内容|出题依据|培训主题|主题)[::]\s*", "", line)
  360. line = re.sub(r"^第\s*\d+\s*(页|张|部分|章|节)[::]?\s*", "", line)
  361. line = re.sub(r"\.(ppt|pptx)$", "", line, flags=re.IGNORECASE)
  362. if any(re.search(pattern, line, re.IGNORECASE) for pattern in metadata_patterns):
  363. continue
  364. line = re.sub(r"\s+", "", line)
  365. if len(line) < 2:
  366. continue
  367. if re.fullmatch(r"[0-9A-Za-z_-]+", line):
  368. continue
  369. if "文件名" in line or "PPT文件信息" in line or "WPS" in line:
  370. continue
  371. if line not in seen:
  372. candidates.append(line)
  373. seen.add(line)
  374. if len(candidates) >= max_lines:
  375. break
  376. return candidates
  377. def _build_knowledge_search_query(source_text: str, project_type: str = "") -> str:
  378. text = _normalize_exam_text((source_text or "").strip())
  379. if not text:
  380. return ""
  381. user_text, ppt_text = _split_basis_sources(text)
  382. candidates = []
  383. seen = set()
  384. title_candidate = _extract_exam_title_from_source(text, project_type)
  385. if title_candidate and title_candidate != "智能生成试卷":
  386. candidates.append(title_candidate)
  387. seen.add(title_candidate)
  388. for block in filter(None, [user_text, ppt_text, text]):
  389. for line in _extract_basis_candidate_lines(block, max_lines=6):
  390. refined = _refine_exam_title_candidate(line)
  391. if len(refined) < 2:
  392. continue
  393. if refined not in seen:
  394. candidates.append(refined)
  395. seen.add(refined)
  396. if len(candidates) >= 5:
  397. break
  398. if len(candidates) >= 5:
  399. break
  400. return " ".join(candidates[:5]).strip()
  401. def _extract_exam_title_from_source(source_text: str, project_type: str = "") -> str:
  402. text = (source_text or "").strip()
  403. if not text:
  404. return "智能生成试卷"
  405. text = _normalize_exam_text(text)
  406. text = re.sub(r"用户指定的主题/关键词[::]\s*", "", text)
  407. text = re.sub(r"^(出题依据内容|出题依据|培训主题|主题)[::]\s*", "", text)
  408. text = re.sub(r"以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题[::]?\s*", "", text)
  409. text = re.sub(r"[ \t]+", " ", text).strip()
  410. candidate_pool = []
  411. for line in _extract_basis_candidate_lines(text, max_lines=8):
  412. candidate_pool.append(line)
  413. for marker in ("\n\n", "\n", "。", ";", ";"):
  414. if marker in text:
  415. head = text.split(marker, 1)[0].strip()
  416. if head:
  417. candidate_pool.append(head)
  418. break
  419. stop_phrases = (
  420. "出题依据", "正文", "章节条款", "文件名", "答案解析", "要求", "规定",
  421. "内容", "相关", "进行", "采用", "包括", "本项目", "本工程", "本次",
  422. "施工", "安全", "管理", "技术", "规范", "标准", "方案", "办法", "措施",
  423. "PPT文件信息", "PPT第一页内容提取结果", "提取的文本内容",
  424. "WPS演示", "全屏显示", "幻灯片放映",
  425. "仅供内部交流学习", "仅供内部交流", "请勿外传", "讲授人", "授课人", "主讲人",
  426. "时间", "日期", "联系电话", "联系方式",
  427. )
  428. candidates = []
  429. for piece in candidate_pool + re.split(r"[,,、/()()\-::]", text):
  430. cleaned = re.sub(r"\s+", "", piece)
  431. cleaned = re.sub(r"^[0-9A-Za-z一二三四五六七八九十.]+$", "", cleaned)
  432. cleaned = re.sub(r"[^\u4e00-\u9fa5A-Za-z0-9]", "", cleaned)
  433. if len(cleaned) < 2:
  434. continue
  435. if cleaned in stop_phrases:
  436. continue
  437. candidates.append(cleaned)
  438. if candidates:
  439. def score(candidate: str) -> tuple[int, int]:
  440. keyword_bonus = sum(
  441. 2 for token in ("桥梁", "隧道", "桩基", "桩基础", "钢筋", "混凝土", "施工", "安全", "验收", "培训", "作业", "起重", "便桥")
  442. if token in candidate
  443. )
  444. return (keyword_bonus, min(len(candidate), 15))
  445. best = max(candidates, key=score)
  446. best = _refine_exam_title_candidate(best)
  447. if len(best) >= 2:
  448. return best
  449. prefix = _fallback_exam_title(text)
  450. if prefix != "智能生成试卷":
  451. return prefix
  452. project_prefix = re.sub(
  453. r"[^\u4e00-\u9fa5A-Za-z0-9]", "", (project_type or "").strip())
  454. if project_prefix:
  455. return f"{project_prefix[:8]}试卷"
  456. return "智能生成试卷"
  457. def _build_exam_section_example(
  458. question_type: str,
  459. count: int,
  460. score: int,
  461. basis_enabled: bool,
  462. ) -> str:
  463. basis_field = ', "basis": "文件名:...;章节条款:...;正文:..."' if basis_enabled else ""
  464. total_score = count * score
  465. if question_type == "单选题":
  466. return (
  467. f'{{"singleChoice": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  468. f'"count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], '
  469. f'"selectedAnswer": "A", "analysis": "解析"{basis_field}}}]}}}}'
  470. )
  471. if question_type == "多选题":
  472. return (
  473. f'{{"multiple": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  474. f'"count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], '
  475. f'"selectedAnswers": ["A", "B"], "analysis": "解析"{basis_field}}}]}}}}'
  476. )
  477. if question_type == "判断题":
  478. return (
  479. f'{{"judge": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  480. f'"count": {count}, "questions": [{{"text": "题干", "selectedAnswer": "正确", '
  481. f'"analysis": "解析"{basis_field}}}]}}}}'
  482. )
  483. return (
  484. f'{{"short": {{"scorePerQuestion": {score}, "totalScore": {total_score}, '
  485. f'"count": {count}, "questions": [{{"text": "题干", "outline": {{"keyFactors": "答题要点"}}, '
  486. f'"analysis": "解析"{basis_field}}}]}}}}'
  487. )
  488. def _normalize_exam_text(value: str) -> str:
  489. text = (value or "").strip()
  490. if not text:
  491. return text
  492. text = text.replace("章节名/条款名", "章节条款")
  493. text = text.replace("\\sim", "~")
  494. text = text.replace("$", "")
  495. text = re.sub(r"\\mathrm\s*\{\s*([^{}]+?)\s*\}",
  496. lambda m: re.sub(r"\s+", "", m.group(1)), text)
  497. text = re.sub(r"([A-Za-z])\s*\^\s*\{\s*2\s*\}", r"\1²", text)
  498. text = re.sub(r"([A-Za-z])\s*\^\s*\{\s*3\s*\}", r"\1³", text)
  499. text = re.sub(r"(?<=\d)\s+(?=\d)", "", text)
  500. text = re.sub(r"(?<=\d)\s*\.\s*(?=\d)", ".", text)
  501. text = re.sub(r"(?<=\d)\s*~\s*(?=\d)", "~", text)
  502. text = re.sub(r"(?<=\d)\s*([a-zA-Zµμ%℃°²³]+)\b", r" \1", text)
  503. text = re.sub(r"\b([A-Za-z])\s+([²³])\b", r"\1\2", text)
  504. text = re.sub(r"\s+", " ", text)
  505. return text.strip()
  506. def _normalize_exam_payload_texts(value):
  507. if isinstance(value, dict):
  508. return {key: _normalize_exam_payload_texts(val) for key, val in value.items()}
  509. if isinstance(value, list):
  510. return [_normalize_exam_payload_texts(item) for item in value]
  511. if isinstance(value, str):
  512. return _normalize_exam_text(value)
  513. return value
  514. def _parse_exam_section_payload(raw_response: str, question_type: str) -> tuple[dict, int]:
  515. cleaned = _sanitize_exam_response(raw_response)
  516. parsed = _normalize_exam_payload_texts(json.loads(cleaned))
  517. section = _get_exam_section(parsed, question_type)
  518. actual_count = _get_section_question_count(section)
  519. return parsed, actual_count
  520. async def _repair_exam_section_payload(
  521. raw_response: str,
  522. question_type: str,
  523. count: int,
  524. score: int,
  525. basis_enabled: bool,
  526. ) -> Optional[tuple[dict, int]]:
  527. source_text = (raw_response or "").strip()
  528. if not source_text:
  529. return None
  530. repair_prompt = (
  531. f"下面是一段为【{question_type}】生成的原始内容,其中可能夹杂 Thinking Process、说明文字或不合法 JSON。\n"
  532. "任务:不要重新出题,只对已有内容做结构化整理,输出一个可被 json.loads 直接解析的 JSON 对象。\n"
  533. "要求:\n"
  534. f"1. 顶层只保留当前题型对应字段,count 应写为 {count}。\n"
  535. "2. 只整理原有题目内容,不要新增解释、前言、后记、markdown 代码块或推理过程。\n"
  536. "3. 如果原文中已经有 analysis、basis、options、selectedAnswer/selectedAnswers 等字段,尽量原样保留。\n"
  537. "4. 如果存在 basis 字段,必须保留其原始语言和原始含义;知识库原文是中文时,basis 中的文件名、章节条款、正文都必须保持中文,不得翻译成英文,不得改写为英文摘要。\n"
  538. "5. 如果原文内容本身不足指定题量,不要臆造新题,按已有内容整理即可。\n"
  539. "6. 最终回复必须以 { 开头、以 } 结尾。\n"
  540. f"目标 JSON 结构示例:{_build_exam_section_example(question_type, count, score, basis_enabled)}\n"
  541. f"原始内容如下:\n{source_text[:6000]}"
  542. )
  543. try:
  544. repaired_response = await qwen_service.chat(
  545. [{"role": "user", "content": repair_prompt}],
  546. disable_reasoning=True,
  547. )
  548. parsed, actual_count = _parse_exam_section_payload(
  549. repaired_response, question_type)
  550. logger.info(
  551. f"[exam/generate_stream] {question_type}轻量修复成功: repaired_count={actual_count}"
  552. )
  553. return parsed, actual_count
  554. except Exception as e:
  555. logger.warning(
  556. f"[exam/generate_stream] {question_type}轻量修复失败: {repr(e)}"
  557. )
  558. return None
  559. async def _resolve_exam_title(
  560. user_title: str,
  561. title_source: str,
  562. project_type: str = "",
  563. ) -> str:
  564. manual_title = (user_title or "").strip()
  565. if manual_title:
  566. return manual_title
  567. source_text = (title_source or "").strip()
  568. if not source_text:
  569. return "智能生成试卷"
  570. model_source = source_text[:6000]
  571. title_prompt = (
  572. "你是考试命名助手。请基于用户提供的出题依据、培训材料或 PPT 提取内容,"
  573. "提炼核心主题并生成一个简短、正式、适合试卷顶部展示的标题。\n"
  574. "要求:\n"
  575. "1. 只输出 JSON,不要输出 markdown 代码块或解释文字。\n"
  576. "2. 标题必须简短,控制在 6-15 个汉字内,避免口语化。\n"
  577. "3. 优先提炼正文主题,不要机械拼接“考试/考核/题库/试卷”等泛词,除非确有必要。\n"
  578. "4. 如果内容里出现公司或组织名称,要么完全省略,要么保留完整全称,禁止擅自简写。\n"
  579. "5. 如果同时包含用户输入依据和 PPT 内容,要综合两者,不要只取其一。\n"
  580. f"项目类型:{project_type or '未指定'}\n"
  581. f"出题依据内容:\n{model_source}\n\n"
  582. '请严格返回:{"title":"简短试卷名称"}'
  583. )
  584. try:
  585. model_response = await qwen_service.chat(
  586. [{"role": "user", "content": title_prompt}],
  587. disable_reasoning=True,
  588. )
  589. json_match = re.search(r"\{.*\}", model_response.strip(), re.DOTALL)
  590. if json_match:
  591. parsed = json.loads(json_match.group())
  592. model_title = _refine_exam_title_candidate(
  593. str(parsed.get("title") or "").strip()
  594. )
  595. if 2 <= len(model_title) <= 15:
  596. logger.info(
  597. f"[exam/title] 模型生成试卷标题成功: source_len={len(source_text)}, title={model_title}"
  598. )
  599. return model_title
  600. else:
  601. plain_title = _refine_exam_title_candidate(model_response.strip())
  602. if 2 <= len(plain_title) <= 15:
  603. logger.info(
  604. f"[exam/title] 模型纯文本试卷标题成功: source_len={len(source_text)}, title={plain_title}"
  605. )
  606. return plain_title
  607. except Exception as e:
  608. logger.warning(f"[exam/title] 模型生成试卷标题失败,回退规则提取: {repr(e)}")
  609. resolved = _extract_exam_title_from_source(source_text, project_type)
  610. logger.info(
  611. f"[exam/title] 基于用户输入出题依据提取试卷标题: source_len={len(source_text)}, title={resolved}"
  612. )
  613. return resolved
  614. @router.post("/exam/generate_title")
  615. async def generate_exam_title(
  616. request: Request,
  617. data: GenerateTitleRequest,
  618. ):
  619. user = request.state.user
  620. if not user:
  621. return {"statusCode": 401, "msg": "未授权"}
  622. resolved_title = await _resolve_exam_title(
  623. user_title="",
  624. title_source=data.sourceContent,
  625. project_type=data.projectType,
  626. )
  627. return {
  628. "statusCode": 200,
  629. "msg": "success",
  630. "data": {"title": resolved_title}
  631. }
  632. async def _fetch_knowledge_docs(query_str: str, log_prefix: str) -> Optional[list[str]]:
  633. import httpx
  634. search_api_url = _get_knowledge_search_api_url()
  635. timeout = httpx.Timeout(20.0, connect=5.0)
  636. last_error = None
  637. for attempt in range(2):
  638. try:
  639. async with httpx.AsyncClient(timeout=timeout) as client:
  640. resp = await client.post(
  641. search_api_url,
  642. json={"query_str": query_str, "n": 20}
  643. )
  644. if resp.status_code != 200:
  645. logger.error(
  646. f"[{log_prefix}] 知识库检索API响应错误: url={search_api_url}, "
  647. f"status={resp.status_code}, body={resp.text}")
  648. return None
  649. search_data = resp.json()
  650. results = search_data.get("results")
  651. if results is None:
  652. results = search_data.get("data", [])
  653. if isinstance(results, dict):
  654. results = results.get("items", [])
  655. if not isinstance(results, list):
  656. results = []
  657. retrieved_docs = []
  658. for res in results:
  659. if not isinstance(res, dict):
  660. continue
  661. doc_text = res.get("document") or res.get(
  662. "content") or res.get("text")
  663. if doc_text:
  664. retrieved_docs.append(doc_text)
  665. return retrieved_docs
  666. except httpx.ReadTimeout as e:
  667. last_error = e
  668. if attempt == 0:
  669. logger.warning(
  670. f"[{log_prefix}] 知识库检索读取超时,准备重试: "
  671. f"url={search_api_url}, timeout=20s, attempt={attempt + 1}/2")
  672. continue
  673. logger.exception(
  674. f"[{log_prefix}] 知识库检索失败: url={search_api_url}, "
  675. f"exc_type={type(e).__name__}, detail={repr(e)}")
  676. return None
  677. except Exception as e:
  678. last_error = e
  679. logger.exception(
  680. f"[{log_prefix}] 知识库检索失败: url={search_api_url}, "
  681. f"exc_type={type(e).__name__}, detail={repr(e)}")
  682. return None
  683. if last_error:
  684. logger.exception(
  685. f"[{log_prefix}] 知识库检索失败: url={search_api_url}, "
  686. f"exc_type={type(last_error).__name__}, detail={repr(last_error)}")
  687. return None
  688. @router.post("/exam/build_prompt")
  689. async def build_exam_prompt(
  690. request: Request,
  691. data: BuildPromptRequest,
  692. db: Session = Depends(get_db)
  693. ):
  694. """根据前端考试工坊参数生成提示词"""
  695. user = request.state.user
  696. if not user:
  697. return {"statusCode": 401, "msg": "未授权"}
  698. question_desc = []
  699. total_count = 0
  700. for item in data.questionTypes:
  701. count = item.count or item.questionCount or 0
  702. score = item.scorePerQuestion or 0
  703. qtype = item.questionType or item.name or "未命名题型"
  704. total_count += count
  705. question_desc.append(f"{qtype}{count}道,每道{score}分")
  706. question_text = ";".join(question_desc) if question_desc else "题型未提供"
  707. question_schema_lines = []
  708. for item in data.questionTypes:
  709. count = item.count or item.questionCount or 0
  710. score = item.scorePerQuestion or 0
  711. qtype = item.questionType or item.name or "未命名题型"
  712. if count <= 0:
  713. continue
  714. question_schema_lines.append(f"- {qtype}: {count}道,每道{score}分")
  715. question_schema = "\n".join(
  716. question_schema_lines) if question_schema_lines else "- 未提供有效题型"
  717. ppt_content = _get_basis_content(data)
  718. retrieval_query = _build_knowledge_search_query(
  719. ppt_content, data.projectType)
  720. combined_source_mode = "用户输入依据:" in ppt_content and "PPT提取内容:" in ppt_content
  721. # === 新增:如果出题依据不是真正的长文本,而是关键词,则调用远端知识库检索服务 ===
  722. # 启发式判断:如果文本长度小于 50 字,且不包含明显的段落换行,认为它是关键词,去查知识库
  723. if ppt_content and (
  724. (len(ppt_content) < 50 and "\n" not in ppt_content)
  725. or (combined_source_mode and retrieval_query)
  726. ):
  727. query_str = retrieval_query or ppt_content
  728. logger.info(
  729. f"[exam/build_prompt] 检测到可检索出题依据,尝试调用知识库检索API: query='{query_str}'")
  730. retrieved_docs = await _fetch_knowledge_docs(query_str, "exam/build_prompt")
  731. if retrieved_docs:
  732. retrieved_text = "\n\n---\n\n".join(retrieved_docs)
  733. logger.info(
  734. f"[exam/build_prompt] 知识库检索成功,拼接了 {len(retrieved_docs)} 个文档块,总长度 {len(retrieved_text)}")
  735. # 覆盖原来的 ppt_content,改为:用户关键词 + 检索到的真实知识库内容
  736. ppt_content = (
  737. f"用户指定的主题/关键词:{query_str}\n\n"
  738. f"原始出题依据:\n{text[:2000] if (text := _get_basis_content(data)) else '无'}\n\n"
  739. "以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题:\n\n"
  740. f"{retrieved_text}"
  741. )
  742. elif retrieved_docs == []:
  743. logger.warning(
  744. f"[exam/build_prompt] 知识库中未检索到与 '{query_str}' 相关的文档块")
  745. ppt_content = f"(注:未能在知识库中检索到相关文档,请仅根据以下关键词及原始依据出题:{query_str}\n\n{_get_basis_content(data)})"
  746. if ppt_content:
  747. max_chars = 12000
  748. if len(ppt_content) > max_chars:
  749. head_len = max_chars // 2
  750. tail_len = max_chars - head_len
  751. ppt_content = (
  752. ppt_content[:head_len]
  753. + "\n\n(出题依据内容过长,已截断,以下为结尾片段)\n\n"
  754. + ppt_content[-tail_len:]
  755. )
  756. logger.info(
  757. f"[exam/build_prompt] basis content truncated: original_len={len(_get_basis_content(data))} kept_len={len(ppt_content)}"
  758. )
  759. basis_field = ', "basis": "<文件名:...;章节条款:...;正文:...>"' if data.requireBasis else ''
  760. basis_instruction = (
  761. "【出题依据要求】:每道题必须附带一个 'basis' 字段。\n"
  762. "basis 必须严格按以下顺序组织:先写相关文件名,再写章节条款,最后写与题目直接相关的正文原文内容。\n"
  763. "推荐格式为:“文件名:xxx;章节条款:xxx;正文:xxx”。\n"
  764. "basis 只能填写知识库中的原文依据,不得改写成题目,不得出现题干句式,不得包含选项内容,不得直接写出正确答案、错误答案、解析结论或“应选A/应选B/正确/错误”等判断结果。\n"
  765. "basis 应尽量保持知识库原文原貌,不得做摘要、润色、优化、同义替换、翻译或重组;知识库原文如果是中文,basis 也必须保持中文原文,不得改写成英文或中英混杂表述;如果缺少文件名或章节条款,也必须保留固定标识位,分别写为“文件名:未标注”“章节条款:未标注”。\n"
  766. "答案解析请放在独立的 analysis 字段中,不要混入 basis。\n"
  767. ) if data.requireBasis else ""
  768. prompt = (
  769. "请根据以下要求直接生成一份完整试卷,并严格返回纯 JSON,不要输出 markdown 代码块、解释说明或额外文字。\n"
  770. f"生成模式:{data.mode or '未指定'}\n"
  771. f"客户端:{data.client or '未指定'}\n"
  772. f"项目类型:{data.projectType or '未指定'}\n"
  773. f"考试标题:{data.examTitle if data.examTitle else '未提供。请你仔细阅读出题依据内容,高度凝练其核心主题(不要生硬拼凑前缀),生成一个不超过15个字的贴切的试卷名称。特别注意:如果试卷名称中包含公司或组织名称,要么完全省略不写,要么必须使用完整的全称(例如:如果原内容是“蜀道矿业集团”,必须写“蜀道矿业集团”,绝不能擅自简写为“蜀道矿业”)'}\n"
  774. f"总分:{data.totalScore or 0}\n"
  775. f"总题量:{total_count}\n"
  776. f"题型要求:{question_text}\n"
  777. f"出题依据内容:{ppt_content or '无'}\n"
  778. "出题依据内容是本次试卷的核心来源,所有题目必须围绕该内容中的知识点、术语、流程、规范要求和场景展开。\n"
  779. "如果出题依据内容中出现了章节、条款、培训主题或专业术语,题目必须优先考查这些内容,不能偏离到无关知识。\n"
  780. "单选题、多选题、判断题和简答题的题干、选项、答案解析都要与出题依据内容直接相关,不能泛泛而谈。\n"
  781. "请结合出题依据内容、工程类型和题型要求,生成有具体内容、具体选项、具体答案、具体解析的试卷。\n"
  782. "凡是题型配置中 count 大于 0 的题型,必须返回对应数量的非空题目,不能返回空数组,不能少题。\n"
  783. "即使出题依据内容较短,也要优先围绕已有内容中的关键词、术语、场景和要求组织出题,不能因为信息少而返回空题目。\n"
  784. "如果某题型要求生成 3 道题,就必须生成 3 道完整可作答的题目,少于要求数量视为不合格。\n"
  785. "禁止输出“选项A”“题目1”“桥梁工程相关单选题1”“题目内容”“解析内容”这类占位内容,所有题目必须是可直接展示和作答的真实内容。\n"
  786. "【极度重要的多选题防作弊要求】:\n"
  787. "近期发现你生成的多选题中,正确答案总是偷懒按顺序排列(比如全都包含A、全都连号如AB、ABC、ABCD)!这在真实考试中是绝对不允许的。\n"
  788. "你必须强制打乱正确答案的字母组合,严格遵守以下分布规则:\n"
  789. " - 必须有至少 30% 的题目正确答案【完全不包含A】(如 BC, CD, BD, BCD)!\n"
  790. " - 必须有至少 30% 的题目正确答案【跳跃分布】(如 AC, AD, BD, ABD, ACD)!\n"
  791. " - 包含2个正确选项的题目占比应达到 40%\n"
  792. " - 包含3个正确选项的题目占比应达到 40%\n"
  793. " - 包含4个正确选项的题目(ABCD)绝对不能超过 20%!\n"
  794. "【答案随机性要求】:\n"
  795. "1. 单选题:提供4个选项(A/B/C/D),正确答案只能是其中1个,且正确答案必须在A、B、C、D中随机分布,绝不能所有题目的正确答案都相同。\n"
  796. "2. 多选题:提供4个选项(A/B/C/D),正确答案的个数在2~4个之间随机,且答案组合必须随机(例如:可以是AB、AC、AD、BC、BD、CD、ABC、ABD、BCD、ABCD等),绝不能都从A开始或全都是ABCD。\n"
  797. "3. 判断题:正确答案必须在“正确”和“错误”之间随机分布,绝不能所有判断题的答案全都是“正确”或全都是“错误”。\n"
  798. f"{basis_instruction}"
  799. "下面的 JSON 结构示例只用于说明字段格式,示例中的字符串不能原样照抄到最终结果中,最终返回的每个字符串都必须替换成结合出题依据生成的具体内容。\n"
  800. "JSON 输出结构必须符合以下格式:\n"
  801. "{\n"
  802. ' "title": "试卷标题",\n'
  803. ' "totalScore": 100,\n'
  804. ' "totalQuestions": 10,\n'
  805. f' "singleChoice": {{"scorePerQuestion": 2, "totalScore": 20, "count": 10, "questions": [{{"text": "<单选题题干>", "options": [{{"key": "A", "text": "<选项A具体内容>"}}, {{"key": "B", "text": "<选项B具体内容>"}}, {{"key": "C", "text": "<选项C具体内容>"}}, {{"key": "D", "text": "<选项D具体内容>"}}], "answer": "A", "analysis": "<解析内容>"{basis_field}}}]}},\n'
  806. f' "judge": {{"scorePerQuestion": 2, "totalScore": 0, "count": 0, "questions": [{{"text": "<判断题题干>", "answer": "正确", "analysis": "<解析内容>"{basis_field}}}]}},\n'
  807. f' "multiple": {{"scorePerQuestion": 3, "totalScore": 0, "count": 0, "questions": [{{"text": "<多选题题干>", "options": [{{"key": "A", "text": "<选项A具体内容>"}}, {{"key": "B", "text": "<选项B具体内容>"}}, {{"key": "C", "text": "<选项C具体内容>"}}, {{"key": "D", "text": "<选项D具体内容>"}}], "answers": ["A", "C"], "analysis": "<解析内容>"{basis_field}}}]}},\n'
  808. f' "short": {{"scorePerQuestion": 10, "totalScore": 0, "count": 0, "questions": [{{"text": "<简答题题干>", "outline": {{"keyFactors": "<答题要点>", "measures": "<参考措施>"}}{basis_field}}}]}}\n'
  809. "}\n"
  810. "请按下面的题型配置生成对应数量的题目,没有的题型 count 返回 0、questions 返回空数组:\n"
  811. f"{question_schema}"
  812. )
  813. return {
  814. "statusCode": 200,
  815. "msg": "success",
  816. "data": {"prompt": prompt}
  817. }
  818. class BuildSinglePromptRequest(BaseModel):
  819. question_type: str
  820. topic: str
  821. difficulty: str
  822. class GenerateStreamRequest(BaseModel):
  823. mode: str = ""
  824. client: str = ""
  825. projectType: str = ""
  826. examTitle: str = ""
  827. totalScore: int = 0
  828. questionTypes: list[QuestionTypeItem] = Field(default_factory=list)
  829. pptContent: str = ""
  830. basisContent: str = ""
  831. requireBasis: bool = False
  832. ai_conversation_id: Optional[int] = 0
  833. def _get_basis_content(data: BuildPromptRequest | GenerateStreamRequest) -> str:
  834. return ((getattr(data, "basisContent", "") or getattr(data, "pptContent", "")) or "").strip()
  835. @router.post("/exam/generate_stream")
  836. async def generate_exam_stream(
  837. request: Request,
  838. data: GenerateStreamRequest,
  839. ):
  840. """
  841. 流式生成试卷(按题型分批输出)
  842. """
  843. user = request.state.user
  844. if not user:
  845. return {"statusCode": 401, "msg": "未授权"}
  846. async def event_generator():
  847. db = None
  848. try:
  849. yield f"data: {json.dumps({'type': 'progress', 'message': '正在检索知识库...', 'percent': 5}, ensure_ascii=False)}\n\n"
  850. yield f"data: {json.dumps({'type': 'progress', 'message': '正在分析试卷标题...', 'percent': 8}, ensure_ascii=False)}\n\n"
  851. # 2. 获取上下文
  852. raw_basis_content = _get_basis_content(data)
  853. ppt_content = raw_basis_content
  854. retrieval_query = _build_knowledge_search_query(
  855. raw_basis_content, data.projectType)
  856. combined_source_mode = "用户输入依据:" in raw_basis_content and "PPT提取内容:" in raw_basis_content
  857. keyword_search_mode = bool(
  858. ppt_content and len(
  859. ppt_content) < 50 and "\n" not in ppt_content
  860. )
  861. retrieval_mode = bool(
  862. keyword_search_mode or (
  863. combined_source_mode and retrieval_query)
  864. )
  865. retrieval_succeeded = False
  866. if retrieval_mode:
  867. query_str = retrieval_query or ppt_content
  868. retrieved_docs = await _fetch_knowledge_docs(query_str, "exam/generate_stream")
  869. if retrieved_docs:
  870. logger.info(
  871. f"[exam/generate_stream] 知识库检索成功,拼接了 {len(retrieved_docs)} 个文档块")
  872. retrieval_succeeded = True
  873. original_basis = raw_basis_content[:
  874. 4000] if raw_basis_content else "无"
  875. ppt_content = f"用户指定的主题/关键词:{query_str}\n\n原始出题依据:\n{original_basis}\n\n以下是从知识库中检索到的相关原文片段,请严格基于这些原文片段出题:\n\n" + "\n\n---\n\n".join(
  876. retrieved_docs)
  877. elif retrieved_docs == []:
  878. logger.warning(
  879. f"[exam/generate_stream] 知识库中未检索到与 '{query_str}' 相关的文档块")
  880. resolved_exam_title = await _resolve_exam_title(
  881. user_title=data.examTitle,
  882. title_source=raw_basis_content,
  883. project_type=data.projectType,
  884. )
  885. # 1. 创建或获取对话
  886. conv_id = _ensure_exam_conversation_with_fresh_session(
  887. user_id=user.user_id,
  888. exam_title=resolved_exam_title,
  889. ai_conversation_id=data.ai_conversation_id,
  890. )
  891. yield f"data: {json.dumps({'type': 'initial', 'ai_conversation_id': conv_id, 'title': resolved_exam_title}, ensure_ascii=False)}\n\n"
  892. if ppt_content and len(ppt_content) > 12000:
  893. head_len = 6000
  894. tail_len = 6000
  895. ppt_content = ppt_content[:head_len] + \
  896. "\n\n(已截断)\n\n" + ppt_content[-tail_len:]
  897. basis_enabled = bool(
  898. data.requireBasis and (
  899. not retrieval_mode or retrieval_succeeded)
  900. )
  901. if data.requireBasis and retrieval_mode and not retrieval_succeeded:
  902. logger.warning(
  903. "[exam/generate_stream] 联合关键词未成功检索到知识库原文,已禁用 basis 字段以避免模型虚构依据")
  904. basis_instruction = (
  905. "【出题依据要求】:本次未成功检索到可核验的知识库原文,因此禁止输出 'basis' 字段。\n"
  906. "禁止虚构规范名称、标准编号、章节条款、出处或依据内容。\n"
  907. )
  908. elif basis_enabled:
  909. basis_instruction = (
  910. "【出题依据要求】:每道题必须附带一个 'basis' 字段。\n"
  911. "basis 必须严格按以下顺序组织:先写相关文件名,再写章节条款,最后写与本题直接相关的正文原文内容。\n"
  912. "推荐格式为:“文件名:xxx;章节条款:xxx;正文:xxx”。\n"
  913. "basis 必须尽量保持知识库原文原貌,模型不得做摘要、润色、优化、改写、同义替换、翻译或重组,不得省略关键表述;知识库原文如果是中文,basis 也必须保持中文原文,不得改写成英文或中英混杂表述。\n"
  914. "如果检索结果中存在文件名或章节标题,必须显式写出;如果同一题涉及多处原文,也必须在每段原文前先写文件名,再写章节条款,最后再写正文。\n"
  915. "如果缺少文件名或章节名,也必须保留固定标识位,分别写为“文件名:未标注”“章节条款:未标注”。\n"
  916. "analysis 字段保留为独立解析字段,负责输出基于原文的答案解析;basis 本身只能是原文依据,不能混入模型总结。\n"
  917. )
  918. else:
  919. basis_instruction = ""
  920. # 过滤出需要生成的题型
  921. valid_types = []
  922. for item in data.questionTypes:
  923. count = item.count or item.questionCount or 0
  924. if count > 0:
  925. valid_types.append(item)
  926. total_types = len(valid_types)
  927. if total_types == 0:
  928. yield f"data: {json.dumps({'type': 'progress', 'message': '未配置任何题型', 'percent': 100}, ensure_ascii=False)}\n\n"
  929. yield "data: [DONE]\n\n"
  930. return
  931. full_exam_data = {
  932. "title": resolved_exam_title,
  933. "totalScore": data.totalScore
  934. }
  935. for index, qtype_item in enumerate(valid_types):
  936. count = qtype_item.count or qtype_item.questionCount
  937. score = qtype_item.scorePerQuestion
  938. name = qtype_item.questionType or qtype_item.name
  939. # 开始生成当前题型,进度区间起始点
  940. start_percent = 10 + int(80 * (index / total_types))
  941. yield f"data: {json.dumps({'type': 'progress', 'message': f'正在生成{name}({index+1}/{total_types})...', 'percent': start_percent}, ensure_ascii=False)}\n\n"
  942. prompt = (
  943. f"请根据以下要求直接生成【{name}】题目,并严格返回纯 JSON,不要输出 markdown 代码块、解释说明或额外文字。\n"
  944. f"生成模式:{data.mode or '未指定'}\n"
  945. f"客户端:{data.client or '未指定'}\n"
  946. f"项目类型:{data.projectType or '未指定'}\n"
  947. f"试卷标题:{resolved_exam_title}\n"
  948. f"出题依据内容:{ppt_content or '无'}\n"
  949. "出题依据内容是本次试题的核心来源,必须围绕该内容中的知识点、术语、流程、规范要求和场景展开。\n"
  950. f"你需要生成:{count}道【{name}】,每道{score}分。\n"
  951. f"{basis_instruction}\n"
  952. f"返回 JSON 中的 count 必须等于 {count},questions 数组必须恰好包含 {count} 个题目对象,不能只返回 1 个示例对象。\n"
  953. "下面的 JSON 仅用于展示字段结构,questions 内的对象格式按此扩展到要求数量。\n"
  954. "JSON 输出结构必须符合以下格式(根据题型返回单个字段):\n"
  955. )
  956. basis_field = ', "basis": "文件名:...;章节条款:...;正文:..."' if basis_enabled else ""
  957. total_score = count * score
  958. if name == "单选题":
  959. prompt += f'{{"singleChoice": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], "selectedAnswer": "A", "analysis": "解析"{basis_field}}}]}}}}\n'
  960. prompt += "注意:选项必须且只能是4个,固定为A、B、C、D,禁止出现E或更多选项。正确答案在A、B、C、D中随机。"
  961. elif name == "多选题":
  962. prompt += f'{{"multiple": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "options": [{{"key": "A", "text": "内容A"}}], "selectedAnswers": ["A", "B"], "analysis": "解析"{basis_field}}}]}}}}\n'
  963. prompt += (
  964. "注意:选项必须且只能是4个,固定为A、B、C、D,禁止出现E或更多选项。"
  965. "正确答案的个数必须在2~4个之间随机分布,且不能全部都为同一种数量。\n"
  966. "你必须严格遵守以下多选题正确答案分布规则:\n"
  967. f" - 在本次生成的 {count} 道多选题中,包含2个正确选项的题目占比应接近40%。\n"
  968. f" - 在本次生成的 {count} 道多选题中,包含3个正确选项的题目占比应接近40%。\n"
  969. f" - 在本次生成的 {count} 道多选题中,包含4个正确选项(ABCD)的题目占比不得超过20%。\n"
  970. "你必须强制打乱正确答案的字母组合,严格遵守以下规则:\n"
  971. " - 必须有至少30%的题目正确答案完全不包含A(如 BC、CD、BD、BCD)。\n"
  972. " - 必须有至少30%的题目正确答案采用跳跃分布(如 AC、AD、BD、ABD、ACD)。\n"
  973. " - 绝不能所有题目都从A开始,绝不能大量重复 ABC、ABD、ACD、BCD、ABCD 这类固定模式。\n"
  974. " - 正确答案组合必须在 AB、AC、AD、BC、BD、CD、ABC、ABD、ACD、BCD、ABCD 等形式之间充分打散。\n"
  975. "如果数量分布无法完全整除,也必须尽量逼近上述比例,绝不能出现全部都是3个正确选项或全部都是同一组合模式。"
  976. )
  977. elif name == "判断题":
  978. prompt += f'{{"judge": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "selectedAnswer": "正确", "analysis": "解析"{basis_field}}}]}}}}\n'
  979. prompt += (
  980. "注意:正确答案必须在“正确”和“错误”之间随机分布。\n"
  981. f"判断题必须一次性返回 {count} 道完整题目,questions 数组中必须实际展开为 {count} 个不同的题目对象,"
  982. "不能只给 1 个示例对象,不能让前端或调用方自行复制。"
  983. )
  984. elif name == "简答题":
  985. prompt += f'{{"short": {{"scorePerQuestion": {score}, "totalScore": {total_score}, "count": {count}, "questions": [{{"text": "题干", "outline": {{"keyFactors": "答题要点"}}, "analysis": "解析"{basis_field}}}]}}}}\n'
  986. try:
  987. qwen_response = ""
  988. parsed = None
  989. last_error = None
  990. for attempt in range(2):
  991. current_prompt = prompt
  992. if attempt == 1:
  993. current_prompt += (
  994. f"\n这是第2次重试,上一次生成的【{name}】结果不可用。\n"
  995. f"本次必须一次性完整返回 {count} 道【{name}】,"
  996. "不得少题、不得只返回示例题、不得返回 1 道占位题。\n"
  997. "严禁输出 Thinking Process、Reasoning、思考过程、解释说明、前言、后记、markdown 代码块或任何 JSON 之外的内容。\n"
  998. "你的最终回复必须以 { 开头、以 } 结尾,且整个回复只能是一个可被 json.loads 直接解析的 JSON 对象。"
  999. )
  1000. if name == "判断题":
  1001. current_prompt += (
  1002. f"\n特别强调:你现在生成的是【判断题】。"
  1003. f"questions 数组里必须真实返回 {count} 个判断题对象,"
  1004. "每个对象都要有独立题干和答案,绝不能只返回 1 个对象作为模板。"
  1005. )
  1006. logger.warning(
  1007. f"[exam/generate_stream] {name}首次生成结果不可用,开始第{attempt + 1}次重试")
  1008. qwen_response = await qwen_service.chat(
  1009. [{"role": "user", "content": current_prompt}],
  1010. disable_reasoning=True,
  1011. )
  1012. try:
  1013. parsed, actual_count = _parse_exam_section_payload(
  1014. qwen_response, name)
  1015. section = _get_exam_section(parsed, name)
  1016. is_valid_content, invalid_reason = _validate_section_questions(
  1017. section, name)
  1018. if actual_count == count and is_valid_content:
  1019. last_error = None
  1020. break
  1021. if actual_count != count:
  1022. last_error = ValueError(
  1023. f"{name}返回题量不完整,期望{count}道,实际{actual_count}道")
  1024. else:
  1025. last_error = ValueError(
  1026. invalid_reason or f"{name}存在占位内容")
  1027. logger.warning(
  1028. f"[exam/generate_stream] {last_error}; attempt={attempt + 1}/2")
  1029. except Exception as inner_error:
  1030. last_error = inner_error
  1031. repaired_payload = await _repair_exam_section_payload(
  1032. raw_response=qwen_response,
  1033. question_type=name,
  1034. count=count,
  1035. score=score,
  1036. basis_enabled=basis_enabled,
  1037. )
  1038. if repaired_payload is not None:
  1039. parsed, actual_count = repaired_payload
  1040. section = _get_exam_section(parsed, name)
  1041. is_valid_content, invalid_reason = _validate_section_questions(
  1042. section, name)
  1043. if actual_count == count and is_valid_content:
  1044. last_error = None
  1045. break
  1046. if actual_count != count:
  1047. last_error = ValueError(
  1048. f"{name}轻量修复后题量仍不完整,期望{count}道,实际{actual_count}道")
  1049. else:
  1050. last_error = ValueError(
  1051. invalid_reason or f"{name}轻量修复后仍存在占位内容")
  1052. logger.warning(
  1053. f"[exam/generate_stream] {last_error}; attempt={attempt + 1}/2")
  1054. continue
  1055. logger.warning(
  1056. f"[exam/generate_stream] {name}结果解析失败,准备重试: "
  1057. f"attempt={attempt + 1}/2, detail={inner_error!r}")
  1058. if last_error is not None:
  1059. raise last_error
  1060. if resolved_exam_title and not parsed.get("title"):
  1061. parsed["title"] = resolved_exam_title
  1062. # 合并到完整试卷
  1063. full_exam_data.update(parsed)
  1064. # 当前题型生成完成,进度推到当前区间的终点
  1065. end_percent = 10 + int(80 * ((index + 1) / total_types))
  1066. yield f"data: {json.dumps({'type': 'progress', 'message': f'{name}生成完成...', 'percent': end_percent}, ensure_ascii=False)}\n\n"
  1067. yield f"data: {json.dumps({'type': 'batch_data', 'questionType': name, 'data': parsed}, ensure_ascii=False)}\n\n"
  1068. except Exception as e:
  1069. logger.error(
  1070. f"生成{name}失败: {e}; raw_snippet={(qwen_response or '')[:300]}")
  1071. yield f"data: {json.dumps({'type': 'error', 'message': f'{name}生成失败,未保存残缺试卷,请重试。'}, ensure_ascii=False)}\n\n"
  1072. return
  1073. # 保存完整试卷到数据库
  1074. yield f"data: {json.dumps({'type': 'progress', 'message': '正在保存试卷...', 'percent': 98}, ensure_ascii=False)}\n\n"
  1075. request_payload = (
  1076. data.model_dump()
  1077. if hasattr(data, "model_dump")
  1078. else data.dict()
  1079. )
  1080. _save_exam_messages_with_fresh_session(
  1081. conv_id=conv_id,
  1082. user_id=user.user_id,
  1083. request_payload=request_payload,
  1084. exam_payload=full_exam_data,
  1085. )
  1086. yield f"data: {json.dumps({'type': 'progress', 'message': '试卷生成完成', 'percent': 100}, ensure_ascii=False)}\n\n"
  1087. except Exception as e:
  1088. logger.error(f"[exam/generate_stream] 异常: {e}")
  1089. yield f"data: {json.dumps({'type': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
  1090. finally:
  1091. if db is not None:
  1092. db.close()
  1093. yield "data: [DONE]\n\n"
  1094. return StreamingResponse(event_generator(), media_type="text/event-stream")
  1095. @router.post("/exam/build_single_prompt")
  1096. async def build_single_question_prompt(
  1097. request: Request,
  1098. data: BuildSinglePromptRequest,
  1099. db: Session = Depends(get_db)
  1100. ):
  1101. """生成单题提示词 - 对齐Go版本函数名"""
  1102. user = request.state.user
  1103. if not user:
  1104. return {"statusCode": 401, "msg": "未授权"}
  1105. prompt = f"""请生成1道关于{data.topic}的{data.question_type},难度为{data.difficulty}。"""
  1106. return {
  1107. "statusCode": 200,
  1108. "msg": "success",
  1109. "data": {"prompt": prompt}
  1110. }
  1111. class ModifyQuestionRequest(BaseModel):
  1112. ai_conversation_id: int
  1113. content: str
  1114. @router.post("/re_modify_question")
  1115. async def re_modify_question(
  1116. request: Request,
  1117. data: ModifyQuestionRequest,
  1118. db: Session = Depends(get_db)
  1119. ):
  1120. """修改考试题目 - 实际修改ai_message表"""
  1121. user = request.state.user
  1122. if not user:
  1123. return {"statusCode": 401, "msg": "未授权"}
  1124. # 修改ai_message表中type='ai'的content
  1125. result = db.query(AIMessage).filter(
  1126. AIMessage.ai_conversation_id == data.ai_conversation_id,
  1127. AIMessage.type == 'ai'
  1128. ).update({"content": data.content})
  1129. if result == 0:
  1130. return {"statusCode": 404, "msg": "消息不存在"}
  1131. db.commit()
  1132. return {"statusCode": 200, "msg": "success"}
  1133. class ReproduceSingleQuestionRequest(BaseModel):
  1134. message: str = ""
  1135. ai_conversation_id: Optional[int] = None
  1136. regenerate_reason: str = ""
  1137. @router.post("/re_produce_single_question")
  1138. async def re_produce_single_question(
  1139. request: Request,
  1140. data: ReproduceSingleQuestionRequest,
  1141. db: Session = Depends(get_db)
  1142. ):
  1143. """重新生成单题"""
  1144. user = request.state.user
  1145. if not user:
  1146. return {"statusCode": 401, "msg": "未授权"}
  1147. prompt = (data.message or "").strip()
  1148. # 兼容旧版调用:未传 message 时,尝试根据会话和重生成原因构造提示词。
  1149. if not prompt and data.ai_conversation_id:
  1150. message = db.query(AIMessage).filter(
  1151. AIMessage.ai_conversation_id == data.ai_conversation_id,
  1152. AIMessage.type == 'ai'
  1153. ).first()
  1154. if not message:
  1155. return {"statusCode": 404, "msg": "消息不存在"}
  1156. prompt = (message.content or "").strip()
  1157. if data.regenerate_reason:
  1158. prompt = f"{prompt}\n\n请根据以下要求重新生成:{data.regenerate_reason}"
  1159. if not prompt:
  1160. return {"statusCode": 400, "msg": "缺少生成内容"}
  1161. try:
  1162. new_question = await qwen_service.chat([
  1163. {"role": "user", "content": prompt}
  1164. ])
  1165. except Exception as e:
  1166. return {"statusCode": 500, "msg": f"AI生成失败: {str(e)}"}
  1167. return {
  1168. "statusCode": 200,
  1169. "msg": "success",
  1170. "data": {
  1171. "ai_conversation_id": data.ai_conversation_id,
  1172. "new_question": new_question,
  1173. "reply": new_question,
  1174. "content": new_question,
  1175. "message": new_question
  1176. }
  1177. }