document_chat_retrieval.yaml 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. description: "文档编辑 AI 对话-RAG 质量优先检索配置"
  2. version: "1.0.0"
  3. retrieval:
  4. enabled: true
  5. parent_collection: "t_kngs_construction_plan_parent"
  6. child_collection: "t_kngs_construction_plan_child"
  7. parent_recall_top_k: 30
  8. child_recall_top_k: 40
  9. tag_recall_top_k: 30
  10. chapter_recall_top_k: 15
  11. recall_top_k: 30
  12. rerank_top_k: 8
  13. submit_top_k: 3
  14. min_vector_similarity: 0.45
  15. min_rerank_score: 0.70
  16. min_qualified_count: 1
  17. max_reference_chars: 4000
  18. max_single_reference_chars: 1500
  19. allow_vector_fallback: false
  20. allow_unscoped_search: false
  21. dense_weight: 0.7
  22. sparse_weight: 0.3
  23. child_dense_weight: 0.6
  24. child_sparse_weight: 0.4
  25. ranker_type: "weighted"
  26. tag_recall_enabled: true
  27. tag_terms_limit: 8
  28. rrf_k: 60
  29. parent_vector_weight: 1.0
  30. child_locator_weight: 0.8
  31. tag_weight: 1.2
  32. chapter_similarity_weight: 0.5
  33. tag_exact_bonus: 0.08
  34. tag_partial_bonus: 0.03
  35. multi_source_bonus: 0.02
  36. scope_bonus: 0.03
  37. keyword_extraction:
  38. # 用于从用户问题、章节标题、章节内容、历史对话中抽取检索关键词。
  39. domain_terms:
  40. - "工程概况"
  41. - "编制依据"
  42. - "施工部署"
  43. - "施工准备"
  44. - "资源配置"
  45. - "测量放线"
  46. - "临时用电"
  47. - "临时用水"
  48. - "交通组织"
  49. - "围挡"
  50. - "便道"
  51. - "排水"
  52. - "降水"
  53. - "土方"
  54. - "基坑"
  55. - "边坡"
  56. - "支护"
  57. - "地基"
  58. - "基础"
  59. - "模板"
  60. - "钢筋"
  61. - "混凝土"
  62. - "预应力"
  63. - "脚手架"
  64. - "支架"
  65. - "防水"
  66. - "装饰装修"
  67. - "验收"
  68. - "标准"
  69. - "规范"
  70. - "检查"
  71. - "检测"
  72. - "试验"
  73. - "安装"
  74. - "拆除"
  75. - "吊装"
  76. - "质量控制"
  77. - "安全文明施工"
  78. - "环境保护"
  79. - "水土保持"
  80. - "应急预案"
  81. - "成品保护"
  82. - "进度计划"
  83. - "机械设备"
  84. - "劳动力"
  85. - "材料计划"
  86. - "架桥机"
  87. - "龙门吊"
  88. - "吊车"
  89. - "塔吊"
  90. - "施工电梯"
  91. - "挂篮"
  92. - "台车"
  93. - "箱梁"
  94. - "T梁"
  95. - "梁板"
  96. - "钢丝绳"
  97. - "支座"
  98. - "安全装置"
  99. - "操作证"
  100. - "合格证"
  101. - "静载"
  102. - "动载"
  103. - "空载"
  104. # 用于抽取“术语 + 动作/章节类型”组合词,例如“架桥机验收”“模板安装要求”。
  105. action_terms:
  106. - "验收"
  107. - "标准"
  108. - "规范"
  109. - "检查"
  110. - "检测"
  111. - "试验"
  112. - "安装"
  113. - "拆除"
  114. - "吊装"
  115. - "要求"
  116. - "控制"
  117. - "保护"
  118. - "预案"
  119. - "计划"
  120. # tag 检索时过滤过泛的词,避免命中面过大。
  121. tag_generic_terms:
  122. - "验收"
  123. - "标准"
  124. - "规范"
  125. - "检查"
  126. - "检测"
  127. - "试验"
  128. - "安装"
  129. - "拆除"
  130. - "要求"
  131. - "安全"
  132. - "环保"
  133. - "质量"
  134. - "进度"
  135. - "交底"
  136. # tag 检索优先词,通常是设备、工法、标准号等高区分度词。
  137. tag_priority_terms:
  138. - "架桥机"
  139. - "龙门吊"
  140. - "吊车"
  141. - "塔吊"
  142. - "施工电梯"
  143. - "挂篮"
  144. - "支架"
  145. - "台车"
  146. warnings:
  147. no_scope: "缺少可靠的知识库检索范围,本次未引用向量库内容。"
  148. no_recall: "未召回可信知识库内容,本次回答不引用向量库。"
  149. low_confidence: "未找到可信度足够的知识库片段,本次未引用向量库内容。"
  150. rerank_failed: "知识库片段重排不可用,本次未引用向量库内容。"