Browse Source

v0.0.3-构建大纲审查基础

WangXuMing 4 months ago
parent
commit
e516bcd8fd
3 changed files with 30 additions and 5 deletions
  1. 0 0
      __init__.py
  2. 25 0
      data_pipeline/training_data/test_rag.py
  3. 5 5
      views/test_views.py

+ 0 - 0
__init__.py


+ 25 - 0
data_pipeline/training_data/test_rag.py

@@ -0,0 +1,25 @@
+import os
+import sys
+current_script_path = os.path.abspath(__file__)
+script_dir = os.path.dirname(current_script_path)
+project_root = os.path.abspath(os.path.join(script_dir, "../../"))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+from foundation.ai.rag.retrieval.retrieval import retrieval_manager
+
+query = "实体:通风孔;背景:在箱梁腹板上设置通风孔,用于结构内部空气流通,需保证与预应力钢筋保护层满足间距要求,并水平设置"
+collection = "first_bfp_collection"  # 你的 Milvus 集合名
+
+# 二阶段:Milvus混合检索(向量+BM25) → BGE重排
+results = retrieval_manager.multi_stage_recall(
+    collection_name=collection,
+    query_text=query,
+    hybrid_top_k=50,   # 第一阶段取多少候选
+    top_k=3,          # 最终返回条数
+    ranker_type="weighted"  # 或 "rrf"
+)
+
+print(results)
+for item in results:
+    print(item["rerank_score"], item["text_content"])
+    # 元数据在 item["metadata"](来自混合检索阶段)

+ 5 - 5
views/test_views.py

@@ -808,13 +808,13 @@ async def bfp_search_endpoint(
         
         output = None
         # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
-        client = SiliconFlowAPI()
+        #client = SiliconFlowAPI()
         # 抽象测试
-        pg_vector_db = PGVectorDB(base_api_platform=client)
+        pg_vector_db = PGVectorDB()
         output = pg_vector_db.retriever(param={"table_name": "tv_basis_of_preparation"}, query_text=input_query , top_k=top_k)
         # 重排序处理
         content_list = [doc["text_content"] for doc in output]
-        output = client.rerank(input_query=input_query, documents=content_list , top_n=top_k)
+        #output = client.rerank(input_query=input_query, documents=content_list , top_n=top_k)
 
         # 返回字典格式的响应
         return JSONResponse(
@@ -853,9 +853,9 @@ async def bfp_search_endpoint(
         
         output = None
         # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
-        client = SiliconFlowAPI()
+        #client = SiliconFlowAPI()
         # 抽象测试
-        vector_db = MilvusVectorManager(base_api_platform=client)
+        vector_db = MilvusVectorManager()
         output = vector_db.retriever(param={"collection_name": "tv_basis_of_preparation"}, query_text=input_query , top_k=top_k)
 
         # 返回字典格式的响应