|
@@ -1,245 +0,0 @@
|
|
|
-#!/usr/bin/env python3
|
|
|
|
|
-"""
|
|
|
|
|
-测试 Milvus v2.6 混合搜索功能
|
|
|
|
|
-"""
|
|
|
|
|
-
|
|
|
|
|
-import sys
|
|
|
|
|
-import os
|
|
|
|
|
-
|
|
|
|
|
-# 添加项目根目录到路径
|
|
|
|
|
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
|
-
|
|
|
|
|
-print("Milvus v2.6 混合搜索测试")
|
|
|
|
|
-print("=" * 50)
|
|
|
|
|
-
|
|
|
|
|
-def test_hybrid_search_v26():
|
|
|
|
|
- """测试 v2.6 混合搜索功能"""
|
|
|
|
|
-
|
|
|
|
|
- try:
|
|
|
|
|
- # 检查版本
|
|
|
|
|
- import pymilvus
|
|
|
|
|
- print(f"PyMilvus 版本: {pymilvus.__version__}")
|
|
|
|
|
-
|
|
|
|
|
- # 连接服务器并检查版本
|
|
|
|
|
- from pymilvus import connections, utility
|
|
|
|
|
- connections.connect(
|
|
|
|
|
- alias="default",
|
|
|
|
|
- host='192.168.92.61',
|
|
|
|
|
- port='19530',
|
|
|
|
|
- db_name="lq_db"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- try:
|
|
|
|
|
- server_version = utility.get_server_version()
|
|
|
|
|
- print(f"Milvus 服务器版本: {server_version}")
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- print(f"获取服务器版本失败: {e}")
|
|
|
|
|
-
|
|
|
|
|
- # 导入必要组件
|
|
|
|
|
- from langchain_milvus import Milvus, BM25BuiltInFunction
|
|
|
|
|
- from langchain_core.documents import Document
|
|
|
|
|
- from foundation.ai.models.model_handler import model_handler
|
|
|
|
|
-
|
|
|
|
|
- print("✓ 导入成功")
|
|
|
|
|
-
|
|
|
|
|
- # 获取嵌入模型
|
|
|
|
|
- emdmodel = model_handler._get_lq_qwen3_8b_emd()
|
|
|
|
|
- print("✓ 嵌入模型加载成功")
|
|
|
|
|
-
|
|
|
|
|
- # 创建测试文档
|
|
|
|
|
- test_docs = [
|
|
|
|
|
- Document(
|
|
|
|
|
- page_content="四川路桥建设集团专注于桥梁和隧道工程建设",
|
|
|
|
|
- metadata={"category": "company", "type": "construction"}
|
|
|
|
|
- ),
|
|
|
|
|
- Document(
|
|
|
|
|
- page_content="高速公路桥梁建设技术包括预应力混凝土和钢结构",
|
|
|
|
|
- metadata={"category": "technology", "type": "highway"}
|
|
|
|
|
- ),
|
|
|
|
|
- Document(
|
|
|
|
|
- page_content="隧道工程施工方法包括盾构法和钻爆法",
|
|
|
|
|
- metadata={"category": "method", "type": "tunnel"}
|
|
|
|
|
- ),
|
|
|
|
|
- Document(
|
|
|
|
|
- page_content="人工智能在建筑行业应用于智能监控和自动化施工",
|
|
|
|
|
- metadata={"category": "ai", "type": "technology"}
|
|
|
|
|
- ),
|
|
|
|
|
- Document(
|
|
|
|
|
- page_content="BIM技术在路桥工程中的数字化应用越来越普及",
|
|
|
|
|
- metadata={"category": "bim", "type": "digital"}
|
|
|
|
|
- )
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- print(f"✓ 创建 {len(test_docs)} 个测试文档")
|
|
|
|
|
-
|
|
|
|
|
- # 连接参数
|
|
|
|
|
- connection_args = {
|
|
|
|
|
- "uri": "http://192.168.92.61:19530",
|
|
|
|
|
- "user": None,
|
|
|
|
|
- "db_name": "lq_db"
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- collection_name = "test_hybrid_v26"
|
|
|
|
|
-
|
|
|
|
|
- print("\n🚀 创建混合搜索向量存储...")
|
|
|
|
|
- vectorstore = Milvus.from_documents(
|
|
|
|
|
- documents=test_docs,
|
|
|
|
|
- embedding=emdmodel,
|
|
|
|
|
- builtin_function=BM25BuiltInFunction(),
|
|
|
|
|
- vector_field=["dense", "sparse"],
|
|
|
|
|
- connection_args=connection_args,
|
|
|
|
|
- collection_name=collection_name,
|
|
|
|
|
- consistency_level="Strong",
|
|
|
|
|
- drop_old=True,
|
|
|
|
|
- )
|
|
|
|
|
- print("✅ 混合搜索向量存储创建成功!")
|
|
|
|
|
-
|
|
|
|
|
- # 测试不同的搜索策略
|
|
|
|
|
- print("\n🔍 测试混合搜索功能...")
|
|
|
|
|
-
|
|
|
|
|
- # 1. 加权搜索
|
|
|
|
|
- print("\n1. 加权搜索 (dense=0.7, sparse=0.3):")
|
|
|
|
|
- results = vectorstore.similarity_search(
|
|
|
|
|
- query="桥梁建设技术",
|
|
|
|
|
- k=3,
|
|
|
|
|
- ranker_type="weighted",
|
|
|
|
|
- ranker_params={"weights": [0.7, 0.3]}
|
|
|
|
|
- )
|
|
|
|
|
- print(f" 找到 {len(results)} 个结果:")
|
|
|
|
|
- for i, result in enumerate(results):
|
|
|
|
|
- content = result.page_content[:50]
|
|
|
|
|
- category = result.metadata.get('category', 'N/A')
|
|
|
|
|
- print(f" {i+1}. {content}... (类别: {category})")
|
|
|
|
|
-
|
|
|
|
|
- # 2. RRF 搜索
|
|
|
|
|
- print("\n2. RRF 搜索:")
|
|
|
|
|
- rrf_results = vectorstore.similarity_search(
|
|
|
|
|
- query="人工智能应用",
|
|
|
|
|
- k=2,
|
|
|
|
|
- ranker_type="rrf",
|
|
|
|
|
- ranker_params={"k": 60}
|
|
|
|
|
- )
|
|
|
|
|
- print(f" 找到 {len(rrf_results)} 个结果:")
|
|
|
|
|
- for i, result in enumerate(rrf_results):
|
|
|
|
|
- content = result.page_content[:50]
|
|
|
|
|
- print(f" {i+1}. {content}...")
|
|
|
|
|
-
|
|
|
|
|
- # 3. 默认搜索
|
|
|
|
|
- print("\n3. 默认搜索:")
|
|
|
|
|
- default_results = vectorstore.similarity_search(
|
|
|
|
|
- query="BIM技术应用",
|
|
|
|
|
- k=2
|
|
|
|
|
- )
|
|
|
|
|
- print(f" 找到 {len(default_results)} 个结果:")
|
|
|
|
|
- for i, result in enumerate(default_results):
|
|
|
|
|
- content = result.page_content[:50]
|
|
|
|
|
- print(f" {i+1}. {content}...")
|
|
|
|
|
-
|
|
|
|
|
- # # 清理
|
|
|
|
|
- # if utility.has_collection(collection_name):
|
|
|
|
|
- # utility.drop_collection(collection_name)
|
|
|
|
|
- # print(f"\n✅ 清理测试集合: {collection_name}")
|
|
|
|
|
-
|
|
|
|
|
- return True
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- print(f"❌ 测试失败: {e}")
|
|
|
|
|
- import traceback
|
|
|
|
|
- traceback.print_exc()
|
|
|
|
|
- return False
|
|
|
|
|
-
|
|
|
|
|
-def test_advanced_hybrid_features():
|
|
|
|
|
- """测试高级混合搜索功能"""
|
|
|
|
|
-
|
|
|
|
|
- try:
|
|
|
|
|
- print("\n🎯 测试高级混合搜索功能...")
|
|
|
|
|
-
|
|
|
|
|
- from langchain_milvus import Milvus, BM25BuiltInFunction
|
|
|
|
|
- from langchain_core.documents import Document
|
|
|
|
|
- from foundation.ai.models.model_handler import model_handler
|
|
|
|
|
-
|
|
|
|
|
- emdmodel = model_handler._get_lq_qwen3_8b_emd()
|
|
|
|
|
-
|
|
|
|
|
- # 测试多种权重配置
|
|
|
|
|
- docs = [
|
|
|
|
|
- Document(page_content="深度学习技术在图像识别中的应用", metadata={"domain": "ai", "type": "dl"}),
|
|
|
|
|
- Document(page_content="机器学习算法在数据挖掘中的实践", metadata={"domain": "ai", "type": "ml"}),
|
|
|
|
|
- Document(page_content="神经网络模型的优化方法研究", metadata={"domain": "ai", "type": "nn"}),
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- connection_args = {
|
|
|
|
|
- "uri": "http://192.168.92.61:19530",
|
|
|
|
|
- "user": None,
|
|
|
|
|
- "db_name": "lq_db"
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- collection_name = "test_advanced_hybrid"
|
|
|
|
|
-
|
|
|
|
|
- # 创建向量存储
|
|
|
|
|
- vectorstore = Milvus.from_documents(
|
|
|
|
|
- documents=docs,
|
|
|
|
|
- embedding=emdmodel,
|
|
|
|
|
- builtin_function=BM25BuiltInFunction(),
|
|
|
|
|
- vector_field=["dense", "sparse"],
|
|
|
|
|
- connection_args=connection_args,
|
|
|
|
|
- collection_name=collection_name,
|
|
|
|
|
- consistency_level="Strong",
|
|
|
|
|
- drop_old=True,
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- print("✅ 高级混合搜索测试集创建成功")
|
|
|
|
|
-
|
|
|
|
|
- # 测试不同的权重组合
|
|
|
|
|
- test_configs = [
|
|
|
|
|
- {"name": "语义优先", "weights": [0.9, 0.1]},
|
|
|
|
|
- {"name": "关键词优先", "weights": [0.1, 0.9]},
|
|
|
|
|
- {"name": "平衡配置", "weights": [0.5, 0.5]},
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- for config in test_configs:
|
|
|
|
|
- results = vectorstore.similarity_search(
|
|
|
|
|
- query="深度学习模型",
|
|
|
|
|
- k=2,
|
|
|
|
|
- ranker_type="weighted",
|
|
|
|
|
- ranker_params={"weights": config["weights"]}
|
|
|
|
|
- )
|
|
|
|
|
- print(f" {config['name']} ({config['weights']}): {len(results)} 个结果")
|
|
|
|
|
-
|
|
|
|
|
- # 清理
|
|
|
|
|
- from pymilvus import utility
|
|
|
|
|
- if utility.has_collection(collection_name):
|
|
|
|
|
- utility.drop_collection(collection_name)
|
|
|
|
|
-
|
|
|
|
|
- return True
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- print(f"❌ 高级功能测试失败: {e}")
|
|
|
|
|
- return False
|
|
|
|
|
-
|
|
|
|
|
-if __name__ == "__main__":
|
|
|
|
|
- print("开始 Milvus v2.6 混合搜索测试...")
|
|
|
|
|
-
|
|
|
|
|
- # 基础混合搜索测试
|
|
|
|
|
- basic_success = test_hybrid_search_v26()
|
|
|
|
|
-
|
|
|
|
|
- # 高级功能测试
|
|
|
|
|
- if basic_success:
|
|
|
|
|
- advanced_success = test_advanced_hybrid_features()
|
|
|
|
|
- else:
|
|
|
|
|
- advanced_success = False
|
|
|
|
|
-
|
|
|
|
|
- print("\n" + "=" * 50)
|
|
|
|
|
- print("测试结果总结:")
|
|
|
|
|
- print(f"✅ 基础混合搜索: {'成功' if basic_success else '失败'}")
|
|
|
|
|
- print(f"✅ 高级混合搜索: {'成功' if advanced_success else '失败'}")
|
|
|
|
|
-
|
|
|
|
|
- if basic_success and advanced_success:
|
|
|
|
|
- print("\n🎉 恭喜!Milvus v2.6 混合搜索功能完全正常!")
|
|
|
|
|
- print("\n📝 可以在你的项目中使用以下功能:")
|
|
|
|
|
- print("- ✓ create_hybrid_collection() 方法")
|
|
|
|
|
- print("- ✓ hybrid_search() 方法")
|
|
|
|
|
- print("- ✓ 加权搜索 (ranker_type='weighted')")
|
|
|
|
|
- print("- ✓ RRF 搜索 (ranker_type='rrf')")
|
|
|
|
|
- print("- ✓ 自定义权重配置")
|
|
|
|
|
- else:
|
|
|
|
|
- print("\n❌ 仍有问题需要解决")
|
|
|