#!/usr/bin/env python3 """ 测试 Milvus v2.6 混合搜索功能 """ import sys import os # 添加项目根目录到路径 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) print("Milvus v2.6 混合搜索测试") print("=" * 50) def test_hybrid_search_v26(): """测试 v2.6 混合搜索功能""" try: # 检查版本 import pymilvus print(f"PyMilvus 版本: {pymilvus.__version__}") # 连接服务器并检查版本 from pymilvus import connections, utility connections.connect( alias="default", host='192.168.92.61', port='19530', db_name="lq_db" ) try: server_version = utility.get_server_version() print(f"Milvus 服务器版本: {server_version}") except Exception as e: print(f"获取服务器版本失败: {e}") # 导入必要组件 from langchain_milvus import Milvus, BM25BuiltInFunction from langchain_core.documents import Document from foundation.ai.models.model_handler import model_handler print("✓ 导入成功") # 获取嵌入模型 emdmodel = model_handler._get_lq_qwen3_8b_emd() print("✓ 嵌入模型加载成功") # 创建测试文档 test_docs = [ Document( page_content="四川路桥建设集团专注于桥梁和隧道工程建设", metadata={"category": "company", "type": "construction"} ), Document( page_content="高速公路桥梁建设技术包括预应力混凝土和钢结构", metadata={"category": "technology", "type": "highway"} ), Document( page_content="隧道工程施工方法包括盾构法和钻爆法", metadata={"category": "method", "type": "tunnel"} ), Document( page_content="人工智能在建筑行业应用于智能监控和自动化施工", metadata={"category": "ai", "type": "technology"} ), Document( page_content="BIM技术在路桥工程中的数字化应用越来越普及", metadata={"category": "bim", "type": "digital"} ) ] print(f"✓ 创建 {len(test_docs)} 个测试文档") # 连接参数 connection_args = { "uri": "http://192.168.92.61:19530", "user": None, "db_name": "lq_db" } collection_name = "test_hybrid_v26" print("\n🚀 创建混合搜索向量存储...") vectorstore = Milvus.from_documents( documents=test_docs, embedding=emdmodel, builtin_function=BM25BuiltInFunction(), vector_field=["dense", "sparse"], connection_args=connection_args, collection_name=collection_name, consistency_level="Strong", drop_old=True, ) print("✅ 混合搜索向量存储创建成功!") # 测试不同的搜索策略 print("\n🔍 测试混合搜索功能...") # 1. 加权搜索 print("\n1. 加权搜索 (dense=0.7, sparse=0.3):") results = vectorstore.similarity_search( query="桥梁建设技术", k=3, ranker_type="weighted", ranker_params={"weights": [0.7, 0.3]} ) print(f" 找到 {len(results)} 个结果:") for i, result in enumerate(results): content = result.page_content[:50] category = result.metadata.get('category', 'N/A') print(f" {i+1}. {content}... (类别: {category})") # 2. RRF 搜索 print("\n2. RRF 搜索:") rrf_results = vectorstore.similarity_search( query="人工智能应用", k=2, ranker_type="rrf", ranker_params={"k": 60} ) print(f" 找到 {len(rrf_results)} 个结果:") for i, result in enumerate(rrf_results): content = result.page_content[:50] print(f" {i+1}. {content}...") # 3. 默认搜索 print("\n3. 默认搜索:") default_results = vectorstore.similarity_search( query="BIM技术应用", k=2 ) print(f" 找到 {len(default_results)} 个结果:") for i, result in enumerate(default_results): content = result.page_content[:50] print(f" {i+1}. {content}...") # # 清理 # if utility.has_collection(collection_name): # utility.drop_collection(collection_name) # print(f"\n✅ 清理测试集合: {collection_name}") return True except Exception as e: print(f"❌ 测试失败: {e}") import traceback traceback.print_exc() return False def test_advanced_hybrid_features(): """测试高级混合搜索功能""" try: print("\n🎯 测试高级混合搜索功能...") from langchain_milvus import Milvus, BM25BuiltInFunction from langchain_core.documents import Document from foundation.ai.models.model_handler import model_handler emdmodel = model_handler._get_lq_qwen3_8b_emd() # 测试多种权重配置 docs = [ Document(page_content="深度学习技术在图像识别中的应用", metadata={"domain": "ai", "type": "dl"}), Document(page_content="机器学习算法在数据挖掘中的实践", metadata={"domain": "ai", "type": "ml"}), Document(page_content="神经网络模型的优化方法研究", metadata={"domain": "ai", "type": "nn"}), ] connection_args = { "uri": "http://192.168.92.61:19530", "user": None, "db_name": "lq_db" } collection_name = "test_advanced_hybrid" # 创建向量存储 vectorstore = Milvus.from_documents( documents=docs, embedding=emdmodel, builtin_function=BM25BuiltInFunction(), vector_field=["dense", "sparse"], connection_args=connection_args, collection_name=collection_name, consistency_level="Strong", drop_old=True, ) print("✅ 高级混合搜索测试集创建成功") # 测试不同的权重组合 test_configs = [ {"name": "语义优先", "weights": [0.9, 0.1]}, {"name": "关键词优先", "weights": [0.1, 0.9]}, {"name": "平衡配置", "weights": [0.5, 0.5]}, ] for config in test_configs: results = vectorstore.similarity_search( query="深度学习模型", k=2, ranker_type="weighted", ranker_params={"weights": config["weights"]} ) print(f" {config['name']} ({config['weights']}): {len(results)} 个结果") # 清理 from pymilvus import utility if utility.has_collection(collection_name): utility.drop_collection(collection_name) return True except Exception as e: print(f"❌ 高级功能测试失败: {e}") return False if __name__ == "__main__": print("开始 Milvus v2.6 混合搜索测试...") # 基础混合搜索测试 basic_success = test_hybrid_search_v26() # 高级功能测试 if basic_success: advanced_success = test_advanced_hybrid_features() else: advanced_success = False print("\n" + "=" * 50) print("测试结果总结:") print(f"✅ 基础混合搜索: {'成功' if basic_success else '失败'}") print(f"✅ 高级混合搜索: {'成功' if advanced_success else '失败'}") if basic_success and advanced_success: print("\n🎉 恭喜!Milvus v2.6 混合搜索功能完全正常!") print("\n📝 可以在你的项目中使用以下功能:") print("- ✓ create_hybrid_collection() 方法") print("- ✓ hybrid_search() 方法") print("- ✓ 加权搜索 (ranker_type='weighted')") print("- ✓ RRF 搜索 (ranker_type='rrf')") print("- ✓ 自定义权重配置") else: print("\n❌ 仍有问题需要解决")