test_hybrid_v2.6.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. #!/usr/bin/env python3
  2. """
  3. 测试 Milvus v2.6 混合搜索功能
  4. """
  5. import sys
  6. import os
  7. # 添加项目根目录到路径
  8. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9. print("Milvus v2.6 混合搜索测试")
  10. print("=" * 50)
  11. def test_hybrid_search_v26():
  12. """测试 v2.6 混合搜索功能"""
  13. try:
  14. # 检查版本
  15. import pymilvus
  16. print(f"PyMilvus 版本: {pymilvus.__version__}")
  17. # 连接服务器并检查版本
  18. from pymilvus import connections, utility
  19. connections.connect(
  20. alias="default",
  21. host='192.168.92.61',
  22. port='19530',
  23. db_name="lq_db"
  24. )
  25. try:
  26. server_version = utility.get_server_version()
  27. print(f"Milvus 服务器版本: {server_version}")
  28. except Exception as e:
  29. print(f"获取服务器版本失败: {e}")
  30. # 导入必要组件
  31. from langchain_milvus import Milvus, BM25BuiltInFunction
  32. from langchain_core.documents import Document
  33. from foundation.ai.models.model_handler import model_handler
  34. print("✓ 导入成功")
  35. # 获取嵌入模型
  36. emdmodel = model_handler._get_lq_qwen3_8b_emd()
  37. print("✓ 嵌入模型加载成功")
  38. # 创建测试文档
  39. test_docs = [
  40. Document(
  41. page_content="四川路桥建设集团专注于桥梁和隧道工程建设",
  42. metadata={"category": "company", "type": "construction"}
  43. ),
  44. Document(
  45. page_content="高速公路桥梁建设技术包括预应力混凝土和钢结构",
  46. metadata={"category": "technology", "type": "highway"}
  47. ),
  48. Document(
  49. page_content="隧道工程施工方法包括盾构法和钻爆法",
  50. metadata={"category": "method", "type": "tunnel"}
  51. ),
  52. Document(
  53. page_content="人工智能在建筑行业应用于智能监控和自动化施工",
  54. metadata={"category": "ai", "type": "technology"}
  55. ),
  56. Document(
  57. page_content="BIM技术在路桥工程中的数字化应用越来越普及",
  58. metadata={"category": "bim", "type": "digital"}
  59. )
  60. ]
  61. print(f"✓ 创建 {len(test_docs)} 个测试文档")
  62. # 连接参数
  63. connection_args = {
  64. "uri": "http://192.168.92.61:19530",
  65. "user": None,
  66. "db_name": "lq_db"
  67. }
  68. collection_name = "test_hybrid_v26"
  69. print("\n🚀 创建混合搜索向量存储...")
  70. vectorstore = Milvus.from_documents(
  71. documents=test_docs,
  72. embedding=emdmodel,
  73. builtin_function=BM25BuiltInFunction(),
  74. vector_field=["dense", "sparse"],
  75. connection_args=connection_args,
  76. collection_name=collection_name,
  77. consistency_level="Strong",
  78. drop_old=True,
  79. )
  80. print("✅ 混合搜索向量存储创建成功!")
  81. # 测试不同的搜索策略
  82. print("\n🔍 测试混合搜索功能...")
  83. # 1. 加权搜索
  84. print("\n1. 加权搜索 (dense=0.7, sparse=0.3):")
  85. results = vectorstore.similarity_search(
  86. query="桥梁建设技术",
  87. k=3,
  88. ranker_type="weighted",
  89. ranker_params={"weights": [0.7, 0.3]}
  90. )
  91. print(f" 找到 {len(results)} 个结果:")
  92. for i, result in enumerate(results):
  93. content = result.page_content[:50]
  94. category = result.metadata.get('category', 'N/A')
  95. print(f" {i+1}. {content}... (类别: {category})")
  96. # 2. RRF 搜索
  97. print("\n2. RRF 搜索:")
  98. rrf_results = vectorstore.similarity_search(
  99. query="人工智能应用",
  100. k=2,
  101. ranker_type="rrf",
  102. ranker_params={"k": 60}
  103. )
  104. print(f" 找到 {len(rrf_results)} 个结果:")
  105. for i, result in enumerate(rrf_results):
  106. content = result.page_content[:50]
  107. print(f" {i+1}. {content}...")
  108. # 3. 默认搜索
  109. print("\n3. 默认搜索:")
  110. default_results = vectorstore.similarity_search(
  111. query="BIM技术应用",
  112. k=2
  113. )
  114. print(f" 找到 {len(default_results)} 个结果:")
  115. for i, result in enumerate(default_results):
  116. content = result.page_content[:50]
  117. print(f" {i+1}. {content}...")
  118. # # 清理
  119. # if utility.has_collection(collection_name):
  120. # utility.drop_collection(collection_name)
  121. # print(f"\n✅ 清理测试集合: {collection_name}")
  122. return True
  123. except Exception as e:
  124. print(f"❌ 测试失败: {e}")
  125. import traceback
  126. traceback.print_exc()
  127. return False
  128. def test_advanced_hybrid_features():
  129. """测试高级混合搜索功能"""
  130. try:
  131. print("\n🎯 测试高级混合搜索功能...")
  132. from langchain_milvus import Milvus, BM25BuiltInFunction
  133. from langchain_core.documents import Document
  134. from foundation.ai.models.model_handler import model_handler
  135. emdmodel = model_handler._get_lq_qwen3_8b_emd()
  136. # 测试多种权重配置
  137. docs = [
  138. Document(page_content="深度学习技术在图像识别中的应用", metadata={"domain": "ai", "type": "dl"}),
  139. Document(page_content="机器学习算法在数据挖掘中的实践", metadata={"domain": "ai", "type": "ml"}),
  140. Document(page_content="神经网络模型的优化方法研究", metadata={"domain": "ai", "type": "nn"}),
  141. ]
  142. connection_args = {
  143. "uri": "http://192.168.92.61:19530",
  144. "user": None,
  145. "db_name": "lq_db"
  146. }
  147. collection_name = "test_advanced_hybrid"
  148. # 创建向量存储
  149. vectorstore = Milvus.from_documents(
  150. documents=docs,
  151. embedding=emdmodel,
  152. builtin_function=BM25BuiltInFunction(),
  153. vector_field=["dense", "sparse"],
  154. connection_args=connection_args,
  155. collection_name=collection_name,
  156. consistency_level="Strong",
  157. drop_old=True,
  158. )
  159. print("✅ 高级混合搜索测试集创建成功")
  160. # 测试不同的权重组合
  161. test_configs = [
  162. {"name": "语义优先", "weights": [0.9, 0.1]},
  163. {"name": "关键词优先", "weights": [0.1, 0.9]},
  164. {"name": "平衡配置", "weights": [0.5, 0.5]},
  165. ]
  166. for config in test_configs:
  167. results = vectorstore.similarity_search(
  168. query="深度学习模型",
  169. k=2,
  170. ranker_type="weighted",
  171. ranker_params={"weights": config["weights"]}
  172. )
  173. print(f" {config['name']} ({config['weights']}): {len(results)} 个结果")
  174. # 清理
  175. from pymilvus import utility
  176. if utility.has_collection(collection_name):
  177. utility.drop_collection(collection_name)
  178. return True
  179. except Exception as e:
  180. print(f"❌ 高级功能测试失败: {e}")
  181. return False
  182. if __name__ == "__main__":
  183. print("开始 Milvus v2.6 混合搜索测试...")
  184. # 基础混合搜索测试
  185. basic_success = test_hybrid_search_v26()
  186. # 高级功能测试
  187. if basic_success:
  188. advanced_success = test_advanced_hybrid_features()
  189. else:
  190. advanced_success = False
  191. print("\n" + "=" * 50)
  192. print("测试结果总结:")
  193. print(f"✅ 基础混合搜索: {'成功' if basic_success else '失败'}")
  194. print(f"✅ 高级混合搜索: {'成功' if advanced_success else '失败'}")
  195. if basic_success and advanced_success:
  196. print("\n🎉 恭喜!Milvus v2.6 混合搜索功能完全正常!")
  197. print("\n📝 可以在你的项目中使用以下功能:")
  198. print("- ✓ create_hybrid_collection() 方法")
  199. print("- ✓ hybrid_search() 方法")
  200. print("- ✓ 加权搜索 (ranker_type='weighted')")
  201. print("- ✓ RRF 搜索 (ranker_type='rrf')")
  202. print("- ✓ 自定义权重配置")
  203. else:
  204. print("\n❌ 仍有问题需要解决")