test_milvus_methods.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #!/usr/bin/env python3
  2. """
  3. 直接测试 milvus_vector.py 中的 create_hybrid_collection 和 hybrid_search 方法
  4. """
  5. import sys
  6. import os
  7. # 添加项目根目录到路径
  8. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9. print("测试 MilvusVectorManager 的 create_hybrid_collection 和 hybrid_search 方法")
  10. print("=" * 70)
  11. def test_milvus_vector_manager_methods():
  12. """直接测试 MilvusVectorManager 类的方法"""
  13. try:
  14. # 导入并初始化 MilvusVectorManager
  15. from foundation.database.base.vector.milvus_vector import MilvusVectorManager
  16. print("✓ 成功导入 MilvusVectorManager")
  17. # 初始化管理器
  18. manager = MilvusVectorManager()
  19. print("✓ MilvusVectorManager 初始化成功")
  20. # 测试数据
  21. test_documents = [
  22. {
  23. 'content': '四川路桥建设集团专注于桥梁和隧道工程建设',
  24. 'metadata': {'category': 'company', 'industry': 'construction', 'id': 1}
  25. },
  26. {
  27. 'content': '高速公路桥梁建设技术包括预应力混凝土桥梁和钢结构桥梁',
  28. 'metadata': {'category': 'technology', 'industry': 'highway', 'id': 2}
  29. },
  30. {
  31. 'content': '隧道工程施工方法包括盾构法、钻爆法和明挖法',
  32. 'metadata': {'category': 'method', 'industry': 'tunnel', 'id': 3}
  33. },
  34. {
  35. 'content': '人工智能在建筑行业的应用包括智能监控和自动化施工',
  36. 'metadata': {'category': 'ai', 'industry': 'technology', 'id': 4}
  37. },
  38. {
  39. 'content': 'BIM技术在路桥工程中的数字化应用越来越普及',
  40. 'metadata': {'category': 'bim', 'industry': 'digital', 'id': 5}
  41. }
  42. ]
  43. collection_name = "test_milvus_methods"
  44. print(f"\n🚀 测试 create_hybrid_collection 方法...")
  45. print(f" 准备创建集合: {collection_name}")
  46. print(f" 文档数量: {len(test_documents)}")
  47. # 调用 create_hybrid_collection 方法
  48. vectorstore = manager.create_hybrid_collection(
  49. collection_name=collection_name,
  50. documents=test_documents
  51. )
  52. print("✅ create_hybrid_collection 执行成功!")
  53. print(f" 返回的 vectorstore 类型: {type(vectorstore)}")
  54. # 等待索引创建完成
  55. import time
  56. time.sleep(3)
  57. print(f"\n🔍 测试 hybrid_search 方法...")
  58. # 测试参数
  59. param = {'collection_name': collection_name}
  60. # 1. 测试加权搜索
  61. print("\n 1. 测试加权混合搜索:")
  62. query1 = "桥梁建设技术"
  63. print(f" 查询: '{query1}'")
  64. results1 = manager.hybrid_search(
  65. param=param,
  66. query_text=query1,
  67. top_k=3,
  68. ranker_type="weighted",
  69. dense_weight=0.7,
  70. sparse_weight=0.3
  71. )
  72. print(f" 找到 {len(results1)} 个结果:")
  73. for i, result in enumerate(results1):
  74. content = result.get('text_content', '')[:50]
  75. similarity = result.get('similarity', 0)
  76. metadata = result.get('metadata', {})
  77. print(f" {i+1}. {content}... (相似度: {similarity:.4f})")
  78. print(f" 元数据: {metadata}")
  79. # 2. 测试RRF搜索
  80. print("\n 2. 测试RRF混合搜索:")
  81. query2 = "人工智能应用"
  82. print(f" 查询: '{query2}'")
  83. results2 = manager.hybrid_search(
  84. param=param,
  85. query_text=query2,
  86. top_k=2,
  87. ranker_type="rrf"
  88. )
  89. print(f" 找到 {len(results2)} 个结果:")
  90. for i, result in enumerate(results2):
  91. content = result.get('text_content', '')
  92. metadata = result.get('metadata', {})
  93. print(f" {i+1}. {content}")
  94. print(f" 元数据: {metadata}")
  95. # 3. 测试不同权重配置
  96. print("\n 3. 测试不同权重配置:")
  97. query3 = "路桥工程"
  98. weight_configs = [
  99. {"dense": 0.8, "sparse": 0.2, "name": "语义优先"},
  100. {"dense": 0.2, "sparse": 0.8, "name": "关键词优先"},
  101. {"dense": 0.5, "sparse": 0.5, "name": "平衡配置"}
  102. ]
  103. for config in weight_configs:
  104. print(f" {config['name']} (dense={config['dense']}, sparse={config['sparse']}):")
  105. results3 = manager.hybrid_search(
  106. param=param,
  107. query_text=query3,
  108. top_k=2,
  109. ranker_type="weighted",
  110. dense_weight=config["dense"],
  111. sparse_weight=config["sparse"]
  112. )
  113. print(f" 返回 {len(results3)} 个结果")
  114. if results3:
  115. best_content = results3[0].get('text_content', '')[:50]
  116. print(f" 最佳匹配: {best_content}...")
  117. # 清理测试集合
  118. print(f"\n🧹 清理测试集合...")
  119. try:
  120. from pymilvus import utility
  121. if utility.has_collection(collection_name):
  122. utility.drop_collection(collection_name)
  123. print(f"✅ 成功清理集合: {collection_name}")
  124. except Exception as e:
  125. print(f"⚠️ 清理集合失败: {e}")
  126. return True
  127. except Exception as e:
  128. print(f"❌ 测试失败: {e}")
  129. import traceback
  130. traceback.print_exc()
  131. return False
  132. def test_method_signatures():
  133. """测试方法签名和基本功能"""
  134. try:
  135. from foundation.database.base.vector.milvus_vector import MilvusVectorManager
  136. print("\n📋 方法签名检查:")
  137. # 检查 create_hybrid_collection 方法
  138. import inspect
  139. create_sig = inspect.signature(MilvusVectorManager.create_hybrid_collection)
  140. print(f" create_hybrid_collection{create_sig}")
  141. # 检查 hybrid_search 方法
  142. hybrid_sig = inspect.signature(MilvusVectorManager.hybrid_search)
  143. print(f" hybrid_search{hybrid_sig}")
  144. # 检查方法是否存在
  145. methods = dir(MilvusVectorManager())
  146. has_create = 'create_hybrid_collection' in methods
  147. has_hybrid = 'hybrid_search' in methods
  148. print(f"\n 方法存在性检查:")
  149. print(f" create_hybrid_collection: {'✓' if has_create else '✗'}")
  150. print(f" hybrid_search: {'✓' if has_hybrid else '✗'}")
  151. return has_create and has_hybrid
  152. except Exception as e:
  153. print(f"❌ 方法签名检查失败: {e}")
  154. return False
  155. if __name__ == "__main__":
  156. print("开始测试 MilvusVectorManager 的核心方法...")
  157. # 方法签名检查
  158. signature_ok = test_method_signatures()
  159. if signature_ok:
  160. # 核心功能测试
  161. function_ok = test_milvus_vector_manager_methods()
  162. else:
  163. function_ok = False
  164. print("\n" + "=" * 70)
  165. print("测试结果总结:")
  166. print(f"✅ 方法签名检查: {'通过' if signature_ok else '失败'}")
  167. print(f"✅ 核心功能测试: {'通过' if function_ok else '失败'}")
  168. if signature_ok and function_ok:
  169. print("\n🎉 所有测试通过!")
  170. print("\n📝 MilvusVectorManager 核心方法完全可用:")
  171. print(" ✓ create_hybrid_collection() - 混合集合创建")
  172. print(" ✓ hybrid_search() - 混合搜索")
  173. print(" ✓ 加权搜索 (ranker_type='weighted')")
  174. print(" ✓ RRF搜索 (ranker_type='rrf')")
  175. print(" ✓ 自定义权重配置")
  176. print(" ✓ 完整的错误处理和回退机制")
  177. else:
  178. print("\n❌ 部分测试失败,请检查实现")