| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212 |
- #!/usr/bin/env python3
- """
- 直接测试 milvus_vector.py 中的 create_hybrid_collection 和 hybrid_search 方法
- """
- import sys
- import os
- # 添加项目根目录到路径
- sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- print("测试 MilvusVectorManager 的 create_hybrid_collection 和 hybrid_search 方法")
- print("=" * 70)
- def test_milvus_vector_manager_methods():
- """直接测试 MilvusVectorManager 类的方法"""
- try:
- # 导入并初始化 MilvusVectorManager
- from foundation.database.base.vector.milvus_vector import MilvusVectorManager
- print("✓ 成功导入 MilvusVectorManager")
- # 初始化管理器
- manager = MilvusVectorManager()
- print("✓ MilvusVectorManager 初始化成功")
- # 测试数据
- test_documents = [
- {
- 'content': '四川路桥建设集团专注于桥梁和隧道工程建设',
- 'metadata': {'category': 'company', 'industry': 'construction', 'id': 1}
- },
- {
- 'content': '高速公路桥梁建设技术包括预应力混凝土桥梁和钢结构桥梁',
- 'metadata': {'category': 'technology', 'industry': 'highway', 'id': 2}
- },
- {
- 'content': '隧道工程施工方法包括盾构法、钻爆法和明挖法',
- 'metadata': {'category': 'method', 'industry': 'tunnel', 'id': 3}
- },
- {
- 'content': '人工智能在建筑行业的应用包括智能监控和自动化施工',
- 'metadata': {'category': 'ai', 'industry': 'technology', 'id': 4}
- },
- {
- 'content': 'BIM技术在路桥工程中的数字化应用越来越普及',
- 'metadata': {'category': 'bim', 'industry': 'digital', 'id': 5}
- }
- ]
- collection_name = "test_milvus_methods"
- print(f"\n🚀 测试 create_hybrid_collection 方法...")
- print(f" 准备创建集合: {collection_name}")
- print(f" 文档数量: {len(test_documents)}")
- # 调用 create_hybrid_collection 方法
- vectorstore = manager.create_hybrid_collection(
- collection_name=collection_name,
- documents=test_documents
- )
- print("✅ create_hybrid_collection 执行成功!")
- print(f" 返回的 vectorstore 类型: {type(vectorstore)}")
- # 等待索引创建完成
- import time
- time.sleep(3)
- print(f"\n🔍 测试 hybrid_search 方法...")
- # 测试参数
- param = {'collection_name': collection_name}
- # 1. 测试加权搜索
- print("\n 1. 测试加权混合搜索:")
- query1 = "桥梁建设技术"
- print(f" 查询: '{query1}'")
- results1 = manager.hybrid_search(
- param=param,
- query_text=query1,
- top_k=3,
- ranker_type="weighted",
- dense_weight=0.7,
- sparse_weight=0.3
- )
- print(f" 找到 {len(results1)} 个结果:")
- for i, result in enumerate(results1):
- content = result.get('text_content', '')[:50]
- similarity = result.get('similarity', 0)
- metadata = result.get('metadata', {})
- print(f" {i+1}. {content}... (相似度: {similarity:.4f})")
- print(f" 元数据: {metadata}")
- # 2. 测试RRF搜索
- print("\n 2. 测试RRF混合搜索:")
- query2 = "人工智能应用"
- print(f" 查询: '{query2}'")
- results2 = manager.hybrid_search(
- param=param,
- query_text=query2,
- top_k=2,
- ranker_type="rrf"
- )
- print(f" 找到 {len(results2)} 个结果:")
- for i, result in enumerate(results2):
- content = result.get('text_content', '')
- metadata = result.get('metadata', {})
- print(f" {i+1}. {content}")
- print(f" 元数据: {metadata}")
- # 3. 测试不同权重配置
- print("\n 3. 测试不同权重配置:")
- query3 = "路桥工程"
- weight_configs = [
- {"dense": 0.8, "sparse": 0.2, "name": "语义优先"},
- {"dense": 0.2, "sparse": 0.8, "name": "关键词优先"},
- {"dense": 0.5, "sparse": 0.5, "name": "平衡配置"}
- ]
- for config in weight_configs:
- print(f" {config['name']} (dense={config['dense']}, sparse={config['sparse']}):")
- results3 = manager.hybrid_search(
- param=param,
- query_text=query3,
- top_k=2,
- ranker_type="weighted",
- dense_weight=config["dense"],
- sparse_weight=config["sparse"]
- )
- print(f" 返回 {len(results3)} 个结果")
- if results3:
- best_content = results3[0].get('text_content', '')[:50]
- print(f" 最佳匹配: {best_content}...")
- # 清理测试集合
- print(f"\n🧹 清理测试集合...")
- try:
- from pymilvus import utility
- if utility.has_collection(collection_name):
- utility.drop_collection(collection_name)
- print(f"✅ 成功清理集合: {collection_name}")
- except Exception as e:
- print(f"⚠️ 清理集合失败: {e}")
- return True
- except Exception as e:
- print(f"❌ 测试失败: {e}")
- import traceback
- traceback.print_exc()
- return False
- def test_method_signatures():
- """测试方法签名和基本功能"""
- try:
- from foundation.database.base.vector.milvus_vector import MilvusVectorManager
- print("\n📋 方法签名检查:")
- # 检查 create_hybrid_collection 方法
- import inspect
- create_sig = inspect.signature(MilvusVectorManager.create_hybrid_collection)
- print(f" create_hybrid_collection{create_sig}")
- # 检查 hybrid_search 方法
- hybrid_sig = inspect.signature(MilvusVectorManager.hybrid_search)
- print(f" hybrid_search{hybrid_sig}")
- # 检查方法是否存在
- methods = dir(MilvusVectorManager())
- has_create = 'create_hybrid_collection' in methods
- has_hybrid = 'hybrid_search' in methods
- print(f"\n 方法存在性检查:")
- print(f" create_hybrid_collection: {'✓' if has_create else '✗'}")
- print(f" hybrid_search: {'✓' if has_hybrid else '✗'}")
- return has_create and has_hybrid
- except Exception as e:
- print(f"❌ 方法签名检查失败: {e}")
- return False
- if __name__ == "__main__":
- print("开始测试 MilvusVectorManager 的核心方法...")
- # 方法签名检查
- signature_ok = test_method_signatures()
- if signature_ok:
- # 核心功能测试
- function_ok = test_milvus_vector_manager_methods()
- else:
- function_ok = False
- print("\n" + "=" * 70)
- print("测试结果总结:")
- print(f"✅ 方法签名检查: {'通过' if signature_ok else '失败'}")
- print(f"✅ 核心功能测试: {'通过' if function_ok else '失败'}")
- if signature_ok and function_ok:
- print("\n🎉 所有测试通过!")
- print("\n📝 MilvusVectorManager 核心方法完全可用:")
- print(" ✓ create_hybrid_collection() - 混合集合创建")
- print(" ✓ hybrid_search() - 混合搜索")
- print(" ✓ 加权搜索 (ranker_type='weighted')")
- print(" ✓ RRF搜索 (ranker_type='rrf')")
- print(" ✓ 自定义权重配置")
- print(" ✓ 完整的错误处理和回退机制")
- else:
- print("\n❌ 部分测试失败,请检查实现")
|