milvus入库脚本.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #!/usr/bin/env python3
  2. """
  3. 测试修复后的 Milvus 向量实现
  4. """
  5. import sys
  6. import os
  7. # 添加项目根目录到路径
  8. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9. print("测试修复后的 Milvus 向量实现")
  10. print("=" * 50)
  11. def test_basic_functionality():
  12. """测试基本功能"""
  13. try:
  14. # 导入并初始化 MilvusVectorManager
  15. from foundation.database.base.vector.milvus_vector import MilvusVectorManager
  16. print("成功导入 MilvusVectorManager")
  17. # 初始化管理器
  18. manager = MilvusVectorManager()
  19. print("MilvusVectorManager 初始化成功")
  20. # 测试 text_to_vector 方法
  21. test_text = "桥梁建设技术"
  22. vector = manager.text_to_vector(test_text)
  23. print(f"text_to_vector 测试成功,向量维度: {len(vector)}")
  24. # 简单测试文档
  25. test_documents = [
  26. {
  27. 'content': '四川路桥建设集团专注于桥梁和隧道工程建设',
  28. 'metadata': {'category': 'company', 'type': 'construction'}
  29. },
  30. {
  31. 'content': '高速公路桥梁建设技术包括预应力混凝土和钢结构',
  32. 'metadata': {'category': 'technology', 'type': 'highway'}
  33. }
  34. ]
  35. collection_name = "test_fix_validation"
  36. print(f"\n测试 create_hybrid_collection 方法...")
  37. vectorstore = manager.create_hybrid_collection(
  38. collection_name=collection_name,
  39. documents=test_documents
  40. )
  41. print("create_hybrid_collection 执行成功!")
  42. print(f"返回的 vectorstore 类型: {type(vectorstore)}")
  43. # 等待索引创建完成
  44. import time
  45. time.sleep(5)
  46. print(f"\n测试 hybrid_search 方法...")
  47. param = {'collection_name': collection_name}
  48. # 测试加权搜索
  49. results = manager.hybrid_search(
  50. param=param,
  51. query_text="桥梁建设",
  52. top_k=2,
  53. ranker_type="weighted",
  54. dense_weight=0.7,
  55. sparse_weight=0.3
  56. )
  57. print(f"Hybrid search 执行成功,返回 {len(results)} 个结果")
  58. for i, result in enumerate(results):
  59. content = result.get('text_content', '')[:50]
  60. print(f" {i+1}. {content}...")
  61. # 清理测试集合
  62. print(f"\n清理测试集合...")
  63. try:
  64. from pymilvus import utility
  65. if utility.has_collection(collection_name):
  66. utility.drop_collection(collection_name)
  67. print(f"成功清理集合: {collection_name}")
  68. except Exception as e:
  69. print(f"清理集合失败: {e}")
  70. return True
  71. except Exception as e:
  72. print(f"测试失败: {e}")
  73. import traceback
  74. traceback.print_exc()
  75. return False
  76. if __name__ == "__main__":
  77. success = test_basic_functionality()
  78. print("\n" + "=" * 50)
  79. print(f"测试结果: {'成功' if success else '失败'}")
  80. if success:
  81. print("修复验证成功!")
  82. print("- text_to_vector 方法正常工作")
  83. print("- create_hybrid_collection 方法正常工作")
  84. print("- hybrid_search 方法正常工作")