debug_collection.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. #!/usr/bin/env python3
  2. """
  3. 调试 LangChain Milvus 创建的集合字段结构
  4. """
  5. import sys
  6. import os
  7. # 添加项目根目录到路径
  8. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9. print("调试 LangChain Milvus 集合字段结构")
  10. print("=" * 50)
  11. def debug_collection_structure():
  12. """调试集合字段结构"""
  13. try:
  14. from langchain_milvus import Milvus, BM25BuiltInFunction
  15. from langchain_core.documents import Document
  16. from foundation.ai.models.model_handler import model_handler
  17. # 连接参数
  18. connection_args = {
  19. "uri": "http://192.168.92.61:19530",
  20. "user": None,
  21. "db_name": "lq_db"
  22. }
  23. collection_name = "debug_collection_fields"
  24. # 获取嵌入模型
  25. emdmodel = model_handler._get_lq_qwen3_8b_emd()
  26. # 创建测试文档
  27. test_docs = [
  28. Document(page_content="测试文档内容", metadata={"category": "test"})
  29. ]
  30. print("1. 创建 LangChain Milvus 混合搜索集合...")
  31. vectorstore = Milvus.from_documents(
  32. documents=test_docs,
  33. embedding=emdmodel,
  34. builtin_function=BM25BuiltInFunction(),
  35. vector_field=["dense", "sparse"],
  36. connection_args=connection_args,
  37. collection_name=collection_name,
  38. consistency_level="Strong",
  39. drop_old=True,
  40. )
  41. print("✓ 集合创建成功")
  42. # 等待索引创建
  43. import time
  44. time.sleep(3)
  45. print("\n2. 检查集合结构...")
  46. from pymilvus import Collection, utility
  47. if utility.has_collection(collection_name):
  48. collection = Collection(collection_name)
  49. # 获取集合信息
  50. print(f"集合名称: {collection.name}")
  51. print(f"集合描述: {collection.description}")
  52. print(f"集合数量: {collection.num_entities}")
  53. # 获取字段信息
  54. schema = collection.schema
  55. print(f"\n字段结构:")
  56. for field in schema.fields:
  57. print(f" - 字段名: {field.name}")
  58. print(f" 类型: {field.dtype}")
  59. print(f" 是否主键: {field.is_primary}")
  60. print(f" 是否自动ID: {field.auto_id}")
  61. if hasattr(field, 'max_length'):
  62. print(f" 最大长度: {field.max_length}")
  63. if hasattr(field, 'dim'):
  64. print(f" 维度: {field.dim}")
  65. print()
  66. # 获取索引信息
  67. print("索引信息:")
  68. indexes = collection.indexes
  69. for index in indexes:
  70. print(f" - 索引字段: {index.field_name}")
  71. print(f" 索引类型: {index.index_type}")
  72. print(f" 索引参数: {index.params}")
  73. print()
  74. # 尝试搜索
  75. print("3. 测试搜索...")
  76. try:
  77. results = vectorstore.similarity_search(
  78. query="测试查询",
  79. k=1,
  80. ranker_type="weighted",
  81. ranker_params={"weights": [0.7, 0.3]}
  82. )
  83. print(f"✓ 搜索成功,返回 {len(results)} 个结果")
  84. for result in results:
  85. print(f" 内容: {result.page_content}")
  86. print(f" 元数据: {result.metadata}")
  87. except Exception as e:
  88. print(f"✗ 搜索失败: {e}")
  89. # 清理
  90. if utility.has_collection(collection_name):
  91. utility.drop_collection(collection_name)
  92. print(f"\n✓ 清理测试集合: {collection_name}")
  93. return True
  94. except Exception as e:
  95. print(f"调试失败: {e}")
  96. import traceback
  97. traceback.print_exc()
  98. return False
  99. if __name__ == "__main__":
  100. debug_collection_structure()