#!/usr/bin/env python3 """ 调试 LangChain Milvus 创建的集合字段结构 """ import sys import os # 添加项目根目录到路径 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) print("调试 LangChain Milvus 集合字段结构") print("=" * 50) def debug_collection_structure(): """调试集合字段结构""" try: from langchain_milvus import Milvus, BM25BuiltInFunction from langchain_core.documents import Document from foundation.ai.models.model_handler import model_handler # 连接参数 connection_args = { "uri": "http://192.168.92.61:19530", "user": None, "db_name": "lq_db" } collection_name = "debug_collection_fields" # 获取嵌入模型 emdmodel = model_handler._get_lq_qwen3_8b_emd() # 创建测试文档 test_docs = [ Document(page_content="测试文档内容", metadata={"category": "test"}) ] print("1. 创建 LangChain Milvus 混合搜索集合...") vectorstore = Milvus.from_documents( documents=test_docs, embedding=emdmodel, builtin_function=BM25BuiltInFunction(), vector_field=["dense", "sparse"], connection_args=connection_args, collection_name=collection_name, consistency_level="Strong", drop_old=True, ) print("✓ 集合创建成功") # 等待索引创建 import time time.sleep(3) print("\n2. 检查集合结构...") from pymilvus import Collection, utility if utility.has_collection(collection_name): collection = Collection(collection_name) # 获取集合信息 print(f"集合名称: {collection.name}") print(f"集合描述: {collection.description}") print(f"集合数量: {collection.num_entities}") # 获取字段信息 schema = collection.schema print(f"\n字段结构:") for field in schema.fields: print(f" - 字段名: {field.name}") print(f" 类型: {field.dtype}") print(f" 是否主键: {field.is_primary}") print(f" 是否自动ID: {field.auto_id}") if hasattr(field, 'max_length'): print(f" 最大长度: {field.max_length}") if hasattr(field, 'dim'): print(f" 维度: {field.dim}") print() # 获取索引信息 print("索引信息:") indexes = collection.indexes for index in indexes: print(f" - 索引字段: {index.field_name}") print(f" 索引类型: {index.index_type}") print(f" 索引参数: {index.params}") print() # 尝试搜索 print("3. 测试搜索...") try: results = vectorstore.similarity_search( query="测试查询", k=1, ranker_type="weighted", ranker_params={"weights": [0.7, 0.3]} ) print(f"✓ 搜索成功,返回 {len(results)} 个结果") for result in results: print(f" 内容: {result.page_content}") print(f" 元数据: {result.metadata}") except Exception as e: print(f"✗ 搜索失败: {e}") # 清理 if utility.has_collection(collection_name): utility.drop_collection(collection_name) print(f"\n✓ 清理测试集合: {collection_name}") return True except Exception as e: print(f"调试失败: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": debug_collection_structure()