linyang 3 viikkoa sitten
vanhempi
sitoutus
b5d47516fd

+ 25 - 1
src/app/base/milvus_connection.py

@@ -40,6 +40,30 @@ def get_milvus_vectorstore(collection_name: str, consistency_level: str = "Stron
         
         if manager.password:
             connection_args["password"] = manager.password
+
+        # 动态检测向量字段名称,兼容旧集合(vector)和新集合(dense)
+        vector_field_name = "dense"
+        try:
+            desc = manager.client.describe_collection(collection_name)
+            fields = desc.get("fields", []) if isinstance(desc, dict) else []
+            float_vector_fields = []
+            for f in fields:
+                f_name = f.get("name")
+                f_type = f.get("type")
+                if not f_name:
+                    continue
+                # DataType.FLOAT_VECTOR 在 pymilvus 中通常是 101,字符串形式可能为 "FloatVector"
+                if f_type == 101 or str(f_type).upper() in ("FLOAT_VECTOR", "FLOATVECTOR"):
+                    float_vector_fields.append(f_name)
+            # 优先 dense,其次 vector,再次第一个向量字段
+            if "dense" in float_vector_fields:
+                vector_field_name = "dense"
+            elif "vector" in float_vector_fields:
+                vector_field_name = "vector"
+            elif float_vector_fields:
+                vector_field_name = float_vector_fields[0]
+        except Exception as e:
+            logger.warning(f"自动检测向量字段失败,使用默认 'dense': {e}")
         
         vectorstore = Milvus(
             embedding_function=embedding_function,
@@ -47,7 +71,7 @@ def get_milvus_vectorstore(collection_name: str, consistency_level: str = "Stron
             connection_args=connection_args,
             consistency_level=consistency_level,
             builtin_function=BM25BuiltInFunction(),
-            vector_field=["dense", "sparse"]
+            vector_field=vector_field_name
         )
         return vectorstore
     except Exception as e:

+ 2 - 1
src/app/sample/models/knowledge_base.py

@@ -15,7 +15,8 @@ class KnowledgeBase(BaseModel):
     collection_name_parent = Column(String(100), nullable=False, unique=True, comment="Milvus集合名称(Table Name)(父)")
     collection_name_children = Column(String(100), nullable=True, comment="Milvus集合名称(Table Name)(子)")
     description = Column(String(500), nullable=True, comment="描述")
-    status = Column(String(20), default="normal", comment="状态: normal(正常), test(测试), disabled(禁用)")
+    # 默认禁用,只有同步成功后才置为 normal
+    status = Column(String(20), default="disabled", comment="状态: normal(正常), test(测试), disabled(禁用)")
     document_count = Column(Integer, default=0, comment="文档数量")
     is_deleted = Column(TINYINT, default=0, comment="是否删除")
     created_by = Column(String(500), nullable=True, comment="创建人")

+ 2 - 1
src/app/sample/schemas/knowledge_base.py

@@ -14,7 +14,8 @@ class KnowledgeBaseBase(BaseModel):
     collection_name_parent: str = Field(..., description="Milvus集合名称(父)")
     collection_name_children: str = Field(..., description="Milvus集合名称(子)")
     description: Optional[str] = Field(None, description="描述")
-    status: Optional[str] = Field("normal", description="状态")
+    # 默认状态改为禁用,只有同步成功后才置为 normal
+    status: Optional[str] = Field("disabled", description="状态")
 
 class CustomSchemaField(BaseModel):
     """自定义Schema字段定义"""

+ 38 - 13
src/app/services/knowledge_base_service.py

@@ -271,13 +271,18 @@ class KnowledgeBaseService:
     async def create(self, db: AsyncSession, payload: KnowledgeBaseCreate) -> KnowledgeBase:
         """创建新知识库"""
         # 1. 检查 DB 是否已存在
-        # 检查 collection_name_parent
-        exists1 = await db.execute(select(KnowledgeBase).where(
-            KnowledgeBase.collection_name_parent == payload.collection_name_parent,
-            KnowledgeBase.is_deleted == 0
-        ))
-        if exists1.scalars().first():
-            raise ValueError(f"集合名称 {payload.collection_name_parent} 已存在")
+        # 检查父子集合名称不能相同
+        if payload.collection_name_children and payload.collection_name_parent == payload.collection_name_children:
+            raise ValueError("父集合名称和子集合名称不能相同")
+
+        # 检查 collection_name_parent (可选)
+        if payload.collection_name_parent:
+            exists1 = await db.execute(select(KnowledgeBase).where(
+                KnowledgeBase.collection_name_parent == payload.collection_name_parent,
+                KnowledgeBase.is_deleted == 0
+            ))
+            if exists1.scalars().first():
+                raise ValueError(f"集合名称 {payload.collection_name_parent} 已存在")
             
         # 检查 collection_name_children
         if payload.collection_name_children:
@@ -300,7 +305,8 @@ class KnowledgeBaseService:
                 collection_name_parent=payload.collection_name_parent,
                 collection_name_children=payload.collection_name_children,
                 description=payload.description,
-                status=payload.status or "normal",
+                # 默认创建为禁用状态,待同步成功后再启用
+                status="disabled",
                 created_by="admin",
                 updated_by="admin",
                 created_time=now,
@@ -451,9 +457,12 @@ class KnowledgeBaseService:
                 except Exception as milvus_err:
                     print(f"Ignore Milvus error during delete {col}: {milvus_err}")
             
-            # 2. 软删除 DB 记录
-            kb.is_deleted = 1
-            kb.updated_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            # 2. 解除文档关联 (将 kb_id 置空,状态改为未入库)
+            await db.execute(
+                sql_update(DocumentMain)
+                .where(DocumentMain.kb_id == id)
+                .values(kb_id=None, whether_to_enter=0)
+            )
             
             # 3. 删除关联的元数据 (硬删除)
             await db.execute(sql_delete(SampleMetadata).where(SampleMetadata.knowledge_base_id == id))
@@ -461,6 +470,9 @@ class KnowledgeBaseService:
             # 4. 删除关联的自定义Schema (硬删除)
             await db.execute(sql_delete(CustomSchema).where(CustomSchema.knowledge_base_id == id))
 
+            # 5. 硬删除 KnowledgeBase 记录
+            await db.execute(sql_delete(KnowledgeBase).where(KnowledgeBase.id == id))
+
             await db.commit()
         except Exception as e:
             await db.rollback()
@@ -512,7 +524,20 @@ class KnowledgeBaseService:
                     description=kb.description or "",
                     fields=fields
                 )
-                
+
+            # 同步完成后将知识库状态置为 normal,并触发 load
+            kb.status = "normal"
+            kb.updated_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+            targets = []
+            if kb.collection_name_parent: targets.append(kb.collection_name_parent)
+            if kb.collection_name_children: targets.append(kb.collection_name_children)
+            for col in targets:
+                if milvus_service.has_collection(col):
+                    milvus_service.set_collection_state(col, "load")
+
+            await db.commit()
+            await db.refresh(kb)
             return kb
         except Exception as e:
             raise e
@@ -659,4 +684,4 @@ class KnowledgeBaseService:
                 return 0
         return 0
 
-knowledge_base_service = KnowledgeBaseService()
+knowledge_base_service = KnowledgeBaseService()

+ 2 - 0
src/app/services/milvus_service.py

@@ -271,6 +271,7 @@ class MilvusService:
                 schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True, auto_id=True)
             
             # 检查是否有默认向量列,如果没有则添加 (兼容旧逻辑)
+            # 检查是否有默认向量列,如果没有则添加 (兼容旧逻辑,但如果fields里有dense则不添加)
             has_vector = any(f.get("type") == "FLOAT_VECTOR" for f in fields)
             if not has_vector:
                 schema.add_field(field_name="dense", datatype=DataType.FLOAT_VECTOR, dim=dimension)
@@ -717,6 +718,7 @@ class MilvusService:
             logger.error(f"Error in hybrid search: {e}")
             # 回退到传统的向量搜索
             logger.info("Falling back to traditional vector search")
+            return []
 
 
 # 可选:单例

+ 160 - 114
src/app/services/search_engine_service.py

@@ -97,7 +97,7 @@ class SearchEngineService:
             dim = 768
 
         # 选择 Milvus 向量字段名(anns_field),字段名可能不是固定的 "vector",也可能叫 'dense'/'denser' 等
-        anns_field = "vector"
+        anns_field = "dense"
         if collection_detail and isinstance(collection_detail, dict):
             fields = collection_detail.get("fields", []) or []
             # 优先寻找有 params.dim 的向量字段
@@ -114,7 +114,7 @@ class SearchEngineService:
                     break
 
             # 若未找到带 dim 的字段,尝试匹配常见的向量字段名或字段类型包含 "vector"
-            if anns_field == "vector":
+            if anns_field == "dense":
                 for f in fields:
                     if not isinstance(f, dict):
                         continue
@@ -179,24 +179,24 @@ class SearchEngineService:
                 target_field = safe_field
                 if not is_top_level:
                      target_field = f'metadata["{safe_field}"]'
-                
-                if safe_value.isdigit():
-                    expr_list.append(f'{target_field} == {safe_value}')
-                else:
-                    expr_list.append(f'{target_field} == "{safe_value}"')
+
+                expr_list.append(f'{target_field} == "{safe_value}"')
         
         # 处理新的多重过滤
         if payload.filters:
             for f in payload.filters:
                 safe_field = f.field.replace("'", "").replace('"', "").strip()
-                safe_value = f.value.replace("'", "").replace('"', "").strip()
+                raw_value = (f.value or "").strip()
+                safe_value = raw_value.replace("'", "").replace('"', "").strip()
                 
                 if safe_field and safe_value:
                     # [Special Case] 文档名称过滤 (doc_name_in)
                     # 前端传递的是 "doc_name_in", value 是 JSON 数组字符串 (e.g. '["doc1", "doc2"]')
                     if safe_field == 'doc_name_in':
                         try:
-                            doc_names = json.loads(safe_value)
+                            # doc_name_in 的 value 必须保留引号,否则不是合法 JSON
+                            parse_value = raw_value
+                            doc_names = json.loads(parse_value)
                             if doc_names and isinstance(doc_names, list):
                                 # 构建 OR 条件: (metadata["doc_name"] == "A" || metadata["doc_name"] == "B")
                                 # 注意:Milvus 字段可能是 doc_name, file_name, title, source
@@ -231,7 +231,7 @@ class SearchEngineService:
                                 # JSON 字段内访问不存在的 key 通常返回 null/empty,不会报错
                                 # 所以用 OR 连接是安全的
                                 
-                                doc_filter_expr = f"({' || '.join(sub_exprs)})"
+                                doc_filter_expr = f"({' or '.join(sub_exprs)})"
                                 expr_list.append(doc_filter_expr)
                                 continue # 处理完特殊字段,跳过后续通用逻辑
                         except Exception as e:
@@ -251,14 +251,127 @@ class SearchEngineService:
                     if not is_top_level:
                          target_field = f'metadata["{safe_field}"]'
 
-                    if safe_value.isdigit():
-                        expr_list.append(f'{target_field} == {safe_value}')
-                    else:
-                        expr_list.append(f'{target_field} == "{safe_value}"')
+                    # [Fix] 统一将 metadata 值视为字符串查询
+                    expr_list.append(f'{target_field} == "{safe_value}"')
         
         # 组合所有条件 (使用 AND)
         expr = " and ".join(expr_list) if expr_list else ""
         
+        # 3. 确定分页参数
+        page = payload.page if payload.page and payload.page > 0 else 1
+        page_size = payload.page_size if payload.page_size and payload.page_size > 0 else 10
+        offset = (page - 1) * page_size
+        limit = page_size
+
+        has_query = payload.query and payload.query.strip()
+        
+        if not has_query:
+            # --- 分支 A: 纯标量查询 (无关键词) ---
+            logger = logging.getLogger(__name__)
+            logger.info(f"Scalar query mode for KB={kb_id}, expr={expr}")
+            try:
+                # 1. 获取总数
+                total = 0
+                count_expr = expr if expr else ""
+                # 如果没有表达式,默认查所有 (需要满足 Milvus 语法)
+                if not count_expr:
+                    # 简单获取 stats
+                    stats = milvus_service.client.get_collection_stats(collection_name=kb_id)
+                    total = int(stats.get("row_count", 0)) if isinstance(stats, dict) else 0
+                else:
+                    # 带条件 count
+                    res_cnt = milvus_service.client.query(kb_id, filter=count_expr, output_fields=["count(*)"])
+                    if res_cnt:
+                        total = res_cnt[0].get("count(*)") or 0
+                
+                # 2. 分页查询
+                # 如果没有 expr,Milvus query 需要一个 valid expression
+                # 尝试用 id >= 0,前提是 id 是 int。如果是 varchar,用 id != ""
+                query_expr = expr
+                if not query_expr:
+                    # 获取主键字段名和类型
+                    pk_field = "pk" # 默认
+                    is_int = True
+                    try:
+                        desc = milvus_service.client.describe_collection(kb_id)
+                        if isinstance(desc, dict) and 'fields' in desc:
+                            for f in desc['fields']:
+                                if f.get('primary_key') or f.get('is_primary'):
+                                    pk_field = f.get('name')
+                                    # Type 5 is INT64, 21 is VARCHAR. 
+                                    if f.get('type') == 21 or str(f.get('type')).upper() == 'VARCHAR':
+                                        is_int = False
+                                    break
+                    except:
+                        pass
+                    
+                    query_expr = f'{pk_field} >= 0' if is_int else f'{pk_field} != ""'
+
+                res_page = milvus_service.client.query(
+                    collection_name=kb_id,
+                    filter=query_expr,
+                    output_fields=["*"],
+                    limit=limit,
+                    offset=offset
+                )
+                
+                formatted_results = []
+                for item in res_page:
+                    item_metadata = item.get('metadata') or {}
+                    if isinstance(item_metadata, str):
+                        try:
+                            item_metadata = json.loads(item_metadata)
+                        except Exception:
+                            item_metadata = {}
+
+                    # PDR 模式内容获取 (可选)
+                    item_content = item.get('text') or item.get('content') or item.get('page_content') or ""
+                    if is_pdr:
+                        parent_id = item_metadata.get("parent_id") or item.get("parent_id")
+                        if parent_id:
+                            try:
+                                parent_results = milvus_service.client.query(
+                                    collection_name=parent_col,
+                                    filter=f'parent_id == "{parent_id}"',
+                                    output_fields=["text", "content", "page_content"]
+                                )
+                                if parent_results:
+                                    p_entity = parent_results[0]
+                                    parent_full = p_entity.get("text") or p_entity.get("content") or p_entity.get("page_content")
+                                    if parent_full:
+                                        item_content = f"【父段内容】\n{parent_full}\n\n【片段内容】\n{item_content}"
+                            except:
+                                pass
+
+                    doc_name = (
+                        item_metadata.get('doc_name')
+                        or item_metadata.get('file_name')
+                        or item_metadata.get('title')
+                        or item_metadata.get('source')
+                        or item.get('file_name')
+                        or item.get('title')
+                        or item.get('source')
+                        or "未知文档"
+                    )
+
+                    formatted_results.append(KBSearchResultItem(
+                        id=str(item.get('pk') or item.get('id')),
+                        kb_name=original_kb_id,
+                        doc_name=doc_name,
+                        content=item_content,
+                        meta_info=str(item_metadata),
+                        document_id=str(item.get("document_id") or ""),
+                        metadata=item_metadata,
+                        score=0
+                    ))
+                
+                return KBSearchResponse(results=formatted_results, total=total)
+
+            except Exception as e:
+                logging.error(f"Scalar query failed: {e}")
+                return KBSearchResponse(results=[], total=0)
+
+        # --- 分支 B: 向量/混合检索 (有关键词) ---
         # 选择 Milvus 向量字段名后生成向量 (移到这里,因为之前代码被替换掉了)
         query_vector = text_to_vector_algo(payload.query, dim=dim)
         
@@ -337,80 +450,7 @@ class SearchEngineService:
             # 但现在 Schema 默认 page=1, page_size=10,所以总是走分页逻辑
             
             try:
-                # 尝试使用混合检索 (Hybrid Search)
-                # 只有当用户没有显式指定 metric_type 或者指定为 hybrid 时,且集合支持(通常通过异常回退处理)时使用
-                # 但考虑到 metric_type 可能是 L2/COSINE,我们这里先尝试 hybrid,如果失败回退到普通
-                
-                # 为了不破坏现有逻辑,我们可以根据某种标志来决定是否使用 hybrid
-                # 或者默认尝试 hybrid,如果 collection 不支持 sparse 则会报错回退
-                
-                # 这里我们直接调用 milvus_service.hybrid_search
-                # 注意:hybrid_search 返回的格式与 client.search 不同,需要适配
-                
-                use_hybrid = False
-                # 只有当 metric_type 为 None 或者特定值时才尝试混合检索,避免与用户明确指定的 metric 冲突
-                # 或者我们可以认为只要不指定,就优先尝试混合
-                # 已经在上面判断过 use_hybrid = True 了
-                
-                if use_hybrid:
-                    logger.info(f"Attempting hybrid search for KB={kb_id}")
-                    try:
-                        # Hybrid search (LangChain Milvus) 暂时不支持直接传 offset
-                        # 所以我们需要获取 top_k = offset + limit,然后手动切片
-                        target_k = offset + limit
-                        
-                        hybrid_results = milvus_service.hybrid_search(
-                            collection_name=kb_id,
-                            query_text=payload.query,
-                            top_k=target_k
-                        )
-                        
-                        # 手动切片实现分页
-                        start = offset
-                        end = offset + limit
-                        # 确保不越界
-                        if start >= len(hybrid_results):
-                            sliced_results = []
-                        else:
-                            sliced_results = hybrid_results[start:end]
-                        
-                        formatted_results = []
-                        for item in sliced_results:
-                            item_content = item.get('text_content') or ""
-                            item_metadata = item.get('metadata', {})
-                            
-                            # PDR 模式:从父表获取内容
-                            if is_pdr:
-                                parent_id = item_metadata.get("parent_id")
-                                if parent_id:
-                                    try:
-                                        parent_results = milvus_service.client.query(
-                                            collection_name=parent_col,
-                                            filter=f'parent_id == "{parent_id}"',
-                                            output_fields=["text", "content", "page_content"]
-                                        )
-                                        if parent_results:
-                                            p_entity = parent_results[0]
-                                            item_content = p_entity.get("text") or p_entity.get("content") or p_entity.get("page_content") or item_content
-                                    except Exception as e:
-                                        logging.error(f"Failed to fetch parent chunk {parent_id} from {parent_col}: {e}")
-
-                            formatted_results.append(KBSearchResultItem(
-                                id=str(item.get('id')),
-                                kb_name=original_kb_id, # 使用原始 ID
-                                doc_name=item_metadata.get('file_name') or item_metadata.get('source') or "未知文档",
-                                content=item_content,
-                                meta_info=str(item_metadata),
-                                score=item.get('similarity', 0) * 100 
-                            ))
-
-                        return KBSearchResponse(results=formatted_results, total=collection_count)
-
-                    except Exception as hybrid_err:
-                        logger.warning(f"Hybrid search failed, falling back to vector search: {hybrid_err}")
-                        # Fallback to standard vector search below
-                        pass
-
+                # 暂时仅使用向量检索,关闭混合检索以保证相似度与查询内容强相关
                 results = milvus_service.client.search(
                     collection_name=kb_id,
                     data=[query_vector],
@@ -651,8 +691,7 @@ class SearchEngineService:
                                 meta_info.append(f"{k}: {v}")
                         meta_str = "; ".join(meta_info[:3])
                     
-                    # 根据 collection 的 metric 动态计算相似度分数
-                    # 如果用户请求了特定的 metric,尝试适配;否则使用实际 metric
+                    # 根据 collection 的 metric 动态计算相似度分数(先从原始向量距离/相似度换算到 0-100)
                     display_metric = requested_metric if requested_metric else metric_type
                     
                     similarity_pct = None
@@ -662,54 +701,61 @@ class SearchEngineService:
                         raw_score = None
 
                     if raw_score is not None:
-                        # 核心计算逻辑:先根据 metric_type 理解 raw_score,再根据 display_metric 转换
-                        # 目前简化处理:直接根据 display_metric 解释 raw_score,忽略不兼容的情况
-                        # 更好的做法是:
-                        # 1. 识别 raw_score 的物理意义(距离还是相似度),基于 metric_type
-                        # 2. 转换为 display_metric 要求的格式
-                        
-                        # Case 1: 实际是 L2 (距离),用户想看 L2
                         if "L2" in metric_type or "EUCLIDEAN" in metric_type:
                             distance = raw_score
-                            if display_metric and ("COSINE" in display_metric):
-                                # L2 距离转 Cosine 相似度 (仅适用于归一化向量)
-                                # dist^2 = 2(1-cos) => cos = 1 - dist^2/2
-                                # 但这里简单起见,如果类型不匹配,还是按 L2 算百分比,避免数值错误
-                                similarity_pct = round((1.0 / (1.0 + distance)) * 100.0, 2)
-                            else:
-                                similarity_pct = round((1.0 / (1.0 + distance)) * 100.0, 2)
-                                
-                        # Case 2: 实际是 Cosine (相似度 [-1, 1])
+                            similarity_pct = round((1.0 / (1.0 + distance)) * 100.0, 2)
                         elif "COSINE" in metric_type:
                             cosine_score = raw_score
-                            # 无论用户想看什么,Cosine Score 本身就是相似度,直接归一化到 0-100 最直观
                             similarity_pct = round(max(min((cosine_score + 1.0) / 2.0, 1.0), 0.0) * 100.0, 2)
-                            
-                        # Case 3: IP (内积)
                         elif "IP" in metric_type or "INNER" in metric_type:
                              similarity_pct = round(raw_score * 100.0, 2)
-                        
-                        # Fallback
                         else:
-                            # 兼容 BM25 或其他未知 metric
                             if "BM25" in metric_type:
-                                # BM25 分数通常是正数,没有固定上限,直接显示原值
                                 similarity_pct = round(raw_score, 2)
                             else:
                                 similarity_pct = round(raw_score * 100.0, 2)
                     else:
                         similarity_pct = 0.0
 
+                    # 结合关键词做一次简单的“相关性校正”:若片段完全不包含检索词,则降低相似度权重
+                    query_text = (payload.query or "").strip()
+                    if query_text:
+                        plain_content = (content or "") + " " + (doc_name or "") + " " + (meta_str or "")
+                        if query_text not in plain_content:
+                            # 纯向量相似但没有任何文本命中,认为是“语义可能相关,但与关键词弱相关”,适当降权
+                            similarity_pct = round(similarity_pct * 0.4, 2)
+
                     formatted_results.append(KBSearchResultItem(
                         id=str(hit.id),
                         kb_name=original_kb_id,
                         doc_name=doc_name,
                         content=content,
                         meta_info=meta_str,
+                        document_id=str(document_id) if document_id is not None else None,
+                        metadata=meta_dict if isinstance(meta_dict, dict) else None,
                         score=similarity_pct
                     ))
             
-            return KBSearchResponse(results=formatted_results, total=len(formatted_results))
+            # 按相似度由大到小排序
+            formatted_results.sort(key=lambda x: x.score, reverse=True)
+
+            # [Fix] 动态计算 total 用于分页
+            # 如果当前页结果不满 limit,说明是最后一页
+            current_count = len(formatted_results)
+            if current_count < limit:
+                 final_total = offset + current_count
+            else:
+                 # 否则,使用 collection_count (上限 1000)
+                 # 如果 collection_count 获取失败(0),则至少允许翻页
+                 base_total = collection_count if collection_count > 0 else 1000
+                 final_total = min(base_total, 1000)
+                 
+                 # 确保 final_total 至少能覆盖当前页+1 (如果有满页结果)
+                 # 这样用户能看到"下一页"按钮
+                 if final_total <= offset + current_count:
+                     final_total = offset + current_count + 10 # 预留一页
+
+            return KBSearchResponse(results=formatted_results, total=final_total)
             
         except Exception as e:
             print(f"Search error: {e}")

+ 14 - 15
src/app/services/snippet_service.py

@@ -301,12 +301,12 @@ class SnippetService:
         
         # 1. 动态构建 metadata
         # 查找 KnowledgeBase ID
-        # [Modified] 支持 collection_name1 和 collection_name2
+
         from sqlalchemy import or_
         kb_query = select(KnowledgeBase).where(
             or_(
-                KnowledgeBase.collection_name1 == payload.collection_name,
-                KnowledgeBase.collection_name2 == payload.collection_name
+                KnowledgeBase.collection_name_parent == payload.collection_name,
+                KnowledgeBase.collection_name_children == payload.collection_name
             )
         )
         result = await db.execute(kb_query)
@@ -331,7 +331,7 @@ class SnippetService:
         # 基础数据
         now = int(time.time() * 1000)
         item = {
-            "vector": fake_vector,
+            "dense": fake_vector,
             "text": payload.content,
             "document_id": payload.custom_fields.get("document_id") if hasattr(payload, 'custom_fields') and payload.custom_fields and payload.custom_fields.get("document_id") else str(uuid.uuid4()),
             "parent_id": payload.custom_fields.get("parent_id", "") if hasattr(payload, 'custom_fields') and payload.custom_fields else "",
@@ -421,12 +421,12 @@ class SnippetService:
         
         # 动态构建 metadata
         # 查找 KnowledgeBase ID
-        # [Modified] 支持 collection_name1 和 collection_name2
+        # [Modified] 支持 collection_name_parent 和 collection_name_children
         from sqlalchemy import or_
         kb_query = select(KnowledgeBase).where(
             or_(
-                KnowledgeBase.collection_name1 == kb_name,
-                KnowledgeBase.collection_name2 == kb_name
+                KnowledgeBase.collection_name_parent == kb_name,
+                KnowledgeBase.collection_name_children == kb_name
             )
         )
         result = await db.execute(kb_query)
@@ -466,7 +466,7 @@ class SnippetService:
         
         now = int(time.time() * 1000)
         item = {
-            "vector": fake_vector,
+            "dense": fake_vector,
             "text": payload.content,
             "document_id": payload.custom_fields.get("document_id") if hasattr(payload, 'custom_fields') and payload.custom_fields and payload.custom_fields.get("document_id") else str(uuid.uuid4()),
             "parent_id": payload.custom_fields.get("parent_id", "") if hasattr(payload, 'custom_fields') and payload.custom_fields else "",
@@ -807,19 +807,18 @@ class SnippetService:
                     from sqlalchemy import select, or_
                     from app.sample.models.knowledge_base import KnowledgeBase
                     
-                    # 假设当前 kb 是 collection_name2 (子表),我们要找 collection_name1 (父表)
+                    # 假设当前 kb 是 collection_name_children (子表),我们要找 collection_name_parent (父表)
                     stmt = select(KnowledgeBase).where(
                         or_(
-                            KnowledgeBase.collection_name2 == kb,
-                            KnowledgeBase.collection_name1 == kb # 兼容处理
+                            KnowledgeBase.collection_name_children == kb,
+                            KnowledgeBase.collection_name_parent == kb # 兼容处理
                         )
                     )
                     res = await db.execute(stmt)
                     kb_record = res.scalars().first()
                     
-                    if kb_record and kb_record.collection_name1:
-                        parent_kb = kb_record.collection_name1
-                        print(f"DEBUG: Found parent KB: {parent_kb} for child {kb}nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn")
+                    if kb_record and kb_record.collection_name_parent:
+                        parent_kb = kb_record.collection_name_parent
                         
                         # 2. 在父表中查询 parent_id
                         # 父表中的 ID 应该是 document_id 或者 pk
@@ -900,7 +899,7 @@ class SnippetService:
                             # else:
                                 # print("DEBUG: Parent content NOT found")
                     # else:
-                        # print("DEBUG: Parent KB record NOT found or no collection_name1")
+                        # print("DEBUG: Parent KB record NOT found or no collection_name_parent")
                                 
                 except Exception as e:
                     print(f"Failed to fetch parent content: {e}")