Przeglądaj źródła

v0.0.3-新增功能
- Rag链路基础更新
- launch_review接口,增加13部分工程方案类型字段

WangXuMing 2 miesięcy temu
rodzic
commit
56cae1a801

+ 1 - 1
database/repositories/bus_data_query.py

@@ -1,7 +1,7 @@
 from typing import List, Tuple, Any, Optional, Dict
 from foundation.observability.logger.loggering import server_logger
 from foundation.utils.common import handler_err
-from foundation.database.sql.mysql.async_mysql_base_dao import AsyncBaseDAO
+from foundation.database.base.sql.async_mysql_base_dao import AsyncBaseDAO
 
 
 class BasisOfPreparationDAO(AsyncBaseDAO):

+ 0 - 121
test/debug_collection.py

@@ -1,121 +0,0 @@
-#!/usr/bin/env python3
-"""
-调试 LangChain Milvus 创建的集合字段结构
-"""
-
-import sys
-import os
-
-# 添加项目根目录到路径
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-print("调试 LangChain Milvus 集合字段结构")
-print("=" * 50)
-
-def debug_collection_structure():
-    """调试集合字段结构"""
-    try:
-        from langchain_milvus import Milvus, BM25BuiltInFunction
-        from langchain_core.documents import Document
-        from foundation.ai.models.model_handler import model_handler
-
-        # 连接参数
-        connection_args = {
-            "uri": "http://192.168.92.61:19530",
-            "user": None,
-            "db_name": "lq_db"
-        }
-
-        collection_name = "debug_collection_fields"
-
-        # 获取嵌入模型
-        emdmodel = model_handler._get_lq_qwen3_8b_emd()
-
-        # 创建测试文档
-        test_docs = [
-            Document(page_content="测试文档内容", metadata={"category": "test"})
-        ]
-
-        print("1. 创建 LangChain Milvus 混合搜索集合...")
-        vectorstore = Milvus.from_documents(
-            documents=test_docs,
-            embedding=emdmodel,
-            builtin_function=BM25BuiltInFunction(),
-            vector_field=["dense", "sparse"],
-            connection_args=connection_args,
-            collection_name=collection_name,
-            consistency_level="Strong",
-            drop_old=True,
-        )
-
-        print("✓ 集合创建成功")
-
-        # 等待索引创建
-        import time
-        time.sleep(3)
-
-        print("\n2. 检查集合结构...")
-        from pymilvus import Collection, utility
-
-        if utility.has_collection(collection_name):
-            collection = Collection(collection_name)
-
-            # 获取集合信息
-            print(f"集合名称: {collection.name}")
-            print(f"集合描述: {collection.description}")
-            print(f"集合数量: {collection.num_entities}")
-
-            # 获取字段信息
-            schema = collection.schema
-            print(f"\n字段结构:")
-            for field in schema.fields:
-                print(f"  - 字段名: {field.name}")
-                print(f"    类型: {field.dtype}")
-                print(f"    是否主键: {field.is_primary}")
-                print(f"    是否自动ID: {field.auto_id}")
-                if hasattr(field, 'max_length'):
-                    print(f"    最大长度: {field.max_length}")
-                if hasattr(field, 'dim'):
-                    print(f"    维度: {field.dim}")
-                print()
-
-            # 获取索引信息
-            print("索引信息:")
-            indexes = collection.indexes
-            for index in indexes:
-                print(f"  - 索引字段: {index.field_name}")
-                print(f"    索引类型: {index.index_type}")
-                print(f"    索引参数: {index.params}")
-                print()
-
-            # 尝试搜索
-            print("3. 测试搜索...")
-            try:
-                results = vectorstore.similarity_search(
-                    query="测试查询",
-                    k=1,
-                    ranker_type="weighted",
-                    ranker_params={"weights": [0.7, 0.3]}
-                )
-                print(f"✓ 搜索成功,返回 {len(results)} 个结果")
-                for result in results:
-                    print(f"  内容: {result.page_content}")
-                    print(f"  元数据: {result.metadata}")
-            except Exception as e:
-                print(f"✗ 搜索失败: {e}")
-
-        # 清理
-        if utility.has_collection(collection_name):
-            utility.drop_collection(collection_name)
-            print(f"\n✓ 清理测试集合: {collection_name}")
-
-        return True
-
-    except Exception as e:
-        print(f"调试失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-if __name__ == "__main__":
-    debug_collection_structure()

+ 0 - 66
test/debug_import.py

@@ -1,66 +0,0 @@
-#!/usr/bin/env python3
-"""
-调试循环导入问题
-"""
-
-import sys
-import os
-
-# 添加项目根目录到路径
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-print("开始调试循环导入问题...")
-
-# 逐步导入依赖,查看在哪里出现问题
-try:
-    print("1. 导入 pymilvus...")
-    from pymilvus import connections
-    print("   成功")
-except Exception as e:
-    print(f"   失败: {e}")
-
-try:
-    print("2. 导入 langchain_milvus...")
-    from langchain_milvus import Milvus, BM25BuiltInFunction
-    print("   成功")
-except Exception as e:
-    print(f"   失败: {e}")
-
-try:
-    print("3. 导入 config_handler...")
-    from foundation.infrastructure.config.config import config_handler
-    print("   成功")
-except Exception as e:
-    print(f"   失败: {e}")
-
-try:
-    print("4. 导入 server_logger...")
-    from foundation.observability.logger.loggering import server_logger
-    print("   成功")
-except Exception as e:
-    print(f"   失败: {e}")
-
-try:
-    print("5. 导入 base_vector...")
-    from foundation.database.base.vector.base_vector import BaseVectorDB
-    print("   成功")
-except Exception as e:
-    print(f"   失败: {e}")
-
-try:
-    print("6. 导入 model_handler...")
-    from foundation.ai.models.model_handler import model_handler
-    print("   成功")
-except Exception as e:
-    print(f"   失败: {e}")
-
-try:
-    print("7. 导入 milvus_vector (完整)...")
-    from foundation.database.base.vector.milvus_vector import MilvusVectorManager
-    print("   成功")
-except Exception as e:
-    print(f"   失败: {e}")
-    import traceback
-    traceback.print_exc()
-
-print("\n调试完成")

+ 1 - 1
test/test_multi_stage_recall.py

@@ -85,7 +85,7 @@ def create_test_collection():
         }
     ]
 
-    collection_name = "test_multi_stage_recall"
+    collection_name = "first_bfp_collection"
 
     try:
         # 使用MilvusVectorManager创建混合搜索集合

+ 0 - 203
test/test_rerank_api_curl.py

@@ -1,203 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-使用curl测试重排序API
-"""
-
-import subprocess
-import json
-import sys
-import os
-
-def test_rerank_api_with_curl():
-    """
-    使用curl命令测试重排序API
-    """
-    print("=== 使用curl测试重排序API ===")
-
-    # 构建curl命令
-    curl_command = [
-        "curl",
-        "--location",
-        "http://192.168.91.253:9005/v1/rerank",
-        "--header", "Content-Type: application/json",
-        "--data", json.dumps({
-            "model": "bge-reranker-v2-m3",
-            "query": "乔布斯是谁?",
-            "candidates": [
-                "大模型是一类具有大量参数的人工智能模型。",
-                "苹果是一家科技公司",
-                "大模型用于深度学习任务"
-            ]
-        })
-    ]
-
-    try:
-        print("执行命令:")
-        print(" ".join(curl_command))
-        print()
-
-        # 执行curl命令
-        result = subprocess.run(
-            curl_command,
-            capture_output=True,
-            text=True,
-            timeout=30
-        )
-
-        print(f"返回状态码: {result.returncode}")
-        print("=" * 50)
-        print("标准输出:")
-        print(result.stdout)
-
-        if result.stderr:
-            print("=" * 50)
-            print("标准错误:")
-            print(result.stderr)
-
-        # 尝试解析JSON响应
-        if result.stdout:
-            try:
-                response_data = json.loads(result.stdout)
-                print("=" * 50)
-                print("解析后的JSON响应:")
-                print(json.dumps(response_data, ensure_ascii=False, indent=2))
-
-                # 检查响应格式
-                if "results" in response_data:
-                    print("\n✓ API响应格式正确,包含results字段")
-                    results = response_data["results"]
-                    print(f"✓ 返回了 {len(results)} 个重排序结果")
-
-                    for i, item in enumerate(results, 1):
-                        text = item.get("text", "")
-                        score = item.get("score", "")
-                        print(f"  {i}. 分数: {score} | 内容: {text}")
-                else:
-                    print("\n✗ API响应格式异常,缺少results字段")
-
-            except json.JSONDecodeError as e:
-                print(f"\n✗ JSON解析失败: {str(e)}")
-        else:
-            print("\n✗ 没有收到任何响应")
-
-        return result.returncode == 0
-
-    except subprocess.TimeoutExpired:
-        print("✗ 请求超时")
-        return False
-    except Exception as e:
-        print(f"✗ 执行curl命令时发生异常: {str(e)}")
-        return False
-
-def test_different_queries():
-    """
-    测试不同的查询请求
-    """
-    print("\n=== 测试不同的查询请求 ===")
-
-    test_cases = [
-        {
-            "query": "什么是人工智能?",
-            "candidates": [
-                "人工智能是计算机科学的一个分支。",
-                "机器学习是人工智能的核心技术。",
-                "深度学习使用神经网络进行学习。"
-            ]
-        },
-        {
-            "query": "大模型有什么特点?",
-            "candidates": [
-                "大模型具有数百万到数十亿的参数。",
-                "苹果公司生产iPhone手机。",
-                "Transformer是大模型的基础架构。"
-            ]
-        },
-        {
-            "query": "机器学习和深度学习的区别",
-            "candidates": [
-                "深度学习是机器学习的一个子集。",
-                "机器学习需要人工特征工程。",
-                "深度学习可以自动学习特征。"
-            ]
-        }
-    ]
-
-    for i, test_case in enumerate(test_cases, 1):
-        print(f"\n--- 测试用例 {i}: {test_case['query']} ---")
-
-        curl_command = [
-            "curl",
-            "--location",
-            "http://192.168.91.253:9005/v1/rerank",
-            "--header", "Content-Type: application/json",
-            "--data", json.dumps({
-                "model": "bge-reranker-v2-m3",
-                "query": test_case["query"],
-                "candidates": test_case["candidates"]
-            })
-        ]
-
-        try:
-            result = subprocess.run(
-                curl_command,
-                capture_output=True,
-                text=True,
-                timeout=30
-            )
-
-            if result.returncode == 0 and result.stdout:
-                try:
-                    response_data = json.loads(result.stdout)
-                    if "results" in response_data:
-                        results = response_data["results"]
-                        print(f"✓ 成功返回 {len(results)} 个结果")
-
-                        # 显示前3个结果
-                        for j, item in enumerate(results[:3], 1):
-                            text = item.get("text", "")
-                            score = item.get("score", "")
-                            print(f"  {j}. [{score}] {text[:50]}...")
-                    else:
-                        print("✗ 响应格式异常")
-                except json.JSONDecodeError:
-                    print("✗ JSON解析失败")
-            else:
-                print(f"✗ 请求失败: 状态码 {result.returncode}")
-
-        except Exception as e:
-            print(f"✗ 测试失败: {str(e)}")
-
-def main():
-    """
-    主函数
-    """
-    print("开始使用curl测试重排序API")
-    print("=" * 60)
-
-    # 基本测试
-    success = test_rerank_api_with_curl()
-
-    if success:
-        print("\n🎉 基本API测试成功!")
-
-        # 测试不同查询
-        test_different_queries()
-
-        print("\n" + "=" * 60)
-        print("所有测试完成!")
-
-    else:
-        print("\n❌ 基本API测试失败!")
-        print("请检查:")
-        print("1. 重排序服务是否在 192.168.91.253:9005 运行")
-        print("2. 服务是否支持 /v1/rerank 端点")
-        print("3. 网络连接是否正常")
-
-        return 1
-
-    return 0
-
-if __name__ == "__main__":
-    sys.exit(main())

+ 17 - 3
views/construction_review/launch_review.py

@@ -84,8 +84,8 @@ class LaunchReviewRequest(BaseModel):
         description="审查配置列表,包含的项为启用状态"
     )
     project_plan_type: str = Field(
-        "bridge_up_part",
-        description="工程方案类型,当前仅支持 bridge_up_part"
+        ...,
+        description="工程方案类型: 01_pf_Found_Rotary_Drill(旋挖钻机、冲击钻机成孔桩), 02_pf_Dig_Manual_Pile(人工挖孔桩), 03_bd_Sub_Cyl_Pier(圆柱墩、系梁、盖梁), 04_bd_Sub_Rect_Turn(矩形墩采用翻模工艺、系梁、盖梁), 05_bd_High_Rect_Slide(矩形墩采用爬模工艺、系梁、盖梁), 06_bu_Pre_SS_Beam(简支梁预制、运输及架桥机安装), 07_bu_Erect_Truck_TBeam(汽车式起重机安装T梁), 08_bu_Cast_Col_Support(梁柱式支架), 09_bu_Cast_Full_Support(满堂式支架), 10_bu_Cast_Cant_Trolley(挂篮), 11_se_Elev_Lift_Proj(起重吊装工程), 12_se_Tower_Crane_Proj(起重吊装设备安装), 13_o_Height_Work_Op(高空作业)"
     )
 
     class Config:
@@ -125,7 +125,21 @@ def validate_review_config(review_config: List[str]) -> None:
 def validate_project_plan_type(project_plan_type: str) -> None:
     """验证工程方案类型"""
     # 当前支持的工程方案类型
-    supported_types = {'bridge_up_part'}  # 桥梁上部结构
+    supported_types = {
+        '01_pf_Found_Rotary_Drill',  # 旋挖钻机、冲击钻机成孔桩
+        '02_pf_Dig_Manual_Pile',     # 人工挖孔桩
+        '03_bd_Sub_Cyl_Pier',        # 圆柱墩、系梁、盖梁
+        '04_bd_Sub_Rect_Turn',       # 矩形墩采用翻模工艺、系梁、盖梁
+        '05_bd_High_Rect_Slide',     # 矩形墩采用爬模工艺、系梁、盖梁
+        '06_bu_Pre_SS_Beam',         # 简支梁预制、运输及架桥机安装
+        '07_bu_Erect_Truck_TBeam',   # 汽车式起重机安装T梁
+        '08_bu_Cast_Col_Support',    # 梁柱式支架
+        '09_bu_Cast_Full_Support',   # 满堂式支架
+        '10_bu_Cast_Cant_Trolley',   # 挂篮
+        '11_se_Elev_Lift_Proj',      # 起重吊装工程
+        '12_se_Tower_Crane_Proj',    # 起重吊装设备安装
+        '13_o_Height_Work_Op'        # 高空作业
+    }
 
     if project_plan_type not in supported_types:
         raise LaunchReviewErrors.project_plan_type_invalid()

+ 6 - 7
views/test_views.py

@@ -30,7 +30,7 @@ from foundation.utils.tool_utils import DateTimeEncoder
 from langchain_core.prompts import ChatPromptTemplate
 from foundation.utils.yaml_utils import system_prompt_config
 
-from foundation.ai.models.silicon_flow import SiliconFlowAPI
+from foundation.ai.models.model_handler import model_handler as mh
 from foundation.database.base.vector.pg_vector import PGVectorDB
 from foundation.database.base.vector.milvus_vector import MilvusVectorManager
 
@@ -718,10 +718,9 @@ async def embedding_test_endpoint(
         }
         task_prompt_info = {"task_prompt": ""}
         text = input_query
-         # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
-        from foundation.ai.models.silicon_flow import SiliconFlowAPI
-        base_api_platform = SiliconFlowAPI()
-        embedding = base_api_platform.get_embeddings([text])[0]
+
+
+        embedding = mh._get_lq_qwen3_8b_emd()
         embed_dim = len(embedding)
         server_logger.info(trace_id=trace_id, msg=f"【result】: {embed_dim}")
 
@@ -768,9 +767,9 @@ async def bfp_search_endpoint(
         
         output = None
         # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
-        client = SiliconFlowAPI()
+
         # 抽象测试
-        pg_vector_db = PGVectorDB(base_api_platform=client)
+        pg_vector_db = PGVectorDB()
         output = pg_vector_db.retriever(param={"table_name": "tv_basis_of_preparation"}, query_text=input_query , top_k=top_k)
 
         # 返回字典格式的响应