|
@@ -22,7 +22,11 @@ from foundation.utils.common import return_json, handler_err
|
|
|
from views import test_router, get_operation_id
|
|
from views import test_router, get_operation_id
|
|
|
from foundation.agent.workflow.test_workflow_graph import test_workflow_graph
|
|
from foundation.agent.workflow.test_workflow_graph import test_workflow_graph
|
|
|
from file_processors.pdf_processor import PDFProcessor
|
|
from file_processors.pdf_processor import PDFProcessor
|
|
|
|
|
+from file_processors.bfp_pdf_processor import BfpPDFProcessor
|
|
|
|
|
|
|
|
|
|
+from foundation.models.silicon_flow import SiliconFlowAPI
|
|
|
|
|
+from foundation.rag.vector.pg_vector_mananger import PGVectorManager
|
|
|
|
|
+from foundation.rag.vector.pg_vector import PGVectorDB
|
|
|
|
|
|
|
|
|
|
|
|
|
@test_router.post("/generate/chat", response_model=TestForm)
|
|
@test_router.post("/generate/chat", response_model=TestForm)
|
|
@@ -373,7 +377,7 @@ async def generate_chat_endpoint(
|
|
|
|
|
|
|
|
|
|
|
|
|
@test_router.post("/data/pdf/governance", response_model=TestForm)
|
|
@test_router.post("/data/pdf/governance", response_model=TestForm)
|
|
|
-async def generate_chat_endpoint(
|
|
|
|
|
|
|
+async def pdf_governance_endpoint(
|
|
|
param: TestForm,
|
|
param: TestForm,
|
|
|
trace_id: str = Depends(get_operation_id)):
|
|
trace_id: str = Depends(get_operation_id)):
|
|
|
"""
|
|
"""
|
|
@@ -409,4 +413,275 @@ async def generate_chat_endpoint(
|
|
|
|
|
|
|
|
except Exception as err:
|
|
except Exception as err:
|
|
|
handler_err(server_logger, trace_id=trace_id, err=err, err_name="generate/stream")
|
|
handler_err(server_logger, trace_id=trace_id, err=err, err_name="generate/stream")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@test_router.post("/data/bfp/governance", response_model=TestForm)
|
|
|
|
|
+async def bfp_governance_endpoint(
|
|
|
|
|
+ param: TestForm,
|
|
|
|
|
+ trace_id: str = Depends(get_operation_id)):
|
|
|
|
|
+ """
|
|
|
|
|
+ 编制依据文档切分处理
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"{param}")
|
|
|
|
|
+ print(trace_id)
|
|
|
|
|
+ # 从字典中获取input
|
|
|
|
|
+ input_query = param.input
|
|
|
|
|
+ session_id = param.config.session_id
|
|
|
|
|
+ context = param.context
|
|
|
|
|
+ header_info = {
|
|
|
|
|
+ }
|
|
|
|
|
+ task_prompt_info = {"task_prompt": ""}
|
|
|
|
|
+ #file_directory= "I:/wangxun_dev_workspace/lq_workspace/LQDataGovernance/test/pdf_files"
|
|
|
|
|
+ file_directory= "test/bfp_files"
|
|
|
|
|
+ # 初始化知识问答处理
|
|
|
|
|
+ pdf_processor = BfpPDFProcessor(directory=file_directory)
|
|
|
|
|
+ file_data = pdf_processor.process_pdfs_group()
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"【result】: ", log_type="bfp/governance")
|
|
|
|
|
+ output = None
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ #server_logger.debug(trace_id=trace_id, msg=f"【result】: {output}", log_type="agent/chat")
|
|
|
|
|
+ # 返回字典格式的响应
|
|
|
|
|
+ return JSONResponse(
|
|
|
|
|
+ return_json(data={"output": output}, data_type="text", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except ValueError as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/governance")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/governance")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@test_router.post("/data/embedding/test", response_model=TestForm)
|
|
|
|
|
+async def embedding_test_endpoint(
|
|
|
|
|
+ param: TestForm,
|
|
|
|
|
+ trace_id: str = Depends(get_operation_id)):
|
|
|
|
|
+ """
|
|
|
|
|
+ embedding模型测试
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"{param}")
|
|
|
|
|
+ print(trace_id)
|
|
|
|
|
+ # 从字典中获取input
|
|
|
|
|
+ input_query = param.input
|
|
|
|
|
+ session_id = param.config.session_id
|
|
|
|
|
+ context = param.context
|
|
|
|
|
+ header_info = {
|
|
|
|
|
+ }
|
|
|
|
|
+ task_prompt_info = {"task_prompt": ""}
|
|
|
|
|
+ text = input_query
|
|
|
|
|
+ # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
|
|
|
|
|
+ from foundation.models.silicon_flow import SiliconFlowAPI
|
|
|
|
|
+ base_api_platform = SiliconFlowAPI()
|
|
|
|
|
+ embedding = base_api_platform.get_embeddings([text])[0]
|
|
|
|
|
+ embed_dim = len(embedding)
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"【result】: {embed_dim}")
|
|
|
|
|
+
|
|
|
|
|
+ output = f"embed_dim={embed_dim},embedding:{embedding}"
|
|
|
|
|
+ #output = test_generate_model_client.get_model_data_governance_invoke(trace_id , task_prompt_info, input_query, context)
|
|
|
|
|
+ # 直接执行
|
|
|
|
|
+ #server_logger.debug(trace_id=trace_id, msg=f"【result】: {output}", log_type="embedding")
|
|
|
|
|
+ # 返回字典格式的响应
|
|
|
|
|
+ return JSONResponse(
|
|
|
|
|
+ return_json(data={"output": output}, data_type="text", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except ValueError as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="embedding")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="embedding")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@test_router.post("/data/pgvector/test", response_model=TestForm)
|
|
|
|
|
+async def pgvector_test_endpoint(
|
|
|
|
|
+ param: TestForm,
|
|
|
|
|
+ trace_id: str = Depends(get_operation_id)):
|
|
|
|
|
+ """
|
|
|
|
|
+ pg_vector 向量检索测试
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"{param}")
|
|
|
|
|
+ print(trace_id)
|
|
|
|
|
+ # 从字典中获取input
|
|
|
|
|
+ input_query = param.input
|
|
|
|
|
+ session_id = param.config.session_id
|
|
|
|
|
+ context = param.context
|
|
|
|
|
+ header_info = {
|
|
|
|
|
+ }
|
|
|
|
|
+ task_prompt_info = {"task_prompt": ""}
|
|
|
|
|
+ output = "success"
|
|
|
|
|
+ # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
|
|
|
|
|
+ client = SiliconFlowAPI()
|
|
|
|
|
+ # 初始化数据库管理器
|
|
|
|
|
+ # 1、原始测试
|
|
|
|
|
+ # db_manager = PGVectorManager(client)
|
|
|
|
|
+ # db_manager.db_test(query_text=input_query)
|
|
|
|
|
+
|
|
|
|
|
+ # 2、抽象测试
|
|
|
|
|
+ pg_vector_db = PGVectorDB(base_api_platform=client)
|
|
|
|
|
+ output = pg_vector_db.similarity_cosine_search(param={"table_name": "test_documents"}, query_text=input_query)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ # 直接执行
|
|
|
|
|
+ #server_logger.debug(trace_id=trace_id, msg=f"【result】: {output}", log_type="agent/chat")
|
|
|
|
|
+ # 返回字典格式的响应
|
|
|
|
|
+ return JSONResponse(
|
|
|
|
|
+ return_json(data={"output": output}, data_type="text", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except ValueError as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/governance")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/governance")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@test_router.post("/data/bfp/indb", response_model=TestForm)
|
|
|
|
|
+async def bfp_indb_endpoint(
|
|
|
|
|
+ param: TestForm,
|
|
|
|
|
+ trace_id: str = Depends(get_operation_id)):
|
|
|
|
|
+ """
|
|
|
|
|
+ 编制依据文档切分处理 和 入库处理
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"{param}")
|
|
|
|
|
+ print(trace_id)
|
|
|
|
|
+ # 从字典中获取input
|
|
|
|
|
+ input_query = param.input
|
|
|
|
|
+ session_id = param.config.session_id
|
|
|
|
|
+ context = param.context
|
|
|
|
|
+ header_info = {
|
|
|
|
|
+ }
|
|
|
|
|
+ task_prompt_info = {"task_prompt": ""}
|
|
|
|
|
+ #file_directory= "I:/wangxun_dev_workspace/lq_workspace/LQDataGovernance/test/pdf_files"
|
|
|
|
|
+ file_directory= "test/bfp_files"
|
|
|
|
|
+ # 初始化知识问答处理
|
|
|
|
|
+ pdf_processor = BfpPDFProcessor(directory=file_directory)
|
|
|
|
|
+ file_data_list , total_chunks = pdf_processor.get_pdfs_group_data()
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"【result】: ", log_type="bfp/governance")
|
|
|
|
|
+ output = None
|
|
|
|
|
+
|
|
|
|
|
+ # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
|
|
|
|
|
+ client = SiliconFlowAPI()
|
|
|
|
|
+ # 抽象测试
|
|
|
|
|
+ pg_vector_db = PGVectorDB(base_api_platform=client)
|
|
|
|
|
+
|
|
|
|
|
+ for file_data in file_data_list:
|
|
|
|
|
+ #file_data = file_data[0:5]
|
|
|
|
|
+ # 数据标准化处理
|
|
|
|
|
+ documents = pg_vector_db.document_standard(file_data)
|
|
|
|
|
+ pg_vector_db.add_tqdm_batch_documents(param={"table_name": "tv_basis_of_preparation"}, documents=documents)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ #server_logger.debug(trace_id=trace_id, msg=f"【result】: {output}", log_type="agent/chat")
|
|
|
|
|
+ # 返回字典格式的响应
|
|
|
|
|
+ return JSONResponse(
|
|
|
|
|
+ return_json(data={"output": output}, data_type="text", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except ValueError as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/governance")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/governance")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@test_router.post("/data/bfp/batch/indb", response_model=TestForm)
|
|
|
|
|
+async def bfp_batch_indb_endpoint(
|
|
|
|
|
+ param: TestForm,
|
|
|
|
|
+ trace_id: str = Depends(get_operation_id)):
|
|
|
|
|
+ """
|
|
|
|
|
+ 编制依据文档 批量切分和入库处理
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"{param}")
|
|
|
|
|
+ # 从字典中获取input
|
|
|
|
|
+ input_query = param.input
|
|
|
|
|
+ session_id = param.config.session_id
|
|
|
|
|
+ context = param.context
|
|
|
|
|
+ header_info = {
|
|
|
|
|
+ }
|
|
|
|
|
+ # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
|
|
|
|
|
+ client = SiliconFlowAPI()
|
|
|
|
|
+ # 抽象测试
|
|
|
|
|
+ pg_vector_db = PGVectorDB(base_api_platform=client)
|
|
|
|
|
+ #file_directory= "I:/wangxun_dev_workspace/lq_workspace/LQDataGovernance/test/pdf_files"
|
|
|
|
|
+ file_directory= "test/bfp_files"
|
|
|
|
|
+ # 初始化知识问答处理
|
|
|
|
|
+ pdf_processor = BfpPDFProcessor(directory=file_directory, base_vector=pg_vector_db)
|
|
|
|
|
+ pdf_processor.process_tqdm_pdfs_group()
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"【result】: ", log_type="bfp/batch/indb")
|
|
|
|
|
+ output = "success"
|
|
|
|
|
+
|
|
|
|
|
+ #server_logger.debug(trace_id=trace_id, msg=f"【result】: {output}", log_type="agent/chat")
|
|
|
|
|
+ # 返回字典格式的响应
|
|
|
|
|
+ return JSONResponse(
|
|
|
|
|
+ return_json(data={"output": output}, data_type="text", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except ValueError as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/batch/indb")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/batch/indb")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@test_router.post("/data/bfp/search", response_model=TestForm)
|
|
|
|
|
+async def bfp_search_endpoint(
|
|
|
|
|
+ param: TestForm,
|
|
|
|
|
+ trace_id: str = Depends(get_operation_id)):
|
|
|
|
|
+ """
|
|
|
|
|
+ 编制依据文档切分处理 和 入库处理
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ server_logger.info(trace_id=trace_id, msg=f"{param}")
|
|
|
|
|
+ print(trace_id)
|
|
|
|
|
+ # 从字典中获取input
|
|
|
|
|
+ input_query = param.input
|
|
|
|
|
+ session_id = param.config.session_id
|
|
|
|
|
+ context = param.context
|
|
|
|
|
+ header_info = {
|
|
|
|
|
+ }
|
|
|
|
|
+ task_prompt_info = {"task_prompt": ""}
|
|
|
|
|
+ top_k = int(session_id)
|
|
|
|
|
+
|
|
|
|
|
+ output = None
|
|
|
|
|
+ # 初始化客户端(需提前设置环境变量 SILICONFLOW_API_KEY)
|
|
|
|
|
+ client = SiliconFlowAPI()
|
|
|
|
|
+ # 抽象测试
|
|
|
|
|
+ pg_vector_db = PGVectorDB(base_api_platform=client)
|
|
|
|
|
+ output = pg_vector_db.retriever(param={"table_name": "tv_basis_of_preparation"}, query_text=input_query , top_k=top_k)
|
|
|
|
|
+
|
|
|
|
|
+ # 返回字典格式的响应
|
|
|
|
|
+ return JSONResponse(
|
|
|
|
|
+ return_json(data={"output": output}, data_type="text", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except ValueError as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/search")
|
|
|
|
|
+ return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as err:
|
|
|
|
|
+ handler_err(server_logger, trace_id=trace_id, err=err, err_name="bfp/search")
|
|
|
return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|
|
return JSONResponse(return_json(code=100500, msg=f"{err}", trace_id=trace_id))
|