Parcourir la source

fix:(启用ocr)

WangXuMing il y a 1 semaine
Parent
commit
44f94b26cf

+ 6 - 6
config/config.ini

@@ -70,7 +70,7 @@ ENGINE=glm-ocr
 # GLM-OCR 配置
 # GLM-OCR 配置
 GLM_OCR_API_URL=http://183.220.37.46:25429/v1/chat/completions
 GLM_OCR_API_URL=http://183.220.37.46:25429/v1/chat/completions
 GLM_OCR_TIMEOUT=600
 GLM_OCR_TIMEOUT=600
-GLM_OCR_API_KEY=2026_Unified_Secure_Key
+GLM_OCR_API_KEY=sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a
 
 
 # MinerU 配置  
 # MinerU 配置  
 MINERU_API_URL=http://183.220.37.46:25428/file_parse
 MINERU_API_URL=http://183.220.37.46:25428/file_parse
@@ -167,7 +167,7 @@ PGVECTOR_PASSWORD=pg16@123
 # Qwen3.5-122B-A10B 模型(端口25423)
 # Qwen3.5-122B-A10B 模型(端口25423)
 SHUTIAN_122B_SERVER_URL=http://183.220.37.46:25423/v1
 SHUTIAN_122B_SERVER_URL=http://183.220.37.46:25423/v1
 SHUTIAN_122B_MODEL_ID=/model/Qwen3.5-122B-A10B
 SHUTIAN_122B_MODEL_ID=/model/Qwen3.5-122B-A10B
-SHUTIAN_122B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_122B_API_KEY=sk-prod_ojkjwcO4TTd9TL3vK6uo8a2Dvcdoz64u_9a89845f
 
 
 # Qwen3-8B 模型(端口25424)
 # Qwen3-8B 模型(端口25424)
 SHUTIAN_8B_SERVER_URL=http://183.220.37.46:25424/v1
 SHUTIAN_8B_SERVER_URL=http://183.220.37.46:25424/v1
@@ -177,22 +177,22 @@ SHUTIAN_8B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
 # Qwen3.6-27B 模型(端口25424)
 # Qwen3.6-27B 模型(端口25424)
 SHUTIAN_27B_SERVER_URL=http://183.220.37.46:25424/v1
 SHUTIAN_27B_SERVER_URL=http://183.220.37.46:25424/v1
 SHUTIAN_27B_MODEL_ID=/model/Qwen3.6-27B
 SHUTIAN_27B_MODEL_ID=/model/Qwen3.6-27B
-SHUTIAN_27B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_27B_API_KEY=sk_prod_HH21x5WB9Pm7IM9Bf808BoJPEn_4bPX5_f2c5f3f6
 
 
 # Qwen3.5-35B 模型(端口25427)
 # Qwen3.5-35B 模型(端口25427)
 SHUTIAN_35B_SERVER_URL=http://183.220.37.46:25427/v1
 SHUTIAN_35B_SERVER_URL=http://183.220.37.46:25427/v1
 SHUTIAN_35B_MODEL_ID=/model/Qwen3.5-35B
 SHUTIAN_35B_MODEL_ID=/model/Qwen3.5-35B
-SHUTIAN_35B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_35B_API_KEY=sk_prod_0NuLZt1a2UrD80F9iB-GTxOIuAkJSZxH_5522d7ae
 
 
 # Qwen3-Embedding-8B 嵌入模型(端口25425)
 # Qwen3-Embedding-8B 嵌入模型(端口25425)
 SHUTIAN_EMBED_SERVER_URL=http://183.220.37.46:25425/v1
 SHUTIAN_EMBED_SERVER_URL=http://183.220.37.46:25425/v1
 SHUTIAN_EMBED_MODEL_ID=/model/Qwen3-Embedding-8B
 SHUTIAN_EMBED_MODEL_ID=/model/Qwen3-Embedding-8B
-SHUTIAN_EMBED_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_EMBED_API_KEY=sk_prod_3HDoVka8mU8Jqj9Xnmfkn8bxk5kmzKrz_700c186f
 
 
 # Qwen3-Reranker-8B 重排序模型(端口25426)
 # Qwen3-Reranker-8B 重排序模型(端口25426)
 SHUTIAN_RERANK_SERVER_URL=http://183.220.37.46:25426/v1/rerank
 SHUTIAN_RERANK_SERVER_URL=http://183.220.37.46:25426/v1/rerank
 SHUTIAN_RERANK_MODEL_ID=/model/Qwen3-Reranker-8B
 SHUTIAN_RERANK_MODEL_ID=/model/Qwen3-Reranker-8B
-SHUTIAN_RERANK_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_RERANK_API_KEY=sk_prod_dvgYHKWFoQlYAKmkIvBSyuguNSQGeNh0_23c65608
 
 
 
 
 [milvus]
 [milvus]

+ 8 - 8
core/construction_review/component/minimal_pipeline/pdf_extractor1.py

@@ -154,14 +154,14 @@ class PdfStructureExtractor:
         }
         }
 
 
         ocr_catalog: Optional[Dict[str, Any]] = None
         ocr_catalog: Optional[Dict[str, Any]] = None
-        # if self.detect_toc:
-        #     try:
-        #         ocr_catalog = self._extract_catalog(file_content, progress_callback)
-        #         if ocr_catalog:
-        #             ocr_catalog = self._normalize_catalog(ocr_catalog)
-        #             logger.info(f"[PDF提取] 目录提取完成: {ocr_catalog.get('total_chapters', 0)} 章")
-        #     except Exception as exc:
-        #         logger.warning(f"[PDF提取] OCR目录提取失败: {exc}")
+        if self.detect_toc:
+            try:
+                ocr_catalog = self._extract_catalog(file_content, progress_callback)
+                if ocr_catalog:
+                    ocr_catalog = self._normalize_catalog(ocr_catalog)
+                    logger.info(f"[PDF提取] 目录提取完成: {ocr_catalog.get('total_chapters', 0)} 章")
+            except Exception as exc:
+                logger.warning(f"[PDF提取] OCR目录提取失败: {exc}")
 
 
         doc = fitz.open(stream=file_content, filetype="pdf")
         doc = fitz.open(stream=file_content, filetype="pdf")
         try:
         try:

+ 19 - 2
utils_test/Model_Test/test_model_stress.py

@@ -16,6 +16,9 @@
      
      
     # python utils_test/Model_Test/test_model_stress.py --concurrency 150 --count 150 --model shutian_qwen3_6_27b --context-size 8k
     # python utils_test/Model_Test/test_model_stress.py --concurrency 150 --count 150 --model shutian_qwen3_6_27b --context-size 8k
 
 
+    # 避免服务端 KV 缓存命中(注入随机值)
+    python utils_test/Model_Test/test_model_stress.py --concurrency 10 --count 50 --bust-cache
+
     # 自定义参数
     # 自定义参数
     python utils_test/Model_Test/test_model_stress.py --concurrency 20 --count 100 --model shutian_qwen3_5_122b
     python utils_test/Model_Test/test_model_stress.py --concurrency 20 --count 100 --model shutian_qwen3_5_122b
 
 
@@ -41,6 +44,7 @@ import asyncio
 import argparse
 import argparse
 import time
 import time
 import statistics
 import statistics
+import uuid
 from pathlib import Path
 from pathlib import Path
 from dataclasses import dataclass, field
 from dataclasses import dataclass, field
 from typing import List, Optional, Tuple
 from typing import List, Optional, Tuple
@@ -191,11 +195,13 @@ def _extract_token_usage(response) -> Tuple[int, int]:
 
 
 async def _run_llm_request(trace_id: str, model_name: Optional[str] = None,
 async def _run_llm_request(trace_id: str, model_name: Optional[str] = None,
                            function_name: Optional[str] = None,
                            function_name: Optional[str] = None,
-                           context_size: int = 0) -> RequestResult:
+                           context_size: int = 0,
+                           bust_cache: bool = False) -> RequestResult:
     """执行单次 LLM 调用并记录延迟和 token 用量
     """执行单次 LLM 调用并记录延迟和 token 用量
 
 
     Args:
     Args:
         context_size: 上下文 token 数,>0 时在 user_prompt 前拼接填充文本
         context_size: 上下文 token 数,>0 时在 user_prompt 前拼接填充文本
+        bust_cache: 在 prompt 末尾追加随机值避免 KV 缓存命中
     """
     """
     from foundation.ai.models.model_handler import model_handler
     from foundation.ai.models.model_handler import model_handler
     from foundation.ai.models.model_config_loader import get_model_for_function, get_thinking_mode_for_function
     from foundation.ai.models.model_config_loader import get_model_for_function, get_thinking_mode_for_function
@@ -222,6 +228,10 @@ async def _run_llm_request(trace_id: str, model_name: Optional[str] = None,
         padding = _generate_context_text(context_size)
         padding = _generate_context_text(context_size)
         user_prompt = f"{padding}\n\n---\n\n{TEST_USER_PROMPT}"
         user_prompt = f"{padding}\n\n---\n\n{TEST_USER_PROMPT}"
 
 
+    if bust_cache:
+        rand = uuid.uuid4().hex[:12]
+        user_prompt = f"[noise:{rand}]\n{user_prompt}"
+
     messages = [SystemMessage(content=TEST_SYSTEM_PROMPT), HumanMessage(content=user_prompt)]
     messages = [SystemMessage(content=TEST_SYSTEM_PROMPT), HumanMessage(content=user_prompt)]
 
 
     start = time.perf_counter()
     start = time.perf_counter()
@@ -274,6 +284,7 @@ async def run_stress_test(
     total_count: int,
     total_count: int,
     function_name: Optional[str] = None,
     function_name: Optional[str] = None,
     context_size: int = 0,
     context_size: int = 0,
+    bust_cache: bool = False,
 ) -> StressTestResult:
 ) -> StressTestResult:
     """执行压力测试
     """执行压力测试
 
 
@@ -304,7 +315,7 @@ async def run_stress_test(
             if model_type == "embedding":
             if model_type == "embedding":
                 return await _run_embedding_request(trace_id, model_name)
                 return await _run_embedding_request(trace_id, model_name)
             else:
             else:
-                return await _run_llm_request(trace_id, model_name, function_name, context_size)
+                return await _run_llm_request(trace_id, model_name, function_name, context_size, bust_cache)
 
 
     ctx_label = f" | 上下文: {context_size//1024}k tokens" if context_size > 0 else ""
     ctx_label = f" | 上下文: {context_size//1024}k tokens" if context_size > 0 else ""
     print(f"\n{'='*60}")
     print(f"\n{'='*60}")
@@ -502,6 +513,10 @@ def parse_args():
         "--all-embeddings", action="store_true",
         "--all-embeddings", action="store_true",
         help="逐个测试所有 Embedding 模型",
         help="逐个测试所有 Embedding 模型",
     )
     )
+    parser.add_argument(
+        "--bust-cache", action="store_true",
+        help="在每次请求的 prompt 末尾注入随机值,避免服务端 KV 缓存命中",
+    )
     return parser.parse_args()
     return parser.parse_args()
 
 
 
 
@@ -539,6 +554,7 @@ async def _run_single_model_test(args, model_name: str, function_name: Optional[
                 total_count=args.count,
                 total_count=args.count,
                 function_name=function_name,
                 function_name=function_name,
                 context_size=ctx_size,
                 context_size=ctx_size,
+                bust_cache=args.bust_cache,
             )
             )
             summary = print_report(result)
             summary = print_report(result)
             summary["context_display"] = ctx_display
             summary["context_display"] = ctx_display
@@ -571,6 +587,7 @@ async def main():
                     model_type=model_type,
                     model_type=model_type,
                     concurrency=args.concurrency,
                     concurrency=args.concurrency,
                     total_count=args.count,
                     total_count=args.count,
+                    bust_cache=args.bust_cache,
                 )
                 )
                 summary = print_report(result)
                 summary = print_report(result)
                 results_summary.append(summary)
                 results_summary.append(summary)