Bladeren bron

fix:(启用ocr)

WangXuMing 1 week geleden
bovenliggende
commit
44f94b26cf

+ 6 - 6
config/config.ini

@@ -70,7 +70,7 @@ ENGINE=glm-ocr
 # GLM-OCR 配置
 GLM_OCR_API_URL=http://183.220.37.46:25429/v1/chat/completions
 GLM_OCR_TIMEOUT=600
-GLM_OCR_API_KEY=2026_Unified_Secure_Key
+GLM_OCR_API_KEY=sk_prod_sXgHYxfVvZdw7O-cki6i7Cp2TbguOvbA_f4beb12a
 
 # MinerU 配置  
 MINERU_API_URL=http://183.220.37.46:25428/file_parse
@@ -167,7 +167,7 @@ PGVECTOR_PASSWORD=pg16@123
 # Qwen3.5-122B-A10B 模型(端口25423)
 SHUTIAN_122B_SERVER_URL=http://183.220.37.46:25423/v1
 SHUTIAN_122B_MODEL_ID=/model/Qwen3.5-122B-A10B
-SHUTIAN_122B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_122B_API_KEY=sk-prod_ojkjwcO4TTd9TL3vK6uo8a2Dvcdoz64u_9a89845f
 
 # Qwen3-8B 模型(端口25424)
 SHUTIAN_8B_SERVER_URL=http://183.220.37.46:25424/v1
@@ -177,22 +177,22 @@ SHUTIAN_8B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
 # Qwen3.6-27B 模型(端口25424)
 SHUTIAN_27B_SERVER_URL=http://183.220.37.46:25424/v1
 SHUTIAN_27B_MODEL_ID=/model/Qwen3.6-27B
-SHUTIAN_27B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_27B_API_KEY=sk_prod_HH21x5WB9Pm7IM9Bf808BoJPEn_4bPX5_f2c5f3f6
 
 # Qwen3.5-35B 模型(端口25427)
 SHUTIAN_35B_SERVER_URL=http://183.220.37.46:25427/v1
 SHUTIAN_35B_MODEL_ID=/model/Qwen3.5-35B
-SHUTIAN_35B_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_35B_API_KEY=sk_prod_0NuLZt1a2UrD80F9iB-GTxOIuAkJSZxH_5522d7ae
 
 # Qwen3-Embedding-8B 嵌入模型(端口25425)
 SHUTIAN_EMBED_SERVER_URL=http://183.220.37.46:25425/v1
 SHUTIAN_EMBED_MODEL_ID=/model/Qwen3-Embedding-8B
-SHUTIAN_EMBED_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_EMBED_API_KEY=sk_prod_3HDoVka8mU8Jqj9Xnmfkn8bxk5kmzKrz_700c186f
 
 # Qwen3-Reranker-8B 重排序模型(端口25426)
 SHUTIAN_RERANK_SERVER_URL=http://183.220.37.46:25426/v1/rerank
 SHUTIAN_RERANK_MODEL_ID=/model/Qwen3-Reranker-8B
-SHUTIAN_RERANK_API_KEY=sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615
+SHUTIAN_RERANK_API_KEY=sk_prod_dvgYHKWFoQlYAKmkIvBSyuguNSQGeNh0_23c65608
 
 
 [milvus]

+ 8 - 8
core/construction_review/component/minimal_pipeline/pdf_extractor1.py

@@ -154,14 +154,14 @@ class PdfStructureExtractor:
         }
 
         ocr_catalog: Optional[Dict[str, Any]] = None
-        # if self.detect_toc:
-        #     try:
-        #         ocr_catalog = self._extract_catalog(file_content, progress_callback)
-        #         if ocr_catalog:
-        #             ocr_catalog = self._normalize_catalog(ocr_catalog)
-        #             logger.info(f"[PDF提取] 目录提取完成: {ocr_catalog.get('total_chapters', 0)} 章")
-        #     except Exception as exc:
-        #         logger.warning(f"[PDF提取] OCR目录提取失败: {exc}")
+        if self.detect_toc:
+            try:
+                ocr_catalog = self._extract_catalog(file_content, progress_callback)
+                if ocr_catalog:
+                    ocr_catalog = self._normalize_catalog(ocr_catalog)
+                    logger.info(f"[PDF提取] 目录提取完成: {ocr_catalog.get('total_chapters', 0)} 章")
+            except Exception as exc:
+                logger.warning(f"[PDF提取] OCR目录提取失败: {exc}")
 
         doc = fitz.open(stream=file_content, filetype="pdf")
         try:

+ 19 - 2
utils_test/Model_Test/test_model_stress.py

@@ -16,6 +16,9 @@
      
     # python utils_test/Model_Test/test_model_stress.py --concurrency 150 --count 150 --model shutian_qwen3_6_27b --context-size 8k
 
+    # 避免服务端 KV 缓存命中(注入随机值)
+    python utils_test/Model_Test/test_model_stress.py --concurrency 10 --count 50 --bust-cache
+
     # 自定义参数
     python utils_test/Model_Test/test_model_stress.py --concurrency 20 --count 100 --model shutian_qwen3_5_122b
 
@@ -41,6 +44,7 @@ import asyncio
 import argparse
 import time
 import statistics
+import uuid
 from pathlib import Path
 from dataclasses import dataclass, field
 from typing import List, Optional, Tuple
@@ -191,11 +195,13 @@ def _extract_token_usage(response) -> Tuple[int, int]:
 
 async def _run_llm_request(trace_id: str, model_name: Optional[str] = None,
                            function_name: Optional[str] = None,
-                           context_size: int = 0) -> RequestResult:
+                           context_size: int = 0,
+                           bust_cache: bool = False) -> RequestResult:
     """执行单次 LLM 调用并记录延迟和 token 用量
 
     Args:
         context_size: 上下文 token 数,>0 时在 user_prompt 前拼接填充文本
+        bust_cache: 在 prompt 末尾追加随机值避免 KV 缓存命中
     """
     from foundation.ai.models.model_handler import model_handler
     from foundation.ai.models.model_config_loader import get_model_for_function, get_thinking_mode_for_function
@@ -222,6 +228,10 @@ async def _run_llm_request(trace_id: str, model_name: Optional[str] = None,
         padding = _generate_context_text(context_size)
         user_prompt = f"{padding}\n\n---\n\n{TEST_USER_PROMPT}"
 
+    if bust_cache:
+        rand = uuid.uuid4().hex[:12]
+        user_prompt = f"[noise:{rand}]\n{user_prompt}"
+
     messages = [SystemMessage(content=TEST_SYSTEM_PROMPT), HumanMessage(content=user_prompt)]
 
     start = time.perf_counter()
@@ -274,6 +284,7 @@ async def run_stress_test(
     total_count: int,
     function_name: Optional[str] = None,
     context_size: int = 0,
+    bust_cache: bool = False,
 ) -> StressTestResult:
     """执行压力测试
 
@@ -304,7 +315,7 @@ async def run_stress_test(
             if model_type == "embedding":
                 return await _run_embedding_request(trace_id, model_name)
             else:
-                return await _run_llm_request(trace_id, model_name, function_name, context_size)
+                return await _run_llm_request(trace_id, model_name, function_name, context_size, bust_cache)
 
     ctx_label = f" | 上下文: {context_size//1024}k tokens" if context_size > 0 else ""
     print(f"\n{'='*60}")
@@ -502,6 +513,10 @@ def parse_args():
         "--all-embeddings", action="store_true",
         help="逐个测试所有 Embedding 模型",
     )
+    parser.add_argument(
+        "--bust-cache", action="store_true",
+        help="在每次请求的 prompt 末尾注入随机值,避免服务端 KV 缓存命中",
+    )
     return parser.parse_args()
 
 
@@ -539,6 +554,7 @@ async def _run_single_model_test(args, model_name: str, function_name: Optional[
                 total_count=args.count,
                 function_name=function_name,
                 context_size=ctx_size,
+                bust_cache=args.bust_cache,
             )
             summary = print_report(result)
             summary["context_display"] = ctx_display
@@ -571,6 +587,7 @@ async def main():
                     model_type=model_type,
                     concurrency=args.concurrency,
                     total_count=args.count,
+                    bust_cache=args.bust_cache,
                 )
                 summary = print_report(result)
                 results_summary.append(summary)