Procházet zdrojové kódy

feat(sgsc-文档切分模块-xth):glm-ocr添加鉴权头

xgo před 1 týdnem
rodič
revize
6df11d5517

+ 216 - 0
config/config .ini.template

@@ -0,0 +1,216 @@
+
+
+[model]
+MODEL_TYPE=qwen3_5_35b_a3b
+
+# Embedding模型类型选择: lq_qwen3_8b_emd, siliconflow_embed
+EMBEDDING_MODEL_TYPE=lq_qwen3_8b_emd
+
+# Rerank模型类型选择: bge_rerank_model, lq_rerank_model, silicoflow_rerank_model
+RERANK_MODEL_TYPE=lq_rerank_model
+
+# 完整性审查模型类型 (用于 llm_content_classifier_v2)
+COMPLETENESS_REVIEW_MODEL_TYPE=qwen3_5_122b_a10b
+
+
+[deepseek]
+DEEPSEEK_SERVER_URL=https://api.deepseek.com
+DEEPSEEK_MODEL_ID=deepseek-chat
+DEEPSEEK_API_KEY=sk-9fe722389bac47e9ab30cf45b32eb736
+
+[doubao]
+DOUBAO_SERVER_URL=https://ark.cn-beijing.volces.com/api/v3/
+DOUBAO_MODEL_ID=doubao-seed-1-6-flash-250715
+DOUBAO_API_KEY=c98686df-506f-432c-98de-32e571a8e916
+
+
+[qwen]
+QWEN_SERVER_URL=http://192.168.91.253:8003/v1/
+QWEN_MODEL_ID=qwen3-30b
+QWEN_API_KEY=sk-123456
+
+# Qwen3-30B 独立配置(与qwen配置相同,方便后续独立管理)
+[qwen3_30b]
+QWEN3_30B_SERVER_URL=http://192.168.91.253:8003/v1/
+QWEN3_30B_MODEL_ID=qwen3-30b
+QWEN3_30B_API_KEY=sk-123456
+
+
+[ai_review]
+# 调试模式配置
+MAX_REVIEW_UNITS=5
+REVIEW_MODE=all
+# REVIEW_MODE=all/random/first
+
+
+[app]
+APP_CODE=lq-agent
+APP_SECRET=sx-73d32556-605e-11f0-9dd8-acde48001122
+
+
+[launch]
+HOST = 0.0.0.0
+LAUNCH_PORT = 8002
+
+[redis]
+REDIS_URL=redis://:123456@127.0.0.1:6379
+REDIS_HOST=127.0.0.1
+REDIS_PORT=6379
+REDIS_DB=0
+REDIS_PASSWORD=123456
+REDIS_MAX_CONNECTIONS=50
+
+[ocr]
+# OCR 引擎选择(以下写法都支持):
+# GLM-OCR: glm_ocr | glm-ocr | glmocr
+# MinerU:  mineru | mineru-ocr | mineru_ocr
+# 默认: glm_ocr
+ENGINE=glm-ocr
+
+# GLM-OCR 配置
+GLM_OCR_API_URL=http://183.220.37.46:25429/v1/chat/completions
+GLM_OCR_TIMEOUT=600
+GLM_OCR_API_KEY=2026_Unified_Secure_Key
+
+# MinerU 配置  
+MINERU_API_URL=http://183.220.37.46:25428/file_parse
+MINERU_TIMEOUT=300
+
+[log]
+LOG_FILE_PATH=logs
+LOG_FILE_MAX_MB=10
+LOG_BACKUP_COUNT=5
+CONSOLE_OUTPUT=True
+
+[user_lists]
+USERS=['user-001']
+
+
+[siliconflow]
+SLCF_MODEL_SERVER_URL=https://api.siliconflow.cn/v1
+SLCF_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
+SLCF_CHAT_MODEL_ID=test-model
+SLCF_EMBED_MODEL_ID=netease-youdao/bce-embedding-base_v1
+SLCF_REANKER_MODEL_ID=BAAI/bge-reranker-v2-m3
+SLCF_VL_CHAT_MODEL_ID=THUDM/GLM-4.1V-9B-Thinking
+
+[siliconflow_embed]
+# 硅基流动 Embedding 模型配置
+SLCF_EMBED_SERVER_URL=https://api.siliconflow.cn/v1
+SLCF_EMBED_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
+SLCF_EMBED_MODEL_ID=Qwen/Qwen3-Embedding-8B
+SLCF_EMBED_DIMENSIONS=4096
+
+[lq_qwen3_8b]
+QWEN_LOCAL_1_5B_SERVER_URL=http://192.168.91.253:9002/v1
+QWEN_LOCAL_1_5B_MODEL_ID=Qwen3-8B
+QWEN_LOCAL_1_5B_API_KEY=dummy
+
+# 本地部署的Qwen3-Embedding-8B配置
+[lq_qwen3_8b_emd]
+LQ_EMBEDDING_SERVER_URL=http://192.168.91.253:9003/v1
+LQ_EMBEDDING_MODEL_ID=Qwen3-Embedding-8B
+LQ_EMBEDDING_API_KEY=dummy
+
+[lq_qwen3_4b]
+QWEN_LOCAL_1_5B_SERVER_URL=http://192.168.91.253:9001/v1
+QWEN_LOCAL_1_5B_MODEL_ID=Qwen3-4B
+QWEN_LOCAL_1_5B_API_KEY=dummy
+
+# 本地部署的Qwen3-Reranker-8B配置
+[lq_rerank_model]
+LQ_RERANKER_SERVER_URL=http://192.168.91.253:9004/v1/rerank
+LQ_RERANKER_MODEL=Qwen3-Reranker-8B
+LQ_RERANKER_API_KEY=dummy
+LQ_RERANKER_TOP_N=10
+
+# 硅基流动API的Qwen3-Reranker-8B配置
+[silicoflow_rerank_model]
+SILICOFLOW_RERANKER_API_URL=https://api.siliconflow.cn/v1/rerank
+SILICOFLOW_RERANKER_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
+SILICOFLOW_RERANKER_MODEL=Qwen/Qwen3-Reranker-8B
+
+# BGE Reranker配置
+[bge_rerank_model]
+BGE_RERANKER_SERVER_URL=http://192.168.91.253:9004/rerank
+BGE_RERANKER_MODEL=BAAI/bge-reranker-v2-m3
+BGE_RERANKER_API_KEY=dummy
+BGE_RERANKER_TOP_N=10
+
+[lq_qwen3_8B_lora]
+LQ_QWEN3_8B_LQ_LORA_SERVER_URL=http://192.168.91.253:9006/v1
+LQ_QWEN3_8B_LQ_LORA_MODEL_ID=Qwen3-8B-lq-lora
+LQ_QWEN3_8B_LQ_LORA_API_KEY=dummy
+
+
+
+[mysql]
+MYSQL_HOST=192.168.92.61
+MYSQL_PORT=13306
+MYSQL_USER=root
+MYSQL_PASSWORD=lq@123
+MYSQL_DB=lq_db
+MYSQL_MIN_SIZE=1
+MYSQL_MAX_SIZE=5
+MYSQL_AUTO_COMMIT=True
+
+
+[pgvector]
+PGVECTOR_HOST=124.223.140.149
+PGVECTOR_PORT=7432
+PGVECTOR_DB=vector_db
+PGVECTOR_USER=vector_user
+PGVECTOR_PASSWORD=pg16@123
+
+
+[milvus]
+MILVUS_HOST=192.168.92.96
+MILVUS_PORT=30129
+MILVUS_DB=lq_db
+MILVUS_COLLECTION=first_bfp_collection_test
+MILVUS_USER=
+MILVUS_PASSWORD=
+
+
+[hybrid_search]
+# 混合检索权重配置
+DENSE_WEIGHT=0.3
+SPARSE_WEIGHT=0.7
+
+
+# ============================================================
+# DashScope Qwen3.5 系列模型配置
+# ============================================================
+
+# DashScope Qwen3.5-35B-A3B 模型
+[qwen3_5_35b_a3b]
+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
+DASHSCOPE_MODEL_ID=qwen3.5-35b-a3b
+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
+
+# DashScope Qwen3.5-27B 模型
+[qwen3_5_27b]
+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
+DASHSCOPE_MODEL_ID=qwen3.5-27b
+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
+
+# DashScope Qwen3.5-122B-A10B 模型
+[qwen3_5_122b_a10b]
+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
+DASHSCOPE_MODEL_ID=qwen3.5-122b-a10b
+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
+
+# ============================================================
+# LLM 通用配置
+# ============================================================
+
+[llm_keywords]
+TIMEOUT=60
+MAX_RETRIES=2
+CONCURRENT_WORKERS=20
+STREAM=false
+TEMPERATURE=0.3
+MAX_TOKENS=1024
+
+
+

+ 8 - 0
core/construction_review/component/doc_worker/pdf_worker/hybrid_extractor.py

@@ -112,7 +112,15 @@ class HybridFullTextExtractor(FullTextExtractor):
             "http://183.220.37.46:25429/v1/chat/completions"
         )
         self.glm_timeout = int(_read_ini_config("ocr", "glm_ocr_timeout", "600"))
+        
+        # 【新增】读取 GLM-OCR API Key(用于鉴权)
+        self.glm_api_key = _read_ini_config("ocr", "glm_ocr_api_key", "")
+        
+        # 构建请求头,如果配置了 API Key 则添加 Authorization
         self.glm_headers = {"Content-Type": "application/json"}
+        if self.glm_api_key:
+            self.glm_headers["Authorization"] = f"Bearer {self.glm_api_key}"
+            logger.debug(f"[HybridExtractor] GLM-OCR 已配置 API Key 鉴权")
         
         # 【新增】MinerU 配置
         self.mineru_api_url = _read_ini_config(