1 неделя назад · b307bfa97c
--- a/.ini.template
+++ b/.ini.template
@@ -0,0 +1,216 @@
 
															+
														
 
															+
														
 
															+[model]
														
 
															+MODEL_TYPE=qwen3_5_35b_a3b
														
 
															+
														
 
															+# Embedding模型类型选择: lq_qwen3_8b_emd, siliconflow_embed
														
 
															+EMBEDDING_MODEL_TYPE=lq_qwen3_8b_emd
														
 
															+
														
 
															+# Rerank模型类型选择: bge_rerank_model, lq_rerank_model, silicoflow_rerank_model
														
 
															+RERANK_MODEL_TYPE=lq_rerank_model
														
 
															+
														
 
															+# 完整性审查模型类型 (用于 llm_content_classifier_v2)
														
 
															+COMPLETENESS_REVIEW_MODEL_TYPE=qwen3_5_122b_a10b
														
 
															+
														
 
															+
														
 
															+[deepseek]
														
 
															+DEEPSEEK_SERVER_URL=https://api.deepseek.com
														
 
															+DEEPSEEK_MODEL_ID=deepseek-chat
														
 
															+DEEPSEEK_API_KEY=sk-9fe722389bac47e9ab30cf45b32eb736
														
 
															+
														
 
															+[doubao]
														
 
															+DOUBAO_SERVER_URL=https://ark.cn-beijing.volces.com/api/v3/
														
 
															+DOUBAO_MODEL_ID=doubao-seed-1-6-flash-250715
														
 
															+DOUBAO_API_KEY=c98686df-506f-432c-98de-32e571a8e916
														
 
															+
														
 
															+
														
 
															+[qwen]
														
 
															+QWEN_SERVER_URL=http://192.168.91.253:8003/v1/
														
 
															+QWEN_MODEL_ID=qwen3-30b
														
 
															+QWEN_API_KEY=sk-123456
														
 
															+
														
 
															+# Qwen3-30B 独立配置（与qwen配置相同，方便后续独立管理）
														
 
															+[qwen3_30b]
														
 
															+QWEN3_30B_SERVER_URL=http://192.168.91.253:8003/v1/
														
 
															+QWEN3_30B_MODEL_ID=qwen3-30b
														
 
															+QWEN3_30B_API_KEY=sk-123456
														
 
															+
														
 
															+
														
 
															+[ai_review]
														
 
															+# 调试模式配置
														
 
															+MAX_REVIEW_UNITS=5
														
 
															+REVIEW_MODE=all
														
 
															+# REVIEW_MODE=all/random/first
														
 
															+
														
 
															+
														
 
															+[app]
														
 
															+APP_CODE=lq-agent
														
 
															+APP_SECRET=sx-73d32556-605e-11f0-9dd8-acde48001122
														
 
															+
														
 
															+
														
 
															+[launch]
														
 
															+HOST = 0.0.0.0
														
 
															+LAUNCH_PORT = 8002
														
 
															+
														
 
															+[redis]
														
 
															+REDIS_URL=redis://:123456@127.0.0.1:6379
														
 
															+REDIS_HOST=127.0.0.1
														
 
															+REDIS_PORT=6379
														
 
															+REDIS_DB=0
														
 
															+REDIS_PASSWORD=123456
														
 
															+REDIS_MAX_CONNECTIONS=50
														
 
															+
														
 
															+[ocr]
														
 
															+# OCR 引擎选择（以下写法都支持）：
														
 
															+# GLM-OCR: glm_ocr | glm-ocr | glmocr
														
 
															+# MinerU:  mineru | mineru-ocr | mineru_ocr
														
 
															+# 默认: glm_ocr
														
 
															+ENGINE=glm-ocr
														
 
															+
														
 
															+# GLM-OCR 配置
														
 
															+GLM_OCR_API_URL=http://183.220.37.46:25429/v1/chat/completions
														
 
															+GLM_OCR_TIMEOUT=600
														
 
															+GLM_OCR_API_KEY=2026_Unified_Secure_Key
														
 
															+
														
 
															+# MinerU 配置  
														
 
															+MINERU_API_URL=http://183.220.37.46:25428/file_parse
														
 
															+MINERU_TIMEOUT=300
														
 
															+
														
 
															+[log]
														
 
															+LOG_FILE_PATH=logs
														
 
															+LOG_FILE_MAX_MB=10
														
 
															+LOG_BACKUP_COUNT=5
														
 
															+CONSOLE_OUTPUT=True
														
 
															+
														
 
															+[user_lists]
														
 
															+USERS=['user-001']
														
 
															+
														
 
															+
														
 
															+[siliconflow]
														
 
															+SLCF_MODEL_SERVER_URL=https://api.siliconflow.cn/v1
														
 
															+SLCF_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
														
 
															+SLCF_CHAT_MODEL_ID=test-model
														
 
															+SLCF_EMBED_MODEL_ID=netease-youdao/bce-embedding-base_v1
														
 
															+SLCF_REANKER_MODEL_ID=BAAI/bge-reranker-v2-m3
														
 
															+SLCF_VL_CHAT_MODEL_ID=THUDM/GLM-4.1V-9B-Thinking
														
 
															+
														
 
															+[siliconflow_embed]
														
 
															+# 硅基流动 Embedding 模型配置
														
 
															+SLCF_EMBED_SERVER_URL=https://api.siliconflow.cn/v1
														
 
															+SLCF_EMBED_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
														
 
															+SLCF_EMBED_MODEL_ID=Qwen/Qwen3-Embedding-8B
														
 
															+SLCF_EMBED_DIMENSIONS=4096
														
 
															+
														
 
															+[lq_qwen3_8b]
														
 
															+QWEN_LOCAL_1_5B_SERVER_URL=http://192.168.91.253:9002/v1
														
 
															+QWEN_LOCAL_1_5B_MODEL_ID=Qwen3-8B
														
 
															+QWEN_LOCAL_1_5B_API_KEY=dummy
														
 
															+
														
 
															+# 本地部署的Qwen3-Embedding-8B配置
														
 
															+[lq_qwen3_8b_emd]
														
 
															+LQ_EMBEDDING_SERVER_URL=http://192.168.91.253:9003/v1
														
 
															+LQ_EMBEDDING_MODEL_ID=Qwen3-Embedding-8B
														
 
															+LQ_EMBEDDING_API_KEY=dummy
														
 
															+
														
 
															+[lq_qwen3_4b]
														
 
															+QWEN_LOCAL_1_5B_SERVER_URL=http://192.168.91.253:9001/v1
														
 
															+QWEN_LOCAL_1_5B_MODEL_ID=Qwen3-4B
														
 
															+QWEN_LOCAL_1_5B_API_KEY=dummy
														
 
															+
														
 
															+# 本地部署的Qwen3-Reranker-8B配置
														
 
															+[lq_rerank_model]
														
 
															+LQ_RERANKER_SERVER_URL=http://192.168.91.253:9004/v1/rerank
														
 
															+LQ_RERANKER_MODEL=Qwen3-Reranker-8B
														
 
															+LQ_RERANKER_API_KEY=dummy
														
 
															+LQ_RERANKER_TOP_N=10
														
 
															+
														
 
															+# 硅基流动API的Qwen3-Reranker-8B配置
														
 
															+[silicoflow_rerank_model]
														
 
															+SILICOFLOW_RERANKER_API_URL=https://api.siliconflow.cn/v1/rerank
														
 
															+SILICOFLOW_RERANKER_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
														
 
															+SILICOFLOW_RERANKER_MODEL=Qwen/Qwen3-Reranker-8B
														
 
															+
														
 
															+# BGE Reranker配置
														
 
															+[bge_rerank_model]
														
 
															+BGE_RERANKER_SERVER_URL=http://192.168.91.253:9004/rerank
														
 
															+BGE_RERANKER_MODEL=BAAI/bge-reranker-v2-m3
														
 
															+BGE_RERANKER_API_KEY=dummy
														
 
															+BGE_RERANKER_TOP_N=10
														
 
															+
														
 
															+[lq_qwen3_8B_lora]
														
 
															+LQ_QWEN3_8B_LQ_LORA_SERVER_URL=http://192.168.91.253:9006/v1
														
 
															+LQ_QWEN3_8B_LQ_LORA_MODEL_ID=Qwen3-8B-lq-lora
														
 
															+LQ_QWEN3_8B_LQ_LORA_API_KEY=dummy
														
 
															+
														
 
															+
														
 
															+
														
 
															+[mysql]
														
 
															+MYSQL_HOST=192.168.92.61
														
 
															+MYSQL_PORT=13306
														
 
															+MYSQL_USER=root
														
 
															+MYSQL_PASSWORD=lq@123
														
 
															+MYSQL_DB=lq_db
														
 
															+MYSQL_MIN_SIZE=1
														
 
															+MYSQL_MAX_SIZE=5
														
 
															+MYSQL_AUTO_COMMIT=True
														
 
															+
														
 
															+
														
 
															+[pgvector]
														
 
															+PGVECTOR_HOST=124.223.140.149
														
 
															+PGVECTOR_PORT=7432
														
 
															+PGVECTOR_DB=vector_db
														
 
															+PGVECTOR_USER=vector_user
														
 
															+PGVECTOR_PASSWORD=pg16@123
														
 
															+
														
 
															+
														
 
															+[milvus]
														
 
															+MILVUS_HOST=192.168.92.96
														
 
															+MILVUS_PORT=30129
														
 
															+MILVUS_DB=lq_db
														
 
															+MILVUS_COLLECTION=first_bfp_collection_test
														
 
															+MILVUS_USER=
														
 
															+MILVUS_PASSWORD=
														
 
															+
														
 
															+
														
 
															+[hybrid_search]
														
 
															+# 混合检索权重配置
														
 
															+DENSE_WEIGHT=0.3
														
 
															+SPARSE_WEIGHT=0.7
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# DashScope Qwen3.5 系列模型配置
														
 
															+# ============================================================
														
 
															+
														
 
															+# DashScope Qwen3.5-35B-A3B 模型
														
 
															+[qwen3_5_35b_a3b]
														
 
															+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
														
 
															+DASHSCOPE_MODEL_ID=qwen3.5-35b-a3b
														
 
															+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
														
 
															+
														
 
															+# DashScope Qwen3.5-27B 模型
														
 
															+[qwen3_5_27b]
														
 
															+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
														
 
															+DASHSCOPE_MODEL_ID=qwen3.5-27b
														
 
															+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
														
 
															+
														
 
															+# DashScope Qwen3.5-122B-A10B 模型
														
 
															+[qwen3_5_122b_a10b]
														
 
															+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
														
 
															+DASHSCOPE_MODEL_ID=qwen3.5-122b-a10b
														
 
															+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
														
 
															+
														
 
															+# ============================================================
														
 
															+# LLM 通用配置
														
 
															+# ============================================================
														
 
															+
														
 
															+[llm_keywords]
														
 
															+TIMEOUT=60
														
 
															+MAX_RETRIES=2
														
 
															+CONCURRENT_WORKERS=20
														
 
															+STREAM=false
														
 
															+TEMPERATURE=0.3
														
 
															+MAX_TOKENS=1024
														
 
															+
														
 
															+
														
 
															+
														
--- a/config/config.ini
+++ b/config/config.ini
@@ -0,0 +1,215 @@
 
															+
														
 
															+
														
 
															+[model]
														
 
															+MODEL_TYPE=qwen3_5_35b_a3b
														
 
															+
														
 
															+# Embedding模型类型选择: lq_qwen3_8b_emd, siliconflow_embed
														
 
															+EMBEDDING_MODEL_TYPE=lq_qwen3_8b_emd
														
 
															+
														
 
															+# Rerank模型类型选择: bge_rerank_model, lq_rerank_model, silicoflow_rerank_model
														
 
															+RERANK_MODEL_TYPE=lq_rerank_model
														
 
															+
														
 
															+# 完整性审查模型类型 (用于 llm_content_classifier_v2)
														
 
															+COMPLETENESS_REVIEW_MODEL_TYPE=qwen3_5_122b_a10b
														
 
															+
														
 
															+
														
 
															+[deepseek]
														
 
															+DEEPSEEK_SERVER_URL=https://api.deepseek.com
														
 
															+DEEPSEEK_MODEL_ID=deepseek-chat
														
 
															+DEEPSEEK_API_KEY=sk-9fe722389bac47e9ab30cf45b32eb736
														
 
															+
														
 
															+[doubao]
														
 
															+DOUBAO_SERVER_URL=https://ark.cn-beijing.volces.com/api/v3/
														
 
															+DOUBAO_MODEL_ID=doubao-seed-1-6-flash-250715
														
 
															+DOUBAO_API_KEY=c98686df-506f-432c-98de-32e571a8e916
														
 
															+
														
 
															+
														
 
															+[qwen]
														
 
															+QWEN_SERVER_URL=http://192.168.91.253:8003/v1/
														
 
															+QWEN_MODEL_ID=qwen3-30b
														
 
															+QWEN_API_KEY=sk-123456
														
 
															+
														
 
															+# Qwen3-30B 独立配置（与qwen配置相同，方便后续独立管理）
														
 
															+[qwen3_30b]
														
 
															+QWEN3_30B_SERVER_URL=http://192.168.91.253:8003/v1/
														
 
															+QWEN3_30B_MODEL_ID=qwen3-30b
														
 
															+QWEN3_30B_API_KEY=sk-123456
														
 
															+
														
 
															+
														
 
															+[ai_review]
														
 
															+# 调试模式配置
														
 
															+MAX_REVIEW_UNITS=5
														
 
															+REVIEW_MODE=all
														
 
															+# REVIEW_MODE=all/random/first
														
 
															+
														
 
															+
														
 
															+[app]
														
 
															+APP_CODE=lq-agent
														
 
															+APP_SECRET=sx-73d32556-605e-11f0-9dd8-acde48001122
														
 
															+
														
 
															+
														
 
															+[launch]
														
 
															+HOST = 0.0.0.0
														
 
															+LAUNCH_PORT = 8002
														
 
															+
														
 
															+[redis]
														
 
															+REDIS_URL=redis://:123456@127.0.0.1:6379
														
 
															+REDIS_HOST=127.0.0.1
														
 
															+REDIS_PORT=6379
														
 
															+REDIS_DB=0
														
 
															+REDIS_PASSWORD=123456
														
 
															+REDIS_MAX_CONNECTIONS=50
														
 
															+
														
 
															+[ocr]
														
 
															+# OCR 引擎选择（以下写法都支持）：
														
 
															+# GLM-OCR: glm_ocr | glm-ocr | glmocr
														
 
															+# MinerU:  mineru | mineru-ocr | mineru_ocr
														
 
															+# 默认: glm_ocr
														
 
															+ENGINE=glm-ocr
														
 
															+
														
 
															+# GLM-OCR 配置
														
 
															+GLM_OCR_API_URL=http://183.220.37.46:25429/v1/chat/completions
														
 
															+GLM_OCR_TIMEOUT=600
														
 
															+
														
 
															+# MinerU 配置  
														
 
															+MINERU_API_URL=http://183.220.37.46:25428/file_parse
														
 
															+MINERU_TIMEOUT=300
														
 
															+
														
 
															+[log]
														
 
															+LOG_FILE_PATH=logs
														
 
															+LOG_FILE_MAX_MB=10
														
 
															+LOG_BACKUP_COUNT=5
														
 
															+CONSOLE_OUTPUT=True
														
 
															+
														
 
															+[user_lists]
														
 
															+USERS=['user-001']
														
 
															+
														
 
															+
														
 
															+[siliconflow]
														
 
															+SLCF_MODEL_SERVER_URL=https://api.siliconflow.cn/v1
														
 
															+SLCF_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
														
 
															+SLCF_CHAT_MODEL_ID=test-model
														
 
															+SLCF_EMBED_MODEL_ID=netease-youdao/bce-embedding-base_v1
														
 
															+SLCF_REANKER_MODEL_ID=BAAI/bge-reranker-v2-m3
														
 
															+SLCF_VL_CHAT_MODEL_ID=THUDM/GLM-4.1V-9B-Thinking
														
 
															+
														
 
															+[siliconflow_embed]
														
 
															+# 硅基流动 Embedding 模型配置
														
 
															+SLCF_EMBED_SERVER_URL=https://api.siliconflow.cn/v1
														
 
															+SLCF_EMBED_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
														
 
															+SLCF_EMBED_MODEL_ID=Qwen/Qwen3-Embedding-8B
														
 
															+SLCF_EMBED_DIMENSIONS=4096
														
 
															+
														
 
															+[lq_qwen3_8b]
														
 
															+QWEN_LOCAL_1_5B_SERVER_URL=http://192.168.91.253:9002/v1
														
 
															+QWEN_LOCAL_1_5B_MODEL_ID=Qwen3-8B
														
 
															+QWEN_LOCAL_1_5B_API_KEY=dummy
														
 
															+
														
 
															+# 本地部署的Qwen3-Embedding-8B配置
														
 
															+[lq_qwen3_8b_emd]
														
 
															+LQ_EMBEDDING_SERVER_URL=http://192.168.91.253:9003/v1
														
 
															+LQ_EMBEDDING_MODEL_ID=Qwen3-Embedding-8B
														
 
															+LQ_EMBEDDING_API_KEY=dummy
														
 
															+
														
 
															+[lq_qwen3_4b]
														
 
															+QWEN_LOCAL_1_5B_SERVER_URL=http://192.168.91.253:9001/v1
														
 
															+QWEN_LOCAL_1_5B_MODEL_ID=Qwen3-4B
														
 
															+QWEN_LOCAL_1_5B_API_KEY=dummy
														
 
															+
														
 
															+# 本地部署的Qwen3-Reranker-8B配置
														
 
															+[lq_rerank_model]
														
 
															+LQ_RERANKER_SERVER_URL=http://192.168.91.253:9004/v1/rerank
														
 
															+LQ_RERANKER_MODEL=Qwen3-Reranker-8B
														
 
															+LQ_RERANKER_API_KEY=dummy
														
 
															+LQ_RERANKER_TOP_N=10
														
 
															+
														
 
															+# 硅基流动API的Qwen3-Reranker-8B配置
														
 
															+[silicoflow_rerank_model]
														
 
															+SILICOFLOW_RERANKER_API_URL=https://api.siliconflow.cn/v1/rerank
														
 
															+SILICOFLOW_RERANKER_API_KEY=sk-rdabeukkgfwyelstbqlcupsrwfkmduqvadztvxeyumvllstt
														
 
															+SILICOFLOW_RERANKER_MODEL=Qwen/Qwen3-Reranker-8B
														
 
															+
														
 
															+# BGE Reranker配置
														
 
															+[bge_rerank_model]
														
 
															+BGE_RERANKER_SERVER_URL=http://192.168.91.253:9004/rerank
														
 
															+BGE_RERANKER_MODEL=BAAI/bge-reranker-v2-m3
														
 
															+BGE_RERANKER_API_KEY=dummy
														
 
															+BGE_RERANKER_TOP_N=10
														
 
															+
														
 
															+[lq_qwen3_8B_lora]
														
 
															+LQ_QWEN3_8B_LQ_LORA_SERVER_URL=http://192.168.91.253:9006/v1
														
 
															+LQ_QWEN3_8B_LQ_LORA_MODEL_ID=Qwen3-8B-lq-lora
														
 
															+LQ_QWEN3_8B_LQ_LORA_API_KEY=dummy
														
 
															+
														
 
															+
														
 
															+
														
 
															+[mysql]
														
 
															+MYSQL_HOST=192.168.92.61
														
 
															+MYSQL_PORT=13306
														
 
															+MYSQL_USER=root
														
 
															+MYSQL_PASSWORD=lq@123
														
 
															+MYSQL_DB=lq_db
														
 
															+MYSQL_MIN_SIZE=1
														
 
															+MYSQL_MAX_SIZE=5
														
 
															+MYSQL_AUTO_COMMIT=True
														
 
															+
														
 
															+
														
 
															+[pgvector]
														
 
															+PGVECTOR_HOST=124.223.140.149
														
 
															+PGVECTOR_PORT=7432
														
 
															+PGVECTOR_DB=vector_db
														
 
															+PGVECTOR_USER=vector_user
														
 
															+PGVECTOR_PASSWORD=pg16@123
														
 
															+
														
 
															+
														
 
															+[milvus]
														
 
															+MILVUS_HOST=192.168.92.96
														
 
															+MILVUS_PORT=30129
														
 
															+MILVUS_DB=lq_db
														
 
															+MILVUS_COLLECTION=first_bfp_collection_test
														
 
															+MILVUS_USER=
														
 
															+MILVUS_PASSWORD=
														
 
															+
														
 
															+
														
 
															+[hybrid_search]
														
 
															+# 混合检索权重配置
														
 
															+DENSE_WEIGHT=0.3
														
 
															+SPARSE_WEIGHT=0.7
														
 
															+
														
 
															+
														
 
															+# ============================================================
														
 
															+# DashScope Qwen3.5 系列模型配置
														
 
															+# ============================================================
														
 
															+
														
 
															+# DashScope Qwen3.5-35B-A3B 模型
														
 
															+[qwen3_5_35b_a3b]
														
 
															+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
														
 
															+DASHSCOPE_MODEL_ID=qwen3.5-35b-a3b
														
 
															+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
														
 
															+
														
 
															+# DashScope Qwen3.5-27B 模型
														
 
															+[qwen3_5_27b]
														
 
															+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
														
 
															+DASHSCOPE_MODEL_ID=qwen3.5-27b
														
 
															+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
														
 
															+
														
 
															+# DashScope Qwen3.5-122B-A10B 模型
														
 
															+[qwen3_5_122b_a10b]
														
 
															+DASHSCOPE_SERVER_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
														
 
															+DASHSCOPE_MODEL_ID=qwen3.5-122b-a10b
														
 
															+DASHSCOPE_API_KEY=sk-98cca096416a41d5a6cec68b824486c5
														
 
															+
														
 
															+# ============================================================
														
 
															+# LLM 通用配置
														
 
															+# ============================================================
														
 
															+
														
 
															+[llm_keywords]
														
 
															+TIMEOUT=60
														
 
															+MAX_RETRIES=2
														
 
															+CONCURRENT_WORKERS=20
														
 
															+STREAM=false
														
 
															+TEMPERATURE=0.3
														
 
															+MAX_TOKENS=1024
														
 
															+
														
 
															+
														
 
															+
														
--- a/core/construction_review/component/ai_review_engine.py
+++ b/core/construction_review/component/ai_review_engine.py
@@ -678,8 +678,12 @@ class AIReviewEngine(BaseReviewer):
 
															                 'StandardCategoryTable.csv'
														
 
															             )
														
 
															-            # 创建轻量级审查器
														
 
															-            checker = LightweightCompletenessChecker(csv_path)
														
 
															+            # 创建轻量级审查器（传入model_client用于LLM生成建议）
														
 
															+            # self.model_client 是从 BaseReviewer 继承的
														
 
															+            checker = LightweightCompletenessChecker(
														
 
															+                csv_path,
														
 
															+                model_client=getattr(self, 'model_client', None)
														
 
															+            )
														
 
															             # 从state获取outline和原始chunks（如果有）
														
 
															             outline = None
														
--- a/core/construction_review/component/doc_worker/pdf_worker/adapter.py
+++ b/core/construction_review/component/doc_worker/pdf_worker/adapter.py
@@ -4,6 +4,8 @@ pdf_worker_adapter
 
															 将 PDF 处理实现包装为 file_parse 的 PipelineComponents，
														
 
															 并提供一个方便复用的构建函数。
														
 
															+
														
 
															+【修改记录】2025-03-27: OCR 引擎从 MinerU 替换为 GLM-OCR 本地 API
														
 
															 """
														
 
															 from __future__ import annotations
														
@@ -16,7 +18,6 @@ from ..interfaces import DocumentPipeline, FileParseFacade, ResultWriter
 
															 from ..classification.hierarchy_classifier import HierarchyClassifier
														
 
															 from ..classification.chunk_classifier import ChunkClassifier
														
 
															 from .fulltext_extractor import PdfFullTextExtractor
														
 
															-from .mineru_extractor import LocalMinerUFullTextExtractor
														
 
															 from .hybrid_extractor import HybridFullTextExtractor
														
 
															 from .json_writer import PdfJsonResultWriter
														
 
															 from .text_splitter import PdfTextSplitter
														
@@ -40,49 +41,19 @@ def build_pdf_facade(config: Optional[PdfWorkerConfig] = None) -> FileParseFacad
 
															     构建一个处理 PDF 的 FileParseFacade（智能混合模式）。
														
 
															     【已升级为智能混合模式】
														
 
															-    - 自动检测扫描页（含表格区域）并使用本地 MinerU OCR 提取
														
 
															+    - 自动检测扫描页（含表格区域）并使用 GLM-OCR 识别
														
 
															     - 电子页使用 PyMuPDF 本地提取，兼顾速度与准确率
														
 
															     - 保留准确的分页信息，无需云端 API
														
 
															     """
														
 
															-    # 默认使用混合模式（原纯本地模式可通过 build_local_pdf_facade 获取）
														
 
															+    # 默认使用混合模式
														
 
															     return build_hybrid_facade(config)
														
 
															-def build_local_mineru_facade(config: Optional[PdfWorkerConfig] = None) -> FileParseFacade:
														
 
															-    """
														
 
															-    构建一个使用本地部署 MinerU 提取全文的 FileParseFacade。
														
 
															-    
														
 
															-    需要在 config.yaml 中配置 mineru_local 相关参数：
														
 
															-    - server_ip: MinerU 服务器 IP
														
 
															-    - server_port: MinerU 服务器端口 (默认 23424)
														
 
															-    - api_key: 鉴权密钥
														
 
															-    - timeout: 请求超时时间
														
 
															-    """
														
 
															-    if config is None:
														
 
															-        config = PdfWorkerConfig()
														
 
															-
														
 
															-    writers: List[ResultWriter] = config.writers or [PdfJsonResultWriter()]
														
 
															-
														
 
															-    components = PipelineComponents(
														
 
															-        config=default_config_provider,
														
 
															-        toc_extractor=PdfTOCExtractor(),
														
 
															-        classifier=HierarchyClassifier(),
														
 
															-        fulltext_extractor=LocalMinerUFullTextExtractor(),
														
 
															-        splitter=PdfTextSplitter(),
														
 
															-        writers=writers,
														
 
															-        chunk_classifier=ChunkClassifier(),
														
 
															-    )
														
 
															-
														
 
															-    pipeline: DocumentPipeline = DefaultDocumentPipeline(components)
														
 
															-    facade: FileParseFacade = DefaultFileParseFacade(pipeline)
														
 
															-    return facade
														
 
															-
														
 
															-
														
 
															 def build_hybrid_facade(config: Optional[PdfWorkerConfig] = None) -> FileParseFacade:
														
 
															     """
														
 
															     构建一个使用混合提取策略的 FileParseFacade。
														
 
															-    - 智能路由：电子页走本地提取，扫描页走本地 MinerU OCR。
														
 
															+    - 智能路由：电子页走本地提取，扫描页走 GLM-OCR 识别。
														
 
															     - 兼顾速度与准确率，并保留准确的分页信息。
														
 
															     - 无需云端 API，完全本地化部署。
														
 
															     """
														
--- a/core/construction_review/component/doc_worker/pdf_worker/batch_cli.py
+++ b/core/construction_review/component/doc_worker/pdf_worker/batch_cli.py
@@ -13,8 +13,10 @@ PDF 批量处理命令行入口
 
															   # 批量处理并指定输出目录
														
 
															   python -m doc_worker.pdf_worker.batch_cli data/ -o output/
														
 
															-  # 使用混合模式（扫描件自动使用本地 MinerU）
														
 
															+  # 使用混合模式（扫描件自动使用 GLM-OCR）
														
 
															   python -m doc_worker.pdf_worker.batch_cli data/ --engine hybrid
														
 
															+
														
 
															+【修改记录】2025-03-27: 移除 MinerU 引擎选项，仅保留 hybrid 和 pdf
														
 
															 """
														
 
															 from __future__ import annotations
														
@@ -23,7 +25,7 @@ import argparse
 
															 from pathlib import Path
														
 
															 from typing import List
														
 
															-from .adapter import build_pdf_facade, build_local_mineru_facade, build_hybrid_facade
														
 
															+from .adapter import build_pdf_facade, build_hybrid_facade
														
 
															 def find_pdf_files(path: Path) -> List[Path]:
														
@@ -45,9 +47,9 @@ def main() -> None:
 
															     )
														
 
															     parser.add_argument(
														
 
															         "--engine",
														
 
															-        choices=["pdf", "mineru", "hybrid"],
														
 
															+        choices=["pdf", "hybrid"],
														
 
															         default="hybrid",
														
 
															-        help="选择全文提取引擎：hybrid (智能混合模式，默认), pdf (纯本地 PyMuPDF), mineru (纯 MinerU OCR)",
														
 
															+        help="选择全文提取引擎：hybrid (智能混合模式，默认), pdf (纯本地 PyMuPDF)",
														
 
															     )
														
 
															     parser.add_argument(
														
 
															         "-l",
														
@@ -91,11 +93,8 @@ def main() -> None:
 
															     print("=" * 80)
														
 
															     # 根据引擎选择 facade
														
 
															-    if args.engine == "mineru":
														
 
															-        print("使用本地 MinerU OCR 引擎...")
														
 
															-        facade = build_local_mineru_facade()
														
 
															-    elif args.engine == "hybrid":
														
 
															-        print("使用智能混合引擎（扫描件自动使用本地 MinerU）...")
														
 
															+    if args.engine == "hybrid":
														
 
															+        print("使用智能混合引擎（扫描件自动使用 GLM-OCR）...")
														
 
															         facade = build_hybrid_facade()
														
 
															     else:  # default to pdf
														
 
															         print("使用本地 PyMuPDF 引擎...")
														
--- a/core/construction_review/component/doc_worker/pdf_worker/cli.py
+++ b/core/construction_review/component/doc_worker/pdf_worker/cli.py
@@ -4,6 +4,8 @@ PDF 处理命令行入口（基于 pdf_worker_adapter）
 
															 用法示例：
														
 
															   python -m file_parse.pdf_worker.cli input.pdf
														
 
															+
														
 
															+【修改记录】2025-03-27: 移除 MinerU 引擎选项，仅保留 hybrid 和 pdf
														
 
															 """
														
 
															 from __future__ import annotations
														
@@ -11,7 +13,7 @@ from __future__ import annotations
 
															 import argparse
														
 
															 from pathlib import Path
														
 
															-from .adapter import build_pdf_facade, build_local_mineru_facade, build_hybrid_facade
														
 
															+from .adapter import build_pdf_facade, build_hybrid_facade
														
 
															 def main() -> None:
														
@@ -22,9 +24,9 @@ def main() -> None:
 
															     parser.add_argument(
														
 
															         "--engine",
														
 
															-        choices=["pdf", "mineru", "hybrid"],
														
 
															+        choices=["pdf", "hybrid"],
														
 
															         default="hybrid",
														
 
															-        help="选择全文提取引擎：hybrid (智能混合模式，默认), pdf (纯本地 PyMuPDF), mineru (纯 MinerU OCR)",
														
 
															+        help="选择全文提取引擎：hybrid (智能混合模式，默认), pdf (纯本地 PyMuPDF)",
														
 
															     )
														
 
															     parser.add_argument(
														
@@ -62,11 +64,8 @@ def main() -> None:
 
															     if file_path.suffix.lower() not in supported_extensions:
														
 
															         raise SystemExit(f"当前 CLI 仅支持以下文件类型: {supported_extensions}")
														
 
															-    if args.engine == "mineru":
														
 
															-        print("正在使用本地 MinerU OCR 引擎...")
														
 
															-        facade = build_local_mineru_facade()
														
 
															-    elif args.engine == "hybrid":
														
 
															-        print("正在使用智能混合引擎（扫描件自动使用本地 MinerU）...")
														
 
															+    if args.engine == "hybrid":
														
 
															+        print("正在使用智能混合引擎（扫描件自动使用 GLM-OCR）...")
														
 
															         facade = build_hybrid_facade()
														
 
															     else:  # default to pdf
														
 
															         print("正在使用本地 PyMuPDF 引擎...")
														
--- a/core/construction_review/component/doc_worker/pdf_worker/html_to_markdown.py
+++ b/core/construction_review/component/doc_worker/pdf_worker/html_to_markdown.py
@@ -1,8 +1,10 @@
 
															 """
														
 
															 HTML 到 Markdown 转换器
														
 
															-用于将 MinerU 返回的 HTML 格式转换为 Markdown 格式。
														
 
															+用于将 HTML 格式（如 OCR 返回的 HTML）转换为 Markdown 格式。
														
 
															 使用 markdownify 库，支持表格、列表、标题等复杂结构转换。
														
 
															+
														
 
															+【修改记录】2025-03-27: 更新文档说明，移除 MinerU 特定引用
														
 
															 """
														
 
															 from __future__ import annotations
														
--- a/core/construction_review/component/doc_worker/pdf_worker/hybrid_extractor.py
+++ b/core/construction_review/component/doc_worker/pdf_worker/hybrid_extractor.py
@@ -357,15 +357,6 @@ class HybridFullTextExtractor(FullTextExtractor):
 
															             f"总字符数: {total_chars}"
														
 
															         )
														
 
															-        # 保存提取后的原始PDF内容到缓存目录
														
 
															-        from foundation.observability.cachefiles.cache_manager import cache, CacheBaseDir
														
 
															-        cache.save(
														
 
															-            data=pages,
														
 
															-            subdir="document_temp",
														
 
															-            filename="原始pdf结果.json",
														
 
															-            base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW
														
 
															-        )
														
 
															-
														
 
															         return pages
														
 
															     def _ocr_page_with_glm(self, page: fitz.Page, page_num: int, original_filename: str) -> str:
														
@@ -800,4 +791,4 @@ class HybridFullTextExtractor(FullTextExtractor):
 
															             return "\n".join(md_rows)
														
 
															         return re.sub(r'<table[^>]*>.*?</table>', convert_table_match, content, 
														
 
															-                     flags=re.DOTALL | re.IGNORECASE)
														
 
															+                     flags=re.DOTALL | re.IGNORECASE)
														
--- a/core/construction_review/component/reviewers/completeness_reviewer.py
+++ b/core/construction_review/component/reviewers/completeness_reviewer.py
@@ -15,6 +15,9 @@ from typing import Dict, List, Optional, Set, Tuple, Any
 
															 from dataclasses import dataclass, field
														
 
															 from collections import defaultdict
														
 
															 from pathlib import Path
														
 
															+import json
														
 
															+
														
 
															+from foundation.observability.logger.loggering import review_logger as logger
														
 
															 @dataclass
														
@@ -180,18 +183,42 @@ class TertiarySpecLoader:
 
															 class LightweightCompletenessChecker:
														
 
															     """轻量级完整性检查器"""
														
 
															-    
														
 
															-    def __init__(self, standard_csv_path: str):
														
 
															+
														
 
															+    def __init__(self, standard_csv_path: str, model_client=None, prompt_loader=None):
														
 
															         """
														
 
															         初始化检查器
														
 
															-        
														
 
															+
														
 
															         Args:
														
 
															             standard_csv_path: StandardCategoryTable.csv 文件路径
														
 
															+            model_client: 模型客户端（可选），用于生成智能建议
														
 
															+            prompt_loader: 提示词加载器（可选）
														
 
															         """
														
 
															         self.spec_loader = TertiarySpecLoader(standard_csv_path)
														
 
															         self.tertiary_specs = self.spec_loader.get_tertiary_items()
														
 
															         self.secondary_specs = self.spec_loader.get_secondary_items()
														
 
															         self.secondary_names = self.spec_loader.get_secondary_names()
														
 
															+
														
 
															+        # 大模型客户端和提示词加载器（用于生成智能建议）
														
 
															+        self.model_client = model_client
														
 
															+        self.prompt_loader = prompt_loader
														
 
															+
														
 
															+        # 如果没有提供model_client，尝试从foundation导入
														
 
															+        if self.model_client is None:
														
 
															+            try:
														
 
															+                from foundation.ai.agent.generate.model_generate import generate_model_client
														
 
															+                self.model_client = generate_model_client
														
 
															+            except ImportError:
														
 
															+                logger.warning("无法导入generate_model_client，建议生成功能将使用简单拼接模式")
														
 
															+                self.model_client = None
														
 
															+
														
 
															+        # 如果没有提供prompt_loader，尝试从当前模块导入
														
 
															+        if self.prompt_loader is None:
														
 
															+            try:
														
 
															+                from .utils.prompt_loader import prompt_loader
														
 
															+                self.prompt_loader = prompt_loader
														
 
															+            except ImportError:
														
 
															+                logger.warning("无法导入prompt_loader，建议生成功能将使用简单拼接模式")
														
 
															+                self.prompt_loader = None
														
 
															     def _normalize_chapter_code(self, code: str) -> str:
														
 
															         """将章节分类码大小写归一化为与CSV一致（如 'management' -> 'management'）"""
														
@@ -202,6 +229,198 @@ class LightweightCompletenessChecker:
 
															                 return k
														
 
															         return code
														
 
															+    def _build_llm_prompt_for_recommendation(
														
 
															+        self,
														
 
															+        level: str,
														
 
															+        first_code: str,
														
 
															+        first_name: str,
														
 
															+        second_code: str = None,
														
 
															+        second_name: str = None,
														
 
															+        tertiary_items: List[TertiaryItem] = None,
														
 
															+        outline_title: str = None
														
 
															+    ) -> str:
														
 
															+        """
														
 
															+        构建用于LLM生成建议的prompt
														
 
															+
														
 
															+        Args:
														
 
															+            level: 缺失级别（一级 / 二级 / 三级 / 一致性）
														
 
															+            first_code: 一级分类代码
														
 
															+            first_name: 一级分类名称
														
 
															+            second_code: 二级分类代码（可选）
														
 
															+            second_name: 二级分类名称（可选）
														
 
															+            tertiary_items: 缺失的三级分类项列表（可选）
														
 
															+            outline_title: 目录中的标题（用于一致性检查）
														
 
															+
														
 
															+        Returns:
														
 
															+            str: 构建的prompt
														
 
															+        """
														
 
															+        # 构建问题上下文
														
 
															+        if level == "一级":
														
 
															+            context = f"""
														
 
															+【问题类型】一级章节缺失
														
 
															+【缺失章节】{first_name} ({first_code})
														
 
															+【问题描述】文档中缺少'{first_name}'整个章节，这是专项施工方案中必须包含的一级章节。"""
														
 
															+            # 获取该一级下的所有二级和三级信息作为参考
														
 
															+            related_specs = []
														
 
															+            for (fc, sc), sec_item in self.secondary_specs.items():
														
 
															+                if fc == first_code:
														
 
															+                    # 获取该二级下的所有三级
														
 
															+                    tertiary_list = self.spec_loader.get_tertiary_by_secondary(fc, sc)
														
 
															+                    tertiary_info = []
														
 
															+                    for t_item in tertiary_list:
														
 
															+                        tertiary_info.append(f"      - {t_item.third_cn}: {t_item.third_focus}")
														
 
															+                    related_specs.append(f"""
														
 
															+  【二级分类】{sec_item.second_cn}
														
 
															+    【包含的三级内容要点】
														
 
															+{chr(10).join(tertiary_info)}""")
														
 
															+
														
 
															+            reference = f"""
														
 
															+【规范参考信息】
														
 
															+根据《桥梁公司危险性较大工程管理实施细则（2025版）》，'{first_name}'章节应包含以下内容：
														
 
															+{chr(10).join(related_specs)}
														
 
															+"""
														
 
															+
														
 
															+        elif level == "二级":
														
 
															+            context = f"""
														
 
															+【问题类型】二级章节缺失
														
 
															+【所属一级】{first_name} ({first_code})
														
 
															+【缺失章节】{second_name} ({second_code})
														
 
															+【问题描述】'{first_name}'下缺少'{second_name}'二级章节。"""
														
 
															+            # 获取该二级下的所有三级信息
														
 
															+            tertiary_list = self.spec_loader.get_tertiary_by_secondary(first_code, second_code)
														
 
															+            tertiary_info = []
														
 
															+            for t_item in tertiary_list:
														
 
															+                tertiary_info.append(f"    - {t_item.third_cn}: {t_item.third_focus}")
														
 
															+
														
 
															+            reference = f"""
														
 
															+【规范参考信息】
														
 
															+根据《桥梁公司危险性较大工程管理实施细则（2025版）》，'{second_name}'章节应包含以下三级内容要点：
														
 
															+{chr(10).join(tertiary_info)}
														
 
															+"""
														
 
															+
														
 
															+        elif level == "三级":
														
 
															+            context = f"""
														
 
															+【问题类型】三级内容缺失
														
 
															+【所属一级】{first_name} ({first_code})
														
 
															+【所属二级】{second_name} ({second_code})
														
 
															+【缺失内容】"""
														
 
															+            missing_contents = []
														
 
															+            for item in tertiary_items or []:
														
 
															+                missing_contents.append(f"    - {item.third_cn}: {item.third_focus}")
														
 
															+            context += "\n" + "\n".join(missing_contents)
														
 
															+
														
 
															+            reference = f"""
														
 
															+【规范参考信息】
														
 
															+以上缺失的内容要点是'{second_name}'章节下的标准内容要求，具体包括：
														
 
															+{chr(10).join([f'  - {t.third_cn}: 应包含{t.third_focus}' for t in (tertiary_items or [])])}
														
 
															+"""
														
 
															+
														
 
															+        elif level == "一致性":
														
 
															+            context = f"""
														
 
															+【问题类型】目录与正文不一致
														
 
															+【涉及章节】{outline_title or second_name}
														
 
															+【问题描述】目录页列有该章节，但正文中未发现对应内容。"""
														
 
															+            reference = """
														
 
															+【规范参考信息】
														
 
															+根据文档一致性要求，目录中列出的章节应在正文中有对应的内容描述。若该章节确实不需要，应从目录中移除；若需要保留，则必须补充正文内容。
														
 
															+"""
														
 
															+        else:
														
 
															+            context = "【问题类型】未知"
														
 
															+            reference = ""
														
 
															+
														
 
															+        prompt = f"""你是一位资深的工程施工方案审查专家。请根据以下问题上下文和规范参考信息，生成专业的审查建议。
														
 
															+
														
 
															+{context}
														
 
															+
														
 
															+{reference}
														
 
															+
														
 
															+请用JSON格式输出审查建议，包含以下字段：
														
 
															+- issue_point: 问题摘要（简洁明了，50字以内）
														
 
															+- suggestion: 具体补充建议（详细可行，100-200字，包含具体应该补充的内容要点）
														
 
															+- reason: 规范依据说明（引用具体规范要求，说明为什么需要补充）
														
 
															+
														
 
															+注意：
														
 
															+1. suggestion应该具体、可操作，引用规范中的具体内容要求
														
 
															+2. 使用专业的工程术语
														
 
															+3. 语气应该是指导性的，帮助编制人员理解需要补充什么内容
														
 
															+
														
 
															+JSON输出："""
														
 
															+        return prompt
														
 
															+
														
 
															+    async def _generate_recommendation_with_llm(
														
 
															+        self,
														
 
															+        level: str,
														
 
															+        first_code: str,
														
 
															+        first_name: str,
														
 
															+        second_code: str = None,
														
 
															+        second_name: str = None,
														
 
															+        tertiary_items: List[TertiaryItem] = None,
														
 
															+        outline_title: str = None,
														
 
															+        timeout: int = 30
														
 
															+    ) -> Dict[str, str]:
														
 
															+        """
														
 
															+        使用大模型生成建议
														
 
															+
														
 
															+        Returns:
														
 
															+            Dict[str, str]: 包含 issue_point, suggestion, reason 的字典
														
 
															+        """
														
 
															+        if not self.model_client:
														
 
															+            return None
														
 
															+
														
 
															+        try:
														
 
															+            prompt = self._build_llm_prompt_for_recommendation(
														
 
															+                level=level,
														
 
															+                first_code=first_code,
														
 
															+                first_name=first_name,
														
 
															+                second_code=second_code,
														
 
															+                second_name=second_name,
														
 
															+                tertiary_items=tertiary_items,
														
 
															+                outline_title=outline_title
														
 
															+            )
														
 
															+
														
 
															+            # 调用大模型
														
 
															+            task_prompt_info = {
														
 
															+                "task_prompt": prompt,
														
 
															+                "task_name": f"completeness_suggestion_{level}"
														
 
															+            }
														
 
															+
														
 
															+            # 生成唯一trace_id
														
 
															+            import uuid
														
 
															+            trace_id = f"completeness_llm_{uuid.uuid4().hex[:8]}"
														
 
															+
														
 
															+            model_response = await self.model_client.get_model_generate_invoke(
														
 
															+                trace_id=trace_id,
														
 
															+                task_prompt_info=task_prompt_info,
														
 
															+                timeout=timeout,
														
 
															+                model_name="qwen"  # 使用默认模型，可根据需要调整
														
 
															+            )
														
 
															+
														
 
															+            # 解析模型返回的JSON
														
 
															+            try:
														
 
															+                # 尝试从返回文本中提取JSON
														
 
															+                response_text = model_response.strip()
														
 
															+                # 查找JSON块
														
 
															+                if "```json" in response_text:
														
 
															+                    json_str = response_text.split("```json")[1].split("```")[0].strip()
														
 
															+                elif "```" in response_text:
														
 
															+                    json_str = response_text.split("```")[1].split("```")[0].strip()
														
 
															+                else:
														
 
															+                    json_str = response_text
														
 
															+
														
 
															+                result = json.loads(json_str)
														
 
															+                return {
														
 
															+                    "issue_point": result.get("issue_point", ""),
														
 
															+                    "suggestion": result.get("suggestion", ""),
														
 
															+                    "reason": result.get("reason", "")
														
 
															+                }
														
 
															+            except (json.JSONDecodeError, IndexError) as e:
														
 
															+                logger.warning(f"LLM建议生成结果解析失败: {e}，返回: {model_response[:200]}")
														
 
															+                return None
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            logger.warning(f"LLM建议生成失败: {e}")
														
 
															+            return None
														
 
															+
														
 
															     async def check(
														
 
															         self,
														
 
															         chunks: List[Dict],
														
@@ -259,7 +478,7 @@ class LightweightCompletenessChecker:
 
															         # 7. 生成分级建议
														
 
															         actual_first = {cat1 for cat1, _ in actual_secondary}
														
 
															-        recommendations = self._generate_recommendations(
														
 
															+        recommendations = await self._generate_recommendations(
														
 
															             tertiary_result, catalogue_result, outline_result,
														
 
															             actual_first, actual_secondary, actual_tertiary,
														
 
															             chapter_classification
														
@@ -636,7 +855,7 @@ class LightweightCompletenessChecker:
 
															         else:
														
 
															             return "incomplete"
														
 
															-    def _generate_recommendations(
														
 
															+    async def _generate_recommendations(
														
 
															         self,
														
 
															         tertiary_result: Dict,
														
 
															         catalogue_result: Dict,
														
@@ -653,8 +872,8 @@ class LightweightCompletenessChecker:
 
															           level        : 缺失级别（一级 / 二级 / 三级 / 一致性）
														
 
															           issue_point  : 问题摘要（含级别标识）
														
 
															           location     : 问题定位路径
														
 
															-          suggestion   : 补充建议
														
 
															-          reason       : 规范依据说明
														
 
															+          suggestion   : 补充建议（使用LLM生成）
														
 
															+          reason       : 规范依据说明（使用LLM生成）
														
 
															         """
														
 
															         recommendations: List[Dict[str, Any]] = []
														
@@ -679,17 +898,36 @@ class LightweightCompletenessChecker:
 
															             # ── 一级缺失 ──────────────────────────────────────────────
														
 
															             if first_code not in actual_first:
														
 
															-                recommendations.append({
														
 
															-                    "level": "一级",
														
 
															-                    "issue_point": f"【一级章节缺失】'{first_name}'整个章节不存在",
														
 
															-                    "location": first_name,
														
 
															-                    "suggestion": f"请添加'{first_name}'章节及其下全部子章节内容",
														
 
															-                    "reason": (
														
 
															-                        f"根据规范要求，文档必须包含'{first_name}'一级章节，"
														
 
															-                        f"当前正文中未发现该章节任何内容"
														
 
															-                    ),
														
 
															-                    "first_seq": first_seq,
														
 
															-                })
														
 
															+                # 尝试使用LLM生成建议
														
 
															+                llm_result = await self._generate_recommendation_with_llm(
														
 
															+                    level="一级",
														
 
															+                    first_code=first_code,
														
 
															+                    first_name=first_name,
														
 
															+                    first_seq=first_seq
														
 
															+                )
														
 
															+
														
 
															+                if llm_result:
														
 
															+                    recommendations.append({
														
 
															+                        "level": "一级",
														
 
															+                        "issue_point": llm_result.get("issue_point", f"【一级章节缺失】'{first_name}'整个章节不存在"),
														
 
															+                        "location": first_name,
														
 
															+                        "suggestion": llm_result.get("suggestion", f"请添加'{first_name}'章节及其下全部子章节内容"),
														
 
															+                        "reason": llm_result.get("reason", f"根据规范要求，文档必须包含'{first_name}'一级章节，当前正文中未发现该章节任何内容"),
														
 
															+                        "first_seq": first_seq,
														
 
															+                    })
														
 
															+                else:
														
 
															+                    # 回退到简单拼接
														
 
															+                    recommendations.append({
														
 
															+                        "level": "一级",
														
 
															+                        "issue_point": f"【一级章节缺失】'{first_name}'整个章节不存在",
														
 
															+                        "location": first_name,
														
 
															+                        "suggestion": f"请添加'{first_name}'章节及其下全部子章节内容",
														
 
															+                        "reason": (
														
 
															+                            f"根据规范要求，文档必须包含'{first_name}'一级章节，"
														
 
															+                            f"当前正文中未发现该章节任何内容"
														
 
															+                        ),
														
 
															+                        "first_seq": first_seq,
														
 
															+                    })
														
 
															                 continue
														
 
															             # ── 一级存在，检查二级 ─────────────────────────────────────
														
@@ -703,20 +941,41 @@ class LightweightCompletenessChecker:
 
															                 # ── 二级缺失 ──────────────────────────────────────────
														
 
															                 if (cat1, cat2) not in actual_secondary:
														
 
															-                    recommendations.append({
														
 
															-                        "level": "二级",
														
 
															-                        "issue_point": (
														
 
															-                            f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
														
 
															-                        ),
														
 
															-                        "location": f"{first_name} > {second_name}",
														
 
															-                        "suggestion": f"请在'{first_name}'下添加'{second_name}'章节内容",
														
 
															-                        "reason": (
														
 
															-                            f"根据规范要求，'{first_name}'下应包含'{second_name}'二级章节，"
														
 
															-                            f"当前正文中未发现该章节内容"
														
 
															-                        ),
														
 
															-                        "first_seq": first_seq,
														
 
															-                        "second_seq": second_seq,
														
 
															-                    })
														
 
															+                    # 尝试使用LLM生成建议
														
 
															+                    llm_result = await self._generate_recommendation_with_llm(
														
 
															+                        level="二级",
														
 
															+                        first_code=cat1,
														
 
															+                        first_name=first_name,
														
 
															+                        second_code=cat2,
														
 
															+                        second_name=second_name
														
 
															+                    )
														
 
															+
														
 
															+                    if llm_result:
														
 
															+                        recommendations.append({
														
 
															+                            "level": "二级",
														
 
															+                            "issue_point": llm_result.get("issue_point", f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"),
														
 
															+                            "location": f"{first_name} > {second_name}",
														
 
															+                            "suggestion": llm_result.get("suggestion", f"请在'{first_name}'下添加'{second_name}'章节内容"),
														
 
															+                            "reason": llm_result.get("reason", f"根据规范要求，'{first_name}'下应包含'{second_name}'二级章节，当前正文中未发现该章节内容"),
														
 
															+                            "first_seq": first_seq,
														
 
															+                            "second_seq": second_seq,
														
 
															+                        })
														
 
															+                    else:
														
 
															+                        # 回退到简单拼接
														
 
															+                        recommendations.append({
														
 
															+                            "level": "二级",
														
 
															+                            "issue_point": (
														
 
															+                                f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
														
 
															+                            ),
														
 
															+                            "location": f"{first_name} > {second_name}",
														
 
															+                            "suggestion": f"请在'{first_name}'下添加'{second_name}'章节内容",
														
 
															+                            "reason": (
														
 
															+                                f"根据规范要求，'{first_name}'下应包含'{second_name}'二级章节，"
														
 
															+                                f"当前正文中未发现该章节内容"
														
 
															+                            ),
														
 
															+                            "first_seq": first_seq,
														
 
															+                            "second_seq": second_seq,
														
 
															+                        })
														
 
															                     continue
														
 
															                 # ── 二级存在，检查三级缺失 ────────────────────────────
														
@@ -734,40 +993,82 @@ class LightweightCompletenessChecker:
 
															                 if not missing_t_items:
														
 
															                     continue
														
 
															-                # 为每个缺失的三级项创建单独的 recommendation
														
 
															-                for t_item in missing_t_items:
														
 
															-                    recommendations.append({
														
 
															-                        "level": "三级",
														
 
															-                        "issue_point": (
														
 
															-                            f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'"
														
 
															-                        ),
														
 
															-                        "location": f"{first_name} > {second_name}",
														
 
															-                        "suggestion": f"请补充'{second_name}'下的'{t_item.third_cn}'内容",
														
 
															-                        "reason": f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点",
														
 
															-                        "first_seq": first_seq,
														
 
															-                        "second_seq": second_seq,
														
 
															-                        "third_seq": t_item.third_seq,
														
 
															-                    })
														
 
															+                # 尝试使用LLM批量生成三级缺失建议
														
 
															+                llm_result = await self._generate_recommendation_with_llm(
														
 
															+                    level="三级",
														
 
															+                    first_code=cat1,
														
 
															+                    first_name=first_name,
														
 
															+                    second_code=cat2,
														
 
															+                    second_name=second_name,
														
 
															+                    tertiary_items=missing_t_items
														
 
															+                )
														
 
															+
														
 
															+                if llm_result:
														
 
															+                    # LLM生成了整体建议，为每个缺失项添加相同建议（但位置不同）
														
 
															+                    for t_item in missing_t_items:
														
 
															+                        recommendations.append({
														
 
															+                            "level": "三级",
														
 
															+                            "issue_point": f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'",
														
 
															+                            "location": f"{first_name} > {second_name}",
														
 
															+                            "suggestion": llm_result.get("suggestion", f"请补充'{second_name}'下的'{t_item.third_cn}'内容"),
														
 
															+                            "reason": llm_result.get("reason", f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点"),
														
 
															+                            "first_seq": first_seq,
														
 
															+                            "second_seq": second_seq,
														
 
															+                            "third_seq": t_item.third_seq,
														
 
															+                        })
														
 
															+                else:
														
 
															+                    # 回退到简单拼接
														
 
															+                    for t_item in missing_t_items:
														
 
															+                        recommendations.append({
														
 
															+                            "level": "三级",
														
 
															+                            "issue_point": (
														
 
															+                                f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'"
														
 
															+                            ),
														
 
															+                            "location": f"{first_name} > {second_name}",
														
 
															+                            "suggestion": f"请补充'{second_name}'下的'{t_item.third_cn}'内容",
														
 
															+                            "reason": f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点",
														
 
															+                            "first_seq": first_seq,
														
 
															+                            "second_seq": second_seq,
														
 
															+                            "third_seq": t_item.third_seq,
														
 
															+                        })
														
 
															         # ── 一致性审查：目录有列但正文无内容 ─────────────────────────────
														
 
															         if outline_result:
														
 
															             for e in outline_result.get("empty_sections", []):
														
 
															                 f_name = e.get("first_name", "")
														
 
															-                # 优先用目录页原始标题，回退到标准名称
														
 
															                 sec_title = e.get("outline_title") or e.get("secondary_name", "")
														
 
															                 location = f"{f_name} > {sec_title}" if f_name else sec_title
														
 
															-                recommendations.append({
														
 
															-                    "level": "一致性",
														
 
															-                    "issue_point": f"【目录正文不一致】'{location}'目录已列但正文无内容",
														
 
															-                    "location": location,
														
 
															-                    "suggestion": (
														
 
															-                        f"请补充'{sec_title}'章节的正文内容，或从目录中移除该章节"
														
 
															-                    ),
														
 
															-                    "reason": (
														
 
															-                        f"目录页列有'{sec_title}'章节，但正文中未发现对应内容，"
														
 
															-                        f"存在目录与正文不一致的问题"
														
 
															-                    ),
														
 
															-                })
														
 
															+
														
 
															+                # 尝试使用LLM生成建议
														
 
															+                llm_result = await self._generate_recommendation_with_llm(
														
 
															+                    level="一致性",
														
 
															+                    first_code="",
														
 
															+                    first_name=f_name,
														
 
															+                    second_name=sec_title,
														
 
															+                    outline_title=sec_title
														
 
															+                )
														
 
															+
														
 
															+                if llm_result:
														
 
															+                    recommendations.append({
														
 
															+                        "level": "一致性",
														
 
															+                        "issue_point": llm_result.get("issue_point", f"【目录正文不一致】'{location}'目录已列但正文无内容"),
														
 
															+                        "location": location,
														
 
															+                        "suggestion": llm_result.get("suggestion", f"请补充'{sec_title}'章节的正文内容，或从目录中移除该章节"),
														
 
															+                        "reason": llm_result.get("reason", f"目录页列有'{sec_title}'章节，但正文中未发现对应内容，存在目录与正文不一致的问题"),
														
 
															+                    })
														
 
															+                else:
														
 
															+                    recommendations.append({
														
 
															+                        "level": "一致性",
														
 
															+                        "issue_point": f"【目录正文不一致】'{location}'目录已列但正文无内容",
														
 
															+                        "location": location,
														
 
															+                        "suggestion": (
														
 
															+                            f"请补充'{sec_title}'章节的正文内容，或从目录中移除该章节"
														
 
															+                        ),
														
 
															+                        "reason": (
														
 
															+                            f"目录页列有'{sec_title}'章节，但正文中未发现对应内容，"
														
 
															+                            f"存在目录与正文不一致的问题"
														
 
															+                        ),
														
 
															+                    })
														
 
															         if not recommendations:
														
 
															             recommendations.append({
														
@@ -785,16 +1086,20 @@ class LightweightCompletenessChecker:
 
															 async def check_completeness_lightweight(
														
 
															     chunks: List[Dict],
														
 
															     outline: Optional[List[Dict]] = None,
														
 
															-    standard_csv_path: Optional[str] = None
														
 
															+    standard_csv_path: Optional[str] = None,
														
 
															+    model_client=None,
														
 
															+    prompt_loader=None
														
 
															 ) -> LightweightCompletenessResult:
														
 
															     """
														
 
															     轻量级完整性审查入口函数
														
 
															-    
														
 
															+
														
 
															     Args:
														
 
															         chunks: 文档分块列表，每个chunk需包含tertiary_category_code
														
 
															         outline: 目录结构（可选）
														
 
															         standard_csv_path: 三级标准CSV文件路径，默认为doc_worker/config/StandardCategoryTable.csv
														
 
															-    
														
 
															+        model_client: 模型客户端（可选），用于生成智能建议
														
 
															+        prompt_loader: 提示词加载器（可选）
														
 
															+
														
 
															     Returns:
														
 
															         LightweightCompletenessResult
														
 
															     """
														
@@ -802,8 +1107,12 @@ async def check_completeness_lightweight(
 
															         # 默认路径
														
 
															         default_path = Path(__file__).parent.parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
														
 
															         standard_csv_path = str(default_path)
														
 
															-    
														
 
															-    checker = LightweightCompletenessChecker(standard_csv_path)
														
 
															+
														
 
															+    checker = LightweightCompletenessChecker(
														
 
															+        standard_csv_path,
														
 
															+        model_client=model_client,
														
 
															+        prompt_loader=prompt_loader
														
 
															+    )
														
 
															     return await checker.check(chunks=chunks, outline=outline)
														
--- a/core/construction_review/component/reviewers/timeliness_basis_reviewer.py
+++ b/core/construction_review/component/reviewers/timeliness_basis_reviewer.py
@@ -192,7 +192,7 @@ class BasisReviewService:
 
															         self,
														
 
															         basis_items: List[str],
														
 
															         collection_name: str = "first_bfp_collection_status",
														
 
															-        top_k_each: int = 3,
														
 
															+        top_k_each: int = 10,  # 增加召回数量，提高精确匹配机会
														
 
															     ) -> List[Dict[str, Any]]:
														
 
															         """异步批次审查（通常3条）"""
														
 
															         basis_items = [x for x in (basis_items or []) if isinstance(x, str) and x.strip()]
														
--- a/core/construction_review/component/reviewers/timeliness_content_reviewer.py
+++ b/core/construction_review/component/reviewers/timeliness_content_reviewer.py
@@ -46,14 +46,14 @@ class StandardExtractor:
 
															     # 规范编号正则模式（匹配类似 GB 50010-2010、JTG B01-2014、GB/T 50502-2020 等格式）
														
 
															     STANDARD_NUMBER_PATTERNS = [
														
 
															-        # 中国国家标准：GB 50010-2010、GB/T 50502-2020
														
 
															-        r'GB(?:/T)?\s*\d{4,5}(?:\.\d+)?\s*-\s*\d{4}',
														
 
															+        # 中国国家标准：GB 50010-2010、GB/T 50502-2020、GB 51-2001
														
 
															+        r'GB(?:/T)?\s*\d{1,5}(?:\.\d+)?\s*-\s*\d{4}',
														
 
															         # 中国行业标准：JTG B01-2014、JTG D60-2015、JTG/T 3650-2020
														
 
															-        r'[A-Z]{2,3}(?:/T)?\s*[A-Z]?\s*\d{2,4}(?:\.\d+)?\s*-\s*\d{4}',
														
 
															+        r'[A-Z]{2,3}(?:/T)?\s*[A-Z]?\s*\d{1,5}(?:\.\d+)?\s*-\s*\d{4}',
														
 
															         # 地方标准：DB11/T 1234-2020
														
 
															-        r'DB\d{2}(?:/T)?\s*\d{4,5}\s*-\s*\d{4}',
														
 
															+        r'DB\d{2}(?:/T)?\s*\d{1,5}\s*-\s*\d{4}',
														
 
															         # 团体标准：T/CECS 123-2020
														
 
															-        r'T/\w+\s*\d{3,5}\s*-\s*\d{4}',
														
 
															+        r'T/\w+\s*\d{1,5}\s*-\s*\d{4}',
														
 
															     ]
														
 
															     # 规范名称与编号组合的正则模式
														
@@ -398,7 +398,7 @@ class ContentTimelinessReviewer:
 
															         self,
														
 
															         standard_number: str,
														
 
															         collection_name: str,
														
 
															-        top_k: int = 3
														
 
															+        top_k: int = 10  # 增加召回数量，提高精确匹配机会
														
 
															     ) -> List[dict]:
														
 
															         """异步搜索单个规范"""
														
 
															         try:
														
--- a/core/construction_review/component/reviewers/utils/reference_matcher.py
+++ b/core/construction_review/component/reviewers/utils/reference_matcher.py
@@ -67,6 +67,8 @@ HUMAN = """
 
															 3. **has_exact_match**（是否有名称编号都相同的文件）
														
 
															    - 参考文件中的编号和文件名与审查规范完全匹配，返回 true
														
 
															+   - **重要**：比较时忽略括号格式差异（半角()和全角（）视为相同）
														
 
															+   - 例如：《规范》（GB 1234-2020）与《规范》（GB 1234-2020）视为完全匹配
														
 
															    - 否则返回 false
														
 
															 4. **exact_match_info**（名称编号相同的文件及状态）
														
@@ -163,6 +165,98 @@ def _extract_regulation_info(text: str) -> Tuple[str, Optional[str]]:
 
															     return name, number
														
 
															+def _normalize_text(text: str) -> str:
														
 
															+    """
														
 
															+    标准化文本，统一括号格式用于比较
														
 
															+    将全角括号转换为半角括号，去除多余空格
														
 
															+    """
														
 
															+    if not text:
														
 
															+        return text
														
 
															+    # 全角括号转为半角括号
														
 
															+    text = text.replace('（', '(').replace('）', ')')
														
 
															+    # 统一书名号（中文书名号保持不变，但统一全角半角）
														
 
															+    text = text.replace('『', '《').replace('』', '》')
														
 
															+    text = text.replace('﹄', '《').replace('﹃', '》')
														
 
															+    # 去除多余空格
														
 
															+    text = ' '.join(text.split())
														
 
															+    return text.strip()
														
 
															+
														
 
															+
														
 
															+def _extract_core_number(number: str) -> str:
														
 
															+    """
														
 
															+    提取规范编号的核心部分（去掉年份）
														
 
															+    例如：JGJ 65-2013 -> JGJ65, GB/T 50010-2010 -> GB/T50010
														
 
															+    
														
 
															+    Args:
														
 
															+        number: 规范编号，如 "JGJ 65-2013"
														
 
															+        
														
 
															+    Returns:
														
 
															+        核心编号，如 "JGJ65"
														
 
															+    """
														
 
															+    if not number:
														
 
															+        return ""
														
 
															+    
														
 
															+    # 标准化：转大写、去空格
														
 
															+    normalized = number.upper().replace(' ', '')
														
 
															+    
														
 
															+    # 去掉年份部分（-YYYY 或 —YYYY）
														
 
															+    # 匹配末尾的年份 -4位数字 或 —4位数字 或 - 4位数字
														
 
															+    normalized = re.sub(r'[-—]\s*\d{4}$', '', normalized)
														
 
															+    
														
 
															+    return normalized
														
 
															+
														
 
															+
														
 
															+def _is_same_regulation_family(original_number: str, generated_number: str, threshold: int = 100) -> bool:
														
 
															+    """
														
 
															+    判断两个编号是否属于同一规范家族（核心部分相同或高度相似）
														
 
															+    
														
 
															+    Args:
														
 
															+        original_number: 原始编号
														
 
															+        generated_number: 生成的编号
														
 
															+        threshold: 数字差异阈值，默认100
														
 
															+        
														
 
															+    Returns:
														
 
															+        bool: 是否属于同一规范家族
														
 
															+    """
														
 
															+    original_core = _extract_core_number(original_number)
														
 
															+    generated_core = _extract_core_number(generated_number)
														
 
															+    
														
 
															+    if not original_core or not generated_core:
														
 
															+        return False
														
 
															+    
														
 
															+    # 如果核心部分完全相同，肯定是同一规范
														
 
															+    if original_core == generated_core:
														
 
															+        return True
														
 
															+    
														
 
															+    # 提取前缀（如 JGJ、GB/T 等）和数字部分
														
 
															+    def _split_core(core: str) -> tuple:
														
 
															+        """将核心编号拆分为前缀和数字部分"""
														
 
															+        match = re.match(r'^([A-Z]+(?:/[A-Z])?)(\d+(?:\.\d+)?)$', core)
														
 
															+        if match:
														
 
															+            return match.group(1), match.group(2)
														
 
															+        return core, ""
														
 
															+    
														
 
															+    orig_prefix, orig_num = _split_core(original_core)
														
 
															+    gen_prefix, gen_num = _split_core(generated_core)
														
 
															+    
														
 
															+    # 如果前缀相同但数字不同，可能是同一系列的不同规范
														
 
															+    # 例如 JGJ65 和 JGJ300 都是 JGJ 系列，但是完全不同的规范
														
 
															+    # 我们认为：如果前缀相同且数字相似（差值在一定范围内），才算同一规范家族
														
 
															+    if orig_prefix == gen_prefix and orig_num and gen_num:
														
 
															+        try:
														
 
															+            orig_val = float(orig_num)
														
 
															+            gen_val = float(gen_num)
														
 
															+            # 【关键阈值】如果数字差异达到或超过阈值，认为是完全不同的规范
														
 
															+            if abs(orig_val - gen_val) >= threshold:
														
 
															+                return False
														
 
															+            return True
														
 
															+        except ValueError:
														
 
															+            # 无法转换为数字，直接比较字符串
														
 
															+            pass
														
 
															+    
														
 
															+    return False
														
 
															+
														
 
															+
														
 
															 # ===== 9) 新流程：验证并生成正确编号 =====
														
 
															 async def validate_and_generate_number(
														
 
															     review_item: str,
														
@@ -189,6 +283,39 @@ async def validate_and_generate_number(
 
															     if existing_number:
														
 
															         logger.info(f"[时效性验证] 验证编号: 《{regulation_name}》 {existing_number}")
														
 
															+        # 先进行本地标准化比较：检查参考候选中是否有名称和编号都完全匹配（忽略括号差异）的
														
 
															+        normalized_existing_number = _normalize_text(existing_number)
														
 
															+        normalized_regulation_name = _normalize_text(regulation_name)
														
 
															+        for candidate in reference_candidates:
														
 
															+            # 从候选中提取名称和编号
														
 
															+            candidate_name, candidate_number = _extract_regulation_info(candidate)
														
 
															+            if (candidate_name and candidate_number and
														
 
															+                _normalize_text(candidate_name) == normalized_regulation_name and
														
 
															+                _normalize_text(candidate_number) == normalized_existing_number):
														
 
															+                logger.info(f"[时效性验证] 本地验证通过（名称和编号都匹配）: 《{regulation_name}》 {existing_number}")
														
 
															+                return ValidationMatchResult(
														
 
															+                    review_item=review_item,
														
 
															+                    reference_candidates=reference_candidates,
														
 
															+                    is_valid=True,
														
 
															+                    validated_number=existing_number,
														
 
															+                    status="验证通过"
														
 
															+                )
														
 
															+
														
 
															+        # 【关键】检查是否有编号相同但名称不同的情况（规范名称错误）
														
 
															+        for candidate in reference_candidates:
														
 
															+            candidate_name, candidate_number = _extract_regulation_info(candidate)
														
 
															+            if (candidate_name and candidate_number and
														
 
															+                _normalize_text(candidate_number) == normalized_existing_number and
														
 
															+                _normalize_text(candidate_name) != normalized_regulation_name):
														
 
															+                logger.info(f"[时效性验证] 编号相同但名称不同: 《{regulation_name}》-> 应为《{candidate_name}》")
														
 
															+                return ValidationMatchResult(
														
 
															+                    review_item=review_item,
														
 
															+                    reference_candidates=reference_candidates,
														
 
															+                    is_valid=False,
														
 
															+                    validated_number=existing_number,
														
 
															+                    status="规范名称错误"
														
 
															+                )
														
 
															+        
														
 
															         # 调用3模型验证
														
 
															         validation = await validate_reference_number(
														
 
															             regulation_name=regulation_name,
														
@@ -323,7 +450,73 @@ async def match_reference_files(reference_text: str, review_text: str) -> str:
 
															         exact_info = raw_item.get("exact_match_info", "")
														
 
															         same_name_current = raw_item.get("same_name_current", "")
														
 
															-        # 如果有精确匹配，直接接受
														
 
															+        # 【校正逻辑】如果LLM判断has_exact_match=false，但本地比较发现名称和编号都相同（忽略括号差异），则校正为true
														
 
															+        if not has_exact and exact_info:
														
 
															+            review_name, review_number = _extract_regulation_info(review_item)
														
 
															+            exact_name, exact_number = _extract_regulation_info(exact_info)
														
 
															+            if (review_name and exact_name and
														
 
															+                _normalize_text(review_name) == _normalize_text(exact_name) and
														
 
															+                review_number and exact_number and
														
 
															+                _normalize_text(review_number) == _normalize_text(exact_number)):
														
 
															+                logger.info(f"[规范匹配校正] review_item='{review_item}' 名称和编号都相同，校正has_exact_match为true")
														
 
															+                has_exact = True
														
 
															+        
														
 
															+        # 【第一步】检查向量搜索候选中的匹配情况
														
 
															+        # ref_candidates 是 List[List[str]]，需要获取当前项对应的候选列表
														
 
															+        current_candidates = ref_candidates[i] if i < len(ref_candidates) else []
														
 
															+        review_name, review_number = _extract_regulation_info(review_item)
														
 
															+
														
 
															+        if review_name and review_number and current_candidates:
														
 
															+            normalized_review_name = _normalize_text(review_name)
														
 
															+            normalized_review_number = _normalize_text(review_number)
														
 
															+
														
 
															+            # 先检查是否有完全匹配（名称和编号都相同）
														
 
															+            for candidate in current_candidates:
														
 
															+                if isinstance(candidate, str):
														
 
															+                    candidate_name, candidate_number = _extract_regulation_info(candidate)
														
 
															+                    if (candidate_name and candidate_number and
														
 
															+                        _normalize_text(candidate_name) == normalized_review_name and
														
 
															+                        _normalize_text(candidate_number) == normalized_review_number):
														
 
															+                        # 向量库中找到精确匹配（名称和编号都相同）
														
 
															+                        logger.info(f"[规范匹配] 向量库中找到精确匹配: '{review_item}' -> '{candidate}'")
														
 
															+                        final_results.append({
														
 
															+                            "review_item": review_item,
														
 
															+                            "has_related_file": True,
														
 
															+                            "has_exact_match": True,
														
 
															+                            "exact_match_info": candidate,
														
 
															+                            "same_name_current": candidate
														
 
															+                        })
														
 
															+                        has_exact = True
														
 
															+                        break
														
 
															+
														
 
															+            if has_exact:
														
 
															+                continue
														
 
															+
														
 
															+            # 【关键】检查是否有编号相同但名称不同的情况（规范名称错误）
														
 
															+            for candidate in current_candidates:
														
 
															+                if isinstance(candidate, str):
														
 
															+                    candidate_name, candidate_number = _extract_regulation_info(candidate)
														
 
															+                    if (candidate_name and candidate_number and
														
 
															+                        _normalize_text(candidate_number) == normalized_review_number and
														
 
															+                        _normalize_text(candidate_name) != normalized_review_name):
														
 
															+                        # 编号相同但名称不同 - 判定为规范名称错误
														
 
															+                        logger.info(f"[规范匹配] 编号相同但名称不同: '{review_item}' -> '{candidate}'")
														
 
															+                        final_results.append({
														
 
															+                            "review_item": review_item,
														
 
															+                            "has_related_file": True,
														
 
															+                            "has_exact_match": False,
														
 
															+                            "exact_match_info": "",
														
 
															+                            "same_name_current": candidate,
														
 
															+                            "name_mismatch": True,  # 标记为名称不匹配
														
 
															+                            "correct_name": candidate_name  # 正确的名称
														
 
															+                        })
														
 
															+                        has_exact = True  # 标记为已处理，跳过后续逻辑
														
 
															+                        break
														
 
															+
														
 
															+            if has_exact:
														
 
															+                continue
														
 
															+        
														
 
															+        # 如果有精确匹配（由LLM判断），直接接受
														
 
															         if has_exact and exact_info:
														
 
															             final_results.append({
														
 
															                 "review_item": review_item,
														
@@ -334,15 +527,47 @@ async def match_reference_files(reference_text: str, review_text: str) -> str:
 
															             })
														
 
															             continue
														
 
															-        # 如果没有精确匹配，但有相关文件，进行验证/生成
														
 
															-        if has_related or ref_candidates:
														
 
															+        # 【第二步】如果没有精确匹配，但有相关文件，进行验证/生成
														
 
															+        # 使用当前项的候选列表（不是整个二维列表）
														
 
															+        if has_related or current_candidates:
														
 
															             try:
														
 
															                 validation_result = await validate_and_generate_number(
														
 
															                     review_item=review_item,
														
 
															-                    reference_candidates=ref_candidates
														
 
															+                    reference_candidates=current_candidates
														
 
															                 )
														
 
															                 if validation_result.validated_number:
														
 
															+                    # 【关键逻辑】检查生成的编号与原始编号是否属于同一规范家族
														
 
															+                    is_same_family = _is_same_regulation_family(
														
 
															+                        review_number or "",
														
 
															+                        validation_result.validated_number
														
 
															+                    )
														
 
															+
														
 
															+                    # 【特殊处理】检查参考候选中是否有名称完全匹配的文件
														
 
															+                    # 如果名称相同但编号不同（如 GB 51-2001 vs GB 50021-2001），应接受生成的编号
														
 
															+                    has_same_name_in_candidates = False
														
 
															+                    for candidate in current_candidates:
														
 
															+                        if isinstance(candidate, str):
														
 
															+                            candidate_name, _ = _extract_regulation_info(candidate)
														
 
															+                            if (candidate_name and
														
 
															+                                _normalize_text(candidate_name) == _normalize_text(review_name)):
														
 
															+                                has_same_name_in_candidates = True
														
 
															+                                break
														
 
															+
														
 
															+                    if not is_same_family and not has_same_name_in_candidates:
														
 
															+                        # 生成的编号与原始编号完全不同，且参考库中没有名称匹配的文件
														
 
															+                        # 说明参考库中找到的文件实际上不相关
														
 
															+                        logger.info(f"[规范匹配] '{review_item}' 生成的编号({validation_result.validated_number})"
														
 
															+                                  f"与原始编号({review_number})不属于同一规范家族，判定为无相关文件")
														
 
															+                        final_results.append({
														
 
															+                            "review_item": review_item,
														
 
															+                            "has_related_file": False,  # 【关键】标记为无相关文件
														
 
															+                            "has_exact_match": False,
														
 
															+                            "exact_match_info": "",
														
 
															+                            "same_name_current": ""
														
 
															+                        })
														
 
															+                        continue
														
 
															+                    
														
 
															                     if validation_result.is_valid:
														
 
															                         # 验证通过，原始编号正确
														
 
															                         final_results.append({
														
--- a/core/construction_review/component/reviewers/utils/timeliness_determiner.py
+++ b/core/construction_review/component/reviewers/utils/timeliness_determiner.py
@@ -48,30 +48,42 @@ HUMAN = """
 
															 【判定规则（按优先级从高到低）】
														
 
															+**重要提示**：比较规范编号时，忽略括号格式差异（半角()和全角（）视为相同）。例如 "GB/T 5224-2014" 和 "GB/T 5224-2014" 是相同的编号。
														
 
															+
														
 
															 1. **无参考规范**（无风险）
														
 
															    - 条件：has_related_file = false
														
 
															    - 原因：在参考规范库中完全找不到相关文件
														
 
															    - 建议：当前引用未在参考规范库中发现，建议人工核实其有效性
														
 
															-2. **规范编号错误**（高风险）
														
 
															-   - 条件：has_related_file = true 且 has_exact_match = false
														
 
															-   - 原因：与参考文件XXX编号不一致
														
 
															+2. **规范名称错误**（高风险）
														
 
															+   - 条件：name_mismatch = true（编号相同但名称不同）
														
 
															+   - 原因：规范编号正确，但规范名称错误。审查引用的是《错误名称》（编号），参考库中应为《正确名称》（编号）
														
 
															+   - 建议：建议将规范名称更正为《正确名称》（编号）
														
 
															+   - **重要**：必须从 correct_name 字段获取正确的规范名称
														
 
															+
														
 
															+3. **规范编号错误**（高风险）
														
 
															+   - 条件：has_related_file = true 且 has_exact_match = false 且 name_mismatch 不存在或不为true
														
 
															+   - 原因：与参考文件XXX编号不一致（注意：仅当编号实质性不同时才算不一致，忽略括号格式差异）
														
 
															    - 建议：建议核实并更正为参考库中的正确编号XXX
														
 
															-3. **规范编号正确**（无风险）
														
 
															+4. **规范编号正确**（无风险）
														
 
															    - 条件：has_exact_match = true 且 exact_match_info 中状态为"现行"
														
 
															    - 原因：与参考文件XXX名称编号一致，且文件状态为现行
														
 
															    - 建议：引用规范为现行有效版本，无需调整
														
 
															-4. **引用已废止的规范**（高风险）
														
 
															+5. **引用已废止的规范**（高风险）
														
 
															    - 条件：has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 为空
														
 
															    - 原因：参考文件显示XXX已废止，且无明确替代版本
														
 
															    - 建议：建议删除该引用或咨询最新替代规范
														
 
															-5. **引用已被替代的规范**（高风险）
														
 
															+6. **引用已被替代的规范**（高风险）
														
 
															    - 条件：has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 不为空
														
 
															-   - 原因：参考文件显示XXX已废止，但存在XXX现行版本
														
 
															-   - 建议：建议更新为现行替代标准
														
 
															+   - 原因：参考文件显示《规范名称》（原编号）已废止，存在现行版本《规范名称》（新编号）
														
 
															+   - 建议：建议更新为现行版本《规范名称》（新编号），并核实其适用性
														
 
															+   - **重要**：
														
 
															+     - 必须从 same_name_current 字段中提取具体的现行版本编号
														
 
															+     - 例如 same_name_current="《预应力混凝土用钢绞线》（GB/T 5224-2023）状态为现行"，则建议应为"建议更新为现行版本《预应力混凝土用钢绞线》（GB/T 5224-2023），并核实其适用性"
														
 
															+     - 严禁在建议中出现"XXX"字样，必须替换为实际的规范名称和编号
														
 
															 【规范匹配结果】
														
 
															 {match_results}
														
@@ -114,6 +126,23 @@ def extract_first_json(text: str) -> dict:
 
															     raise ValueError("JSON 花括号未闭合")
														
 
															+# ===== 辅助函数：标准化文本 =====
														
 
															+def _normalize_text(text: str) -> str:
														
 
															+    """标准化文本，统一括号格式用于比较"""
														
 
															+    if not text:
														
 
															+        return text
														
 
															+    text = text.replace('（', '(').replace('）', ')')
														
 
															+    text = ' '.join(text.split())
														
 
															+    return text.strip()
														
 
															+
														
 
															+
														
 
															+def _extract_number_from_location(location: str) -> str:
														
 
															+    """从location字段提取规范编号"""
														
 
															+    import re
														
 
															+    match = re.search(r'[（(]([^)）]+)[）)]', location)
														
 
															+    return match.group(1).strip() if match else ""
														
 
															+
														
 
															+
														
 
															 # ===== 7) 核心方法 =====
														
 
															 async def determine_timeliness_issue(match_results: str) -> str:
														
 
															     """
														
@@ -146,6 +175,10 @@ async def determine_timeliness_issue(match_results: str) -> str:
 
															             data = extract_first_json(raw)
														
 
															             findings = TimelinessResults.model_validate(data)
														
 
															             result = [x.model_dump() for x in findings.items]
														
 
															+            
														
 
															+            # 【强制校正】处理LLM误判：如果判定为"规范编号错误"但编号实质相同，则校正为"规范编号正确"
														
 
															+            result = _correct_misjudgment(result, match_results)
														
 
															+            
														
 
															             return json.dumps(result, ensure_ascii=False, indent=2)
														
 
															         except (Exception, ValidationError, json.JSONDecodeError) as e:
														
 
															             last_err = e
														
@@ -153,6 +186,50 @@ async def determine_timeliness_issue(match_results: str) -> str:
 
															     raise RuntimeError(f"时效性判定失败：{last_err}") from last_err
														
 
															+def _correct_misjudgment(results: list, match_results: str) -> list:
														
 
															+    """
														
 
															+    校正LLM的误判：检查"规范编号错误"是否实际为编号相同（仅括号格式不同）
														
 
															+    """
														
 
															+    import json
														
 
															+    import re
														
 
															+    
														
 
															+    try:
														
 
															+        match_data = json.loads(match_results)
														
 
															+        match_items = match_data if isinstance(match_data, list) else match_data.get('items', [])
														
 
															+        
														
 
															+        for i, item in enumerate(results):
														
 
															+            issue_point = item.get('issue_point', '')
														
 
															+            location = item.get('location', '')
														
 
															+            reason = item.get('reason', '')
														
 
															+            
														
 
															+            # 只处理"规范编号错误"的情况
														
 
															+            if '规范编号错误' not in issue_point:
														
 
															+                continue
														
 
															+                
														
 
															+            # 从location提取审查项编号
														
 
															+            review_number = _extract_number_from_location(location)
														
 
															+            if not review_number:
														
 
															+                continue
														
 
															+            
														
 
															+            # 从reason或match_items中提取参考文件编号
														
 
															+            ref_number = ''
														
 
															+            reason_match = re.search(r'（([^）]+)）', reason)
														
 
															+            if reason_match:
														
 
															+                ref_number = reason_match.group(1).strip()
														
 
															+            
														
 
															+            # 如果编号实质相同（忽略括号差异），校正为"规范编号正确"
														
 
															+            if review_number and ref_number and _normalize_text(review_number) == _normalize_text(ref_number):
														
 
															+                print(f"[校正] 误判检测: '{location}' 编号实质相同，校正为'规范编号正确'")
														
 
															+                item['issue_point'] = '规范编号正确'
														
 
															+                item['suggestion'] = '引用规范为现行有效版本，无需调整'
														
 
															+                item['reason'] = f'与参考文件{location}名称编号一致，且文件状态为现行'
														
 
															+                item['risk_level'] = '无风险'
														
 
															+    except Exception as e:
														
 
															+        print(f"[校正] 校正过程出错: {e}")
														
 
															+    
														
 
															+    return results
														
 
															+
														
 
															+
														
 
															 # ===== 8) 示例 =====
														
 
															 if __name__ == "__main__":
														
 
															     import asyncio
														
--- a/requirements.txt
+++ b/requirements.txt