Просмотр исходного кода

fix:修复模型返回包含"Thinking Process"思考过程,导致JSON解析失败

Meric 2 недель назад
Родитель
Сommit
7dc5c881f7
2 измененных файлов с 433 добавлено и 418 удалено
  1. 38 23
      src/app/scripts/md2excel_extractor.py
  2. 395 395
      uv.lock

+ 38 - 23
src/app/scripts/md2excel_extractor.py

@@ -365,37 +365,52 @@ def parse_llm_response(response_text: str) -> Dict[str, str]:
     Returns:
         解析后的字典,包含提取的字段
     """
-    try:
-        # 尝试直接解析 JSON
-        return json.loads(response_text)
-    except json.JSONDecodeError:
-        pass
-    
-    # 尝试从文本中提取 JSON 块
-    # 匹配 ```json ... ``` 格式
-    json_pattern = r'```json\s*(.*?)\s*```'
-    match = re.search(json_pattern, response_text, re.DOTALL)
-    if match:
-        try:
-            return json.loads(match.group(1))
-        except json.JSONDecodeError:
-            pass
+    def extract_json_objects(text):
+        brace_depth = 0
+        json_objects = []
+        start_idx = -1
+        
+        for i, char in enumerate(text):
+            if char == '{':
+                if brace_depth == 0:
+                    start_idx = i
+                brace_depth += 1
+            elif char == '}':
+                brace_depth -= 1
+                if brace_depth == 0 and start_idx != -1:
+                    json_objects.append(text[start_idx:i+1])
+                    start_idx = -1
+        
+        return json_objects
+    
+    json_objects = extract_json_objects(response_text)
+    
+    placeholder_keywords = ["提取到的", "或'", "...", "示例", "Extract", "Template"]
     
-    # 尝试匹配任意 JSON 对象
-    json_pattern2 = r'\{[\s\S]*?"项目名称"[\s\S]*?\}'
-    match2 = re.search(json_pattern2, response_text)
-    if match2:
+    for obj in json_objects:
         try:
-            return json.loads(match2.group())
+            data = json.loads(obj)
+            if isinstance(data, dict) and "项目名称" in data:
+                project_name = data.get("项目名称", "")
+                plan_name = data.get("方案名称", "")
+                expert_opinion = data.get("专项方案专家评审意见回复表", "")
+                
+                is_placeholder = False
+                for keyword in placeholder_keywords:
+                    if keyword in str(project_name) or keyword in str(plan_name):
+                        is_placeholder = True
+                        break
+                
+                if not is_placeholder and len(str(expert_opinion)) > 50:
+                    return data
         except json.JSONDecodeError:
-            pass
+            continue
     
-    # 如果都无法解析,返回原始文本作为专家意见
     print(f"  [警告] 无法解析 JSON 响应,使用原始文本")
     return {
         "项目名称": "解析失败",
         "方案名称": "解析失败",
-        "专项方案专家评审意见回复表": response_text[:500]
+        "专项方案专家评审意见回复表": response_text
     }
 
 

Разница между файлами не показана из-за своего большого размера
+ 395 - 395
uv.lock


Некоторые файлы не были показаны из-за большого количества измененных файлов