5 дней назад · 55eb70305c
--- a/core/construction_review/component/minimal_pipeline/toc_detector.py
+++ b/core/construction_review/component/minimal_pipeline/toc_detector.py
@@ -65,8 +65,8 @@ class TOCCatalogExtractor:
 
				     MAX_CHECK_PAGES = 50
			
 
				     DPI = 150
			
 
				 
			
 
				-    # OCR 配置（目录页使用更低DPI避免请求过大）
			
 
				-    OCR_DPI = 150
			
 
				+    # OCR 配置（高 DPI 渲染后缩放到 800px，确保目录文字清晰）
			
 
				+    OCR_DPI = 600
			
 
				     MAX_SHORT_EDGE = 800
			
 
				     JPEG_QUALITY = 85
			
 
				     MAX_IMAGE_SIZE_MB = 5
			
@@ -371,9 +371,13 @@ class TOCCatalogExtractor:
 
				                 new_size = (int(img.width * ratio), int(img.height * ratio))
			
 
				                 img = img.resize(new_size, Image.Resampling.LANCZOS)
			
 
				 
			
 
				+            # 二值化增强：将浅灰文字变黑，提高 OCR 识别率
			
 
				+            img = img.convert('L')
			
 
				+            img = img.point(lambda x: 0 if x < 220 else 255)
			
 
				+            img = img.convert('RGB')
			
 
				+
			
 
				             buffer = io.BytesIO()
			
 
				-            quality = self.JPEG_QUALITY if not force_smaller else 75
			
 
				-            img.save(buffer, format='JPEG', quality=quality, optimize=True)
			
 
				+            img.save(buffer, format='PNG', optimize=True)
			
 
				             return buffer.getvalue()
			
 
				 
			
 
				         except Exception as e:
			
--- a/utils_test/minimal_pipeline/_toc_detector.py
+++ b/utils_test/minimal_pipeline/_toc_detector.py
@@ -65,8 +65,8 @@ class TOCCatalogExtractor:
 
				     MAX_CHECK_PAGES = 50
			
 
				     DPI = 150
			
 
				 
			
 
				-    # OCR 配置（目录页使用更低DPI避免请求过大）
			
 
				-    OCR_DPI = 150
			
 
				+    # OCR 配置（高 DPI 渲染后缩放到 800px，确保目录文字清晰）
			
 
				+    OCR_DPI = 600
			
 
				     MAX_SHORT_EDGE = 800
			
 
				     JPEG_QUALITY = 85
			
 
				     MAX_IMAGE_SIZE_MB = 5
			
@@ -371,9 +371,13 @@ class TOCCatalogExtractor:
 
				                 new_size = (int(img.width * ratio), int(img.height * ratio))
			
 
				                 img = img.resize(new_size, Image.Resampling.LANCZOS)
			
 
				 
			
 
				+            # 二值化增强：将浅灰文字变黑，提高 OCR 识别率
			
 
				+            img = img.convert('L')
			
 
				+            img = img.point(lambda x: 0 if x < 220 else 255)
			
 
				+            img = img.convert('RGB')
			
 
				+
			
 
				             buffer = io.BytesIO()
			
 
				-            quality = self.JPEG_QUALITY if not force_smaller else 75
			
 
				-            img.save(buffer, format='JPEG', quality=quality, optimize=True)
			
 
				+            img.save(buffer, format='PNG', optimize=True)
			
 
				             return buffer.getvalue()
			
 
				 
			
 
				         except Exception as e: