|
@@ -65,8 +65,8 @@ class TOCCatalogExtractor:
|
|
|
MAX_CHECK_PAGES = 50
|
|
MAX_CHECK_PAGES = 50
|
|
|
DPI = 150
|
|
DPI = 150
|
|
|
|
|
|
|
|
- # OCR 配置(目录页使用更低DPI避免请求过大)
|
|
|
|
|
- OCR_DPI = 150
|
|
|
|
|
|
|
+ # OCR 配置(高 DPI 渲染后缩放到 800px,确保目录文字清晰)
|
|
|
|
|
+ OCR_DPI = 600
|
|
|
MAX_SHORT_EDGE = 800
|
|
MAX_SHORT_EDGE = 800
|
|
|
JPEG_QUALITY = 85
|
|
JPEG_QUALITY = 85
|
|
|
MAX_IMAGE_SIZE_MB = 5
|
|
MAX_IMAGE_SIZE_MB = 5
|
|
@@ -371,9 +371,13 @@ class TOCCatalogExtractor:
|
|
|
new_size = (int(img.width * ratio), int(img.height * ratio))
|
|
new_size = (int(img.width * ratio), int(img.height * ratio))
|
|
|
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
|
img = img.resize(new_size, Image.Resampling.LANCZOS)
|
|
|
|
|
|
|
|
|
|
+ # 二值化增强:将浅灰文字变黑,提高 OCR 识别率
|
|
|
|
|
+ img = img.convert('L')
|
|
|
|
|
+ img = img.point(lambda x: 0 if x < 220 else 255)
|
|
|
|
|
+ img = img.convert('RGB')
|
|
|
|
|
+
|
|
|
buffer = io.BytesIO()
|
|
buffer = io.BytesIO()
|
|
|
- quality = self.JPEG_QUALITY if not force_smaller else 75
|
|
|
|
|
- img.save(buffer, format='JPEG', quality=quality, optimize=True)
|
|
|
|
|
|
|
+ img.save(buffer, format='PNG', optimize=True)
|
|
|
return buffer.getvalue()
|
|
return buffer.getvalue()
|
|
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|