|
@@ -666,7 +666,26 @@ class ContentClassifierClient:
|
|
|
|
|
|
|
|
if total_lines <= MAX_LINES_PER_CHUNK:
|
|
if total_lines <= MAX_LINES_PER_CHUNK:
|
|
|
# 内容不长,直接处理
|
|
# 内容不长,直接处理
|
|
|
- return await self._classify_single_chunk(section, start_time)
|
|
|
|
|
|
|
+ result = await self._classify_single_chunk(section, start_time)
|
|
|
|
|
+ # 补充验证:关键字扫描 + LLM二次确认,补充遗漏的分类
|
|
|
|
|
+ if not result.error and result.classified_contents is not None:
|
|
|
|
|
+ supplement = await self._detect_and_supplement(section, result.classified_contents)
|
|
|
|
|
+ if supplement:
|
|
|
|
|
+ merged = self._merge_classified_contents(result.classified_contents + supplement, section)
|
|
|
|
|
+ total_l, classified_l, coverage_r = self._calculate_coverage_rate(section, merged)
|
|
|
|
|
+ return ClassificationResult(
|
|
|
|
|
+ model=result.model,
|
|
|
|
|
+ section_key=result.section_key,
|
|
|
|
|
+ section_name=result.section_name,
|
|
|
|
|
+ classified_contents=merged,
|
|
|
|
|
+ latency=result.latency,
|
|
|
|
|
+ raw_response=result.raw_response,
|
|
|
|
|
+ error=result.error,
|
|
|
|
|
+ total_lines=total_l,
|
|
|
|
|
+ classified_lines=classified_l,
|
|
|
|
|
+ coverage_rate=coverage_r
|
|
|
|
|
+ )
|
|
|
|
|
+ return result
|
|
|
|
|
|
|
|
# 内容过长,无重叠分块处理
|
|
# 内容过长,无重叠分块处理
|
|
|
# 不使用 overlap:有重叠时边界行被两块各看一次反而容易两头都不认领,
|
|
# 不使用 overlap:有重叠时边界行被两块各看一次反而容易两头都不认领,
|