CRBC-MaaS-Platform-Project
/
LQAgentPlatform


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
							async def _call_llm_for_secondary_classification(
        self,
        first_category: str,
        first_category_code: str,
        level2_titles: List[str]
    ) -> Optional[Dict[str, Any]]:
        """
        调用LLM进行二级分类（并发版）

        使用 function_name 从 model_setting.yaml 加载模型配置
        """
        # 获取该一级分类的二级分类标准和映射
        secondary_standards = self.prompt_loader.get_secondary_standards(first_category)
        secondary_mapping = self.prompt_loader.get_secondary_mapping(first_category)

        # 构建层级路径和内容预览（简化处理）
        hierarchy_path = f"{first_category}"
        content_preview = "\n".join(f"- {title}" for title in level2_titles)

        # 并发控制
        semaphore = asyncio.Semaphore(self._concurrency)

        async def classify_single_title(chunk_title: str) -> Dict[str, Any]:
            """对单个二级标题进行分类（带重试）"""
            prompt = self.prompt_loader.render(
                "chunk_secondary_classification",
                first_category=first_category,
                chunk_title=chunk_title,
                hierarchy_path=hierarchy_path,
                content_preview=content_preview,
                secondary_standards=secondary_standards,
            )

            # 带重试的LLM调用
            max_retries = 3
            async with semaphore:
                for attempt in range(max_retries):
                    try:
                        content = await generate_model_client.get_model_generate_invoke(
                            trace_id="hierarchy_classifier_secondary",
                            system_prompt=prompt["system"],
                            user_prompt=prompt["user"],
                            function_name=self.FUNCTION_NAME_SECONDARY,
                        )
                        result = _extract_json(content)
                        if result and isinstance(result, dict) and "category_index" in result:
                            category_index = result.get("category_index", 0)
                            # 映射编号到代码和名称
                            if category_index > 0 and category_index in secondary_mapping:
                                mapped = secondary_mapping[category_index]
                                return {
                                    "title": chunk_title,
                                    "category_index": category_index,
                                    "category_code": mapped.get("code", ""),
                                    "category_name": mapped.get("name", ""),
                                    "raw_response": content,
                                }
                            else:
                                # 编号为0或未找到映射，标记为非标准项
                                return {
                                    "title": chunk_title,
                                    "category_index": category_index,
                                    "category_code": "non_standard",
                                    "category_name": "非标准项",
                                    "raw_response": content,
                                }
                        else:
                            logger.warning(f"[二级分类] JSON解析失败或缺少category_index: {chunk_title}, 尝试: {attempt + 1}/{max_retries}")
                            if attempt == max_retries - 1:
                                # 最后一次尝试失败，使用默认值
                                return {
                                    "title": chunk_title,
                                    "category_index": 0,
                                    "category_code": "non_standard",
                                    "category_name": "非标准项",
                                    "raw_response": content,
                                    "error": "JSON解析失败",
                                }
                    except Exception as e:
                        logger.error(f"[二级分类] LLM调用失败: {chunk_title}, 错误: {e}, 尝试: {attempt + 1}/{max_retries}")
                        if attempt == max_retries - 1:
                            return {
                                "title": chunk_title,
                                "category_index": 0,
                                "category_code": "non_standard",
                                "category_name": "非标准项",
                                "error": str(e),
                            }

            # 不会到达这里，但保留以防万一
            return {
                "title": chunk_title,
                "category_index": 0,
                "category_code": "non_standard",
                "category_name": "非标准项",
                "error": "未知错误",
            }

        # 并发执行所有二级标题的分类
        tasks = [classify_single_title(title) for title in level2_titles]
        results = await asyncio.gather(*tasks)

        return {
            "first_category": first_category,
            "first_category_code": first_category_code,
            "level2_count": len(level2_titles),
            "classifications": results,
        }