|
|
@@ -383,23 +383,27 @@ class CatalogCheckProcessor:
|
|
|
def remove_common_elements_between_dataframes(
|
|
|
miss_outline_df: pd.DataFrame,
|
|
|
redis_data: pd.DataFrame
|
|
|
-) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
|
+) -> tuple[pd.DataFrame, pd.DataFrame, List]:
|
|
|
"""
|
|
|
去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
|
|
|
+ 同时返回所有公共元素的列表
|
|
|
|
|
|
Args:
|
|
|
miss_outline_df: 包含miss_outline列的DataFrame
|
|
|
redis_data: 包含missing_items列的DataFrame
|
|
|
|
|
|
Returns:
|
|
|
- tuple: (更新后的miss_outline_df, 更新后的redis_data)
|
|
|
+ tuple: (更新后的miss_outline_df, 更新后的redis_data, 所有公共元素列表)
|
|
|
"""
|
|
|
# 合并两个DataFrame,基于chapter_label
|
|
|
merged_df = pd.merge(miss_outline_df, redis_data, on='chapter_label', how='inner', suffixes=('_outline', '_redis'))
|
|
|
|
|
|
- # 创建一个字典来存储公共元素
|
|
|
+ # 创建一个字典来存储公共元素(用于去除操作)
|
|
|
common_elements_dict = {}
|
|
|
|
|
|
+ # 创建一个列表来存储所有公共元素
|
|
|
+ all_common_elements = []
|
|
|
+
|
|
|
# 遍历合并后的DataFrame,计算公共元素
|
|
|
for index, row in merged_df.iterrows():
|
|
|
chapter_label = row['chapter_label']
|
|
|
@@ -419,39 +423,44 @@ def remove_common_elements_between_dataframes(
|
|
|
# 计算公共元素
|
|
|
common_elements = miss_outline_set & missing_items_set
|
|
|
|
|
|
- # 存储公共元素
|
|
|
- common_elements_dict[chapter_label] = common_elements
|
|
|
+ # 存储公共元素到字典(用于去除操作)
|
|
|
+ common_elements_dict[chapter_label] = list(common_elements)
|
|
|
+
|
|
|
+ # 将公共元素添加到总列表中
|
|
|
+ all_common_elements.extend(list(common_elements))
|
|
|
|
|
|
logger.info(f"[目录审查] 章节: {chapter_label}, 公共元素: {common_elements}")
|
|
|
|
|
|
- # 同时更新两个DataFrame,去除公共元素
|
|
|
+ # 更新miss_outline_df,去除公共元素
|
|
|
miss_outline_df['miss_outline'] = miss_outline_df.apply(
|
|
|
- lambda row: list(set(row['miss_outline']) - common_elements_dict.get(row['chapter_label'], set()))
|
|
|
+ lambda row: list(set(row['miss_outline']) - set(common_elements_dict.get(row['chapter_label'], [])))
|
|
|
if isinstance(row['miss_outline'], list) else [],
|
|
|
axis=1
|
|
|
)
|
|
|
|
|
|
+ # 更新redis_data,去除公共元素
|
|
|
redis_data['missing_items'] = redis_data.apply(
|
|
|
- lambda row: list(set(row['missing_items']) - common_elements_dict.get(row['chapter_label'], set()))
|
|
|
+ lambda row: list(set(row['missing_items']) - set(common_elements_dict.get(row['chapter_label'], [])))
|
|
|
if isinstance(row['missing_items'], list) else [],
|
|
|
axis=1
|
|
|
)
|
|
|
|
|
|
- logger.info(f"[目录审查] 已去除公共元素,同时更新了miss_outline_df和redis_data")
|
|
|
+ logger.info(f"[目录审查] 已去除公共元素,同时更新了miss_outline_df和redis_data,所有公共元素: {all_common_elements}")
|
|
|
|
|
|
- return miss_outline_df, redis_data
|
|
|
+ return miss_outline_df, redis_data, all_common_elements
|
|
|
|
|
|
|
|
|
def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any]]:
|
|
|
"""
|
|
|
- 处理目录审查列表,从DataFrame中提取missing_items和miss_outline并生成审查项
|
|
|
+ 处理目录审查列表,从DataFrame中提取missing_items、common_elements_dict和miss_outline并生成审查项
|
|
|
|
|
|
Args:
|
|
|
catogues_df: 包含目录审查数据的DataFrame,需要包含以下列:
|
|
|
- title: 标题
|
|
|
- chapter_label: 章节标签
|
|
|
- chapter_classification: 章节分类
|
|
|
- - missing_items: 目录缺失项(列表或字符串)
|
|
|
+ - missing_items: 目录缺失项(列表或字符串)- 目录中缺失但大纲可能存在
|
|
|
+ - common_elements_dict: 目录与大纲都缺失的公共元素(列表或字符串)
|
|
|
- miss_outline: 大纲缺失项(列表或字符串)
|
|
|
- specification_items: 规范项(列表或字符串)
|
|
|
|
|
|
@@ -495,7 +504,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
|
|
|
# 将规范项列表拼接为字符串(用、号连接)
|
|
|
specification_items_text = '、'.join(specification_items_list) if specification_items_list else ''
|
|
|
|
|
|
- # 解析 missing_items 列(目录缺失)
|
|
|
+ # 解析 missing_items 列(目录缺失但大纲存在)
|
|
|
missing_items_str = row.get('missing_items', '')
|
|
|
try:
|
|
|
if pd.isna(missing_items_str) or missing_items_str == '':
|
|
|
@@ -513,6 +522,24 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
|
|
|
logger.warning(f"第 {index} 行无法解析missing_items: {missing_items_str}")
|
|
|
missing_items_list = []
|
|
|
|
|
|
+ # 解析 common_elements_dict 列(目录与大纲都缺失)
|
|
|
+ common_elements_str = row.get('common_elements_dict', '')
|
|
|
+ try:
|
|
|
+ if pd.isna(common_elements_str) or common_elements_str == '':
|
|
|
+ common_elements_list = []
|
|
|
+ elif isinstance(common_elements_str, list):
|
|
|
+ common_elements_list = common_elements_str
|
|
|
+ else:
|
|
|
+ # 尝试使用 ast.literal_eval 解析
|
|
|
+ common_elements_list = ast.literal_eval(common_elements_str)
|
|
|
+ except (ValueError, SyntaxError):
|
|
|
+ try:
|
|
|
+ # 尝试使用 json.loads 解析
|
|
|
+ common_elements_list = json.loads(common_elements_str)
|
|
|
+ except (json.JSONDecodeError, TypeError):
|
|
|
+ logger.warning(f"第 {index} 行无法解析common_elements_dict: {common_elements_str}")
|
|
|
+ common_elements_list = []
|
|
|
+
|
|
|
# 解析 miss_outline 列(大纲缺失)
|
|
|
miss_outline_str = row.get('miss_outline', '')
|
|
|
try:
|
|
|
@@ -531,26 +558,39 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
|
|
|
logger.warning(f"第 {index} 行无法解析miss_outline: {miss_outline_str}")
|
|
|
miss_outline_list = []
|
|
|
|
|
|
- # 处理 missing_items(目录缺失)
|
|
|
+ # 处理 missing_items(目录缺失但大纲存在)
|
|
|
if isinstance(missing_items_list, list) and len(missing_items_list) > 0:
|
|
|
for missing_item in missing_items_list:
|
|
|
catalog_item = {
|
|
|
"issue_point": f"{missing_item}缺失",
|
|
|
"location": title if title else chapter_label,
|
|
|
- "suggestion": f"目录缺失(missing_items):在待审查目录中未找到与'{missing_item}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{missing_item}'相关内容;整改建议:建议在本章或前序章节中增设'{missing_item}'相关内容,确保与审查规范要求一致。",
|
|
|
+ "suggestion": f"目录缺失:要点目录中缺失'{missing_item}'这个小节,但大纲中存在该小节;当前章节仅涉及'{title if title else chapter_label}',目录中未体现'{missing_item}'相关内容;整改建议:建议在目录中补充'{missing_item}'相关内容,确保目录与大纲一致。",
|
|
|
"reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
|
|
|
"risk_level": "高风险",
|
|
|
"reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
|
|
|
}
|
|
|
catogues_reciew_list.append(catalog_item)
|
|
|
|
|
|
+ # 处理 common_elements_dict(目录与大纲都缺失)
|
|
|
+ if isinstance(common_elements_list, list) and len(common_elements_list) > 0:
|
|
|
+ for common_element in common_elements_list:
|
|
|
+ common_item = {
|
|
|
+ "issue_point": f"{common_element}缺失",
|
|
|
+ "location": title if title else chapter_label,
|
|
|
+ "suggestion": f"目录与大纲都缺失:要点目录和大纲中都缺失'{common_element}'这个小节;当前章节仅涉及'{title if title else chapter_label}',目录和大纲均未涵盖'{common_element}'相关内容;整改建议:建议在本章或前序章节中增设'{common_element}'相关内容,确保目录和大纲都包含该小节。",
|
|
|
+ "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
|
|
|
+ "risk_level": "高风险",
|
|
|
+ "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
|
|
|
+ }
|
|
|
+ catogues_reciew_list.append(common_item)
|
|
|
+
|
|
|
# 处理 miss_outline(大纲缺失)
|
|
|
if isinstance(miss_outline_list, list) and len(miss_outline_list) > 0:
|
|
|
for miss_outline in miss_outline_list:
|
|
|
outline_item = {
|
|
|
"issue_point": f"{miss_outline}缺失",
|
|
|
"location": title if title else chapter_label,
|
|
|
- "suggestion": f"大纲缺失(miss_outline):在待审查大纲中未找到与'{miss_outline}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{miss_outline}'相关内容;整改建议:建议在本章或前序章节中增设'{miss_outline}'相关内容,确保与审查规范要求一致。",
|
|
|
+ "suggestion": f"大纲缺失:大纲中缺失'{miss_outline}'这个小节;当前章节仅涉及'{title if title else chapter_label}',大纲中未涵盖'{miss_outline}'相关内容;整改建议:建议在大纲中补充'{miss_outline}'相关内容,确保大纲完整性。",
|
|
|
"reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
|
|
|
"risk_level": "高风险",
|
|
|
"reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
|