|
|
@@ -399,7 +399,7 @@ def remove_common_elements_between_dataframes(
|
|
|
merged_df = pd.merge(miss_outline_df, redis_data, on='chapter_label', how='inner', suffixes=('_outline', '_redis'))
|
|
|
|
|
|
# 创建一个字典来存储公共元素(用于去除操作)
|
|
|
- common_elements_dict = {}
|
|
|
+ common_elements_list = {}
|
|
|
|
|
|
# 创建一个列表来存储所有公共元素
|
|
|
all_common_elements = []
|
|
|
@@ -424,7 +424,7 @@ def remove_common_elements_between_dataframes(
|
|
|
common_elements = miss_outline_set & missing_items_set
|
|
|
|
|
|
# 存储公共元素到字典(用于去除操作)
|
|
|
- common_elements_dict[chapter_label] = list(common_elements)
|
|
|
+ common_elements_list[chapter_label] = list(common_elements)
|
|
|
|
|
|
# 将公共元素添加到总列表中
|
|
|
all_common_elements.extend(list(common_elements))
|
|
|
@@ -433,14 +433,14 @@ def remove_common_elements_between_dataframes(
|
|
|
|
|
|
# 更新miss_outline_df,去除公共元素
|
|
|
miss_outline_df['miss_outline'] = miss_outline_df.apply(
|
|
|
- lambda row: list(set(row['miss_outline']) - set(common_elements_dict.get(row['chapter_label'], [])))
|
|
|
+ lambda row: list(set(row['miss_outline']) - set(common_elements_list.get(row['chapter_label'], [])))
|
|
|
if isinstance(row['miss_outline'], list) else [],
|
|
|
axis=1
|
|
|
)
|
|
|
|
|
|
# 更新redis_data,去除公共元素
|
|
|
redis_data['missing_items'] = redis_data.apply(
|
|
|
- lambda row: list(set(row['missing_items']) - set(common_elements_dict.get(row['chapter_label'], [])))
|
|
|
+ lambda row: list(set(row['missing_items']) - set(common_elements_list.get(row['chapter_label'], [])))
|
|
|
if isinstance(row['missing_items'], list) else [],
|
|
|
axis=1
|
|
|
)
|
|
|
@@ -452,7 +452,7 @@ def remove_common_elements_between_dataframes(
|
|
|
|
|
|
def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any]]:
|
|
|
"""
|
|
|
- 处理目录审查列表,从DataFrame中提取missing_items、common_elements_dict和miss_outline并生成审查项
|
|
|
+ 处理目录审查列表,从DataFrame中提取missing_items、common_elements_list和miss_outline并生成审查项
|
|
|
|
|
|
Args:
|
|
|
catogues_df: 包含目录审查数据的DataFrame,需要包含以下列:
|
|
|
@@ -460,7 +460,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
|
|
|
- chapter_label: 章节标签
|
|
|
- chapter_classification: 章节分类
|
|
|
- missing_items: 目录缺失项(列表或字符串)- 目录中缺失但大纲可能存在
|
|
|
- - common_elements_dict: 目录与大纲都缺失的公共元素(列表或字符串)
|
|
|
+ - common_elements_list: 目录与大纲都缺失的公共元素(列表或字符串)
|
|
|
- miss_outline: 大纲缺失项(列表或字符串)
|
|
|
- specification_items: 规范项(列表或字符串)
|
|
|
|
|
|
@@ -522,8 +522,8 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
|
|
|
logger.warning(f"第 {index} 行无法解析missing_items: {missing_items_str}")
|
|
|
missing_items_list = []
|
|
|
|
|
|
- # 解析 common_elements_dict 列(目录与大纲都缺失)
|
|
|
- common_elements_str = row.get('common_elements_dict', '')
|
|
|
+ # 解析 common_elements_list 列(目录与大纲都缺失)
|
|
|
+ common_elements_str = row.get('common_elements_list', '')
|
|
|
try:
|
|
|
if pd.isna(common_elements_str) or common_elements_str == '':
|
|
|
common_elements_list = []
|
|
|
@@ -537,7 +537,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
|
|
|
# 尝试使用 json.loads 解析
|
|
|
common_elements_list = json.loads(common_elements_str)
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
- logger.warning(f"第 {index} 行无法解析common_elements_dict: {common_elements_str}")
|
|
|
+ logger.warning(f"第 {index} 行无法解析common_elements_list: {common_elements_str}")
|
|
|
common_elements_list = []
|
|
|
|
|
|
# 解析 miss_outline 列(大纲缺失)
|
|
|
@@ -571,7 +571,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
|
|
|
}
|
|
|
catogues_reciew_list.append(catalog_item)
|
|
|
|
|
|
- # 处理 common_elements_dict(目录与大纲都缺失)
|
|
|
+ # 处理 common_elements_list(目录与大纲都缺失)
|
|
|
if isinstance(common_elements_list, list) and len(common_elements_list) > 0:
|
|
|
for common_element in common_elements_list:
|
|
|
common_item = {
|