|
|
@@ -380,7 +380,7 @@ class CatalogCheckProcessor:
|
|
|
def remove_common_elements_between_dataframes(
|
|
|
miss_outline_df: pd.DataFrame,
|
|
|
redis_data: pd.DataFrame
|
|
|
-) -> tuple[pd.DataFrame, Dict[str, set]]:
|
|
|
+) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
|
"""
|
|
|
去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
|
|
|
|
|
|
@@ -389,7 +389,7 @@ def remove_common_elements_between_dataframes(
|
|
|
redis_data: 包含missing_items列的DataFrame
|
|
|
|
|
|
Returns:
|
|
|
- tuple: (更新后的miss_outline_df, 公共元素字典)
|
|
|
+ tuple: (更新后的miss_outline_df, 更新后的redis_data)
|
|
|
"""
|
|
|
# 合并两个DataFrame,基于chapter_label
|
|
|
merged_df = pd.merge(miss_outline_df, redis_data, on='chapter_label', how='inner', suffixes=('_outline', '_redis'))
|
|
|
@@ -421,16 +421,22 @@ def remove_common_elements_between_dataframes(
|
|
|
|
|
|
logger.info(f"[目录审查] 章节: {chapter_label}, 公共元素: {common_elements}")
|
|
|
|
|
|
- # 更新 miss_outline_df,使用apply函数去除公共元素
|
|
|
+ # 同时更新两个DataFrame,去除公共元素
|
|
|
miss_outline_df['miss_outline'] = miss_outline_df.apply(
|
|
|
lambda row: list(set(row['miss_outline']) - common_elements_dict.get(row['chapter_label'], set()))
|
|
|
if isinstance(row['miss_outline'], list) else [],
|
|
|
axis=1
|
|
|
)
|
|
|
|
|
|
- logger.info(f"[目录审查] 已去除公共元素,更新后的miss_outline_df: {miss_outline_df.to_dict('records')}")
|
|
|
+ redis_data['missing_items'] = redis_data.apply(
|
|
|
+ lambda row: list(set(row['missing_items']) - common_elements_dict.get(row['chapter_label'], set()))
|
|
|
+ if isinstance(row['missing_items'], list) else [],
|
|
|
+ axis=1
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"[目录审查] 已去除公共元素,同时更新了miss_outline_df和redis_data")
|
|
|
|
|
|
- return miss_outline_df, common_elements_dict
|
|
|
+ return miss_outline_df, redis_data
|
|
|
|
|
|
|
|
|
async def catalogues_check(catalog_file = None):
|