Эх сурвалжийг харах

feat:数据处理代码完善

ZengChao 1 сар өмнө
parent
commit
694660ccf4

+ 1 - 1
.env

@@ -9,7 +9,7 @@ MINIO_BASE_PATH=sampledata
 # Milvus向量数据库配置信息
 MILVUS_HOST=192.168.92.61
 MILVUS_PORT=19530
-MILVUS_DB=lq_db
+MILVUS_DB=lq_db_dev
 MILVUS_USER=
 MILVUS_PASSWORD=
 

+ 73 - 15
src/app/scripts/base_in_collection.py

@@ -16,23 +16,63 @@ from app.config.milvus_client import get_milvusclient
 from app.config.setting import settings
 
 # 根目录配置
-ROOT_FOLDER = r"C:\Users\ZengChao\Desktop\新建文件夹 (2)"
+ROOT_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\最终编制依据"
 
 # 失败汇总JSON保存路径
 FAILED_REPORT_PATH = r"C:\Users\ZengChao\Desktop\base_collection_failed_report.json"
 
 # Collection 名称
-PARENT_COLLECTION_NAME = "test_27_parent"
-CHILD_COLLECTION_NAME = "test_27_child"
+PARENT_COLLECTION_NAME = "t_rag_kng_standard_parent"
+CHILD_COLLECTION_NAME = "t_rag_kng_standard"
 
 # 默认创建人/修改人ID
 DEFAULT_USER_ID = "ed6a79d3-0083-4d81-8b48-fc522f686f74"
 
 # MinIO URL 前缀
-PREFIX = "sampledata/base"
+PREFIX = "/standard"
 
+# 字段简写映射
+DOCUMENT_TYPE_MAP = {
+    "国家标准": "GB",
+    "行业标准": "HY",
+    "部门规章": "BM",
+    "地方标准": "DB",
+    "企业标准": "QY",
+    "管理制度": "GL",
+    "技术规范": "GF",
+    "团体标准": "TT",
+    "国际标准": "GJ",
+    "国家法律": "FL",
+    "地方法规": "LR",
+    "其他": "QT",
+}
 
-def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str) -> Dict[str, Any]:
+PROFESSIONAL_FIELD_MAP = {
+    "法律法规": "FL",
+    "通用标准": "TY",
+    "勘察钻探": "KC",
+    "地基基础": "DJ",
+    "路基路面": "LJ",
+    "桥梁工程": "QL",
+    "隧道工程": "SD",
+    "交通工程": "JT",
+    "建筑工程": "JZ",
+    "市政工程": "SZ",
+    "机电安装": "JD",
+    "路桥工程": "LB",
+    "装饰装修": "ZS",
+    "港口航道": "GK",
+    "铁路工程": "TL",
+    "房建工程": "FJ",
+    "水利电力": "SL",
+    "信息化": "XX",
+    "试验检测": "SY",
+    "安全环保": "AQ",
+    "其他": "QT",
+}
+
+
+def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str, file_name: str) -> Dict[str, Any]:
     """
     构造 metadata 字段。
     
@@ -40,17 +80,25 @@ def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str) -> D
         doc_data: JSON 中的 doc 数据
         hierarchy: 文档层级信息
         file_url: 文件 URL
+        file_name: 原始文件名
     
     Returns:
         metadata 字典
     """
+    # 映射为简写
+    document_type_raw = doc_data.get("document_type", "")
+    professional_field_raw = doc_data.get("professional_field", "")
+    
+    document_type = DOCUMENT_TYPE_MAP.get(document_type_raw, document_type_raw)
+    professional_field = PROFESSIONAL_FIELD_MAP.get(professional_field_raw, professional_field_raw)
+    
     return {
-        "file_name": doc_data.get("chinese_name", ""),
+        "file_name": file_name,  # 原始文件名
+        "chinese_name": doc_data.get("chinese_name", ""),  # 中文名称
         "standard_number": doc_data.get("standard_number", ""),
         "issuing_authority": doc_data.get("issuing_authority", ""),
-        "document_type": doc_data.get("document_type", ""),
-        "professional_field": doc_data.get("professional_field", ""),
-        "validity": doc_data.get("validity", ""),
+        "document_type": document_type,  # 简写
+        "professional_field": professional_field,  # 简写
         "hierarchy": hierarchy,
         "file_url": file_url,
         "plan_type_list": {}  # 空 JSON
@@ -58,7 +106,7 @@ def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str) -> D
 
 
 def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows: List[Dict[str, Any]], 
-                       doc_data: Dict[str, Any], doc_id: str, folder_name: str) -> tuple[int, str | None]:
+                       doc_data: Dict[str, Any], doc_id: str, folder_name: str, file_name: str) -> tuple[int, str | None]:
     """
     插入 parent 数据到 Milvus。
     
@@ -97,7 +145,7 @@ def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows:
             "index": row.get("index", 0),
             "tag_list": "",
             "permission": {},
-            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url),
+            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url, file_name),
             "is_deleted": False,
             "created_by": DEFAULT_USER_ID,
             "created_time": now_ts,
@@ -115,7 +163,7 @@ def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows:
 
 
 def insert_child_rows(client: MilvusClient, collection_name: str, child_rows: List[Dict[str, Any]], 
-                      doc_data: Dict[str, Any], doc_id: str, folder_name: str) -> tuple[int, str | None]:
+                      doc_data: Dict[str, Any], doc_id: str, folder_name: str, file_name: str) -> tuple[int, str | None]:
     """
     插入 children 数据到 Milvus。
     
@@ -154,7 +202,7 @@ def insert_child_rows(client: MilvusClient, collection_name: str, child_rows: Li
             "index": row.get("index", 0),
             "tag_list": "",
             "permission": {},
-            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url),
+            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url, file_name),
             "is_deleted": False,
             "created_by": DEFAULT_USER_ID,
             "created_time": now_ts,
@@ -225,12 +273,22 @@ def process_folder(root_folder: str | Path) -> Dict[str, Any]:
             
             doc_id = doc_data.get("id")
             
+            # 获取原始文件名(不含扩展名)
+            all_files = list(subfolder.glob("*"))
+            original_file = None
+            for f in all_files:
+                if f.is_file() and f.suffix not in [".json", ".md"]:
+                    original_file = f
+                    break
+            
+            file_name = original_file.stem if original_file else folder_name
+            
             # 插入 parent 和 children
             parent_count, parent_error = insert_parent_rows(
-                client, PARENT_COLLECTION_NAME, parent_rows, doc_data, doc_id, folder_name
+                client, PARENT_COLLECTION_NAME, parent_rows, doc_data, doc_id, folder_name, file_name
             )
             child_count, child_error = insert_child_rows(
-                client, CHILD_COLLECTION_NAME, child_rows, doc_data, doc_id, folder_name
+                client, CHILD_COLLECTION_NAME, child_rows, doc_data, doc_id, folder_name, file_name
             )
 
             if parent_error:

+ 57 - 3
src/app/scripts/base_info_in_database.py

@@ -52,6 +52,52 @@ STANDARD_BASE_INFO_DEFAULTS = {
     "note": None,
 }
 
+# 字段简写映射
+DOCUMENT_TYPE_MAP = {
+    "国家标准": "GB",
+    "行业标准": "HY",
+    "部门规章": "BM",
+    "地方标准": "DB",
+    "企业标准": "QY",
+    "管理制度": "GL",
+    "技术规范": "GF",
+    "团体标准": "TT",
+    "国际标准": "GJ",
+    "国家法律": "FL",
+    "地方法规": "LR",
+    "其他": "QT",
+}
+
+PROFESSIONAL_FIELD_MAP = {
+    "法律法规": "FL",
+    "通用标准": "TY",
+    "勘察钻探": "KC",
+    "地基基础": "DJ",
+    "路基路面": "LJ",
+    "桥梁工程": "QL",
+    "隧道工程": "SD",
+    "交通工程": "JT",
+    "建筑工程": "JZ",
+    "市政工程": "SZ",
+    "机电安装": "JD",
+    "路桥工程": "LB",
+    "装饰装修": "ZS",
+    "港口航道": "GK",
+    "铁路工程": "TL",
+    "房建工程": "FJ",
+    "水利电力": "SL",
+    "信息化": "XX",
+    "试验检测": "SY",
+    "安全环保": "AQ",
+    "其他": "QT",
+}
+
+VALIDITY_MAP = {
+    "现行": "XH",
+    "废止": "FZ",
+    "试行": "SX",
+}
+
 
 def parse_date(date_str: Optional[str]) -> Optional[str]:
     """解析日期字符串为数据库格式"""
@@ -152,6 +198,14 @@ async def insert_standard_base_info(session, doc_data: Dict[str, Any], folder_na
             drafting_unit = None
             participating_units = None
         
+        document_type_raw = doc_data.get("document_type")
+        professional_field_raw = doc_data.get("professional_field")
+        validity_raw = doc_data.get("validity")
+
+        document_type = DOCUMENT_TYPE_MAP.get(document_type_raw, document_type_raw)
+        professional_field = PROFESSIONAL_FIELD_MAP.get(professional_field_raw, professional_field_raw)
+        validity = VALIDITY_MAP.get(validity_raw, validity_raw)
+
         sql = text(f"""
             INSERT INTO {TABLE_STANDARD_BASE_INFO} (
                 id, chinese_name, english_name, standard_number,
@@ -183,10 +237,10 @@ async def insert_standard_base_info(session, doc_data: Dict[str, Any], folder_na
             "drafting_unit": drafting_unit,
             "approving_department": doc_data.get("approving_department"),
             "participating_units": participating_units,
-            "document_type": doc_data.get("document_type"),
-            "professional_field": doc_data.get("professional_field"),
+            "document_type": document_type,
+            "professional_field": professional_field,
             "engineering_phase": doc_data.get("engineering_phase"),
-            "validity": doc_data.get("validity"),
+            "validity": validity,
             "reference_basis": doc_data.get("reference_basis"),
             "source_url": doc_data.get("source_url"),
             "created_by": STANDARD_BASE_INFO_DEFAULTS["created_by"],

+ 122 - 14
src/app/scripts/plan_info_in_collection.py

@@ -16,23 +16,107 @@ from app.config.milvus_client import get_milvusclient
 from app.config.setting import settings
 
 # 根目录配置
-ROOT_FOLDER = r"C:\Users\ZengChao\Desktop\施工方案文件夹"
+ROOT_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\最终施工方案"
 
 # 失败汇总JSON保存路径
-FAILED_REPORT_PATH = r"C:\Users\ZengChao\Desktop\plan_collection_failed_report.json"
+FAILED_REPORT_PATH = r"F:\第二阶段编制依据及施工方案数据治理-20260206\plan_collection_failed_report.json"
 
 # Collection 名称
-PARENT_COLLECTION_NAME = "plan_parent"
-CHILD_COLLECTION_NAME = "plan_child"
+PARENT_COLLECTION_NAME = "t_rag_kng_construction_plan_parent"
+CHILD_COLLECTION_NAME = "t_rag_kng_construction_plan"
 
 # 默认创建人/修改人ID
 DEFAULT_USER_ID = "ed6a79d3-0083-4d81-8b48-fc522f686f74"
 
 # MinIO URL 前缀
-PREFIX = "sampledata/plan"
+PREFIX = "/plan"
 
+# 枚举简写映射(匹配不到统一用 "QT")
+PLAN_CATEGORY_MAP = {
+    "超危大方案": "CH",
+    "超危大方案较大Ⅱ级": "CH2",
+    "超危大方案较大II级": "CH2",
+    "超危大方案特大Ⅳ级": "CH4",
+    "超危大方案特大IV级": "CH4",
+    "超危大方案一般Ⅰ级": "CH1",
+    "超危大方案一般I级": "CH1",
+    "超危大方案重大Ⅲ级": "CH3",
+    "超危大方案重大III级": "CH3",
+    "危大方案": "WD",
+    "一般方案": "YB",
+    "其他": "QT",
+}
 
-def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str) -> Dict[str, Any]:
+LEVEL_1_CLASSIFICATION_MAP = {
+    "施工方案": "SC",
+    "其他": "QT",
+}
+
+LEVEL_2_CLASSIFICATION_MAP = {
+    "临建工程": "LZ",
+    "路基工程": "LJ",
+    "桥梁工程": "QL",
+    "隧道工程": "SD",
+    "其他": "QT",
+}
+
+LEVEL_3_CLASSIFICATION_MAP = {
+    "TBM施工": "TM",
+    "拌和站安、拆施工": "BH",
+    "不良地质隧道施工": "BL",
+    "常规桥梁": "CG",
+    "挡土墙工程类": "DT",
+    "辅助坑道施工": "FB",
+    "复杂洞口工程施工": "FD",
+    "钢筋加工场安、拆": "GG",
+    "钢栈桥施工": "GZ",
+    "拱桥": "GH",
+    "涵洞工程类": "HD",
+    "滑坡体处理类": "HP",
+    "路堤": "LT",
+    "路堑": "LQ",
+    "深基坑": "JK",
+    "隧道总体施工": "ZT",
+    "特殊结构隧道": "TS",
+    "斜拉桥": "XL",
+    "悬索桥": "XS",
+    "其他": "QT",
+}
+
+LEVEL_4_CLASSIFICATION_MAP = {
+    "挡土墙": "DT",
+    "顶管": "DG",
+    "断层破碎带及软弱围岩": "DL",
+    "钢筋砼箱涵": "GX",
+    "高填路堤": "GT",
+    "抗滑桩": "KH",
+    "软岩大变形隧道": "RY",
+    "上部结构": "SB",
+    "深基坑开挖与支护": "JK",
+    "深挖路堑": "LC",
+    "隧道TBM": "TM",
+    "隧道进洞": "JD",
+    "隧道竖井": "SJ",
+    "隧道斜井": "XJ",
+    "特种设备": "TZ",
+    "瓦斯隧道": "WS",
+    "下部结构": "XB",
+    "小净距隧道": "NJ",
+    "岩爆隧道": "YB",
+    "岩溶隧道": "YR",
+    "涌水突泥隧道": "YN",
+    "桩基础": "ZJ",
+    "其他": "QT",
+}
+
+
+def map_enum(value: str | None, mapping: Dict[str, str], default: str = "QT") -> str:
+    if not value:
+        return default
+    return mapping.get(value, default)
+
+
+def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str, file_name: str) -> Dict[str, Any]:
     """
     构造 metadata 字段。
     
@@ -40,24 +124,38 @@ def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str) -> D
         doc_data: JSON 中的 doc 数据
         hierarchy: 文档层级信息
         file_url: 文件 URL
+        file_name: 原始文件名
     
     Returns:
         metadata 字典
     """
+    plan_category = map_enum(doc_data.get("plan_category"), PLAN_CATEGORY_MAP)
+    level_1_classification = map_enum(doc_data.get("level_1_classification"), LEVEL_1_CLASSIFICATION_MAP)
+    level_2_classification = map_enum(doc_data.get("level_2_classification"), LEVEL_2_CLASSIFICATION_MAP)
+    level_3_classification = map_enum(doc_data.get("level_3_classification"), LEVEL_3_CLASSIFICATION_MAP)
+    level_4_classification = map_enum(doc_data.get("level_4_classification"), LEVEL_4_CLASSIFICATION_MAP)
+
     return {
-        "file_name": doc_data.get("plan_name", ""),
-        "plan_category": doc_data.get("plan_category", ""),
+        "file_name": file_name,
+        "plan_name": doc_data.get("plan_name", ""),
         "project_name": doc_data.get("project_name", ""),
+        "project_section": doc_data.get("project_section", ""),
         "compiling_unit": doc_data.get("compiling_unit", ""),
         "compiling_date": doc_data.get("compiling_date", ""),
+        "plan_summary": doc_data.get("plan_summary", ""),
         "hierarchy": hierarchy,
         "file_url": file_url,
+        "plan_category": plan_category,
+        "level_1_classification": level_1_classification,
+        "level_2_classification": level_2_classification,
+        "level_3_classification": level_3_classification,
+        "level_4_classification": level_4_classification,
         "plan_type_list": {}  # 空 JSON
     }
 
 
 def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows: List[Dict[str, Any]], 
-                       doc_data: Dict[str, Any], doc_id: str, folder_name: str) -> tuple[int, str | None]:
+                       doc_data: Dict[str, Any], doc_id: str, folder_name: str, file_name: str) -> tuple[int, str | None]:
     """
     插入 parent 数据到 Milvus。
     
@@ -96,7 +194,7 @@ def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows:
             "index": row.get("index", 0),
             "tag_list": "",
             "permission": {},
-            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url),
+            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url, file_name),
             "is_deleted": False,
             "created_by": DEFAULT_USER_ID,
             "created_time": now_ts,
@@ -114,7 +212,7 @@ def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows:
 
 
 def insert_child_rows(client: MilvusClient, collection_name: str, child_rows: List[Dict[str, Any]], 
-                      doc_data: Dict[str, Any], doc_id: str, folder_name: str) -> tuple[int, str | None]:
+                      doc_data: Dict[str, Any], doc_id: str, folder_name: str, file_name: str) -> tuple[int, str | None]:
     """
     插入 children 数据到 Milvus。
     
@@ -153,7 +251,7 @@ def insert_child_rows(client: MilvusClient, collection_name: str, child_rows: Li
             "index": row.get("index", 0),
             "tag_list": "",
             "permission": {},
-            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url),
+            "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url, file_name),
             "is_deleted": False,
             "created_by": DEFAULT_USER_ID,
             "created_time": now_ts,
@@ -223,13 +321,23 @@ def process_folder(root_folder: str | Path) -> Dict[str, Any]:
                 continue
             
             doc_id = doc_data.get("id")
+
+            # 获取原始文件名(不含扩展名)
+            all_files = list(subfolder.glob("*"))
+            original_file = None
+            for f in all_files:
+                if f.is_file() and f.suffix not in [".json", ".md"]:
+                    original_file = f
+                    break
+
+            file_name = original_file.stem if original_file else folder_name
             
             # 插入 parent 和 children
             parent_count, parent_error = insert_parent_rows(
-                client, PARENT_COLLECTION_NAME, parent_rows, doc_data, doc_id, folder_name
+                client, PARENT_COLLECTION_NAME, parent_rows, doc_data, doc_id, folder_name, file_name
             )
             child_count, child_error = insert_child_rows(
-                client, CHILD_COLLECTION_NAME, child_rows, doc_data, doc_id, folder_name
+                client, CHILD_COLLECTION_NAME, child_rows, doc_data, doc_id, folder_name, file_name
             )
 
             if parent_error:

+ 93 - 16
src/app/scripts/plan_info_in_database.py

@@ -29,10 +29,10 @@ TABLE_CONSTRUCTION_PLAN_BASE_INFO = "t_samp_construction_plan_base_info"  # 施
 DEFAULT_USER_ID = "ed6a79d3-0083-4d81-8b48-fc522f686f74"
 
 # 根目录配置
-ROOT_FOLDER = r"C:\Users\ZengChao\Desktop\1"
+ROOT_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\最终施工方案"
 
 # 失败汇总JSON保存路径
-FAILED_REPORT_PATH = r"C:\Users\ZengChao\Desktop\plan_db_failed_report.json"
+FAILED_REPORT_PATH = r"F:\第二阶段编制依据及施工方案数据治理-20260206\plan_db_failed_report.json"
 
 # 默认值配置 - 需要修改时只改这里
 DOCUMENT_MAIN_DEFAULTS = {
@@ -58,6 +58,90 @@ PLAN_BASE_INFO_DEFAULTS = {
     "updated_by": DEFAULT_USER_ID,
 }
 
+# 枚举简写映射(匹配不到统一用 "QT")
+PLAN_CATEGORY_MAP = {
+    "超危大方案": "CH",
+    "超危大方案较大Ⅱ级": "CH2",
+    "超危大方案较大II级": "CH2",
+    "超危大方案特大Ⅳ级": "CH4",
+    "超危大方案特大IV级": "CH4",
+    "超危大方案一般Ⅰ级": "CH1",
+    "超危大方案一般I级": "CH1",
+    "超危大方案重大Ⅲ级": "CH3",
+    "超危大方案重大III级": "CH3",
+    "危大方案": "WD",
+    "一般方案": "YB",
+    "其他": "QT",
+}
+
+LEVEL_1_CLASSIFICATION_MAP = {
+    "施工方案": "SC",
+    "其他": "QT",
+}
+
+LEVEL_2_CLASSIFICATION_MAP = {
+    "临建工程": "LZ",
+    "路基工程": "LJ",
+    "桥梁工程": "QL",
+    "隧道工程": "SD",
+    "其他": "QT",
+}
+
+LEVEL_3_CLASSIFICATION_MAP = {
+    "TBM施工": "TM",
+    "拌和站安、拆施工": "BH",
+    "不良地质隧道施工": "BL",
+    "常规桥梁": "CG",
+    "挡土墙工程类": "DT",
+    "辅助坑道施工": "FB",
+    "复杂洞口工程施工": "FD",
+    "钢筋加工场安、拆": "GG",
+    "钢栈桥施工": "GZ",
+    "拱桥": "GH",
+    "涵洞工程类": "HD",
+    "滑坡体处理类": "HP",
+    "路堤": "LT",
+    "路堑": "LQ",
+    "深基坑": "JK",
+    "隧道总体施工": "ZT",
+    "特殊结构隧道": "TS",
+    "斜拉桥": "XL",
+    "悬索桥": "XS",
+    "其他": "QT",
+}
+
+LEVEL_4_CLASSIFICATION_MAP = {
+    "挡土墙": "DT",
+    "顶管": "DG",
+    "断层破碎带及软弱围岩": "DL",
+    "钢筋砼箱涵": "GX",
+    "高填路堤": "GT",
+    "抗滑桩": "KH",
+    "软岩大变形隧道": "RY",
+    "上部结构": "SB",
+    "深基坑开挖与支护": "JK",
+    "深挖路堑": "LC",
+    "隧道TBM": "TM",
+    "隧道进洞": "JD",
+    "隧道竖井": "SJ",
+    "隧道斜井": "XJ",
+    "特种设备": "TZ",
+    "瓦斯隧道": "WS",
+    "下部结构": "XB",
+    "小净距隧道": "NJ",
+    "岩爆隧道": "YB",
+    "岩溶隧道": "YR",
+    "涌水突泥隧道": "YN",
+    "桩基础": "ZJ",
+    "其他": "QT",
+}
+
+
+def map_enum(value: Optional[str], mapping: Dict[str, str], default: str = "QT") -> str:
+    if not value:
+        return default
+    return mapping.get(value, default)
+
 
 async def insert_document_main(
     session,
@@ -152,21 +236,14 @@ async def insert_plan_base_info(session, doc_data: Dict[str, Any], folder_name:
         if compilation_basis is None:
             compilation_basis = PLAN_BASE_INFO_DEFAULTS["compilation_basis"]
 
-        level_1_classification = doc_data.get("level_1_classification")
-        if not level_1_classification:
-            level_1_classification = PLAN_BASE_INFO_DEFAULTS["level_1_classification"]
-
-        level_2_classification = doc_data.get("level_2_classification")
-        if level_2_classification is None:
-            level_2_classification = PLAN_BASE_INFO_DEFAULTS["level_2_classification"]
+        plan_category = map_enum(doc_data.get("plan_category"), PLAN_CATEGORY_MAP)
 
-        level_3_classification = doc_data.get("level_3_classification")
-        if level_3_classification is None:
-            level_3_classification = PLAN_BASE_INFO_DEFAULTS["level_3_classification"]
+        level_1_raw = doc_data.get("level_1_classification") or PLAN_BASE_INFO_DEFAULTS["level_1_classification"]
+        level_1_classification = map_enum(level_1_raw, LEVEL_1_CLASSIFICATION_MAP)
 
-        level_4_classification = doc_data.get("level_4_classification")
-        if level_4_classification is None:
-            level_4_classification = PLAN_BASE_INFO_DEFAULTS["level_4_classification"]
+        level_2_classification = map_enum(doc_data.get("level_2_classification"), LEVEL_2_CLASSIFICATION_MAP)
+        level_3_classification = map_enum(doc_data.get("level_3_classification"), LEVEL_3_CLASSIFICATION_MAP)
+        level_4_classification = map_enum(doc_data.get("level_4_classification"), LEVEL_4_CLASSIFICATION_MAP)
 
         sql = text(f"""
             INSERT INTO {TABLE_CONSTRUCTION_PLAN_BASE_INFO} (
@@ -196,7 +273,7 @@ async def insert_plan_base_info(session, doc_data: Dict[str, Any], folder_name:
             "plan_summary": doc_data.get("plan_summary"),
             "compilation_basis": compilation_basis,
             "note": note,
-            "plan_category": doc_data.get("plan_category"),
+            "plan_category": plan_category,
             "level_1_classification": level_1_classification,
             "level_2_classification": level_2_classification,
             "level_3_classification": level_3_classification,

+ 1 - 1
src/app/scripts/plan_info_in_minio.py

@@ -20,7 +20,7 @@ from app.config.setting import settings
 PREFIX = "sampledata/plan"
 
 # 根目录配置:每个子文件夹包含 1 个原始文件 + 1 个 .md + 1 个 .json
-ROOT_FOLDER = r"C:\Users\ZengChao\Desktop\施工方案文件夹"
+ROOT_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\最终施工方案"
 
 # 失败汇总JSON保存路径
 FAILED_REPORT_PATH = r"C:\Users\ZengChao\Desktop\plan_minio_failed_report.json"

+ 3 - 3
src/app/scripts/plan_info_json_generation.py

@@ -20,11 +20,11 @@ import pandas as pd
 
 
 # ==================== 配置参数 ====================
-EXCEL_FILE = r"C:\Users\ZengChao\Desktop\施工方案.xlsx"      # ✅ Excel文件路径
-ROOT_FOLDER = r"C:\Users\ZengChao\Desktop\施工方案"  # ✅ 根文件夹路径
+EXCEL_FILE = r"F:\第二阶段编制依据及施工方案数据治理-20260206\施工方案.xlsx"     # ✅ Excel文件路径
+ROOT_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\最终施工方案"  # ✅ 根文件夹路径
 SHEET_INDEX = 0                                               # 目标sheet索引(0为第一个sheet)
 
-FAILED_REPORT_PATH = r"C:\Users\ZengChao\Desktop\plan_json_failed_report.json"  # ✅ 失败汇总JSON保存路径
+FAILED_REPORT_PATH = r"F:\第二阶段编制依据及施工方案数据治理-20260206\plan_json_failed_report.json"  # ✅ 失败汇总JSON保存路径
 
 PARENT_MAX_CHARS = 6000                                       # ✅ 父段最大长度(超长切片)
 CHILD_INDEX_START = 0                                         # ✅ children.index 起始