|
|
@@ -16,23 +16,107 @@ from app.config.milvus_client import get_milvusclient
|
|
|
from app.config.setting import settings
|
|
|
|
|
|
# 根目录配置
|
|
|
-ROOT_FOLDER = r"C:\Users\ZengChao\Desktop\施工方案文件夹"
|
|
|
+ROOT_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\最终施工方案"
|
|
|
|
|
|
# 失败汇总JSON保存路径
|
|
|
-FAILED_REPORT_PATH = r"C:\Users\ZengChao\Desktop\plan_collection_failed_report.json"
|
|
|
+FAILED_REPORT_PATH = r"F:\第二阶段编制依据及施工方案数据治理-20260206\plan_collection_failed_report.json"
|
|
|
|
|
|
# Collection 名称
|
|
|
-PARENT_COLLECTION_NAME = "plan_parent"
|
|
|
-CHILD_COLLECTION_NAME = "plan_child"
|
|
|
+PARENT_COLLECTION_NAME = "t_rag_kng_construction_plan_parent"
|
|
|
+CHILD_COLLECTION_NAME = "t_rag_kng_construction_plan"
|
|
|
|
|
|
# 默认创建人/修改人ID
|
|
|
DEFAULT_USER_ID = "ed6a79d3-0083-4d81-8b48-fc522f686f74"
|
|
|
|
|
|
# MinIO URL 前缀
|
|
|
-PREFIX = "sampledata/plan"
|
|
|
+PREFIX = "/plan"
|
|
|
|
|
|
+# 枚举简写映射(匹配不到统一用 "QT")
|
|
|
+PLAN_CATEGORY_MAP = {
|
|
|
+ "超危大方案": "CH",
|
|
|
+ "超危大方案较大Ⅱ级": "CH2",
|
|
|
+ "超危大方案较大II级": "CH2",
|
|
|
+ "超危大方案特大Ⅳ级": "CH4",
|
|
|
+ "超危大方案特大IV级": "CH4",
|
|
|
+ "超危大方案一般Ⅰ级": "CH1",
|
|
|
+ "超危大方案一般I级": "CH1",
|
|
|
+ "超危大方案重大Ⅲ级": "CH3",
|
|
|
+ "超危大方案重大III级": "CH3",
|
|
|
+ "危大方案": "WD",
|
|
|
+ "一般方案": "YB",
|
|
|
+ "其他": "QT",
|
|
|
+}
|
|
|
|
|
|
-def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str) -> Dict[str, Any]:
|
|
|
+LEVEL_1_CLASSIFICATION_MAP = {
|
|
|
+ "施工方案": "SC",
|
|
|
+ "其他": "QT",
|
|
|
+}
|
|
|
+
|
|
|
+LEVEL_2_CLASSIFICATION_MAP = {
|
|
|
+ "临建工程": "LZ",
|
|
|
+ "路基工程": "LJ",
|
|
|
+ "桥梁工程": "QL",
|
|
|
+ "隧道工程": "SD",
|
|
|
+ "其他": "QT",
|
|
|
+}
|
|
|
+
|
|
|
+LEVEL_3_CLASSIFICATION_MAP = {
|
|
|
+ "TBM施工": "TM",
|
|
|
+ "拌和站安、拆施工": "BH",
|
|
|
+ "不良地质隧道施工": "BL",
|
|
|
+ "常规桥梁": "CG",
|
|
|
+ "挡土墙工程类": "DT",
|
|
|
+ "辅助坑道施工": "FB",
|
|
|
+ "复杂洞口工程施工": "FD",
|
|
|
+ "钢筋加工场安、拆": "GG",
|
|
|
+ "钢栈桥施工": "GZ",
|
|
|
+ "拱桥": "GH",
|
|
|
+ "涵洞工程类": "HD",
|
|
|
+ "滑坡体处理类": "HP",
|
|
|
+ "路堤": "LT",
|
|
|
+ "路堑": "LQ",
|
|
|
+ "深基坑": "JK",
|
|
|
+ "隧道总体施工": "ZT",
|
|
|
+ "特殊结构隧道": "TS",
|
|
|
+ "斜拉桥": "XL",
|
|
|
+ "悬索桥": "XS",
|
|
|
+ "其他": "QT",
|
|
|
+}
|
|
|
+
|
|
|
+LEVEL_4_CLASSIFICATION_MAP = {
|
|
|
+ "挡土墙": "DT",
|
|
|
+ "顶管": "DG",
|
|
|
+ "断层破碎带及软弱围岩": "DL",
|
|
|
+ "钢筋砼箱涵": "GX",
|
|
|
+ "高填路堤": "GT",
|
|
|
+ "抗滑桩": "KH",
|
|
|
+ "软岩大变形隧道": "RY",
|
|
|
+ "上部结构": "SB",
|
|
|
+ "深基坑开挖与支护": "JK",
|
|
|
+ "深挖路堑": "LC",
|
|
|
+ "隧道TBM": "TM",
|
|
|
+ "隧道进洞": "JD",
|
|
|
+ "隧道竖井": "SJ",
|
|
|
+ "隧道斜井": "XJ",
|
|
|
+ "特种设备": "TZ",
|
|
|
+ "瓦斯隧道": "WS",
|
|
|
+ "下部结构": "XB",
|
|
|
+ "小净距隧道": "NJ",
|
|
|
+ "岩爆隧道": "YB",
|
|
|
+ "岩溶隧道": "YR",
|
|
|
+ "涌水突泥隧道": "YN",
|
|
|
+ "桩基础": "ZJ",
|
|
|
+ "其他": "QT",
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+def map_enum(value: str | None, mapping: Dict[str, str], default: str = "QT") -> str:
|
|
|
+ if not value:
|
|
|
+ return default
|
|
|
+ return mapping.get(value, default)
|
|
|
+
|
|
|
+
|
|
|
+def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str, file_name: str) -> Dict[str, Any]:
|
|
|
"""
|
|
|
构造 metadata 字段。
|
|
|
|
|
|
@@ -40,24 +124,38 @@ def build_metadata(doc_data: Dict[str, Any], hierarchy: str, file_url: str) -> D
|
|
|
doc_data: JSON 中的 doc 数据
|
|
|
hierarchy: 文档层级信息
|
|
|
file_url: 文件 URL
|
|
|
+ file_name: 原始文件名
|
|
|
|
|
|
Returns:
|
|
|
metadata 字典
|
|
|
"""
|
|
|
+ plan_category = map_enum(doc_data.get("plan_category"), PLAN_CATEGORY_MAP)
|
|
|
+ level_1_classification = map_enum(doc_data.get("level_1_classification"), LEVEL_1_CLASSIFICATION_MAP)
|
|
|
+ level_2_classification = map_enum(doc_data.get("level_2_classification"), LEVEL_2_CLASSIFICATION_MAP)
|
|
|
+ level_3_classification = map_enum(doc_data.get("level_3_classification"), LEVEL_3_CLASSIFICATION_MAP)
|
|
|
+ level_4_classification = map_enum(doc_data.get("level_4_classification"), LEVEL_4_CLASSIFICATION_MAP)
|
|
|
+
|
|
|
return {
|
|
|
- "file_name": doc_data.get("plan_name", ""),
|
|
|
- "plan_category": doc_data.get("plan_category", ""),
|
|
|
+ "file_name": file_name,
|
|
|
+ "plan_name": doc_data.get("plan_name", ""),
|
|
|
"project_name": doc_data.get("project_name", ""),
|
|
|
+ "project_section": doc_data.get("project_section", ""),
|
|
|
"compiling_unit": doc_data.get("compiling_unit", ""),
|
|
|
"compiling_date": doc_data.get("compiling_date", ""),
|
|
|
+ "plan_summary": doc_data.get("plan_summary", ""),
|
|
|
"hierarchy": hierarchy,
|
|
|
"file_url": file_url,
|
|
|
+ "plan_category": plan_category,
|
|
|
+ "level_1_classification": level_1_classification,
|
|
|
+ "level_2_classification": level_2_classification,
|
|
|
+ "level_3_classification": level_3_classification,
|
|
|
+ "level_4_classification": level_4_classification,
|
|
|
"plan_type_list": {} # 空 JSON
|
|
|
}
|
|
|
|
|
|
|
|
|
def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows: List[Dict[str, Any]],
|
|
|
- doc_data: Dict[str, Any], doc_id: str, folder_name: str) -> tuple[int, str | None]:
|
|
|
+ doc_data: Dict[str, Any], doc_id: str, folder_name: str, file_name: str) -> tuple[int, str | None]:
|
|
|
"""
|
|
|
插入 parent 数据到 Milvus。
|
|
|
|
|
|
@@ -96,7 +194,7 @@ def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows:
|
|
|
"index": row.get("index", 0),
|
|
|
"tag_list": "",
|
|
|
"permission": {},
|
|
|
- "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url),
|
|
|
+ "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url, file_name),
|
|
|
"is_deleted": False,
|
|
|
"created_by": DEFAULT_USER_ID,
|
|
|
"created_time": now_ts,
|
|
|
@@ -114,7 +212,7 @@ def insert_parent_rows(client: MilvusClient, collection_name: str, parent_rows:
|
|
|
|
|
|
|
|
|
def insert_child_rows(client: MilvusClient, collection_name: str, child_rows: List[Dict[str, Any]],
|
|
|
- doc_data: Dict[str, Any], doc_id: str, folder_name: str) -> tuple[int, str | None]:
|
|
|
+ doc_data: Dict[str, Any], doc_id: str, folder_name: str, file_name: str) -> tuple[int, str | None]:
|
|
|
"""
|
|
|
插入 children 数据到 Milvus。
|
|
|
|
|
|
@@ -153,7 +251,7 @@ def insert_child_rows(client: MilvusClient, collection_name: str, child_rows: Li
|
|
|
"index": row.get("index", 0),
|
|
|
"tag_list": "",
|
|
|
"permission": {},
|
|
|
- "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url),
|
|
|
+ "metadata": build_metadata(doc_data, row.get("hierarchy", ""), file_url, file_name),
|
|
|
"is_deleted": False,
|
|
|
"created_by": DEFAULT_USER_ID,
|
|
|
"created_time": now_ts,
|
|
|
@@ -223,13 +321,23 @@ def process_folder(root_folder: str | Path) -> Dict[str, Any]:
|
|
|
continue
|
|
|
|
|
|
doc_id = doc_data.get("id")
|
|
|
+
|
|
|
+ # 获取原始文件名(不含扩展名)
|
|
|
+ all_files = list(subfolder.glob("*"))
|
|
|
+ original_file = None
|
|
|
+ for f in all_files:
|
|
|
+ if f.is_file() and f.suffix not in [".json", ".md"]:
|
|
|
+ original_file = f
|
|
|
+ break
|
|
|
+
|
|
|
+ file_name = original_file.stem if original_file else folder_name
|
|
|
|
|
|
# 插入 parent 和 children
|
|
|
parent_count, parent_error = insert_parent_rows(
|
|
|
- client, PARENT_COLLECTION_NAME, parent_rows, doc_data, doc_id, folder_name
|
|
|
+ client, PARENT_COLLECTION_NAME, parent_rows, doc_data, doc_id, folder_name, file_name
|
|
|
)
|
|
|
child_count, child_error = insert_child_rows(
|
|
|
- client, CHILD_COLLECTION_NAME, child_rows, doc_data, doc_id, folder_name
|
|
|
+ client, CHILD_COLLECTION_NAME, child_rows, doc_data, doc_id, folder_name, file_name
|
|
|
)
|
|
|
|
|
|
if parent_error:
|