|
|
@@ -16,11 +16,41 @@ from app.config.database import get_async_sessionmaker, get_async_engine
|
|
|
from app.config.setting import settings
|
|
|
|
|
|
|
|
|
+# ============================================
|
|
|
+# 配置参数区域 - 便于修改
|
|
|
+# ============================================
|
|
|
+
|
|
|
+# 数据库表名配置
|
|
|
+TABLE_DOCUMENT_MAIN = "t_samp_document_main" # 文档主表
|
|
|
+TABLE_STANDARD_BASE_INFO = "t_samp_standard_base_info" # 标准基础信息表
|
|
|
+
|
|
|
# 默认创建人/修改人ID
|
|
|
DEFAULT_USER_ID = "ed6a79d3-0083-4d81-8b48-fc522f686f74"
|
|
|
|
|
|
# 根目录配置
|
|
|
-ROOT_FOLDER = r"C:\Users\ZengChao\Desktop\新建文件夹"
|
|
|
+ROOT_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\最终编制依据"
|
|
|
+
|
|
|
+# 失败汇总JSON保存路径
|
|
|
+FAILED_REPORT_PATH = r"F:\第二阶段编制依据及施工方案数据治理-20260206\base_db_failed_report.json"
|
|
|
+
|
|
|
+# 默认值配置 - 需要修改时只改这里
|
|
|
+DOCUMENT_MAIN_DEFAULTS = {
|
|
|
+ "conversion_status": 2, # 2-完成
|
|
|
+ "whether_to_enter": 0, # 0-未入库
|
|
|
+ "conversion_error": None,
|
|
|
+ "source_type": "standard",
|
|
|
+ "kb_id": "a61e1530-9ff3-4640-b2f7-fe7c9edfcbc1",
|
|
|
+ "kb_method": "parent_child",
|
|
|
+ "whether_to_task": 0,
|
|
|
+ "created_by": DEFAULT_USER_ID,
|
|
|
+ "updated_by": DEFAULT_USER_ID,
|
|
|
+}
|
|
|
+
|
|
|
+STANDARD_BASE_INFO_DEFAULTS = {
|
|
|
+ "created_by": DEFAULT_USER_ID,
|
|
|
+ "updated_by": DEFAULT_USER_ID,
|
|
|
+ "note": None,
|
|
|
+}
|
|
|
|
|
|
|
|
|
def parse_date(date_str: Optional[str]) -> Optional[str]:
|
|
|
@@ -36,7 +66,7 @@ def parse_date(date_str: Optional[str]) -> Optional[str]:
|
|
|
return None
|
|
|
|
|
|
|
|
|
-async def insert_document_main(session, doc_data: Dict[str, Any], title: str, file_extension: str, file_url: str, md_url: str, json_url: str, folder_name: str = "") -> bool:
|
|
|
+async def insert_document_main(session, doc_data: Dict[str, Any], title: str, file_extension: str, file_url: str, md_url: str, json_url: str, folder_name: str = "") -> tuple[bool, Optional[str]]:
|
|
|
"""
|
|
|
插入文档主表 t_samp_document_main
|
|
|
|
|
|
@@ -51,48 +81,53 @@ async def insert_document_main(session, doc_data: Dict[str, Any], title: str, fi
|
|
|
folder_name: 文件夹名称,用于错误打印
|
|
|
|
|
|
Returns:
|
|
|
- 是否插入成功
|
|
|
+ (是否插入成功, 失败原因)
|
|
|
"""
|
|
|
try:
|
|
|
doc_id = doc_data.get("id")
|
|
|
|
|
|
- sql = text("""
|
|
|
- INSERT INTO t_samp_document_main (
|
|
|
+ sql = text(f"""
|
|
|
+ INSERT INTO {TABLE_DOCUMENT_MAIN} (
|
|
|
id, title, conversion_status, whether_to_enter,
|
|
|
- file_url, md_url, json_url, file_extension,
|
|
|
+ conversion_error, file_url, md_url, json_url, file_extension,
|
|
|
created_by, created_time, updated_by, updated_time,
|
|
|
- source_type
|
|
|
+ source_type, kb_id, kb_method, whether_to_task
|
|
|
) VALUES (
|
|
|
:id, :title, :conversion_status, :whether_to_enter,
|
|
|
- :file_url, :md_url, :json_url, :file_extension,
|
|
|
+ :conversion_error, :file_url, :md_url, :json_url, :file_extension,
|
|
|
:created_by, :created_time, :updated_by, :updated_time,
|
|
|
- :source_type
|
|
|
+ :source_type, :kb_id, :kb_method, :whether_to_task
|
|
|
)
|
|
|
""")
|
|
|
|
|
|
await session.execute(sql, {
|
|
|
"id": doc_id,
|
|
|
"title": title,
|
|
|
- "conversion_status": 2, # 2-完成
|
|
|
- "whether_to_enter": 0, # 0-未入库
|
|
|
+ "conversion_status": DOCUMENT_MAIN_DEFAULTS["conversion_status"],
|
|
|
+ "whether_to_enter": DOCUMENT_MAIN_DEFAULTS["whether_to_enter"],
|
|
|
+ "conversion_error": DOCUMENT_MAIN_DEFAULTS["conversion_error"],
|
|
|
"file_url": file_url,
|
|
|
"md_url": md_url,
|
|
|
"json_url": json_url,
|
|
|
"file_extension": file_extension,
|
|
|
- "created_by": DEFAULT_USER_ID,
|
|
|
+ "created_by": DOCUMENT_MAIN_DEFAULTS["created_by"],
|
|
|
"created_time": datetime.now(),
|
|
|
- "updated_by": DEFAULT_USER_ID,
|
|
|
+ "updated_by": DOCUMENT_MAIN_DEFAULTS["updated_by"],
|
|
|
"updated_time": datetime.now(),
|
|
|
- "source_type": "basis",
|
|
|
+ "source_type": DOCUMENT_MAIN_DEFAULTS["source_type"],
|
|
|
+ "kb_id": DOCUMENT_MAIN_DEFAULTS["kb_id"],
|
|
|
+ "kb_method": DOCUMENT_MAIN_DEFAULTS["kb_method"],
|
|
|
+ "whether_to_task": DOCUMENT_MAIN_DEFAULTS["whether_to_task"],
|
|
|
})
|
|
|
|
|
|
- return True
|
|
|
+ return True, None
|
|
|
except Exception as e:
|
|
|
- print(f"📁 {folder_name} ❌ 插入主表失败: {e}")
|
|
|
- return False
|
|
|
+ error_message = str(e)
|
|
|
+ print(f"📁 {folder_name} ❌ 插入主表失败: {error_message}")
|
|
|
+ return False, error_message
|
|
|
|
|
|
|
|
|
-async def insert_standard_base_info(session, doc_data: Dict[str, Any], folder_name: str = "") -> bool:
|
|
|
+async def insert_standard_base_info(session, doc_data: Dict[str, Any], folder_name: str = "") -> tuple[bool, Optional[str]]:
|
|
|
"""
|
|
|
插入标准基础信息表 t_samp_standard_base_info
|
|
|
|
|
|
@@ -102,7 +137,7 @@ async def insert_standard_base_info(session, doc_data: Dict[str, Any], folder_na
|
|
|
folder_name: 文件夹名称,用于错误打印
|
|
|
|
|
|
Returns:
|
|
|
- 是否插入成功
|
|
|
+ (是否插入成功, 失败原因)
|
|
|
"""
|
|
|
try:
|
|
|
doc_id = doc_data.get("id")
|
|
|
@@ -117,21 +152,23 @@ async def insert_standard_base_info(session, doc_data: Dict[str, Any], folder_na
|
|
|
drafting_unit = None
|
|
|
participating_units = None
|
|
|
|
|
|
- sql = text("""
|
|
|
- INSERT INTO t_samp_standard_base_info (
|
|
|
+ sql = text(f"""
|
|
|
+ INSERT INTO {TABLE_STANDARD_BASE_INFO} (
|
|
|
id, chinese_name, english_name, standard_number,
|
|
|
issuing_authority, release_date, implementation_date,
|
|
|
drafting_unit, approving_department, participating_units,
|
|
|
document_type, professional_field, engineering_phase, validity,
|
|
|
reference_basis, source_url,
|
|
|
- created_by, created_time, updated_by, updated_time
|
|
|
+ created_by, created_time, updated_by, updated_time,
|
|
|
+ note
|
|
|
) VALUES (
|
|
|
:id, :chinese_name, :english_name, :standard_number,
|
|
|
:issuing_authority, :release_date, :implementation_date,
|
|
|
:drafting_unit, :approving_department, :participating_units,
|
|
|
:document_type, :professional_field, :engineering_phase, :validity,
|
|
|
:reference_basis, :source_url,
|
|
|
- :created_by, :created_time, :updated_by, :updated_time
|
|
|
+ :created_by, :created_time, :updated_by, :updated_time,
|
|
|
+ :note
|
|
|
)
|
|
|
""")
|
|
|
|
|
|
@@ -152,19 +189,21 @@ async def insert_standard_base_info(session, doc_data: Dict[str, Any], folder_na
|
|
|
"validity": doc_data.get("validity"),
|
|
|
"reference_basis": doc_data.get("reference_basis"),
|
|
|
"source_url": doc_data.get("source_url"),
|
|
|
- "created_by": DEFAULT_USER_ID,
|
|
|
+ "created_by": STANDARD_BASE_INFO_DEFAULTS["created_by"],
|
|
|
"created_time": datetime.now(),
|
|
|
- "updated_by": DEFAULT_USER_ID,
|
|
|
+ "updated_by": STANDARD_BASE_INFO_DEFAULTS["updated_by"],
|
|
|
"updated_time": datetime.now(),
|
|
|
+ "note": STANDARD_BASE_INFO_DEFAULTS["note"],
|
|
|
})
|
|
|
|
|
|
- return True
|
|
|
+ return True, None
|
|
|
except Exception as e:
|
|
|
- print(f"📁 {folder_name} ❌ 插入基础信息表失败: {e}")
|
|
|
- return False
|
|
|
+ error_message = str(e)
|
|
|
+ print(f"📁 {folder_name} ❌ 插入基础信息表失败: {error_message}")
|
|
|
+ return False, error_message
|
|
|
|
|
|
|
|
|
-async def process_folder(root_folder: str | Path) -> Dict[str, int]:
|
|
|
+async def process_folder(root_folder: str | Path) -> Dict[str, Any]:
|
|
|
"""
|
|
|
处理文件夹结构,导入数据库
|
|
|
|
|
|
@@ -180,7 +219,7 @@ async def process_folder(root_folder: str | Path) -> Dict[str, int]:
|
|
|
|
|
|
SessionMaker = get_async_sessionmaker()
|
|
|
|
|
|
- stats = {"success": 0, "failed": 0, "skipped": 0}
|
|
|
+ stats = {"success": 0, "failed": 0, "skipped": 0, "failed_items": []}
|
|
|
|
|
|
# 遍历子文件夹
|
|
|
for subfolder in sorted(root_folder.iterdir()):
|
|
|
@@ -206,6 +245,11 @@ async def process_folder(root_folder: str | Path) -> Dict[str, int]:
|
|
|
doc_data = data.get("doc")
|
|
|
if not doc_data or not doc_data.get("id"):
|
|
|
print(f"📁 {folder_name} ❌ (JSON格式错误或缺少doc/id)")
|
|
|
+ stats["failed_items"].append({
|
|
|
+ "folder": folder_name,
|
|
|
+ "error": "JSON格式错误或缺少doc/id",
|
|
|
+ "reason": "JSON格式错误或缺少doc/id",
|
|
|
+ })
|
|
|
stats["failed"] += 1
|
|
|
continue
|
|
|
|
|
|
@@ -221,6 +265,11 @@ async def process_folder(root_folder: str | Path) -> Dict[str, int]:
|
|
|
|
|
|
if not original_file:
|
|
|
print(f"📁 {folder_name} ❌ (未找到原始文件)")
|
|
|
+ stats["failed_items"].append({
|
|
|
+ "folder": folder_name,
|
|
|
+ "error": "未找到原始文件",
|
|
|
+ "reason": "未找到原始文件",
|
|
|
+ })
|
|
|
stats["failed"] += 1
|
|
|
continue
|
|
|
|
|
|
@@ -229,22 +278,34 @@ async def process_folder(root_folder: str | Path) -> Dict[str, int]:
|
|
|
file_extension = original_file.suffix # 扩展名(含点)
|
|
|
|
|
|
# 构造 URL
|
|
|
- file_url = f"/base/{doc_id}{file_extension}"
|
|
|
- md_url = f"/base/{doc_id}.md"
|
|
|
- json_url = f"/base/{doc_id}.json"
|
|
|
+ file_url = f"/standard/{doc_id}{file_extension}"
|
|
|
+ md_url = f"/standard/{doc_id}.md"
|
|
|
+ json_url = f"/standard/{doc_id}.json"
|
|
|
|
|
|
# 插入数据库
|
|
|
async with SessionMaker() as session:
|
|
|
try:
|
|
|
# 先插入主表
|
|
|
- if not await insert_document_main(session, doc_data, title, file_extension, file_url, md_url, json_url, folder_name):
|
|
|
+ main_ok, main_error = await insert_document_main(session, doc_data, title, file_extension, file_url, md_url, json_url, folder_name)
|
|
|
+ if not main_ok:
|
|
|
await session.rollback()
|
|
|
+ stats["failed_items"].append({
|
|
|
+ "folder": folder_name,
|
|
|
+ "error": "插入主表失败",
|
|
|
+ "reason": main_error,
|
|
|
+ })
|
|
|
stats["failed"] += 1
|
|
|
continue
|
|
|
|
|
|
# 再插入基础信息表
|
|
|
- if not await insert_standard_base_info(session, doc_data, folder_name):
|
|
|
+ base_ok, base_error = await insert_standard_base_info(session, doc_data, folder_name)
|
|
|
+ if not base_ok:
|
|
|
await session.rollback()
|
|
|
+ stats["failed_items"].append({
|
|
|
+ "folder": folder_name,
|
|
|
+ "error": "插入基础信息表失败",
|
|
|
+ "reason": base_error,
|
|
|
+ })
|
|
|
stats["failed"] += 1
|
|
|
continue
|
|
|
|
|
|
@@ -253,12 +314,24 @@ async def process_folder(root_folder: str | Path) -> Dict[str, int]:
|
|
|
stats["success"] += 1
|
|
|
|
|
|
except Exception as e:
|
|
|
+ error_message = str(e)
|
|
|
await session.rollback()
|
|
|
- print(f"📁 {folder_name} ❌ ({str(e)})")
|
|
|
+ print(f"📁 {folder_name} ❌ ({error_message})")
|
|
|
+ stats["failed_items"].append({
|
|
|
+ "folder": folder_name,
|
|
|
+ "error": "未知错误",
|
|
|
+ "reason": error_message,
|
|
|
+ })
|
|
|
stats["failed"] += 1
|
|
|
|
|
|
except Exception as e:
|
|
|
- print(f"📁 {folder_name} ❌ ({str(e)})")
|
|
|
+ error_message = str(e)
|
|
|
+ print(f"📁 {folder_name} ❌ ({error_message})")
|
|
|
+ stats["failed_items"].append({
|
|
|
+ "folder": folder_name,
|
|
|
+ "error": "未知错误",
|
|
|
+ "reason": error_message,
|
|
|
+ })
|
|
|
stats["failed"] += 1
|
|
|
|
|
|
return stats
|
|
|
@@ -273,9 +346,13 @@ async def main():
|
|
|
print("-" * 60)
|
|
|
|
|
|
stats = await process_folder(ROOT_FOLDER)
|
|
|
+
|
|
|
+ with open(FAILED_REPORT_PATH, "w", encoding="utf-8") as f:
|
|
|
+ json.dump({"failed": stats["failed_items"]}, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
print("\n" + "=" * 60)
|
|
|
print(f"✅ 成功: {stats['success']} | ❌ 失败: {stats['failed']} | ⊘ 跳过: {stats['skipped']}")
|
|
|
+ print(f"❌ 失败汇总JSON: {FAILED_REPORT_PATH}")
|
|
|
print("=" * 60)
|
|
|
|
|
|
except Exception as e:
|