| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- 完整性审查集成测试
- 测试流程:
- 1. 加载现有的 final_result.json 文件
- 2. 模拟三级分类结果(添加 tertiary_classification_details)
- 3. 运行完整性审查
- 4. 验证结果
- """
- import asyncio
- import json
- import sys
- from pathlib import Path
- # 添加项目路径
- project_root = Path(__file__).parent.parent.parent
- sys.path.insert(0, str(project_root))
- from core.construction_review.component.reviewers.completeness_reviewer import (
- LightweightCompletenessChecker,
- TertiarySpecLoader
- )
- def load_final_result(file_path: str) -> dict:
- """加载最终结果文件"""
- with open(file_path, 'r', encoding='utf-8') as f:
- return json.load(f)
- def simulate_tertiary_classification(chunks: list) -> list:
- """
- 模拟三级分类结果
- 为每个二级分类下的 chunk 添加 tertiary_classification_details
- """
- # 三级分类标准映射(基于 StandardCategoryTable.csv)
- tertiary_mapping = {
- # 编制依据
- "LawsAndRegulations": [
- {"third_category_code": "NationalLawsAndRegulations", "third_category_name": "国家政府发布的法律法规与规章制度"},
- {"third_category_code": "ProvincialLawsAndRegulationsOfProjectLocation", "third_category_name": "工程所在地省级政府发布的法律法规与规章制度"}
- ],
- "StandardsAndSpecifications": [
- {"third_category_code": "IndustryStandards", "third_category_name": "行业标准"},
- {"third_category_code": "TechnicalRegulations", "third_category_name": "技术规程"}
- ],
- "DocumentSystems": [
- {"third_category_code": "SichuanRoadAndBridgeDocumentSystemsAndmanagementProcedures", "third_category_name": "四川路桥下发的文件制度和管理程序文件"},
- {"third_category_code": "RoadAndBridgeGroupDocumentSystemsAndmanagementProcedures", "third_category_name": "路桥集团下发的文件制度和管理程序文件"},
- {"third_category_code": "BridgeCompanyDocumentSystemsAndmanagementProcedures", "third_category_name": "桥梁公司下发的文件制度和管理程序文件"},
- {"third_category_code": "ConstructionUnitDocumentSystemsAndmanagementProcedures", "third_category_name": "建设单位下发的文件制度和管理程序文件"}
- ],
- "CompilationPrinciples": [
- {"third_category_code": "NationalPoliciesStandardsAndDesignDocument", "third_category_name": "国家方针、政策、标准和设计文件"},
- {"third_category_code": "BasicConstructionProcedures", "third_category_name": "基本建设程序"},
- {"third_category_code": "ProjectFunctionImplementation", "third_category_name": "工程项目功能实现"},
- {"third_category_code": "ContractPerformance", "third_category_name": "合同履约"},
- {"third_category_code": "ConstructionForceConcentration", "third_category_name": "施工力量集中"},
- {"third_category_code": "ProcessControl", "third_category_name": "工序控制"}
- ],
- "CompilationScope": [
- {"third_category_code": "ProjectCoverage", "third_category_name": "填写完整涵盖本方案包含的所有工程"},
- {"third_category_code": "ConstructionTechnology", "third_category_name": "部分工程可简要说明采取的施工工艺"}
- ],
- # 工程概况
- "DesignSummary": [
- {"third_category_code": "ProjectIntroduction", "third_category_name": "工程简介"},
- {"third_category_code": "MainTechnicalStandards", "third_category_name": "主要技术标准"}
- ],
- "GeologyWeather": [
- {"third_category_code": "HydrologicalConditions", "third_category_name": "水文状况"},
- {"third_category_code": "ClimaticConditions", "third_category_name": "气候条件"}
- ],
- "Surroundings": [
- {"third_category_code": "PositionalRelationship", "third_category_name": "位置关系"},
- {"third_category_code": "StructuralDimensions", "third_category_name": "结构尺寸"}
- ],
- "LayoutPlan": [
- {"third_category_code": "TemporaryFacilityLocation", "third_category_name": "临时设施位置"},
- {"third_category_code": "ConstructionWorkPlatform", "third_category_name": "施工作业平台与便道参数"},
- {"third_category_code": "TemporaryWaterAndElectricityArrangement", "third_category_name": "临时水电布置"}
- ],
- "RequirementsTech": [
- {"third_category_code": "DurationTarget", "third_category_name": "工期目标"},
- {"third_category_code": "QualityTarget", "third_category_name": "质量目标"},
- {"third_category_code": "SecurityGoals", "third_category_name": "安全目标"},
- {"third_category_code": "EnvironmentalGoals", "third_category_name": "环境目标"}
- ],
- "RiskLevel": [
- {"third_category_code": "DangerSource", "third_category_name": "危险源"},
- {"third_category_code": "ClassificationAndResponseMeasures", "third_category_name": "分级与应对措施"}
- ],
- "Stakeholders": [
- {"third_category_code": "UnitType", "third_category_name": "单位类型"}
- ],
- # 施工计划
- "Schedule": [
- {"third_category_code": "ProcessOperationTimeAnalysis", "third_category_name": "工序作业时间分析"},
- {"third_category_code": "KeyProjectNodeArrangement", "third_category_name": "关键工程(工序)节点安排"},
- {"third_category_code": "ConstructionScheduleGanttChart", "third_category_name": "施工进度计划横道图等"}
- ],
- "Materials": [
- {"third_category_code": "ListOfConstructionMeasuresAndMaterials", "third_category_name": "施工措施材料清单"}
- ],
- "Equipment": [
- {"third_category_code": "MainConstructionMachineryAndEquipment", "third_category_name": "主要施工机械设备"}
- ],
- "Workforce": [
- {"third_category_code": "WorkforceAllocationPlan", "third_category_name": "劳动力配置计划"},
- {"third_category_code": "StageLaborDemand", "third_category_name": "阶段劳动力需求"}
- ],
- "SafetyCost": [
- {"third_category_code": "CategoryOfSafetyProductionExpenses", "third_category_name": "安全生产费用类别"},
- {"third_category_code": "SecurityFeeName", "third_category_name": "安全费用名称"},
- {"third_category_code": "SingleInvestmentAmount", "third_category_name": "单项投入金额"},
- {"third_category_code": "TotalSafetyProductionExpenses", "third_category_name": "安全生产费用总额"}
- ],
- # 施工工艺技术
- "MethodsOverview": [
- {"third_category_code": "ConstructionTechnologySelection", "third_category_name": "施工工艺选择"},
- {"third_category_code": "MainConstructionMethods", "third_category_name": "主要施工方法"},
- {"third_category_code": "TemplateConfigurationQuantity", "third_category_name": "模板配置数量"}
- ],
- "TechParams": [
- {"third_category_code": "MaterialType", "third_category_name": "材料类型"},
- {"third_category_code": "MaterialSpecifications", "third_category_name": "材料规格"},
- {"third_category_code": "DeviceName", "third_category_name": "设备名称"},
- {"third_category_code": "DeviceModel", "third_category_name": "设备型号"},
- {"third_category_code": "EquipmentManufacturingTime", "third_category_name": "设备出厂时间"},
- {"third_category_code": "EquipmentPerformanceParameters", "third_category_name": "设备性能参数"},
- {"third_category_code": "EquipmentWeight", "third_category_name": "设备自重"}
- ],
- "Process": [
- {"third_category_code": "ConstructionProcess", "third_category_name": "施工工序"},
- {"third_category_code": "ProcessSequence", "third_category_name": "工艺顺序"},
- {"third_category_code": "ProcessFlowDiagram", "third_category_name": "工艺流程框图"}
- ],
- "PrepWork": [
- {"third_category_code": "MeasurementAndStakeout", "third_category_name": "测量放样"},
- {"third_category_code": "TemporaryWaterAndElectricityConsumption", "third_category_name": "临时水电用量"},
- {"third_category_code": "TheSiteIsFlat", "third_category_name": "场地平整"},
- {"third_category_code": "Staffing", "third_category_name": "人员配置"},
- {"third_category_code": "EquipmentEntry", "third_category_name": "设备进场"},
- {"third_category_code": "SafetyProtectionFacilities", "third_category_name": "安全防护措施"},
- {"third_category_code": "PersonnelAccess", "third_category_name": "人员上下通道"}
- ],
- "Operations": [
- {"third_category_code": "ConstructionProcessOperations", "third_category_name": "施工工序描述操作"},
- {"third_category_code": "ConstructionPoints", "third_category_name": "施工要点"},
- {"third_category_code": "FAQPrevention", "third_category_name": "常见问题及预防"},
- {"third_category_code": "ProblemSolvingMeasures", "third_category_name": "问题处理措施"}
- ],
- "Inspection": [
- {"third_category_code": "MaterialInspectionUponArrival", "third_category_name": "材料进场质量检验"},
- {"third_category_code": "RandomInspectionOfIncomingComponents", "third_category_name": "构配件进场质量抽查"},
- {"third_category_code": "ProcessInspectionContent", "third_category_name": "工序检查内容"},
- {"third_category_code": "ProcessInspectionStandards", "third_category_name": "工序检查标准"}
- ]
- }
- # 按二级分类分组
- secondary_groups = {}
- for chunk in chunks:
- sec_code = chunk.get("secondary_category_code", "")
- if sec_code and sec_code not in ("none", "None", ""):
- if sec_code not in secondary_groups:
- secondary_groups[sec_code] = []
- secondary_groups[sec_code].append(chunk)
- # 为每个二级分类添加三级分类详情
- updated_chunks = []
- for sec_code, group_chunks in secondary_groups.items():
- # 获取该二级分类对应的三级分类列表
- tertiary_list = tertiary_mapping.get(sec_code, [])
- for chunk in group_chunks:
- # 模拟三级分类结果
- chunk["tertiary_classification_details"] = tertiary_list
- # 设置第一个三级分类为主分类(向后兼容)
- if tertiary_list:
- chunk["tertiary_category_code"] = tertiary_list[0]["third_category_code"]
- chunk["tertiary_category_cn"] = tertiary_list[0]["third_category_name"]
- updated_chunks.append(chunk)
- return updated_chunks
- async def test_completeness_check():
- """测试完整性审查"""
- print("=" * 60)
- print("完整性审查集成测试")
- print("=" * 60)
- # 1. 加载测试数据
- test_file = project_root / "temp/construction_review/final_result/4148f6019f89e061b15679666f646893-1773993108.json"
- if not test_file.exists():
- print(f"错误: 测试文件不存在: {test_file}")
- return
- print(f"\n1. 加载测试数据: {test_file.name}")
- data = load_final_result(str(test_file))
- chunks = data.get('document_result', {}).get('structured_content', {}).get('chunks', [])
- print(f" 原始 chunks 数量: {len(chunks)}")
- # 2. 检查原始数据
- print("\n2. 检查原始数据结构:")
- sample_chunk = chunks[0] if chunks else {}
- print(f" chunk keys: {list(sample_chunk.keys())}")
- print(f" 有 tertiary_classification_details: {'tertiary_classification_details' in sample_chunk}")
- # 3. 模拟三级分类结果
- print("\n3. 模拟三级分类结果...")
- chunks_with_tertiary = simulate_tertiary_classification(chunks)
- # 统计三级分类情况
- tertiary_counts = {}
- for chunk in chunks_with_tertiary:
- sec_code = chunk.get("secondary_category_code", "")
- details = chunk.get("tertiary_classification_details", [])
- if sec_code:
- tertiary_counts[sec_code] = len(details)
- print(f" 已添加三级分类详情的 chunks: {len(chunks_with_tertiary)}")
- print(f"\n 各二级分类的三级分类数量:")
- for sec_code, count in sorted(tertiary_counts.items()):
- print(f" {sec_code}: {count} 个三级分类")
- # 4. 运行完整性审查
- print("\n4. 运行完整性审查...")
- csv_path = str(project_root / "core/construction_review/component/doc_worker/config/StandardCategoryTable.csv")
- checker = LightweightCompletenessChecker(csv_path)
- result = await checker.check(
- chunks=chunks_with_tertiary,
- outline=None,
- chapter_classification="basis" # 只测试编制依据章节
- )
- # 5. 输出结果
- print("\n5. 审查结果:")
- print(f" 总体状态: {result.overall_status}")
- # 检查三级完整性
- tertiary_result = result.tertiary_completeness
- print(f"\n 三级完整性:")
- print(f" 总数: {tertiary_result.get('total', 0)}")
- print(f" 已有: {tertiary_result.get('present', 0)}")
- print(f" 缺失: {tertiary_result.get('missing', 0)}")
- print(f" 完整率: {tertiary_result.get('completeness_rate', '0%')}")
- # 显示缺失详情
- missing_details = tertiary_result.get('missing_details', [])
- if missing_details:
- print(f"\n 缺失的三级分类 ({len(missing_details)} 个):")
- for item in missing_details[:10]: # 只显示前10个
- print(f" - {item.get('secondary_name', '')} > {item.get('tertiary_name', '')}")
- else:
- print(f"\n [OK] 无缺失的三级分类!")
- # 6. 验证编制原则
- print("\n6. 验证 '编制原则' 二级分类:")
- # 查找编制原则相关的 chunks
- principle_chunks = [c for c in chunks_with_tertiary
- if "CompilationPrinciples" in c.get("secondary_category_code", "")]
- if principle_chunks:
- chunk = principle_chunks[0]
- details = chunk.get("tertiary_classification_details", [])
- print(f" 找到编制原则 chunk")
- print(f" 三级分类详情数量: {len(details)}")
- print(f" 三级分类列表:")
- for d in details:
- print(f" - {d.get('third_category_code')}: {d.get('third_category_name')}")
- # 检查完整性审查是否识别到这些三级分类
- actual_tertiary = set()
- for item in tertiary_result.get('secondary_stats', []):
- if item.get('secondary_code') == 'CompilationPrinciples':
- print(f"\n 完整性审查统计:")
- print(f" 总数: {item.get('total_tertiary', 0)}")
- print(f" 已有: {item.get('present', 0)}")
- print(f" 缺失: {item.get('missing', 0)}")
- else:
- print(" 未找到编制原则相关的 chunks")
- print("\n" + "=" * 60)
- print("测试完成")
- print("=" * 60)
- if __name__ == "__main__":
- asyncio.run(test_completeness_check())
|