#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
端到端完整性审查测试

验证数据流程：
1. document_processor._build_parse_result() 生成 chunks
2. structure_content() 处理 chunks
3. completeness_reviewer 读取并统计

测试目标：验证 tertiary_classification_details 字段在整个流程中不丢失
"""

import asyncio
import json
import sys
from pathlib import Path

# 添加项目路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))


def test_build_parse_result():
    """测试 _build_parse_result 方法"""
    print("\n" + "=" * 60)
    print("测试 1: document_processor._build_parse_result()")
    print("=" * 60)

    from core.construction_review.component.document_processor import DocumentProcessor

    processor = DocumentProcessor()

    # 模拟三级分类后的 chunks
    mock_chunks = [
        {
            'chunk_id': 'test_chunk_1',
            'element_tag': {'page': 1},
            'review_chunk_content': '测试内容',
            'section_label': '第一章->四、编制原则',
            'project_plan_type': 'construction_plan',
            'chapter_classification': 'basis',
            'secondary_category_cn': '编制原则',
            'secondary_category_code': 'CompilationPrinciples',
            'tertiary_category_cn': '国家方针、政策、标准和设计文件',
            'tertiary_category_code': 'NationalPoliciesStandardsAndDesignDocument',
            # 关键：这是三级分类详情列表
            'tertiary_classification_details': [
                {"third_category_code": "NationalPoliciesStandardsAndDesignDocument", "third_category_name": "国家方针、政策、标准和设计文件"},
                {"third_category_code": "BasicConstructionProcedures", "third_category_name": "基本建设程序"},
                {"third_category_code": "ProjectFunctionImplementation", "third_category_name": "工程项目功能实现"},
                {"third_category_code": "ContractPerformance", "third_category_name": "合同履约"},
                {"third_category_code": "ConstructionForceConcentration", "third_category_name": "施工力量集中"},
                {"third_category_code": "ProcessControl", "third_category_name": "工序控制"}
            ]
        }
    ]

    # 调用 _build_parse_result
    result = processor._build_parse_result(
        file_type='docx',
        chunks=mock_chunks,
        pages_content=[],
        toc_info={},
        classified_items=[],
        target_level=1,
        total_chars=100
    )

    # 检查结果
    result_chunks = result.get('chunks', [])
    if not result_chunks:
        print("  [FAIL] 没有生成 chunks")
        return False

    first_chunk = result_chunks[0]
    metadata = first_chunk.get('metadata', {})

    # 验证 tertiary_classification_details 是否存在
    details = metadata.get('tertiary_classification_details', [])
    print(f"  metadata keys: {list(metadata.keys())}")
    print(f"  tertiary_classification_details 存在: {'tertiary_classification_details' in metadata}")
    print(f"  tertiary_classification_details 数量: {len(details)}")

    if 'tertiary_classification_details' not in metadata:
        print("  [FAIL] _build_parse_result 丢失了 tertiary_classification_details")
        return False

    if len(details) != 6:
        print(f"  [FAIL] tertiary_classification_details 数量不正确: {len(details)} != 6")
        return False

    print("  [PASS] _build_parse_result 正确保留了 tertiary_classification_details")
    return True


def test_structure_content():
    """测试 structure_content 方法"""
    print("\n" + "=" * 60)
    print("测试 2: document_processor.structure_content()")
    print("=" * 60)

    from core.construction_review.component.document_processor import DocumentProcessor

    processor = DocumentProcessor()

    # 模拟 _build_parse_result 的输出
    mock_raw_content = {
        'document_type': 'docx',
        'toc_info': {'chapters': []},
        'classification': {'items': [], 'target_level': 1},
        'chunks': [
            {
                'page': 1,
                'content': '测试内容',
                'metadata': {
                    'chunk_id': 'test_chunk_1',
                    'section_label': '第一章->四、编制原则',
                    'project_plan_type': 'construction_plan',
                    'chapter_classification': 'basis',
                    'secondary_category_cn': '编制原则',
                    'secondary_category_code': 'CompilationPrinciples',
                    'tertiary_category_cn': '国家方针、政策、标准和设计文件',
                    'tertiary_category_code': 'NationalPoliciesStandardsAndDesignDocument',
                    'tertiary_classification_details': [
                        {"third_category_code": "NationalPoliciesStandardsAndDesignDocument", "third_category_name": "国家方针、政策、标准和设计文件"},
                        {"third_category_code": "BasicConstructionProcedures", "third_category_name": "基本建设程序"},
                        {"third_category_code": "ProjectFunctionImplementation", "third_category_name": "工程项目功能实现"},
                        {"third_category_code": "ContractPerformance", "third_category_name": "合同履约"},
                        {"third_category_code": "ConstructionForceConcentration", "third_category_name": "施工力量集中"},
                        {"third_category_code": "ProcessControl", "third_category_name": "工序控制"}
                    ],
                    'element_tag': {}
                }
            }
        ],
        'metadata': {}
    }

    # 调用 structure_content
    result = processor.structure_content(mock_raw_content)

    # 检查结果
    result_chunks = result.get('chunks', [])
    if not result_chunks:
        print("  [FAIL] 没有生成 chunks")
        return False

    first_chunk = result_chunks[0]
    print(f"  chunk keys: {list(first_chunk.keys())}")
    print(f"  tertiary_classification_details 存在: {'tertiary_classification_details' in first_chunk}")

    details = first_chunk.get('tertiary_classification_details', [])
    print(f"  tertiary_classification_details 数量: {len(details)}")

    if 'tertiary_classification_details' not in first_chunk:
        print("  [FAIL] structure_content 丢失了 tertiary_classification_details")
        return False

    if len(details) != 6:
        print(f"  [FAIL] tertiary_classification_details 数量不正确: {len(details)} != 6")
        return False

    print("  [PASS] structure_content 正确保留了 tertiary_classification_details")
    return True


async def test_completeness_reviewer():
    """测试 completeness_reviewer 读取数据"""
    print("\n" + "=" * 60)
    print("测试 3: completeness_reviewer 数据读取")
    print("=" * 60)

    from core.construction_review.component.reviewers.completeness_reviewer import (
        LightweightCompletenessChecker
    )

    # 模拟 structure_content 的输出格式
    mock_chunks = [
        {
            'chunk_id': 'test_chunk_1',
            'page': 1,
            'content': '测试内容',
            'section_label': '第一章->四、编制原则',
            'chapter_classification': 'basis',
            'secondary_category_cn': '编制原则',
            'secondary_category_code': 'CompilationPrinciples',
            'tertiary_category_cn': '国家方针、政策、标准和设计文件',
            'tertiary_category_code': 'NationalPoliciesStandardsAndDesignDocument',
            # 关键：扁平结构中的 tertiary_classification_details
            'tertiary_classification_details': [
                {"third_category_code": "NationalPoliciesStandardsAndDesignDocument", "third_category_name": "国家方针、政策、标准和设计文件"},
                {"third_category_code": "BasicConstructionProcedures", "third_category_name": "基本建设程序"},
                {"third_category_code": "ProjectFunctionImplementation", "third_category_name": "工程项目功能实现"},
                {"third_category_code": "ContractPerformance", "third_category_name": "合同履约"},
                {"third_category_code": "ConstructionForceConcentration", "third_category_name": "施工力量集中"},
                {"third_category_code": "ProcessControl", "third_category_name": "工序控制"}
            ]
        }
    ]

    csv_path = str(project_root / "core/construction_review/component/doc_worker/config/StandardCategoryTable.csv")
    checker = LightweightCompletenessChecker(csv_path)

    # 执行检查
    result = await checker.check(
        chunks=mock_chunks,
        outline=None,
        chapter_classification='basis'
    )

    # 检查结果
    tertiary_result = result.tertiary_completeness
    print(f"  总体状态: {result.overall_status}")
    print(f"  三级完整性:")
    print(f"    总数: {tertiary_result.get('total', 0)}")
    print(f"    已有: {tertiary_result.get('present', 0)}")
    print(f"    缺失: {tertiary_result.get('missing', 0)}")

    # 验证编制原则的完整性
    secondary_stats = tertiary_result.get('secondary_stats', [])
    for stat in secondary_stats:
        if stat.get('secondary_code') == 'CompilationPrinciples':
            print(f"\n  编制原则统计:")
            print(f"    总数: {stat.get('total_tertiary', 0)}")
            print(f"    已有: {stat.get('present', 0)}")
            print(f"    缺失: {stat.get('missing', 0)}")

            if stat.get('missing', 0) == 0:
                print("  [PASS] completeness_reviewer 正确识别了所有三级分类")
                return True
            else:
                print(f"  [FAIL] 还有 {stat.get('missing', 0)} 个缺失")
                return False

    print("  [FAIL] 没有找到编制原则的统计")
    return False


def test_metadata_format():
    """测试 metadata 嵌套格式"""
    print("\n" + "=" * 60)
    print("测试 4: metadata 嵌套格式支持")
    print("=" * 60)

    from core.construction_review.component.reviewers.completeness_reviewer import (
        LightweightCompletenessChecker
    )

    # 模拟 metadata 嵌套格式
    mock_chunks = [
        {
            'chunk_id': 'test_chunk_1',
            'page': 1,
            'content': '测试内容',
            'metadata': {
                'chapter_classification': 'basis',
                'secondary_category_code': 'CompilationPrinciples',
                'tertiary_classification_details': [
                    {"third_category_code": "NationalPoliciesStandardsAndDesignDocument", "third_category_name": "国家方针"},
                    {"third_category_code": "BasicConstructionProcedures", "third_category_name": "基本建设程序"},
                ]
            }
        }
    ]

    # 测试 _extract_tertiary_from_chunks 方法
    csv_path = str(project_root / "core/construction_review/component/doc_worker/config/StandardCategoryTable.csv")
    checker = LightweightCompletenessChecker(csv_path)

    # 直接测试提取方法
    actual = checker._extract_tertiary_from_chunks(mock_chunks)

    print(f"  提取到的三级分类: {actual}")
    print(f"  数量: {len(actual)}")

    if len(actual) == 2:
        print("  [PASS] 正确从 metadata 嵌套格式提取三级分类")
        return True
    else:
        print("  [FAIL] 提取数量不正确")
        return False


async def main():
    """运行所有测试"""
    print("\n" + "=" * 60)
    print("端到端完整性审查测试")
    print("验证 tertiary_classification_details 字段在数据流中不丢失")
    print("=" * 60)

    results = []

    # 测试 1: _build_parse_result
    results.append(("_build_parse_result", test_build_parse_result()))

    # 测试 2: structure_content
    results.append(("structure_content", test_structure_content()))

    # 测试 3: completeness_reviewer
    results.append(("completeness_reviewer", await test_completeness_reviewer()))

    # 测试 4: metadata 格式支持
    results.append(("metadata_format", test_metadata_format()))

    # 汇总
    print("\n" + "=" * 60)
    print("测试结果汇总")
    print("=" * 60)

    all_passed = True
    for name, passed in results:
        status = "[PASS]" if passed else "[FAIL]"
        print(f"  {status} {name}")
        if not passed:
            all_passed = False

    print("\n" + "=" * 60)
    if all_passed:
        print("所有测试通过!")
    else:
        print("存在失败的测试，请检查!")
    print("=" * 60)

    return all_passed


if __name__ == "__main__":
    asyncio.run(main())