test_real_scenario.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 模拟实际工作流场景测试
  5. 使用 DocumentWorkflow 完整流程测试
  6. """
  7. import sys
  8. import os
  9. import asyncio
  10. from pathlib import Path
  11. project_root = Path(__file__).parent.parent.parent
  12. os.chdir(project_root)
  13. from core.construction_review.workflows.document_workflow import DocumentWorkflow
  14. from core.base.task_models import TaskFileInfo
  15. TARGET_FILE = Path(__file__).parent / "330测试fa56f7a8-bd36-4140-8cb1-f9f973ce8745.pdf"
  16. async def test_document_workflow():
  17. """测试完整文档工作流"""
  18. if not TARGET_FILE.exists():
  19. print(f"[ERROR] 文件不存在: {TARGET_FILE}")
  20. return 1
  21. # 读取文件内容
  22. with open(TARGET_FILE, "rb") as f:
  23. file_content = f.read()
  24. # 创建 TaskFileInfo
  25. task_info = TaskFileInfo({
  26. "file_id": "test_330_file",
  27. "user_id": "test_user",
  28. "callback_task_id": "test_callback_001",
  29. "file_name": TARGET_FILE.name,
  30. "file_type": "pdf",
  31. "file_content": file_content,
  32. "review_config": [],
  33. "review_item_config": [],
  34. "project_plan_type": "T梁运输及安装专项施工方案",
  35. "tendency_review_role": "",
  36. "launched_at": 0
  37. })
  38. # 创建工作流
  39. workflow = DocumentWorkflow(
  40. task_file_info=task_info,
  41. progress_manager=None, # 不依赖进度管理器
  42. redis_duplicate_checker=None
  43. )
  44. print(f"\n[INFO] 开始处理文件: {TARGET_FILE.name}")
  45. print(f" 文件大小: {len(file_content)} bytes")
  46. try:
  47. result = await workflow.execute(
  48. file_content=file_content,
  49. file_type="pdf"
  50. )
  51. print("\n" + "=" * 80)
  52. print("文档处理结果")
  53. print("=" * 80)
  54. print(f" file_id: {result.get('file_id')}")
  55. print(f" document_name: {result.get('document_name')}")
  56. print(f" total_chunks: {result.get('total_chunks')}")
  57. structured = result.get('structured_content', {})
  58. chunks = structured.get('chunks', [])
  59. # 分析章节分布
  60. print("\n" + "=" * 80)
  61. print("章节分布分析")
  62. print("=" * 80)
  63. first_levels = set()
  64. for c in chunks:
  65. label = c.get('section_label', '') or c.get('title', '')
  66. if '->' in label:
  67. first = label.split('->')[0].strip()
  68. else:
  69. first = label.strip()
  70. first_levels.add(first)
  71. print(f" 共 {len(first_levels)} 个一级章节:")
  72. for fl in sorted(first_levels):
  73. # 计算该章节下的chunks数量
  74. count = sum(1 for c in chunks if (c.get('section_label', '') or c.get('title', '')).startswith(fl))
  75. print(f" - {fl}: {count} chunks")
  76. # 检查是否有缺失的章节
  77. expected_chapters = ["第一章", "第二章", "第三章", "第四章", "第五章", "第六章", "第七章", "第八章", "第九章", "第十章"]
  78. found_chapters = []
  79. missing_chapters = []
  80. for ec in expected_chapters:
  81. found = any(ec in fl for fl in first_levels)
  82. if found:
  83. found_chapters.append(ec)
  84. else:
  85. missing_chapters.append(ec)
  86. print(f"\n 找到的章节: {found_chapters}")
  87. if missing_chapters:
  88. print(f" [WARNING] 缺失的章节: {missing_chapters}")
  89. # 详细信息
  90. print("\n" + "=" * 80)
  91. print("所有 Chunks 详情")
  92. print("=" * 80)
  93. for i, chunk in enumerate(chunks, 1):
  94. label = chunk.get('section_label', '') or chunk.get('title', '')
  95. content = chunk.get('content', '') or chunk.get('review_chunk_content', '')
  96. print(f"\n {i}. {label}")
  97. print(f" chunk_id: {chunk.get('chunk_id')}")
  98. print(f" page: {chunk.get('page')}")
  99. print(f" content_length: {len(content)}")
  100. if content:
  101. preview = content[:80].replace('\n', ' ')
  102. print(f" preview: {preview}...")
  103. return 0
  104. except Exception as e:
  105. print(f"\n[ERROR] 处理失败: {e}")
  106. import traceback
  107. traceback.print_exc()
  108. return 1
  109. if __name__ == "__main__":
  110. result = asyncio.run(test_document_workflow())
  111. sys.exit(result)