| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- """
- 测试 JSON 格式文本提取功能
- """
- import sys
- import os
- # 添加项目根目录到路径
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
- from app.services.qwen_ocr import QwenOCRClient
- def test_json_extraction():
- """测试从 JSON 格式中提取文本"""
-
- # 创建客户端实例(不需要真实的 API Key)
- client = QwenOCRClient(api_key="test_key")
-
- # 测试用例1:标准 JSON 格式
- test_json_1 = """```json
- [
- {"rotate_rect": [178, 272, 25, 157, 90], "text": "统一社会信用代码"},
- {"rotate_rect": [158, 308, 21, 119, 90], "text": "91510100MAEL1NGR2D"},
- {"rotate_rect": [492, 282, 79, 277, 90], "text": "营业执照"},
- {"rotate_rect": [494, 352, 37, 81, 90], "text": "(副本)"},
- {"rotate_rect": [228, 438, 246, 27, 0], "text": "名称 成都网讯创智科技有限公司"}
- ]
- ```"""
-
- # 测试用例2:不带 markdown 标记的 JSON
- test_json_2 = """[
- {"rotate_rect": [178, 272, 25, 157, 90], "text": "统一社会信用代码"},
- {"rotate_rect": [158, 308, 21, 119, 90], "text": "91510100MAEL1NGR2D"}
- ]"""
-
- # 测试用例3:普通文本(不是 JSON)
- test_text = "这是普通的文本内容,不是 JSON 格式"
-
- print("=" * 60)
- print("JSON 文本提取测试")
- print("=" * 60)
-
- # 测试1
- print("\n测试1:带 markdown 标记的 JSON")
- print("-" * 60)
- result_1 = client._extract_text_from_json(test_json_1)
- print("提取结果:")
- print(result_1)
- print("-" * 60)
-
- # 测试2
- print("\n测试2:不带 markdown 标记的 JSON")
- print("-" * 60)
- result_2 = client._extract_text_from_json(test_json_2)
- print("提取结果:")
- print(result_2)
- print("-" * 60)
-
- # 测试3
- print("\n测试3:普通文本(非 JSON)")
- print("-" * 60)
- result_3 = client._extract_text_from_json(test_text)
- print("提取结果:")
- print(result_3)
- print("-" * 60)
-
- # 验证结果
- print("\n" + "=" * 60)
- print("验证结果")
- print("=" * 60)
-
- expected_1 = "统一社会信用代码\n91510100MAEL1NGR2D\n营业执照\n(副本)\n名称 成都网讯创智科技有限公司"
- expected_2 = "统一社会信用代码\n91510100MAEL1NGR2D"
- expected_3 = test_text
-
- print(f"测试1: {'✅ 通过' if result_1 == expected_1 else '❌ 失败'}")
- print(f"测试2: {'✅ 通过' if result_2 == expected_2 else '❌ 失败'}")
- print(f"测试3: {'✅ 通过' if result_3 == expected_3 else '❌ 失败'}")
-
- print("\n" + "=" * 60)
- print("测试完成")
- print("=" * 60)
- if __name__ == "__main__":
- test_json_extraction()
|