| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- #!/usr/bin/env python3
- """测试 ModelScope 数据集 API,查找数据文件区的 CDN 链接。"""
- import json
- import urllib.request
- import urllib.parse
- import sys
- api_base = "https://www.modelscope.cn"
- dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
- print(f"测试数据集: {dataset_id}\n")
- # Test 1: 完整查看数据集 info API 返回
- print("=== Test1: 数据集 info API 完整响应 ===")
- try:
- url = f"{api_base}/api/v1/datasets/{dataset_id}"
- print(f"请求: {url}")
- req = urllib.request.Request(url, headers={"User-Agent": "Test"})
- with urllib.request.urlopen(req, timeout=30) as resp:
- info = json.loads(resp.read().decode())
- print(json.dumps(info, indent=2, ensure_ascii=False)[:3000])
- except Exception as e:
- print(f"失败: {e}")
- # Test 2: 尝试 HubApi(跳过 msdatasets 的 import 问题)
- print("\n=== Test2: HubApi 直接调用 ===")
- try:
- from modelscope.hub.api import HubApi
- api = HubApi()
- # 获取数据集文件列表
- print("尝试 get_dataset_files...")
- try:
- namespace, ds_name = dataset_id.split("/", 1)
- files = api.get_dataset_files(ds_name, namespace=namespace, recursive=True)
- print(f"get_dataset_files 返回 {len(files)} 个文件:")
- for f in files:
- print(f" {f}")
- except Exception as e:
- print(f"get_dataset_files 失败: {e}")
- # 尝试获取文件下载 URL
- print("\n尝试 get_dataset_file_url...")
- try:
- namespace, ds_name = dataset_id.split("/", 1)
- url = api.get_dataset_file_url("train.csv", ds_name, namespace, revision="master")
- print(f"train.csv 下载 URL: {url}")
- except Exception as e:
- print(f"get_dataset_file_url 失败: {e}")
- except ImportError as e:
- print(f"import 失败: {e}")
- except Exception as e:
- print(f"失败: {e}")
- # Test 3: 查看 carBrands50.json 配置文件(可能包含数据文件 URL)
- print("\n=== Test3: carBrands50.json 配置文件 ===")
- try:
- namespace, ds_name = dataset_id.split("/", 1)
- url = (f"{api_base}/api/v1/datasets/{namespace}/{ds_name}/repo"
- f"?Revision=master&FilePath=carBrands50.json&View=false")
- print(f"请求: {url}")
- req = urllib.request.Request(url, headers={"User-Agent": "Test"})
- with urllib.request.urlopen(req, timeout=30) as resp:
- config = resp.read().decode()
- print(config[:2000])
- except Exception as e:
- print(f"失败: {e}")
- # Test 4: 查看 dataset_infos.json
- print("\n=== Test4: dataset_infos.json ===")
- try:
- url = (f"{api_base}/api/v1/datasets/{namespace}/{ds_name}/repo"
- f"?Revision=master&FilePath=dataset_infos.json&View=false")
- print(f"请求: {url}")
- req = urllib.request.Request(url, headers={"User-Agent": "Test"})
- with urllib.request.urlopen(req, timeout=30) as resp:
- config = resp.read().decode()
- print(config[:2000])
- except Exception as e:
- print(f"失败: {e}")
|