#!/usr/bin/env python3 """测试 ModelScope 数据集 API,查找数据文件区的 CDN 链接。""" import json import urllib.request import urllib.parse import sys api_base = "https://www.modelscope.cn" dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50" print(f"测试数据集: {dataset_id}\n") # Test 1: 完整查看数据集 info API 返回 print("=== Test1: 数据集 info API 完整响应 ===") try: url = f"{api_base}/api/v1/datasets/{dataset_id}" print(f"请求: {url}") req = urllib.request.Request(url, headers={"User-Agent": "Test"}) with urllib.request.urlopen(req, timeout=30) as resp: info = json.loads(resp.read().decode()) print(json.dumps(info, indent=2, ensure_ascii=False)[:3000]) except Exception as e: print(f"失败: {e}") # Test 2: 尝试 HubApi(跳过 msdatasets 的 import 问题) print("\n=== Test2: HubApi 直接调用 ===") try: from modelscope.hub.api import HubApi api = HubApi() # 获取数据集文件列表 print("尝试 get_dataset_files...") try: namespace, ds_name = dataset_id.split("/", 1) files = api.get_dataset_files(ds_name, namespace=namespace, recursive=True) print(f"get_dataset_files 返回 {len(files)} 个文件:") for f in files: print(f" {f}") except Exception as e: print(f"get_dataset_files 失败: {e}") # 尝试获取文件下载 URL print("\n尝试 get_dataset_file_url...") try: namespace, ds_name = dataset_id.split("/", 1) url = api.get_dataset_file_url("train.csv", ds_name, namespace, revision="master") print(f"train.csv 下载 URL: {url}") except Exception as e: print(f"get_dataset_file_url 失败: {e}") except ImportError as e: print(f"import 失败: {e}") except Exception as e: print(f"失败: {e}") # Test 3: 查看 carBrands50.json 配置文件(可能包含数据文件 URL) print("\n=== Test3: carBrands50.json 配置文件 ===") try: namespace, ds_name = dataset_id.split("/", 1) url = (f"{api_base}/api/v1/datasets/{namespace}/{ds_name}/repo" f"?Revision=master&FilePath=carBrands50.json&View=false") print(f"请求: {url}") req = urllib.request.Request(url, headers={"User-Agent": "Test"}) with urllib.request.urlopen(req, timeout=30) as resp: config = resp.read().decode() print(config[:2000]) except Exception as e: print(f"失败: {e}") # Test 4: 查看 dataset_infos.json print("\n=== Test4: dataset_infos.json ===") try: url = (f"{api_base}/api/v1/datasets/{namespace}/{ds_name}/repo" f"?Revision=master&FilePath=dataset_infos.json&View=false") print(f"请求: {url}") req = urllib.request.Request(url, headers={"User-Agent": "Test"}) with urllib.request.urlopen(req, timeout=30) as resp: config = resp.read().decode() print(config[:2000]) except Exception as e: print(f"失败: {e}")