| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- #!/usr/bin/env python3
- """测试 HubApi 的 get_dataset_access_config 获取数据文件 CDN 链接。"""
- import json
- import sys
- import inspect
- dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
- namespace, ds_name = dataset_id.split("/", 1)
- print(f"数据集: {dataset_id}\n")
- from modelscope.hub.api import HubApi
- api = HubApi()
- # 获取 dataset_id (数字)
- print("=== 获取 dataset_id ===")
- try:
- ds_id, ds_type = api.get_dataset_id_and_type(namespace=namespace, dataset_name=ds_name)
- print(f"dataset_id={ds_id}, type={ds_type}")
- except Exception as e:
- print(f"失败: {e}")
- ds_id = None
- # 测试 get_dataset_access_config
- print("\n=== get_dataset_access_config ===")
- try:
- sig = inspect.signature(api.get_dataset_access_config)
- print(f"签名: {sig}")
- result = api.get_dataset_access_config(
- dataset_name=ds_name,
- namespace=namespace,
- revision="master",
- )
- print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
- except Exception as e:
- print(f"失败: {e}")
- # 测试 get_dataset_access_config_for_unzipped
- print("\n=== get_dataset_access_config_for_unzipped ===")
- try:
- sig = inspect.signature(api.get_dataset_access_config_for_unzipped)
- print(f"签名: {sig}")
- result = api.get_dataset_access_config_for_unzipped(
- dataset_name=ds_name,
- namespace=namespace,
- revision="master",
- )
- print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
- except Exception as e:
- print(f"失败: {e}")
- # 测试 get_dataset_infos
- print("\n=== get_dataset_infos ===")
- try:
- sig = inspect.signature(api.get_dataset_infos)
- print(f"签名: {sig}")
- result = api.get_dataset_infos(
- dataset_name=ds_name,
- namespace=namespace,
- )
- print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
- except Exception as e:
- print(f"失败: {e}")
- # 测试 get_dataset_file_url
- print("\n=== get_dataset_file_url (train.csv) ===")
- try:
- url = api.get_dataset_file_url(
- file_name="train.csv",
- dataset_name=ds_name,
- namespace=namespace,
- revision="master",
- )
- print(f"URL: {url}")
- except Exception as e:
- print(f"失败: {e}")
- # 测试 get_dataset_file_url (train.zip - 数据文件区)
- print("\n=== get_dataset_file_url (train.zip) ===")
- try:
- url = api.get_dataset_file_url(
- file_name="train.zip",
- dataset_name=ds_name,
- namespace=namespace,
- revision="master",
- )
- print(f"URL: {url}")
- # 尝试下载验证
- import urllib.request
- req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
- try:
- with urllib.request.urlopen(req, timeout=15) as resp:
- print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
- except Exception as e2:
- print(f"HEAD: {e2}")
- except Exception as e:
- print(f"失败: {e}")
- print("\n=== 完成 ===")
|