| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- #!/usr/bin/env python3
- """测试不同方式下载 ModelScope 数据文件区图片。"""
- import json
- import urllib.request
- import urllib.parse
- import sys
- import subprocess
- import os
- api_base = "https://www.modelscope.cn"
- dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
- namespace, ds_name = dataset_id.split("/", 1)
- print(f"数据集: {dataset_id}\n")
- # 先查看当前版本
- print("=== 当前版本 ===")
- for pkg in ["modelscope", "datasets"]:
- try:
- result = subprocess.run(
- ["pip", "show", pkg], capture_output=True, text=True, timeout=10
- )
- for line in result.stdout.splitlines():
- if line.startswith("Version:") or line.startswith("Name:"):
- print(f" {line}")
- except Exception as e:
- print(f" {pkg}: {e}")
- # 方式1: 直接用 hub.api(跳过 msdatasets 的 import 问题)
- print("\n=== 方式1: HubApi snapshot_download ===")
- try:
- from modelscope.hub.snapshot_download import dataset_snapshot_download
- print("dataset_snapshot_download 可用!")
- from modelscope.utils.constant import DownloadMode
- cache_dir = "/tmp/ms_test_cache"
- os.makedirs(cache_dir, exist_ok=True)
- result = dataset_snapshot_download(
- dataset_id=dataset_id,
- cache_dir=cache_dir,
- download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS,
- )
- print(f"成功! 缓存目录: {result}")
- # 列出文件
- for root, dirs, files in os.walk(result):
- for f in files:
- fp = os.path.join(root, f)
- size = os.path.getsize(fp)
- print(f" {os.path.relpath(fp, result)} ({size} bytes)")
- except Exception as e:
- print(f"失败: {e}")
- # 方式2: 尝试 dataset_snapshot_download 不带 DownloadMode
- print("\n=== 方式2: dataset_snapshot_download (简化调用) ===")
- try:
- from modelscope.hub.snapshot_download import dataset_snapshot_download
- cache_dir = "/tmp/ms_test_cache2"
- os.makedirs(cache_dir, exist_ok=True)
- result = dataset_snapshot_download(
- dataset_id=dataset_id,
- cache_dir=cache_dir,
- )
- print(f"成功! 缓存目录: {result}")
- for root, dirs, files in os.walk(result):
- for f in files:
- fp = os.path.join(root, f)
- size = os.path.getsize(fp)
- print(f" {os.path.relpath(fp, result)} ({size} bytes)")
- except Exception as e:
- print(f"失败: {e}")
- # 方式3: 尝试直接用 HubApi 的 get_dataset_file_url_with_token 或类似方法
- print("\n=== 方式3: HubApi 获取下载 URL ===")
- try:
- from modelscope.hub.api import HubApi
- api = HubApi()
- # 列出所有可用方法
- methods = [m for m in dir(api) if 'dataset' in m.lower() or 'download' in m.lower()]
- print(f"可用方法: {methods}")
- # 尝试 list_repo_tree
- for method_name in ['list_repo_tree', 'get_dataset_meta_file_list']:
- if hasattr(api, method_name):
- print(f"\n尝试 {method_name}...")
- try:
- method = getattr(api, method_name)
- result = method(ds_name, namespace=namespace, revision="master")
- print(f" 结果: {result}")
- except Exception as e:
- print(f" 失败: {e}")
- except Exception as e:
- print(f"失败: {e}")
- # 方式4: pip 查看 modelscope 可用版本
- print("\n=== 方式4: pip 检查 ===")
- try:
- result = subprocess.run(
- ["pip", "index", "versions", "modelscope"], capture_output=True, text=True, timeout=15
- )
- print(result.stdout[:500])
- except Exception as e:
- print(f"失败: {e}")
- print("\n=== 完成 ===")
|