#!/usr/bin/env python3 """测试不同方式下载 ModelScope 数据文件区图片。""" import json import urllib.request import urllib.parse import sys import subprocess import os api_base = "https://www.modelscope.cn" dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50" namespace, ds_name = dataset_id.split("/", 1) print(f"数据集: {dataset_id}\n") # 先查看当前版本 print("=== 当前版本 ===") for pkg in ["modelscope", "datasets"]: try: result = subprocess.run( ["pip", "show", pkg], capture_output=True, text=True, timeout=10 ) for line in result.stdout.splitlines(): if line.startswith("Version:") or line.startswith("Name:"): print(f" {line}") except Exception as e: print(f" {pkg}: {e}") # 方式1: 直接用 hub.api(跳过 msdatasets 的 import 问题) print("\n=== 方式1: HubApi snapshot_download ===") try: from modelscope.hub.snapshot_download import dataset_snapshot_download print("dataset_snapshot_download 可用!") from modelscope.utils.constant import DownloadMode cache_dir = "/tmp/ms_test_cache" os.makedirs(cache_dir, exist_ok=True) result = dataset_snapshot_download( dataset_id=dataset_id, cache_dir=cache_dir, download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS, ) print(f"成功! 缓存目录: {result}") # 列出文件 for root, dirs, files in os.walk(result): for f in files: fp = os.path.join(root, f) size = os.path.getsize(fp) print(f" {os.path.relpath(fp, result)} ({size} bytes)") except Exception as e: print(f"失败: {e}") # 方式2: 尝试 dataset_snapshot_download 不带 DownloadMode print("\n=== 方式2: dataset_snapshot_download (简化调用) ===") try: from modelscope.hub.snapshot_download import dataset_snapshot_download cache_dir = "/tmp/ms_test_cache2" os.makedirs(cache_dir, exist_ok=True) result = dataset_snapshot_download( dataset_id=dataset_id, cache_dir=cache_dir, ) print(f"成功! 缓存目录: {result}") for root, dirs, files in os.walk(result): for f in files: fp = os.path.join(root, f) size = os.path.getsize(fp) print(f" {os.path.relpath(fp, result)} ({size} bytes)") except Exception as e: print(f"失败: {e}") # 方式3: 尝试直接用 HubApi 的 get_dataset_file_url_with_token 或类似方法 print("\n=== 方式3: HubApi 获取下载 URL ===") try: from modelscope.hub.api import HubApi api = HubApi() # 列出所有可用方法 methods = [m for m in dir(api) if 'dataset' in m.lower() or 'download' in m.lower()] print(f"可用方法: {methods}") # 尝试 list_repo_tree for method_name in ['list_repo_tree', 'get_dataset_meta_file_list']: if hasattr(api, method_name): print(f"\n尝试 {method_name}...") try: method = getattr(api, method_name) result = method(ds_name, namespace=namespace, revision="master") print(f" 结果: {result}") except Exception as e: print(f" 失败: {e}") except Exception as e: print(f"失败: {e}") # 方式4: pip 查看 modelscope 可用版本 print("\n=== 方式4: pip 检查 ===") try: result = subprocess.run( ["pip", "index", "versions", "modelscope"], capture_output=True, text=True, timeout=15 ) print(result.stdout[:500]) except Exception as e: print(f"失败: {e}") print("\n=== 完成 ===")