test_ms_api.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/env python3
  2. """测试不同方式下载 ModelScope 数据文件区图片。"""
  3. import json
  4. import urllib.request
  5. import urllib.parse
  6. import sys
  7. import subprocess
  8. import os
  9. api_base = "https://www.modelscope.cn"
  10. dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
  11. namespace, ds_name = dataset_id.split("/", 1)
  12. print(f"数据集: {dataset_id}\n")
  13. # 先查看当前版本
  14. print("=== 当前版本 ===")
  15. for pkg in ["modelscope", "datasets"]:
  16. try:
  17. result = subprocess.run(
  18. ["pip", "show", pkg], capture_output=True, text=True, timeout=10
  19. )
  20. for line in result.stdout.splitlines():
  21. if line.startswith("Version:") or line.startswith("Name:"):
  22. print(f" {line}")
  23. except Exception as e:
  24. print(f" {pkg}: {e}")
  25. # 方式1: 直接用 hub.api(跳过 msdatasets 的 import 问题)
  26. print("\n=== 方式1: HubApi snapshot_download ===")
  27. try:
  28. from modelscope.hub.snapshot_download import dataset_snapshot_download
  29. print("dataset_snapshot_download 可用!")
  30. from modelscope.utils.constant import DownloadMode
  31. cache_dir = "/tmp/ms_test_cache"
  32. os.makedirs(cache_dir, exist_ok=True)
  33. result = dataset_snapshot_download(
  34. dataset_id=dataset_id,
  35. cache_dir=cache_dir,
  36. download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS,
  37. )
  38. print(f"成功! 缓存目录: {result}")
  39. # 列出文件
  40. for root, dirs, files in os.walk(result):
  41. for f in files:
  42. fp = os.path.join(root, f)
  43. size = os.path.getsize(fp)
  44. print(f" {os.path.relpath(fp, result)} ({size} bytes)")
  45. except Exception as e:
  46. print(f"失败: {e}")
  47. # 方式2: 尝试 dataset_snapshot_download 不带 DownloadMode
  48. print("\n=== 方式2: dataset_snapshot_download (简化调用) ===")
  49. try:
  50. from modelscope.hub.snapshot_download import dataset_snapshot_download
  51. cache_dir = "/tmp/ms_test_cache2"
  52. os.makedirs(cache_dir, exist_ok=True)
  53. result = dataset_snapshot_download(
  54. dataset_id=dataset_id,
  55. cache_dir=cache_dir,
  56. )
  57. print(f"成功! 缓存目录: {result}")
  58. for root, dirs, files in os.walk(result):
  59. for f in files:
  60. fp = os.path.join(root, f)
  61. size = os.path.getsize(fp)
  62. print(f" {os.path.relpath(fp, result)} ({size} bytes)")
  63. except Exception as e:
  64. print(f"失败: {e}")
  65. # 方式3: 尝试直接用 HubApi 的 get_dataset_file_url_with_token 或类似方法
  66. print("\n=== 方式3: HubApi 获取下载 URL ===")
  67. try:
  68. from modelscope.hub.api import HubApi
  69. api = HubApi()
  70. # 列出所有可用方法
  71. methods = [m for m in dir(api) if 'dataset' in m.lower() or 'download' in m.lower()]
  72. print(f"可用方法: {methods}")
  73. # 尝试 list_repo_tree
  74. for method_name in ['list_repo_tree', 'get_dataset_meta_file_list']:
  75. if hasattr(api, method_name):
  76. print(f"\n尝试 {method_name}...")
  77. try:
  78. method = getattr(api, method_name)
  79. result = method(ds_name, namespace=namespace, revision="master")
  80. print(f" 结果: {result}")
  81. except Exception as e:
  82. print(f" 失败: {e}")
  83. except Exception as e:
  84. print(f"失败: {e}")
  85. # 方式4: pip 查看 modelscope 可用版本
  86. print("\n=== 方式4: pip 检查 ===")
  87. try:
  88. result = subprocess.run(
  89. ["pip", "index", "versions", "modelscope"], capture_output=True, text=True, timeout=15
  90. )
  91. print(result.stdout[:500])
  92. except Exception as e:
  93. print(f"失败: {e}")
  94. print("\n=== 完成 ===")