test_ms_api.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. #!/usr/bin/env python3
  2. """测试 ModelScope 数据集 API,查找数据文件区的 CDN 链接。"""
  3. import json
  4. import urllib.request
  5. import urllib.parse
  6. import sys
  7. api_base = "https://www.modelscope.cn"
  8. dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
  9. print(f"测试数据集: {dataset_id}\n")
  10. # Test 1: 完整查看数据集 info API 返回
  11. print("=== Test1: 数据集 info API 完整响应 ===")
  12. try:
  13. url = f"{api_base}/api/v1/datasets/{dataset_id}"
  14. print(f"请求: {url}")
  15. req = urllib.request.Request(url, headers={"User-Agent": "Test"})
  16. with urllib.request.urlopen(req, timeout=30) as resp:
  17. info = json.loads(resp.read().decode())
  18. print(json.dumps(info, indent=2, ensure_ascii=False)[:3000])
  19. except Exception as e:
  20. print(f"失败: {e}")
  21. # Test 2: 尝试 HubApi(跳过 msdatasets 的 import 问题)
  22. print("\n=== Test2: HubApi 直接调用 ===")
  23. try:
  24. from modelscope.hub.api import HubApi
  25. api = HubApi()
  26. # 获取数据集文件列表
  27. print("尝试 get_dataset_files...")
  28. try:
  29. namespace, ds_name = dataset_id.split("/", 1)
  30. files = api.get_dataset_files(ds_name, namespace=namespace, recursive=True)
  31. print(f"get_dataset_files 返回 {len(files)} 个文件:")
  32. for f in files:
  33. print(f" {f}")
  34. except Exception as e:
  35. print(f"get_dataset_files 失败: {e}")
  36. # 尝试获取文件下载 URL
  37. print("\n尝试 get_dataset_file_url...")
  38. try:
  39. namespace, ds_name = dataset_id.split("/", 1)
  40. url = api.get_dataset_file_url("train.csv", ds_name, namespace, revision="master")
  41. print(f"train.csv 下载 URL: {url}")
  42. except Exception as e:
  43. print(f"get_dataset_file_url 失败: {e}")
  44. except ImportError as e:
  45. print(f"import 失败: {e}")
  46. except Exception as e:
  47. print(f"失败: {e}")
  48. # Test 3: 查看 carBrands50.json 配置文件(可能包含数据文件 URL)
  49. print("\n=== Test3: carBrands50.json 配置文件 ===")
  50. try:
  51. namespace, ds_name = dataset_id.split("/", 1)
  52. url = (f"{api_base}/api/v1/datasets/{namespace}/{ds_name}/repo"
  53. f"?Revision=master&FilePath=carBrands50.json&View=false")
  54. print(f"请求: {url}")
  55. req = urllib.request.Request(url, headers={"User-Agent": "Test"})
  56. with urllib.request.urlopen(req, timeout=30) as resp:
  57. config = resp.read().decode()
  58. print(config[:2000])
  59. except Exception as e:
  60. print(f"失败: {e}")
  61. # Test 4: 查看 dataset_infos.json
  62. print("\n=== Test4: dataset_infos.json ===")
  63. try:
  64. url = (f"{api_base}/api/v1/datasets/{namespace}/{ds_name}/repo"
  65. f"?Revision=master&FilePath=dataset_infos.json&View=false")
  66. print(f"请求: {url}")
  67. req = urllib.request.Request(url, headers={"User-Agent": "Test"})
  68. with urllib.request.urlopen(req, timeout=30) as resp:
  69. config = resp.read().decode()
  70. print(config[:2000])
  71. except Exception as e:
  72. print(f"失败: {e}")