test_ms_api.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #!/usr/bin/env python3
  2. """测试 HubApi 的 get_dataset_access_config 获取数据文件 CDN 链接。"""
  3. import json
  4. import sys
  5. import inspect
  6. dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
  7. namespace, ds_name = dataset_id.split("/", 1)
  8. print(f"数据集: {dataset_id}\n")
  9. from modelscope.hub.api import HubApi
  10. api = HubApi()
  11. # 获取 dataset_id (数字)
  12. print("=== 获取 dataset_id ===")
  13. try:
  14. ds_id, ds_type = api.get_dataset_id_and_type(namespace=namespace, dataset_name=ds_name)
  15. print(f"dataset_id={ds_id}, type={ds_type}")
  16. except Exception as e:
  17. print(f"失败: {e}")
  18. ds_id = None
  19. # 测试 get_dataset_access_config
  20. print("\n=== get_dataset_access_config ===")
  21. try:
  22. sig = inspect.signature(api.get_dataset_access_config)
  23. print(f"签名: {sig}")
  24. result = api.get_dataset_access_config(
  25. dataset_name=ds_name,
  26. namespace=namespace,
  27. revision="master",
  28. )
  29. print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
  30. except Exception as e:
  31. print(f"失败: {e}")
  32. # 测试 get_dataset_access_config_for_unzipped
  33. print("\n=== get_dataset_access_config_for_unzipped ===")
  34. try:
  35. sig = inspect.signature(api.get_dataset_access_config_for_unzipped)
  36. print(f"签名: {sig}")
  37. result = api.get_dataset_access_config_for_unzipped(
  38. dataset_name=ds_name,
  39. namespace=namespace,
  40. revision="master",
  41. )
  42. print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
  43. except Exception as e:
  44. print(f"失败: {e}")
  45. # 测试 get_dataset_infos
  46. print("\n=== get_dataset_infos ===")
  47. try:
  48. sig = inspect.signature(api.get_dataset_infos)
  49. print(f"签名: {sig}")
  50. result = api.get_dataset_infos(
  51. dataset_name=ds_name,
  52. namespace=namespace,
  53. )
  54. print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
  55. except Exception as e:
  56. print(f"失败: {e}")
  57. # 测试 get_dataset_file_url
  58. print("\n=== get_dataset_file_url (train.csv) ===")
  59. try:
  60. url = api.get_dataset_file_url(
  61. file_name="train.csv",
  62. dataset_name=ds_name,
  63. namespace=namespace,
  64. revision="master",
  65. )
  66. print(f"URL: {url}")
  67. except Exception as e:
  68. print(f"失败: {e}")
  69. # 测试 get_dataset_file_url (train.zip - 数据文件区)
  70. print("\n=== get_dataset_file_url (train.zip) ===")
  71. try:
  72. url = api.get_dataset_file_url(
  73. file_name="train.zip",
  74. dataset_name=ds_name,
  75. namespace=namespace,
  76. revision="master",
  77. )
  78. print(f"URL: {url}")
  79. # 尝试下载验证
  80. import urllib.request
  81. req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
  82. try:
  83. with urllib.request.urlopen(req, timeout=15) as resp:
  84. print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
  85. except Exception as e2:
  86. print(f"HEAD: {e2}")
  87. except Exception as e:
  88. print(f"失败: {e}")
  89. print("\n=== 完成 ===")