test_ms_api.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. #!/usr/bin/env python3
  2. """测试通过 OSS 凭证下载数据文件区的图片。"""
  3. import json
  4. import urllib.request
  5. import urllib.parse
  6. import sys
  7. dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
  8. namespace, ds_name = dataset_id.split("/", 1)
  9. print(f"数据集: {dataset_id}\n")
  10. from modelscope.hub.api import HubApi
  11. api = HubApi()
  12. # 获取 OSS 凭证
  13. print("=== 获取 OSS 凭证 ===")
  14. config = api.get_dataset_access_config(
  15. dataset_name=ds_name,
  16. namespace=namespace,
  17. revision="master",
  18. )
  19. host = config["Host"]
  20. backup_dir = config["BackupDir"] # zip 文件
  21. unzip_dir = config["Dir"] # 已解压的文件
  22. access_id = config["AccessId"]
  23. access_secret = config["AccessSecret"]
  24. security_token = config["SecurityToken"]
  25. print(f"Host: {host}")
  26. print(f"Zip 目录: {backup_dir}")
  27. print(f"解压目录: {unzip_dir}")
  28. print(f"过期时间: {config['Expiration']}")
  29. # 测试1: 直接下载 zip(公开访问?)
  30. print("\n=== 测试1: 直接访问 public-zip/train.zip ===")
  31. url = f"{host}/{backup_dir}train.zip"
  32. print(f"URL: {url}")
  33. try:
  34. req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
  35. with urllib.request.urlopen(req, timeout=15) as resp:
  36. print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
  37. except Exception as e:
  38. print(f"HEAD: {e}")
  39. # 测试2: 带 STS 签名下载 zip
  40. print("\n=== 测试2: 带 STS 签名访问 train.zip ===")
  41. # OSS STS 签名 URL 格式: ?OSSAccessKeyId=xxx&Expires=xxx&Signature=xxx&security-token=xxx
  42. import time
  43. import hmac
  44. import hashlib
  45. import base64
  46. expires = str(int(time.time()) + 3600)
  47. string_to_sign = f"HEAD\n\n\n{expires}\n/{config['Bucket']}/{backup_dir}train.zip"
  48. h = hmac.new(access_secret.encode(), string_to_sign.encode(), hashlib.sha1)
  49. signature = urllib.parse.quote(base64.b64encode(h.digest()))
  50. url = (f"{host}/{backup_dir}train.zip"
  51. f"?OSSAccessKeyId={urllib.parse.quote(access_id)}"
  52. f"&Expires={expires}"
  53. f"&Signature={signature}"
  54. f"&security-token={urllib.parse.quote(security_token)}")
  55. print(f"URL: {url[:200]}...")
  56. try:
  57. req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
  58. with urllib.request.urlopen(req, timeout=15) as resp:
  59. print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
  60. print(f">>> 成功! <<<")
  61. except Exception as e:
  62. print(f"HEAD: {e}")
  63. # 测试3: GET 下载前 1MB
  64. print("\n=== 测试3: GET 下载 train.zip 前 1MB ===")
  65. try:
  66. req = urllib.request.Request(url, headers={"User-Agent": "Test", "Range": "bytes=0-1048575"})
  67. with urllib.request.urlopen(req, timeout=30) as resp:
  68. data = resp.read(1048576)
  69. is_zip = data[:4] == b'PK\x03\x04'
  70. print(f"GET: {resp.status} | {len(data)} bytes | is_zip={is_zip}")
  71. if is_zip:
  72. print(f">>> 成功! 是 ZIP 文件! <<<")
  73. except Exception as e:
  74. print(f"GET: {e}")
  75. # 测试4: 访问 public-unzip-dataset(已解压的图片)
  76. print("\n=== 测试4: 直接访问 public-unzip-dataset(已解压图片) ===")
  77. url = f"{host}/{unzip_dir}"
  78. print(f"URL: {url}")
  79. try:
  80. req = urllib.request.Request(url, headers={"User-Agent": "Test"})
  81. with urllib.request.urlopen(req, timeout=15) as resp:
  82. content = resp.read().decode("utf-8", errors="replace")
  83. print(f"状态: {resp.status}")
  84. print(f"内容前 500 字符: {content[:500]}")
  85. except Exception as e:
  86. print(f"失败: {e}")
  87. # 测试5: 带签名访问 public-unzip-dataset
  88. print("\n=== 测试5: 带签名访问 public-unzip-dataset ===")
  89. string_to_sign = f"GET\n\n\n{expires}\n/{config['Bucket']}/{unzip_dir}"
  90. h = hmac.new(access_secret.encode(), string_to_sign.encode(), hashlib.sha1)
  91. signature = urllib.parse.quote(base64.b64encode(h.digest()))
  92. url = (f"{host}/{unzip_dir}"
  93. f"?OSSAccessKeyId={urllib.parse.quote(access_id)}"
  94. f"&Expires={expires}"
  95. f"&Signature={signature}"
  96. f"&security-token={urllib.parse.quote(security_token)}")
  97. try:
  98. req = urllib.request.Request(url, headers={"User-Agent": "Test"})
  99. with urllib.request.urlopen(req, timeout=15) as resp:
  100. content = resp.read().decode("utf-8", errors="replace")
  101. print(f"状态: {resp.status}")
  102. print(f"内容前 500 字符: {content[:500]}")
  103. except Exception as e:
  104. print(f"失败: {e}")
  105. print("\n=== 完成 ===")