Przeglądaj źródła

修改测试文件

lxylxy123321 7 godzin temu
rodzic
commit
e533cf398a
2 zmienionych plików z 115 dodań i 110 usunięć
  1. 84 68
      backend/scripts/test_ms_api.py
  2. 31 42
      result.txt

+ 84 - 68
backend/scripts/test_ms_api.py

@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
-"""测试 HubApi 的 get_dataset_access_config 获取数据文件 CDN 链接。"""
+"""测试通过 OSS 凭证下载数据文件区的图片。"""
 import json
+import urllib.request
+import urllib.parse
 import sys
-import inspect
 
 dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
 namespace, ds_name = dataset_id.split("/", 1)
@@ -12,87 +13,102 @@ print(f"数据集: {dataset_id}\n")
 from modelscope.hub.api import HubApi
 api = HubApi()
 
-# 获取 dataset_id (数字)
-print("=== 获取 dataset_id ===")
-try:
-    ds_id, ds_type = api.get_dataset_id_and_type(namespace=namespace, dataset_name=ds_name)
-    print(f"dataset_id={ds_id}, type={ds_type}")
-except Exception as e:
-    print(f"失败: {e}")
-    ds_id = None
+# 获取 OSS 凭证
+print("=== 获取 OSS 凭证 ===")
+config = api.get_dataset_access_config(
+    dataset_name=ds_name,
+    namespace=namespace,
+    revision="master",
+)
+host = config["Host"]
+backup_dir = config["BackupDir"]  # zip 文件
+unzip_dir = config["Dir"]  # 已解压的文件
+access_id = config["AccessId"]
+access_secret = config["AccessSecret"]
+security_token = config["SecurityToken"]
+print(f"Host: {host}")
+print(f"Zip 目录: {backup_dir}")
+print(f"解压目录: {unzip_dir}")
+print(f"过期时间: {config['Expiration']}")
 
-# 测试 get_dataset_access_config
-print("\n=== get_dataset_access_config ===")
+# 测试1: 直接下载 zip(公开访问?)
+print("\n=== 测试1: 直接访问 public-zip/train.zip ===")
+url = f"{host}/{backup_dir}train.zip"
+print(f"URL: {url}")
 try:
-    sig = inspect.signature(api.get_dataset_access_config)
-    print(f"签名: {sig}")
-    result = api.get_dataset_access_config(
-        dataset_name=ds_name,
-        namespace=namespace,
-        revision="master",
-    )
-    print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
+    req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
+    with urllib.request.urlopen(req, timeout=15) as resp:
+        print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
 except Exception as e:
-    print(f"失败: {e}")
+    print(f"HEAD: {e}")
 
-# 测试 get_dataset_access_config_for_unzipped
-print("\n=== get_dataset_access_config_for_unzipped ===")
+# 测试2: 带 STS 签名下载 zip
+print("\n=== 测试2: 带 STS 签名访问 train.zip ===")
+# OSS STS 签名 URL 格式: ?OSSAccessKeyId=xxx&Expires=xxx&Signature=xxx&security-token=xxx
+import time
+import hmac
+import hashlib
+import base64
+expires = str(int(time.time()) + 3600)
+string_to_sign = f"HEAD\n\n\n{expires}\n/{config['Bucket']}/{backup_dir}train.zip"
+h = hmac.new(access_secret.encode(), string_to_sign.encode(), hashlib.sha1)
+signature = urllib.parse.quote(base64.b64encode(h.digest()))
+url = (f"{host}/{backup_dir}train.zip"
+       f"?OSSAccessKeyId={urllib.parse.quote(access_id)}"
+       f"&Expires={expires}"
+       f"&Signature={signature}"
+       f"&security-token={urllib.parse.quote(security_token)}")
+print(f"URL: {url[:200]}...")
 try:
-    sig = inspect.signature(api.get_dataset_access_config_for_unzipped)
-    print(f"签名: {sig}")
-    result = api.get_dataset_access_config_for_unzipped(
-        dataset_name=ds_name,
-        namespace=namespace,
-        revision="master",
-    )
-    print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
+    req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
+    with urllib.request.urlopen(req, timeout=15) as resp:
+        print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
+        print(f">>> 成功! <<<")
 except Exception as e:
-    print(f"失败: {e}")
+    print(f"HEAD: {e}")
 
-# 测试 get_dataset_infos
-print("\n=== get_dataset_infos ===")
+# 测试3: GET 下载前 1MB
+print("\n=== 测试3: GET 下载 train.zip 前 1MB ===")
 try:
-    sig = inspect.signature(api.get_dataset_infos)
-    print(f"签名: {sig}")
-    result = api.get_dataset_infos(
-        dataset_name=ds_name,
-        namespace=namespace,
-    )
-    print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
+    req = urllib.request.Request(url, headers={"User-Agent": "Test", "Range": "bytes=0-1048575"})
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        data = resp.read(1048576)
+        is_zip = data[:4] == b'PK\x03\x04'
+        print(f"GET: {resp.status} | {len(data)} bytes | is_zip={is_zip}")
+        if is_zip:
+            print(f">>> 成功! 是 ZIP 文件! <<<")
 except Exception as e:
-    print(f"失败: {e}")
+    print(f"GET: {e}")
 
-# 测试 get_dataset_file_url
-print("\n=== get_dataset_file_url (train.csv) ===")
+# 测试4: 访问 public-unzip-dataset(已解压的图片)
+print("\n=== 测试4: 直接访问 public-unzip-dataset(已解压图片) ===")
+url = f"{host}/{unzip_dir}"
+print(f"URL: {url}")
 try:
-    url = api.get_dataset_file_url(
-        file_name="train.csv",
-        dataset_name=ds_name,
-        namespace=namespace,
-        revision="master",
-    )
-    print(f"URL: {url}")
+    req = urllib.request.Request(url, headers={"User-Agent": "Test"})
+    with urllib.request.urlopen(req, timeout=15) as resp:
+        content = resp.read().decode("utf-8", errors="replace")
+        print(f"状态: {resp.status}")
+        print(f"内容前 500 字符: {content[:500]}")
 except Exception as e:
     print(f"失败: {e}")
 
-# 测试 get_dataset_file_url (train.zip - 数据文件区)
-print("\n=== get_dataset_file_url (train.zip) ===")
+# 测试5: 带签名访问 public-unzip-dataset
+print("\n=== 测试5: 带签名访问 public-unzip-dataset ===")
+string_to_sign = f"GET\n\n\n{expires}\n/{config['Bucket']}/{unzip_dir}"
+h = hmac.new(access_secret.encode(), string_to_sign.encode(), hashlib.sha1)
+signature = urllib.parse.quote(base64.b64encode(h.digest()))
+url = (f"{host}/{unzip_dir}"
+       f"?OSSAccessKeyId={urllib.parse.quote(access_id)}"
+       f"&Expires={expires}"
+       f"&Signature={signature}"
+       f"&security-token={urllib.parse.quote(security_token)}")
 try:
-    url = api.get_dataset_file_url(
-        file_name="train.zip",
-        dataset_name=ds_name,
-        namespace=namespace,
-        revision="master",
-    )
-    print(f"URL: {url}")
-    # 尝试下载验证
-    import urllib.request
-    req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
-    try:
-        with urllib.request.urlopen(req, timeout=15) as resp:
-            print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
-    except Exception as e2:
-        print(f"HEAD: {e2}")
+    req = urllib.request.Request(url, headers={"User-Agent": "Test"})
+    with urllib.request.urlopen(req, timeout=15) as resp:
+        content = resp.read().decode("utf-8", errors="replace")
+        print(f"状态: {resp.status}")
+        print(f"内容前 500 字符: {content[:500]}")
 except Exception as e:
     print(f"失败: {e}")
 

+ 31 - 42
result.txt

@@ -1,47 +1,36 @@
 lq@lq:~/Fine-tuning$ cp backend/scripts/test_ms_api.py backend/data/ && sudo docker exec -it finetune-backend python3 /root/Fine-tuning/backend/data/test_ms_api.py tany0699/carBrands50
 数据集: tany0699/carBrands50
 
-=== 当前版本 ===
-  Name: modelscope
-  Version: 1.17.1
-  Name: datasets
-  Version: 4.8.5
-
-=== 方式1: HubApi snapshot_download ===
-dataset_snapshot_download 可用!
-失败: dataset_snapshot_download() got an unexpected keyword argument 'download_mode'
-
-=== 方式2: dataset_snapshot_download (简化调用) ===
-Downloading: 100%|███████████████████████████████████████████████████| 139/139 [00:00<00:00, 217B/s]
-Downloading: 100%|███████████████████████████████████████████████████| 379/379 [00:00<00:00, 699B/s]
-Downloading: 100%|███████████████████████████████████████████████████| 210/210 [00:00<00:00, 330B/s]
-Downloading: 100%|█████████████████████████████████████████████| 1.80k/1.80k [00:00<00:00, 2.88kB/s]
-Downloading: 100%|██████████████████████████████████████████████| 72.8k/72.8k [00:00<00:00, 113kB/s]
-Downloading: 100%|████████████████████████████████████████████████| 137k/137k [00:00<00:00, 190kB/s]
-Downloading: 100%|█████████████████████████████████████████████| 2.95k/2.95k [00:00<00:00, 5.47kB/s]
-成功! 缓存目录: /tmp/ms_test_cache2/tany0699/carBrands50
-  .mv  (6 bytes)
-  README.md  (1845 bytes)
-  .mdl  (43 bytes)
-  classname.txt  (379 bytes)
-  train.csv  (140505 bytes)
-  sample.jpg  (74521 bytes)
-  carBrands50.json  (139 bytes)
-  val.csv  (3020 bytes)
-  dataset_infos.json  (210 bytes)
-  .msc  (490 bytes)
-
-=== 方式3: HubApi 获取下载 URL ===
-可用方法: ['create_dataset', 'dataset_download_statistics', 'delete_oss_dataset_dir', 'delete_oss_dataset_object', 'get_dataset_access_config', 'get_dataset_access_config_for_unzipped', 'get_dataset_access_config_session', 'get_dataset_file_url', 'get_dataset_file_url_origin', 'get_dataset_id_and_type', 'get_dataset_infos', 'get_dataset_meta_file_list', 'get_dataset_meta_files_local_paths', 'list_datasets', 'list_oss_dataset_objects']
-
-尝试 list_repo_tree...
-  失败: HubApi.list_repo_tree() missing 1 required positional argument: 'root_path'
-
-尝试 get_dataset_meta_file_list...
-  失败: HubApi.get_dataset_meta_file_list() missing 1 required positional argument: 'dataset_id'
-
-=== 方式4: pip 检查 ===
-modelscope (1.37.1)
-Available versions: 1.37.1, 1.37.0, 1.36.3, 1.36.2, 1.36.1, 1.36.0, 1.35.4, 1.35.3, 1.35.2, 1.35.1, 1.35.0, 1.34.0, 1.33.0, 1.32.0, 1.31.0, 1.30.0, 1.29.2, 1.29.1, 1.29.0, 1.28.2, 1.28.1, 1.28.0, 1.27.1, 1.27.0, 1.26.0, 1.25.0, 1.24.1, 1.24.0, 1.23.2, 1.23.1, 1.23.0, 1.22.3, 1.22.2, 1.22.1, 1.22.0, 1.21.1, 1.21.0, 1.20.1, 1.20.0, 1.19.2, 1.19.1, 1.19.0, 1.18.1, 1.18.0, 1.17.1, 1.17.0, 1.16.1, 1.16.0, 1.15.0, 1.14.0, 1.13.3, 1.13.2, 1.13.1, 1.13.0, 1.12.0, 1.11.1, 1.11.0, 1.10
+=== 获取 dataset_id ===
+dataset_id=2119, type=2
+
+=== get_dataset_access_config ===
+签名: (dataset_name: str, namespace: str, revision: Optional[str] = 'master')
+结果: {
+  "AccessId": "STS.NXj6GJFxy94HMWrqjL1UHkFMR",
+  "AccessSecret": "AxCCmYH4s381axveFQWYd48uzDTfGjb43fRmDnQPvbU2",
+  "SecurityToken": "CAISiAR1q6Ft5B2yfSjIr5vfff3+q6dYjvajT3HDlWoZPdpkhIPmsDz2IHlEdXZvAekbt/U/mGBY6/YblrtOU5tCTEDoZNd59ZlL3wKlbpGZJmElejJf2vOfAmG2J0PRPqWwCryLoLm/F96pb1fb7GURpZLxaTSlWXG8LJSNkuQJR98LXw6+H1gkZNBNPVlNpdNYT8W4V5CXPwXtn3DbAWdxpwN4khkf06mkxdCG4ResXTSY6OYevNb2OYP2LZsubpt2T96u0fAzdrfLlzZN4RxL/+Jrir5F4XLLt82UGFRNpAmPNfuMrIw+awRwfIIgEqhIt6Wk0vhxofDejcO1qVdENvoHVD/EFsLygpnIEf+gLdslMLbmMTPVz9qLN5KwrgU2YGoDOQ8NObgJI3RrWxs3UWOYeO319FnWaxyuV+2FzatxyYd/y1TjuMiLPx+TSrOIiXhJa9g3ZlkCLRcQ0Xe8KugDehdQfkkgRZTtFNQsNEAD9f+45F2JDXU+k0s65aOuO6nk3YkEcpj6U5581o4QWY9LqWNCTS6sEuL/1RhMLTE8EOYKgPmwacaliaWMxeyYauPdEeCGCpG4rNd5xMDkawSzTUWZjrVLATITa1bLUS/DXbnUsRr8S6G+HID0kCdX1ShCyYGcweq9HiEdnP5k0KRGtPem8WctjWkgSpOO54k21nSVkirOCcFRGoABbd4cbYFtd40HoLkSnCKfKLZQo0/kRyaw8TMwRMGKOpT3INcqyzwr6VPAUnJesCVdF+u+SsqnMxUfy53qBFro7oU/ciHgS1FhOTPLaXWgDm5gl51s+MVEJ+SiXqY33Wh1XsY+/CHkMt/orNHNUMSio4iFnsHgt316kKACYVHKIVAgAA==",
+  "Dir": "public-unzip-dataset/tany0699/carBrands50/master/",
+  "Host": "https://dataset-hub.oss-cn-hangzhou.aliyuncs.com",
+  "Bucket": "dataset-hub",
+  "Region": "oss-cn-hangzhou",
+  "Expiration": "2026-05-29T03:09:46+08:00",
+  "BackupDir": "public-zip/tany0699/carBrands50/master/"
+}
+
+=== get_dataset_access_config_for_unzipped ===
+签名: (dataset_name: str, namespace: str, revision: str, zip_file_name: str)
+失败: HubApi.get_dataset_access_config_for_unzipped() missing 1 required positional argument: 'zip_file_name'
+
+=== get_dataset_infos ===
+签名: (dataset_hub_id: str, revision: str, files_metadata: bool = False, timeout: float = 100, recursive: str = 'True')
+失败: HubApi.get_dataset_infos() got an unexpected keyword argument 'dataset_name'
+
+=== get_dataset_file_url (train.csv) ===
+URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/repo?Source=SDK&Revision=master&FilePath=train.csv&View=False
+
+=== get_dataset_file_url (train.zip) ===
+URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/repo?Source=SDK&Revision=master&FilePath=train.zip&View=False
+HEAD: HTTP Error 404: 
 
 === 完成 ===