lxylxy123321 пре 16 часа
родитељ
комит
7aff30cd45
2 измењених фајлова са 114 додато и 126 уклоњено
  1. 68 73
      backend/scripts/test_ms_api.py
  2. 46 53
      result.txt

+ 68 - 73
backend/scripts/test_ms_api.py

@@ -1,103 +1,98 @@
 #!/usr/bin/env python3
-"""测试不同方式下载 ModelScope 数据文件区图片。"""
+"""测试 HubApi 的 get_dataset_access_config 获取数据文件 CDN 链接。"""
 import json
-import urllib.request
-import urllib.parse
 import sys
-import subprocess
-import os
+import inspect
 
-api_base = "https://www.modelscope.cn"
 dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
 namespace, ds_name = dataset_id.split("/", 1)
 
 print(f"数据集: {dataset_id}\n")
 
-# 先查看当前版本
-print("=== 当前版本 ===")
-for pkg in ["modelscope", "datasets"]:
-    try:
-        result = subprocess.run(
-            ["pip", "show", pkg], capture_output=True, text=True, timeout=10
-        )
-        for line in result.stdout.splitlines():
-            if line.startswith("Version:") or line.startswith("Name:"):
-                print(f"  {line}")
-    except Exception as e:
-        print(f"  {pkg}: {e}")
+from modelscope.hub.api import HubApi
+api = HubApi()
 
-# 方式1: 直接用 hub.api(跳过 msdatasets 的 import 问题)
-print("\n=== 方式1: HubApi snapshot_download ===")
+# 获取 dataset_id (数字)
+print("=== 获取 dataset_id ===")
 try:
-    from modelscope.hub.snapshot_download import dataset_snapshot_download
-    print("dataset_snapshot_download 可用!")
-    from modelscope.utils.constant import DownloadMode
-    cache_dir = "/tmp/ms_test_cache"
-    os.makedirs(cache_dir, exist_ok=True)
-    result = dataset_snapshot_download(
-        dataset_id=dataset_id,
-        cache_dir=cache_dir,
-        download_mode=DownloadMode.REUSE_DATASET_IF_EXISTS,
-    )
-    print(f"成功! 缓存目录: {result}")
-    # 列出文件
-    for root, dirs, files in os.walk(result):
-        for f in files:
-            fp = os.path.join(root, f)
-            size = os.path.getsize(fp)
-            print(f"  {os.path.relpath(fp, result)}  ({size} bytes)")
+    ds_id, ds_type = api.get_dataset_id_and_type(namespace=namespace, dataset_name=ds_name)
+    print(f"dataset_id={ds_id}, type={ds_type}")
 except Exception as e:
     print(f"失败: {e}")
+    ds_id = None
 
-# 方式2: 尝试 dataset_snapshot_download 不带 DownloadMode
-print("\n=== 方式2: dataset_snapshot_download (简化调用) ===")
+# 测试 get_dataset_access_config
+print("\n=== get_dataset_access_config ===")
 try:
-    from modelscope.hub.snapshot_download import dataset_snapshot_download
-    cache_dir = "/tmp/ms_test_cache2"
-    os.makedirs(cache_dir, exist_ok=True)
-    result = dataset_snapshot_download(
-        dataset_id=dataset_id,
-        cache_dir=cache_dir,
+    sig = inspect.signature(api.get_dataset_access_config)
+    print(f"签名: {sig}")
+    result = api.get_dataset_access_config(
+        dataset_name=ds_name,
+        namespace=namespace,
+        revision="master",
     )
-    print(f"成功! 缓存目录: {result}")
-    for root, dirs, files in os.walk(result):
-        for f in files:
-            fp = os.path.join(root, f)
-            size = os.path.getsize(fp)
-            print(f"  {os.path.relpath(fp, result)}  ({size} bytes)")
+    print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
 except Exception as e:
     print(f"失败: {e}")
 
-# 方式3: 尝试直接用 HubApi 的 get_dataset_file_url_with_token 或类似方法
-print("\n=== 方式3: HubApi 获取下载 URL ===")
+# 测试 get_dataset_access_config_for_unzipped
+print("\n=== get_dataset_access_config_for_unzipped ===")
 try:
-    from modelscope.hub.api import HubApi
-    api = HubApi()
-    # 列出所有可用方法
-    methods = [m for m in dir(api) if 'dataset' in m.lower() or 'download' in m.lower()]
-    print(f"可用方法: {methods}")
+    sig = inspect.signature(api.get_dataset_access_config_for_unzipped)
+    print(f"签名: {sig}")
+    result = api.get_dataset_access_config_for_unzipped(
+        dataset_name=ds_name,
+        namespace=namespace,
+        revision="master",
+    )
+    print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
+except Exception as e:
+    print(f"失败: {e}")
 
-    # 尝试 list_repo_tree
-    for method_name in ['list_repo_tree', 'get_dataset_meta_file_list']:
-        if hasattr(api, method_name):
-            print(f"\n尝试 {method_name}...")
-            try:
-                method = getattr(api, method_name)
-                result = method(ds_name, namespace=namespace, revision="master")
-                print(f"  结果: {result}")
-            except Exception as e:
-                print(f"  失败: {e}")
+# 测试 get_dataset_infos
+print("\n=== get_dataset_infos ===")
+try:
+    sig = inspect.signature(api.get_dataset_infos)
+    print(f"签名: {sig}")
+    result = api.get_dataset_infos(
+        dataset_name=ds_name,
+        namespace=namespace,
+    )
+    print(f"结果: {json.dumps(result, indent=2, ensure_ascii=False, default=str)[:3000]}")
+except Exception as e:
+    print(f"失败: {e}")
 
+# 测试 get_dataset_file_url
+print("\n=== get_dataset_file_url (train.csv) ===")
+try:
+    url = api.get_dataset_file_url(
+        file_name="train.csv",
+        dataset_name=ds_name,
+        namespace=namespace,
+        revision="master",
+    )
+    print(f"URL: {url}")
 except Exception as e:
     print(f"失败: {e}")
 
-# 方式4: pip 查看 modelscope 可用版本
-print("\n=== 方式4: pip 检查 ===")
+# 测试 get_dataset_file_url (train.zip - 数据文件区)
+print("\n=== get_dataset_file_url (train.zip) ===")
 try:
-    result = subprocess.run(
-        ["pip", "index", "versions", "modelscope"], capture_output=True, text=True, timeout=15
+    url = api.get_dataset_file_url(
+        file_name="train.zip",
+        dataset_name=ds_name,
+        namespace=namespace,
+        revision="master",
     )
-    print(result.stdout[:500])
+    print(f"URL: {url}")
+    # 尝试下载验证
+    import urllib.request
+    req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "Test"})
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            print(f"HEAD: {resp.status} | size={resp.headers.get('Content-Length', '?')}")
+    except Exception as e2:
+        print(f"HEAD: {e2}")
 except Exception as e:
     print(f"失败: {e}")
 

+ 46 - 53
result.txt

@@ -1,54 +1,47 @@
 lq@lq:~/Fine-tuning$ cp backend/scripts/test_ms_api.py backend/data/ && sudo docker exec -it finetune-backend python3 /root/Fine-tuning/backend/data/test_ms_api.py tany0699/carBrands50
-数据集: tany0699/carBrands50, 目标文件: train.zip
-
-hub_id = 2119
-
---- 格式1 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/repo?Source=SDK&Revision=master&FilePath=train.zip&View=false
-  HEAD: 404
-  GET: 404
-
---- 格式2 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/repo?Revision=master&FilePath=train.zip
-  HEAD: 404
-  GET: 404
-
---- 格式3 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/resolve/master/train.zip
-  HEAD: 404
-  GET: 404
-
---- 格式4 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/2119/repo?Revision=master&FilePath=train.zip
-  HEAD: 404
-  GET: 404
-
---- 格式5 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/repo/files?Revision=master&FilePath=train.zip
-  HEAD: 400
-  GET: 400
-
---- 格式6 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/download?Revision=master&FilePath=train.zip
-  HEAD: 404
-  GET: 404
-
---- 格式7 ---
-  URL: https://www.modelscope.cn/datasets/tany0699/carBrands50/resolve/master/train.zip
-  HEAD: 404
-  GET: 404
-
---- 格式8 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/data-files?Revision=master&FilePath=train.zip
-  HEAD: 404
-  GET: 404
-
---- 格式9 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/2119/resolve/master/train.zip
-  HEAD: 404
-  GET: 404
-
---- 格式10 ---
-  URL: https://www.modelscope.cn/api/v1/datasets/tany0699/carBrands50/repo?Source=SDK&Revision=master&FilePath=train.zip
-  HEAD: 404
-  GET: 404
+数据集: tany0699/carBrands50
+
+=== 当前版本 ===
+  Name: modelscope
+  Version: 1.17.1
+  Name: datasets
+  Version: 4.8.5
+
+=== 方式1: HubApi snapshot_download ===
+dataset_snapshot_download 可用!
+失败: dataset_snapshot_download() got an unexpected keyword argument 'download_mode'
+
+=== 方式2: dataset_snapshot_download (简化调用) ===
+Downloading: 100%|███████████████████████████████████████████████████| 139/139 [00:00<00:00, 217B/s]
+Downloading: 100%|███████████████████████████████████████████████████| 379/379 [00:00<00:00, 699B/s]
+Downloading: 100%|███████████████████████████████████████████████████| 210/210 [00:00<00:00, 330B/s]
+Downloading: 100%|█████████████████████████████████████████████| 1.80k/1.80k [00:00<00:00, 2.88kB/s]
+Downloading: 100%|██████████████████████████████████████████████| 72.8k/72.8k [00:00<00:00, 113kB/s]
+Downloading: 100%|████████████████████████████████████████████████| 137k/137k [00:00<00:00, 190kB/s]
+Downloading: 100%|█████████████████████████████████████████████| 2.95k/2.95k [00:00<00:00, 5.47kB/s]
+成功! 缓存目录: /tmp/ms_test_cache2/tany0699/carBrands50
+  .mv  (6 bytes)
+  README.md  (1845 bytes)
+  .mdl  (43 bytes)
+  classname.txt  (379 bytes)
+  train.csv  (140505 bytes)
+  sample.jpg  (74521 bytes)
+  carBrands50.json  (139 bytes)
+  val.csv  (3020 bytes)
+  dataset_infos.json  (210 bytes)
+  .msc  (490 bytes)
+
+=== 方式3: HubApi 获取下载 URL ===
+可用方法: ['create_dataset', 'dataset_download_statistics', 'delete_oss_dataset_dir', 'delete_oss_dataset_object', 'get_dataset_access_config', 'get_dataset_access_config_for_unzipped', 'get_dataset_access_config_session', 'get_dataset_file_url', 'get_dataset_file_url_origin', 'get_dataset_id_and_type', 'get_dataset_infos', 'get_dataset_meta_file_list', 'get_dataset_meta_files_local_paths', 'list_datasets', 'list_oss_dataset_objects']
+
+尝试 list_repo_tree...
+  失败: HubApi.list_repo_tree() missing 1 required positional argument: 'root_path'
+
+尝试 get_dataset_meta_file_list...
+  失败: HubApi.get_dataset_meta_file_list() missing 1 required positional argument: 'dataset_id'
+
+=== 方式4: pip 检查 ===
+modelscope (1.37.1)
+Available versions: 1.37.1, 1.37.0, 1.36.3, 1.36.2, 1.36.1, 1.36.0, 1.35.4, 1.35.3, 1.35.2, 1.35.1, 1.35.0, 1.34.0, 1.33.0, 1.32.0, 1.31.0, 1.30.0, 1.29.2, 1.29.1, 1.29.0, 1.28.2, 1.28.1, 1.28.0, 1.27.1, 1.27.0, 1.26.0, 1.25.0, 1.24.1, 1.24.0, 1.23.2, 1.23.1, 1.23.0, 1.22.3, 1.22.2, 1.22.1, 1.22.0, 1.21.1, 1.21.0, 1.20.1, 1.20.0, 1.19.2, 1.19.1, 1.19.0, 1.18.1, 1.18.0, 1.17.1, 1.17.0, 1.16.1, 1.16.0, 1.15.0, 1.14.0, 1.13.3, 1.13.2, 1.13.1, 1.13.0, 1.12.0, 1.11.1, 1.11.0, 1.10
+
+=== 完成 ===