| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- #!/usr/bin/env python3
- """测试 MsDataset.load() 完整下载流程。"""
- import json
- import sys
- import os
- import tempfile
- from pathlib import Path
- dataset_id = sys.argv[1] if len(sys.argv) > 1 else "tany0699/carBrands50"
- namespace, ds_name = dataset_id.split("/", 1)
- print(f"数据集: {dataset_id}\n")
- # 测试 MsDataset.load()
- print("=== 用 MsDataset.load() 下载 ===")
- from modelscope.msdatasets import MsDataset
- from PIL import Image
- ds = None
- for split in ("train", "validation", "test"):
- try:
- if namespace:
- ds = MsDataset.load(ds_name, namespace=namespace, split=split)
- else:
- ds = MsDataset.load(dataset_id, split=split)
- if ds:
- count = len(ds) if hasattr(ds, "__len__") else "?"
- print(f"split='{split}' 成功, 共 {count} 条")
- break
- except Exception as e:
- print(f"split='{split}' 失败: {e}")
- if not ds:
- print("所有 split 都失败")
- sys.exit(1)
- # 查看前 2 条数据
- print("\n=== 前 2 条数据 ===")
- count = 0
- for row in ds:
- if count >= 2:
- break
- print(f"\n--- Record {count} ---")
- for k, v in row.items():
- vtype = type(v).__name__
- if isinstance(v, Image.Image):
- # 模拟保存
- tmp = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False)
- if v.mode in ("RGBA", "P", "LA"):
- v = v.convert("RGB")
- v.save(tmp.name, format="JPEG", quality=90)
- size = os.path.getsize(tmp.name)
- os.unlink(tmp.name)
- print(f" {k}: PIL.Image ({v.size[0]}x{v.size[1]}, mode={v.mode}) -> saved as {size} bytes")
- elif isinstance(v, str) and len(v) > 100:
- print(f" {k}: str (len={len(v)})")
- else:
- print(f" {k}: {vtype} = {v}")
- count += 1
- print(f"\n=== 测试通过! ===")
|