|
|
@@ -107,11 +107,14 @@ def _download_modelscope_dataset(dataset_id: str) -> tuple[Path, Path, int]:
|
|
|
"""用 MsDataset 下载并转为 JSONL。"""
|
|
|
from modelscope.msdatasets import MsDataset
|
|
|
|
|
|
+ # 先尝试加载完整数据集
|
|
|
try:
|
|
|
- ds = MsDataset.load(dataset_id, subset_name="default", split="train")
|
|
|
- except Exception:
|
|
|
- # 回退:不带参数,自动选择第一个 split
|
|
|
ds = MsDataset.load(dataset_id)
|
|
|
+ except Exception:
|
|
|
+ try:
|
|
|
+ ds = MsDataset.load(dataset_id, subset_name="default", split="train")
|
|
|
+ except Exception:
|
|
|
+ ds = MsDataset.load(dataset_id, split="train")
|
|
|
|
|
|
ds_dir = settings.processed_dir / f"ms_{dataset_id.replace('/', '_')}"
|
|
|
ds_dir.mkdir(parents=True, exist_ok=True)
|