1 hafta önce · 74da698268
--- a/backend/app/services/dataset_service.py
+++ b/backend/app/services/dataset_service.py
@@ -17,10 +17,17 @@ settings = get_settings()
 
				 async def download_dataset(req: DatasetDownloadRequest) -> DatasetDownloadResponse:
			
 
				     """从 HuggingFace 或 ModelScope 下载数据集。"""
			
 
				     try:
			
 
				-        from datasets import load_dataset
			
 
				+        if req.use_modelscope:
			
 
				+            from modelscope.msdatasets import MsDataset
			
 
				+
			
 
				+            ds = MsDataset.load(req.dataset_id)
			
 
				+            ds_dir = settings.processed_dir / f"ms_{req.dataset_id.replace('/', '_')}"
			
 
				+        else:
			
 
				+            from datasets import load_dataset
			
 
				+
			
 
				+            ds = load_dataset(req.dataset_id)
			
 
				+            ds_dir = settings.processed_dir / f"hf_{req.dataset_id.replace('/', '_')}"
			
 
				 
			
 
				-        ds = load_dataset(req.dataset_id)
			
 
				-        ds_dir = settings.processed_dir / f"hf_{req.dataset_id.replace('/', '_')}"
			
 
				         ds_dir.mkdir(parents=True, exist_ok=True)
			
 
				         # 保存为 JSONL
			
 
				         if "train" in ds:
			
@@ -45,7 +52,7 @@ async def download_dataset(req: DatasetDownloadRequest) -> DatasetDownloadRespon
 
				             session.add(record)
			
 
				             await session.commit()
			
 
				 
			
 
				-        logger.info(f"Downloaded dataset: {req.dataset_id} ({len(split)} records)")
			
 
				+        logger.info(f"Downloaded dataset: {req.dataset_id} ({len(split)} records, source={'ModelScope' if req.use_modelscope else 'HuggingFace'})")
			
 
				         return DatasetDownloadResponse(dataset_id=req.dataset_id, status="completed", path=str(output_path))
			
 
				     except Exception as e:
			
 
				         logger.error(f"Dataset download failed: {e}")
			
--- a/result.txt
+++ b/result.txt
@@ -1,97 +1,34 @@
 
				-base) [root@localhost Fine-tuning]# docker exec finetune-backend cat /root/Fine-tuning/backend/data/models/Qwen/Qwen3.5-0.8B/config.json
			
 
				-{
			
 
				-    "architectures": [
			
 
				-        "Qwen3_5ForConditionalGeneration"
			
 
				-    ],
			
 
				-    "image_token_id": 248056,
			
 
				-    "model_type": "qwen3_5",
			
 
				-    "text_config": {
			
 
				-        "attention_bias": false,
			
 
				-        "attention_dropout": 0.0,
			
 
				-        "attn_output_gate": true,
			
 
				-        "dtype": "bfloat16",
			
 
				-        "eos_token_id": 248044,
			
 
				-        "full_attention_interval": 4,
			
 
				-        "head_dim": 256,
			
 
				-        "hidden_act": "silu",
			
 
				-        "hidden_size": 1024,
			
 
				-        "initializer_range": 0.02,
			
 
				-        "intermediate_size": 3584,
			
 
				-        "layer_types": [
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "full_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "full_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "full_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "full_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "full_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "linear_attention",
			
 
				-            "full_attention"
			
 
				-        ],
			
 
				-        "linear_conv_kernel_dim": 4,
			
 
				-        "linear_key_head_dim": 128,
			
 
				-        "linear_num_key_heads": 16,
			
 
				-        "linear_num_value_heads": 16,
			
 
				-        "linear_value_head_dim": 128,
			
 
				-        "max_position_embeddings": 262144,
			
 
				-        "mlp_only_layers": [],
			
 
				-        "model_type": "qwen3_5_text",
			
 
				-        "mtp_num_hidden_layers": 1,
			
 
				-        "mtp_use_dedicated_embeddings": false,
			
 
				-        "num_attention_heads": 8,
			
 
				-        "num_hidden_layers": 24,
			
 
				-        "num_key_value_heads": 2,
			
 
				-        "rms_norm_eps": 1e-06,
			
 
				-        "tie_word_embeddings": true,
			
 
				-        "use_cache": true,
			
 
				-        "vocab_size": 248320,
			
 
				-        "mamba_ssm_dtype": "float32",
			
 
				-        "rope_parameters": {
			
 
				-            "mrope_interleaved": true,
			
 
				-            "mrope_section": [
			
 
				-                11,
			
 
				-                11,
			
 
				-                10
			
 
				-            ],
			
 
				-            "rope_type": "default",
			
 
				-            "rope_theta": 10000000,
			
 
				-            "partial_rotary_factor": 0.25
			
 
				-        }
			
 
				-    },
			
 
				-    "tie_word_embeddings": true,
			
 
				-    "transformers_version": "4.57.0.dev0",
			
 
				-    "video_token_id": 248057,
			
 
				-    "vision_config": {
			
 
				-        "deepstack_visual_indexes": [],
			
 
				-        "depth": 12,
			
 
				-        "hidden_act": "gelu_pytorch_tanh",
			
 
				-        "hidden_size": 768,
			
 
				-        "in_channels": 3,
			
 
				-        "initializer_range": 0.02,
			
 
				-        "intermediate_size": 3072,
			
 
				-        "model_type": "qwen3_5",
			
 
				-        "num_heads": 12,
			
 
				-        "num_position_embeddings": 2304,
			
 
				-        "out_hidden_size": 1024,
			
 
				-        "patch_size": 16,
			
 
				-        "spatial_merge_size": 2,
			
 
				-        "temporal_patch_size": 2
			
 
				-    },
			
 
				-    "vision_end_token_id": 248054,
			
 
				-    "vision_start_token_id": 248053
			
 
				-}(base) [root@localhost Fine-tuning]# 
			
 
				+INFO:     172.19.0.3:58004 - "GET /api/v1/models/ HTTP/1.0" 200 OK
			
 
				+[transformers] `torch_dtype` is deprecated! Use `dtype` instead!
			
 
				+2026-05-15 10:58:38 | WARNING  | fla.utils | Current Triton version 3.0.0 is below the recommended 3.2.0 version. Errors may occur and these issues will not be fixed. Please consider upgrading Triton.
			
 
				+2026-05-15 10:58:38 | WARNING  | fla.utils | Current Python version 3.10 is below the recommended 3.11 version. It is recommended to upgrade to Python 3.11 or higher for the best experience.
			
 
				+2026-05-15 10:58:43 | WARNING  | fla.ops.rwkv7.fused_addcmul | torch.compile is not available in Python 3.10, using identity decorator instead
			
 
				+/opt/conda/lib/python3.10/site-packages/torchvision/datapoints/__init__.py:12: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
			
 
				+  warnings.warn(_BETA_TRANSFORMS_WARNING)
			
 
				+/opt/conda/lib/python3.10/site-packages/torchvision/transforms/v2/__init__.py:54: UserWarning: The torchvision.datapoints and torchvision.transforms.v2 namespaces are still Beta. While we do not expect major breaking changes, some APIs may still change according to user feedback. Please submit any feedback you may have in this issue: https://github.com/pytorch/vision/issues/6753, and you can also check out https://github.com/pytorch/vision/issues/7319 to learn more about the APIs that we suspect might involve future changes. You can silence this warning by calling torchvision.disable_beta_transforms_warning().
			
 
				+  warnings.warn(_BETA_TRANSFORMS_WARNING)
			
 
				+Loading weights: 100%|██████████| 320/320 [00:00<00:00, 525.66it/s]
			
 
				+/opt/conda/lib/python3.10/site-packages/torch/nn/functional.py:6001: UserWarning: 1Torch was not compiled with memory efficient attention. (Triggered internally at /workspace/framework/mcPytorch/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp:738.)
			
 
				+  return _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, scale = scale, enable_gqa = enable_gqa)
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+1,7,16,128,128,64,1,1,None
			
 
				+INFO:     172.19.0.3:47344 - "POST /api/v1/models/test HTTP/1.0" 200 OK
			
 
				+INFO:     127.0.0.1:38712 - "GET /health HTTP/1.1" 200 OK
			
 
				+INFO:     127.0.0.1:43372 - "GET /health HTTP/1.1" 200 OK
			
 
				+INFO:     127.0.0.1:45258 - "GET /health HTTP/1.1" 200 OK