Kaynağa Gözat

沐曦生态不支持 BitsAndBytes

lxylxy123321 2 gün önce
ebeveyn
işleme
5a57edb1d8

+ 13 - 7
backend/app/engines/text_engine.py

@@ -49,7 +49,7 @@ class TextEngine(BaseEngine):
     async def load_model(self, model_id: str, **kwargs: Any) -> None:
         """下载并加载基础模型。GPU 加载超时直接报错。"""
         import torch
-        from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+        from transformers import AutoModelForCausalLM, AutoTokenizer
 
         # 远程节点不查数据库,直接扫描本地模型目录
         local_path = str(settings.models_dir / model_id.replace("/", "_"))
@@ -92,13 +92,19 @@ class TextEngine(BaseEngine):
         }
         if quantization == "4bit" or quantization == "qlora":
             load_kwargs["torch_dtype"] = torch.float16
-            load_kwargs["quantization_config"] = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_compute_dtype=torch.float16,
-            )
+            try:
+                from transformers import HqqConfig
+                load_kwargs["quantization_config"] = HqqConfig(
+                    nbits=4,
+                    group_size=64,
+                )
+            except ImportError:
+                raise ImportError(
+                    "HQQ is required for 4-bit quantization but is not installed. "
+                    "Run: pip install hqq"
+                )
         elif quantization == "8bit":
+            from transformers import BitsAndBytesConfig
             load_kwargs["quantization_config"] = BitsAndBytesConfig(
                 load_in_8bit=True,
             )

+ 2 - 14
backend/app/peft/__init__.py

@@ -20,24 +20,14 @@ def build_lora_config(params: dict[str, Any]):
 
 
 def build_qlora_config(params: dict[str, Any]):
-    """返回 (bitsandbytes_config, peft.LoraConfig) 二元组。"""
+    """返回 peft.LoraConfig 对象(量化已在 load_model 中通过 HQQ 处理)。"""
     from peft import LoraConfig, TaskType
-    from transformers import BitsAndBytesConfig
-    import torch
-
-    bnb_params = BitsAndBytesConfig(
-        load_in_4bit=params.get("qlora_bits", 4) == 4,
-        load_in_8bit=params.get("qlora_bits", 4) == 8,
-        bnb_4bit_quant_type=params.get("qlora_type", "nf4"),
-        bnb_4bit_use_double_quant=params.get("qlora_double_quant", True),
-        bnb_4bit_compute_dtype=torch.float16,
-    )
 
     target_modules = params.get("lora_target_modules", "all-linear")
     if isinstance(target_modules, str) and target_modules == "all-linear":
         target_modules = ["linear", "lm_head", "q_proj", "v_proj", "k_proj", "o_proj"]
 
-    lora_cfg = LoraConfig(
+    return LoraConfig(
         r=params.get("lora_r", 16),
         lora_alpha=params.get("lora_alpha", 32),
         lora_dropout=params.get("lora_dropout", 0.05),
@@ -45,8 +35,6 @@ def build_qlora_config(params: dict[str, Any]):
         task_type=TaskType.CAUSAL_LM,
     )
 
-    return bnb_params, lora_cfg
-
 
 def build_adalora_config(params: dict[str, Any]):
     """返回实际的 peft.AdaLoraConfig 对象。"""

+ 1 - 0
backend/pyproject.toml

@@ -28,6 +28,7 @@ dependencies = [
     "aiohttp>=3.9.0,<3.11.0",
     "pyjwt>=2.8.0",
     "httpx>=0.27.0",
+    "hqq>=0.2.5",
 ]
 
 [tool.uv]

+ 18 - 5
result.txt

@@ -1,5 +1,18 @@
-(base) [root@localhost ~]# docker exec finetune-trainer cat /root/Fine-tuning/backend/data/logs/638a1786-04d7-44ea-b274-2c673aea22e2.jsonl
-{"ts": "2026-05-22T08:58:18.130363+00:00", "type": "start", "job_id": "638a1786-04d7-44ea-b274-2c673aea22e2"}
-{"ts": "2026-05-22T08:58:18.132911+00:00", "type": "status", "status": "preprocessing"}
-{"ts": "2026-05-22T08:58:22.234319+00:00", "type": "status", "status": "loading_model"}
-{"ts": "2026-05-22T08:58:42.046499+00:00", "type": "error", "message": "GPU model loading failed: We encountered some issues during automatic conversion of the weights. For details look at the `CONVERSION` entries of the above report!", "traceback": "Traceback (most recent call last):\n  File \"/root/Fine-tuning/backend/app/engines/remote_train.py\", line 157, in run_training\n    await engine.load_model(model_id, quantization=quantization_mode)\n  File \"/root/Fine-tuning/backend/app/engines/text_engine.py\", line 131, in load_model\n    raise RuntimeError(f\"GPU model loading failed: {load_error[0]}\")\nRuntimeError: GPU model loading failed: We encountered some issues during automatic conversion of the weights. For details look at the `CONVERSION` entries of the above report!\n"}
+(base) [root@localhost ~]# docker exec finetune-trainer /opt/conda/bin/pip install bitsandbytes
+Looking in indexes: http://mirrors.aliyun.com/pypi/simple
+Collecting bitsandbytes
+  Using cached http://mirrors.aliyun.com/pypi/packages/19/57/3443d6f183436fbdaf5000aac332c4d5ddb056665d459244a5608e98ae92/bitsandbytes-0.49.2-py3-none-manylinux_2_24_x86_64.whl (60.7 MB)
+Requirement already satisfied: torch<3,>=2.3 in /opt/conda/lib/python3.10/site-packages (from bitsandbytes) (2.8.0+metax3.5.3.9)
+Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from bitsandbytes) (1.26.4)
+Requirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from bitsandbytes) (26.2)
+Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch<3,>=2.3->bitsandbytes) (3.29.0)
+Requirement already satisfied: typing-extensions>=4.10.0 in /opt/conda/lib/python3.10/site-packages (from torch<3,>=2.3->bitsandbytes) (4.15.0)
+Requirement already satisfied: sympy>=1.13.3 in /opt/conda/lib/python3.10/site-packages (from torch<3,>=2.3->bitsandbytes) (1.14.0)
+Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch<3,>=2.3->bitsandbytes) (3.4.2)
+Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch<3,>=2.3->bitsandbytes) (3.1.6)
+Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch<3,>=2.3->bitsandbytes) (2025.5.1)
+Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy>=1.13.3->torch<3,>=2.3->bitsandbytes) (1.3.0)
+Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch<3,>=2.3->bitsandbytes) (3.0.2)
+Installing collected packages: bitsandbytes
+Successfully installed bitsandbytes-0.49.2
+WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.