Procházet zdrojové kódy

使用qlora回退lora+FP16,MUSA不支持

lxylxy123321 před 2 dny
rodič
revize
3c049f18db
2 změnil soubory, kde provedl 3 přidání a 12 odebrání
  1. 3 11
      backend/app/engines/text_engine.py
  2. 0 1
      backend/pyproject.toml

+ 3 - 11
backend/app/engines/text_engine.py

@@ -91,18 +91,10 @@ class TextEngine(BaseEngine):
             "attn_implementation": "sdpa",
         }
         if quantization == "4bit" or quantization == "qlora":
+            # 沐曦 GPU 不支持 bitsandbytes/HQQ,直接 fp16 + LoRA
             load_kwargs["torch_dtype"] = torch.float16
-            try:
-                from transformers import HqqConfig
-                load_kwargs["quantization_config"] = HqqConfig(
-                    nbits=4,
-                    group_size=64,
-                )
-            except ImportError:
-                raise ImportError(
-                    "HQQ is required for 4-bit quantization but is not installed. "
-                    "Run: pip install hqq"
-                )
+            logger.info("4-bit quantization not supported on this GPU; "
+                        "falling back to fp16 + LoRA")
         elif quantization == "8bit":
             from transformers import BitsAndBytesConfig
             load_kwargs["quantization_config"] = BitsAndBytesConfig(

+ 0 - 1
backend/pyproject.toml

@@ -28,7 +28,6 @@ dependencies = [
     "aiohttp>=3.9.0,<3.11.0",
     "pyjwt>=2.8.0",
     "httpx>=0.27.0",
-    "hqq>=0.2.5",
 ]
 
 [tool.uv]