|
|
@@ -91,18 +91,10 @@ class TextEngine(BaseEngine):
|
|
|
"attn_implementation": "sdpa",
|
|
|
}
|
|
|
if quantization == "4bit" or quantization == "qlora":
|
|
|
+ # 沐曦 GPU 不支持 bitsandbytes/HQQ,直接 fp16 + LoRA
|
|
|
load_kwargs["torch_dtype"] = torch.float16
|
|
|
- try:
|
|
|
- from transformers import HqqConfig
|
|
|
- load_kwargs["quantization_config"] = HqqConfig(
|
|
|
- nbits=4,
|
|
|
- group_size=64,
|
|
|
- )
|
|
|
- except ImportError:
|
|
|
- raise ImportError(
|
|
|
- "HQQ is required for 4-bit quantization but is not installed. "
|
|
|
- "Run: pip install hqq"
|
|
|
- )
|
|
|
+ logger.info("4-bit quantization not supported on this GPU; "
|
|
|
+ "falling back to fp16 + LoRA")
|
|
|
elif quantization == "8bit":
|
|
|
from transformers import BitsAndBytesConfig
|
|
|
load_kwargs["quantization_config"] = BitsAndBytesConfig(
|