|
|
@@ -1,9 +1,10 @@
|
|
|
import os
|
|
|
|
|
|
# 禁用 FlashAttention,解决沐曦显卡共享内存不足问题
|
|
|
-os.environ["FLASH_ATTENTION_ENABLED"] = "0"
|
|
|
os.environ["PYTORCH_NO_FLASH"] = "1"
|
|
|
-os.environ["VLLM_ATTENTION_BACKEND"] = "NO_ATTENTION"
|
|
|
+os.environ["FLASH_ATTENTION_ENABLED"] = "0"
|
|
|
+os.environ["USE_FLASH_ATTENTION"] = "0"
|
|
|
+os.environ["TORCH_FLASH_ATTN"] = "0"
|
|
|
|
|
|
import asyncio
|
|
|
import json
|