|
|
@@ -1,3 +1,12 @@
|
|
|
+import os
|
|
|
+
|
|
|
+# 禁用 FlashAttention,解决沐曦显卡共享内存不足问题
|
|
|
+# 必须放在最开头,在任何库导入之前设置
|
|
|
+os.environ["PYTORCH_NO_FLASH"] = "1"
|
|
|
+os.environ["FLASH_ATTENTION_ENABLED"] = "0"
|
|
|
+os.environ["USE_FLASH_ATTENTION"] = "0"
|
|
|
+os.environ["TORCH_FLASH_ATTN"] = "0"
|
|
|
+
|
|
|
from contextlib import asynccontextmanager
|
|
|
|
|
|
from fastapi import FastAPI
|