|
|
@@ -15,60 +15,84 @@ async def test_model(model_id: str, prompt: str, max_new_tokens: int = 128, temp
|
|
|
|
|
|
|
|
|
def _test_model_remote(model_id: str, prompt: str, max_new_tokens: int, temperature: float, top_p: float) -> dict[str, Any]:
|
|
|
- """通过 SSH 在算力节点执行模型测试。"""
|
|
|
- import base64
|
|
|
+ """通过 SSH 在算力节点执行模型测试。
|
|
|
+
|
|
|
+ 使用独立的 remote_model_test.py 脚本(无 app/db 依赖,不依赖 sqlalchemy),
|
|
|
+ 通过 SSH + heredoc 部署到远端,docker exec 在容器内执行。
|
|
|
+ """
|
|
|
import json
|
|
|
from app.core.remote_executor import ssh_exec
|
|
|
|
|
|
- # 将 prompt 中的单引号/反斜杠转义
|
|
|
+ # 转义 prompt 中的单引号和反斜杠,用于 shell 安全传递
|
|
|
safe_prompt = prompt.replace("\\", "\\\\").replace("'", "\\'")
|
|
|
|
|
|
- python_script = """\
|
|
|
-import json, asyncio
|
|
|
-from app.services.model_service import resolve_model_path
|
|
|
-
|
|
|
-model_path = asyncio.run(resolve_model_path('%s'))
|
|
|
-
|
|
|
-import torch
|
|
|
-from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
|
|
|
-
|
|
|
-t = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
|
-t.pad_token = t.pad_token or t.eos_token
|
|
|
-
|
|
|
-m = None
|
|
|
-loaders = [
|
|
|
- (AutoModelForCausalLM, {'trust_remote_code': True}),
|
|
|
- (AutoModel, {'trust_remote_code': True}),
|
|
|
-]
|
|
|
-for cls, kw in loaders:
|
|
|
- try:
|
|
|
- m = cls.from_pretrained(model_path, torch_dtype=torch.float16, device_map='auto', **kw)
|
|
|
- break
|
|
|
- except Exception:
|
|
|
- pass
|
|
|
-
|
|
|
-if m is None:
|
|
|
- print(json.dumps({'error': 'Unable to load model'}))
|
|
|
- exit(1)
|
|
|
-
|
|
|
-m.eval()
|
|
|
-inp = t('%s', return_tensors='pt').to(m.device)
|
|
|
-out = m.generate(**inp, max_new_tokens=%d, temperature=%f, top_p=%f, do_sample=%s, pad_token_id=t.eos_token_id)
|
|
|
-gen = t.decode(out[0][inp['input_ids'].shape[1]:], skip_special_tokens=True)
|
|
|
-print(json.dumps({'generated_text': gen}))
|
|
|
-""" % (model_id, safe_prompt, max_new_tokens, temperature, top_p, str(temperature > 0).lower())
|
|
|
-
|
|
|
container = settings.compute_node_docker_container
|
|
|
python = settings.compute_node_python
|
|
|
workdir = settings.compute_node_workdir
|
|
|
|
|
|
- # 用 base64 编码脚本,通过 bash -c 执行:
|
|
|
- # 1. bash -c 能激活 conda 环境(与训练命令一致)
|
|
|
- # 2. base64 避免引号嵌套和命令截断问题
|
|
|
- script_b64 = base64.b64encode(python_script.encode()).decode()
|
|
|
+ # 将脚本写入远端临时文件,执行后清理
|
|
|
remote_cmd = (
|
|
|
- f"docker exec -w {workdir} {container} "
|
|
|
- f"bash -c 'echo {script_b64} | base64 -d | {python}'"
|
|
|
+ f"cat > /tmp/remote_model_test.py << 'SCRIPT_EOF'\n"
|
|
|
+ f"import json, sys\n"
|
|
|
+ f"from pathlib import Path\n"
|
|
|
+ f"import torch\n"
|
|
|
+ f"from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel\n"
|
|
|
+ f"\n"
|
|
|
+ f"def find_model_path(model_id):\n"
|
|
|
+ f" candidates = [\n"
|
|
|
+ f" '/root/.cache/huggingface/hub',\n"
|
|
|
+ f" '/root/.cache/modelscope/hub',\n"
|
|
|
+ f" '/root/models',\n"
|
|
|
+ f" ]\n"
|
|
|
+ f" for base in candidates:\n"
|
|
|
+ f" bp = Path(base)\n"
|
|
|
+ f" if not bp.is_dir():\n"
|
|
|
+ f" continue\n"
|
|
|
+ f" # Direct match\n"
|
|
|
+ f" for child in bp.rglob('config.json'):\n"
|
|
|
+ f" parent = child.parent\n"
|
|
|
+ f" if parent.is_dir():\n"
|
|
|
+ f" return str(parent)\n"
|
|
|
+ f" return None\n"
|
|
|
+ f"\n"
|
|
|
+ f"model_id = sys.argv[1]\n"
|
|
|
+ f"prompt = sys.argv[2]\n"
|
|
|
+ f"max_new_tokens = int(sys.argv[3])\n"
|
|
|
+ f"temperature = float(sys.argv[4])\n"
|
|
|
+ f"top_p = float(sys.argv[5])\n"
|
|
|
+ f"\n"
|
|
|
+ f"model_path = find_model_path(model_id)\n"
|
|
|
+ f"if model_path is None:\n"
|
|
|
+ f" print(json.dumps({{'error': f'Model not found in cache: {{model_id}}'}}))\n"
|
|
|
+ f" sys.exit(1)\n"
|
|
|
+ f"\n"
|
|
|
+ f"t = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n"
|
|
|
+ f"t.pad_token = t.pad_token or t.eos_token\n"
|
|
|
+ f"\n"
|
|
|
+ f"m = None\n"
|
|
|
+ f"for cls, kw in [\n"
|
|
|
+ f" (AutoModelForCausalLM, {{'trust_remote_code': True}}),\n"
|
|
|
+ f" (AutoModel, {{'trust_remote_code': True}}),\n"
|
|
|
+ f"]:\n"
|
|
|
+ f" try:\n"
|
|
|
+ f" m = cls.from_pretrained(model_path, torch_dtype=torch.float16, device_map='auto', **kw)\n"
|
|
|
+ f" break\n"
|
|
|
+ f" except Exception:\n"
|
|
|
+ f" pass\n"
|
|
|
+ f"\n"
|
|
|
+ f"if m is None:\n"
|
|
|
+ f" print(json.dumps({{'error': 'Unable to load model'}}))\n"
|
|
|
+ f" sys.exit(1)\n"
|
|
|
+ f"\n"
|
|
|
+ f"m.eval()\n"
|
|
|
+ f"inp = t(prompt, return_tensors='pt').to(m.device)\n"
|
|
|
+ f"out = m.generate(**inp, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample={str(temperature > 0).lower()}, pad_token_id=t.eos_token_id)\n"
|
|
|
+ f"gen = t.decode(out[0][inp['input_ids'].shape[1]:], skip_special_tokens=True)\n"
|
|
|
+ f"print(json.dumps({{'generated_text': gen}}))\n"
|
|
|
+ f"SCRIPT_EOF\n"
|
|
|
+ f"\n"
|
|
|
+ f"docker exec -w {workdir} {container} {python} /tmp/remote_model_test.py '{model_id}' '{safe_prompt}' {max_new_tokens} {temperature} {top_p}\n"
|
|
|
+ f"rm -f /tmp/remote_model_test.py"
|
|
|
)
|
|
|
|
|
|
code, stdout, stderr = ssh_exec(remote_cmd, timeout=600)
|