from pathlib import Path from typing import Any from app.config import get_settings from app.core.logging import logger settings = get_settings() async def test_model(model_id: str, prompt: str, max_new_tokens: int = 128, temperature: float = 0.8, top_p: float = 0.95) -> dict[str, Any]: """加载已缓存模型并生成测试响应。""" if settings.use_remote_compute: return _test_model_remote(model_id, prompt, max_new_tokens, temperature, top_p) return _test_model_local(model_id, prompt, max_new_tokens, temperature, top_p) def _test_model_remote(model_id: str, prompt: str, max_new_tokens: int, temperature: float, top_p: float) -> dict[str, Any]: """通过 SSH 在算力节点执行模型测试。 使用独立的 remote_model_test.py 脚本(无 app/db 依赖,不依赖 sqlalchemy), 通过 SSH + heredoc 部署到远端,docker exec 在容器内执行。 """ import json from app.core.remote_executor import ssh_exec # 转义 prompt 中的单引号和反斜杠,用于 shell 安全传递 safe_prompt = prompt.replace("\\", "\\\\").replace("'", "\\'") container = settings.compute_node_docker_container python = settings.compute_node_python workdir = settings.compute_node_workdir # 将脚本写入远端临时文件,执行后清理 remote_cmd = ( f"cat > /tmp/remote_model_test.py << 'SCRIPT_EOF'\n" f"import json, sys\n" f"from pathlib import Path\n" f"import torch\n" f"from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel\n" f"\n" f"def find_model_path(model_id):\n" f" candidates = [\n" f" '/root/.cache/huggingface/hub',\n" f" '/root/.cache/modelscope/hub',\n" f" '/root/models',\n" f" ]\n" f" for base in candidates:\n" f" bp = Path(base)\n" f" if not bp.is_dir():\n" f" continue\n" f" # Direct match\n" f" for child in bp.rglob('config.json'):\n" f" parent = child.parent\n" f" if parent.is_dir():\n" f" return str(parent)\n" f" return None\n" f"\n" f"model_id = sys.argv[1]\n" f"prompt = sys.argv[2]\n" f"max_new_tokens = int(sys.argv[3])\n" f"temperature = float(sys.argv[4])\n" f"top_p = float(sys.argv[5])\n" f"\n" f"model_path = find_model_path(model_id)\n" f"if model_path is None:\n" f" print(json.dumps({{'error': f'Model not found in cache: {{model_id}}'}}))\n" f" sys.exit(1)\n" f"\n" f"t = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n" f"t.pad_token = t.pad_token or t.eos_token\n" f"\n" f"m = None\n" f"for cls, kw in [\n" f" (AutoModelForCausalLM, {{'trust_remote_code': True}}),\n" f" (AutoModel, {{'trust_remote_code': True}}),\n" f"]:\n" f" try:\n" f" m = cls.from_pretrained(model_path, torch_dtype=torch.float16, device_map='auto', **kw)\n" f" break\n" f" except Exception:\n" f" pass\n" f"\n" f"if m is None:\n" f" print(json.dumps({{'error': 'Unable to load model'}}))\n" f" sys.exit(1)\n" f"\n" f"m.eval()\n" f"inp = t(prompt, return_tensors='pt').to(m.device)\n" f"out = m.generate(**inp, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample={str(temperature > 0).lower()}, pad_token_id=t.eos_token_id)\n" f"gen = t.decode(out[0][inp['input_ids'].shape[1]:], skip_special_tokens=True)\n" f"print(json.dumps({{'generated_text': gen}}))\n" f"SCRIPT_EOF\n" f"\n" f"docker exec -w {workdir} {container} {python} /tmp/remote_model_test.py '{model_id}' '{safe_prompt}' {max_new_tokens} {temperature} {top_p}\n" f"rm -f /tmp/remote_model_test.py" ) code, stdout, stderr = ssh_exec(remote_cmd, timeout=600) logger.info(f"Remote test result: code={code}, stdout_len={len(stdout)}, stderr_len={len(stderr)}") if stdout: logger.info(f"stdout (first 500): {stdout[:500]}") if stderr: logger.info(f"stderr (first 500): {stderr[:500]}") if code != 0: logger.error(f"Remote model test failed: {stderr}") return {"error": stderr.strip() or "Remote test failed"} # 提取最后一行 JSON for line in reversed(stdout.strip().split("\n")): line = line.strip() if line.startswith("{"): try: result = json.loads(line) result["model_id"] = model_id result["prompt"] = prompt return result except json.JSONDecodeError: continue return {"error": f"Invalid response: {stdout[:500]}"} async def _test_model_local(model_id: str, prompt: str, max_new_tokens: int, temperature: float, top_p: float) -> dict[str, Any]: """本地执行模型测试(仅用于开发环境)。""" import torch from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, AutoConfig from app.services.model_service import resolve_model_path model_path = await resolve_model_path(model_id) if not model_path: return {"error": f"Model not found in cache: {model_id}"} model_dir = Path(model_path) if not (model_dir / "config.json").exists(): return {"error": f"Model directory not found: {model_dir}"} tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # 通用加载策略:尝试多种加载方式,自动兼容各种新架构 model = None for loader_cls, kwargs in [ (AutoModelForCausalLM, {"trust_remote_code": True}), (AutoModel, {"trust_remote_code": True}), ]: try: model = loader_cls.from_pretrained( model_dir, torch_dtype=torch.float16, device_map="auto", **kwargs, ) break except Exception: continue if model is None: return {"error": f"Unable to load model with any available loader. Model type may not be supported yet."} model.eval() inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=temperature > 0, pad_token_id=tokenizer.eos_token_id, ) generated_text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) return { "model_id": model_id, "prompt": prompt, "generated_text": generated_text, }