model_test_service.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. from pathlib import Path
  2. from typing import Any
  3. from app.config import get_settings
  4. from app.core.logging import logger
  5. settings = get_settings()
  6. async def test_model(model_id: str, prompt: str, max_new_tokens: int = 128, temperature: float = 0.8, top_p: float = 0.95) -> dict[str, Any]:
  7. """加载已缓存模型并生成测试响应。"""
  8. return await _test_model_local(model_id, prompt, max_new_tokens, temperature, top_p)
  9. async def _test_model_local(model_id: str, prompt: str, max_new_tokens: int, temperature: float, top_p: float) -> dict[str, Any]:
  10. """本地执行模型测试。"""
  11. import torch
  12. from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, AutoConfig
  13. from app.services.model_service import resolve_model_path
  14. model_path = await resolve_model_path(model_id)
  15. if not model_path:
  16. return {"error": f"Model not found in cache: {model_id}"}
  17. model_dir = Path(model_path)
  18. if not (model_dir / "config.json").exists():
  19. return {"error": f"Model directory not found: {model_dir}"}
  20. tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
  21. if tokenizer.pad_token is None:
  22. tokenizer.pad_token = tokenizer.eos_token
  23. model = None
  24. for loader_cls, kwargs in [
  25. (AutoModelForCausalLM, {"trust_remote_code": True}),
  26. (AutoModel, {"trust_remote_code": True}),
  27. ]:
  28. try:
  29. model = loader_cls.from_pretrained(
  30. model_dir,
  31. torch_dtype=torch.float16,
  32. device_map="auto",
  33. **kwargs,
  34. )
  35. break
  36. except Exception:
  37. continue
  38. if model is None:
  39. return {"error": f"Unable to load model with any available loader. Model type may not be supported yet."}
  40. model.eval()
  41. inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
  42. with torch.no_grad():
  43. outputs = model.generate(
  44. **inputs,
  45. max_new_tokens=max_new_tokens,
  46. temperature=temperature,
  47. top_p=top_p,
  48. do_sample=temperature > 0,
  49. pad_token_id=tokenizer.eos_token_id,
  50. )
  51. generated_text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
  52. return {
  53. "model_id": model_id,
  54. "prompt": prompt,
  55. "generated_text": generated_text,
  56. }