metrics.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import asyncio
  2. import os
  3. from fastapi import APIRouter, Request
  4. from gpustack.config.config import get_global_config
  5. from gpustack.server.deps import CurrentUserDep
  6. import yaml
  7. from gpustack.utils.metrics import get_builtin_metrics_config_file_path
  8. router = APIRouter()
  9. # Cache for parsed YAML configs: {file_path: parsed_data}
  10. _config_cache: dict[str, dict] = {}
  11. # Locks for each file path to ensure async-safe cache access
  12. _cache_locks: dict[str, asyncio.Lock] = {}
  13. def _load_yaml_sync(file_path: str) -> dict:
  14. """Synchronous YAML loading function to be run in thread pool."""
  15. with open(file_path, "r") as f:
  16. return yaml.safe_load(f)
  17. def _save_yaml_sync(file_path: str, data: dict) -> None:
  18. """Synchronous YAML saving function to be run in thread pool."""
  19. with open(file_path, "w") as f:
  20. yaml.safe_dump(data, f)
  21. async def _load_yaml_cached(file_path: str) -> dict:
  22. """Load YAML file with caching. Async-safe and non-blocking.
  23. Cache is only invalidated via _invalidate_cache(), typically called after POST updates.
  24. External file changes will not be detected automatically.
  25. """
  26. # Get or create lock for this file path (setdefault is atomic in CPython)
  27. lock = _cache_locks.setdefault(file_path, asyncio.Lock())
  28. async with lock:
  29. # Check if we have a cached version
  30. if file_path in _config_cache:
  31. return _config_cache[file_path]
  32. # Load and cache the file in thread pool to avoid blocking event loop
  33. data = await asyncio.to_thread(_load_yaml_sync, file_path)
  34. _config_cache[file_path] = data
  35. return data
  36. async def _invalidate_cache(file_path: str) -> None:
  37. """Invalidate cache for a specific file. Async-safe."""
  38. lock = _cache_locks.setdefault(file_path, asyncio.Lock())
  39. async with lock:
  40. _config_cache.pop(file_path, None)
  41. @router.get("/default-config")
  42. async def get_default_metrics_config(user: CurrentUserDep):
  43. builtin_metrics_config_path = get_builtin_metrics_config_file_path()
  44. return await _load_yaml_cached(builtin_metrics_config_path)
  45. @router.get("/config")
  46. async def get_metrics_config(user: CurrentUserDep):
  47. data_dir = get_global_config().data_dir
  48. custom_metrics_config_path = f"{data_dir}/custom_metrics_config.yaml"
  49. builtin_metrics_config_path = get_builtin_metrics_config_file_path()
  50. file_path = (
  51. custom_metrics_config_path
  52. if os.path.exists(custom_metrics_config_path)
  53. else builtin_metrics_config_path
  54. )
  55. return await _load_yaml_cached(file_path)
  56. @router.post("/config")
  57. async def update_metrics_config(user: CurrentUserDep, request: Request):
  58. data_dir = get_global_config().data_dir
  59. custom_metrics_config_path = f"{data_dir}/custom_metrics_config.yaml"
  60. new_config = await request.json()
  61. # Write file in thread pool to avoid blocking event loop
  62. await asyncio.to_thread(_save_yaml_sync, custom_metrics_config_path, new_config)
  63. # Invalidate cache after updating the config
  64. await _invalidate_cache(custom_metrics_config_path)
  65. return {"status": "ok"}