Explorar o código

fix(gateway): embedded 模式下等待 kubeconfig 文件生成后再初始化 Higress

- apiserver s6 服务启动后才生成 kubeconfig 文件
- initialize_gateway 启动时文件可能还不存在,导致直接跳过 gateway 初始化
- 现在加入重试逻辑,等待最多 60 秒(每 5 秒检查一次)
- 配合 ConfigNode.value 修复,确保 embedded Higress 在 Docker 中能正常初始化
kinglee hai 1 semana
pai
achega
1684a74d86
Modificáronse 2 ficheiros con 13 adicións e 1 borrados
  1. 1 1
      docker-compose/docker-compose.server.yaml
  2. 12 0
      gpustack/gateway/__init__.py

+ 1 - 1
docker-compose/docker-compose.server.yaml

@@ -24,7 +24,7 @@ services:
     depends_on:
       postgres:
         condition: service_healthy
-    entrypoint: ["/usr/bin/entrypoint.sh", "--gateway-mode", "disabled"]
+    entrypoint: ["/usr/bin/entrypoint.sh", "--gateway-mode", "embedded"]
     environment:
       GPUSTACK_DATABASE_URL: postgresql://gpustack:${POSTGRES_PASSWORD:-gpustack}@postgres:5432/gpustack
     volumes:

+ 12 - 0
gpustack/gateway/__init__.py

@@ -785,6 +785,18 @@ def validate_ai_statistics_plugin_content_types():
 def initialize_gateway(cfg: Config, timeout: int = 60, interval: int = 5):
     if cfg.gateway_mode == GatewayModeEnum.disabled:
         return
+    # For embedded/external mode, wait for kubeconfig file to be available
+    # (apiserver s6 service generates it on startup)
+    if cfg.gateway_mode != GatewayModeEnum.incluster:
+        kubeconfig_path = cfg.gateway_kubeconfig
+        start_time = time.time()
+        while not kubeconfig_path or not os.path.isfile(kubeconfig_path):
+            if time.time() - start_time > timeout:
+                logger.warning(f"Kubeconfig not found at {kubeconfig_path} after {timeout}s, skipping gateway setup")
+                return
+            logger.info(f"Waiting for kubeconfig at {kubeconfig_path}...")
+            time.sleep(interval)
+
     init_async_k8s_config(cfg=cfg)
     # If k8s config couldn't be initialized (e.g., no valid kubeconfig), skip gateway setup
     if async_gateway_config is None: