entrypoint.sh 934 B

123456789101112131415161718192021222324252627
  1. #!/bin/bash
  2. # 容器启动时自动将 backend 代码同步到 253 训练节点
  3. REMOTE_USER="${COMPUTE_NODE_SSH_USER:-root}"
  4. REMOTE_HOST="${COMPUTE_NODE_HOST}"
  5. REMOTE_PASS="${COMPUTE_NODE_SSH_PASSWORD}"
  6. REMOTE_DIR="/root/Fine-tuning/backend"
  7. if [ -n "$REMOTE_HOST" ]; then
  8. echo "=> Syncing backend code to compute node ${REMOTE_HOST} ..."
  9. # 使用 --no-o --no-g 避免 chown 失败,去掉 --delete 避免无权删除远端文件
  10. if [ -n "$REMOTE_PASS" ]; then
  11. sshpass -p "$REMOTE_PASS" rsync -avz --no-o --no-g \
  12. -e "ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5" \
  13. /app/ ${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/
  14. else
  15. rsync -avz --no-o --no-g \
  16. -e "ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5" \
  17. /app/ ${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/
  18. fi
  19. echo "=> Sync done."
  20. else
  21. echo "=> No compute node configured, skipping code sync."
  22. fi
  23. # 启动主进程
  24. exec "$@"