entrypoint.sh 1014 B

1234567891011121314151617181920212223242526272829
  1. #!/bin/bash
  2. # 容器启动时自动将 backend 代码同步到 253 训练节点
  3. REMOTE_USER="${COMPUTE_NODE_SSH_USER:-root}"
  4. REMOTE_HOST="${COMPUTE_NODE_HOST}"
  5. REMOTE_PASS="${COMPUTE_NODE_SSH_PASSWORD}"
  6. REMOTE_DIR="/root/Fine-tuning/backend"
  7. if [ -n "$REMOTE_HOST" ]; then
  8. echo "=> Syncing backend code to compute node ${REMOTE_HOST} ..."
  9. # 使用 --no-o --no-g 避免 chown 失败,去掉 --delete 避免无权删除远端文件
  10. if [ -n "$REMOTE_PASS" ]; then
  11. sshpass -p "$REMOTE_PASS" rsync -avz --no-o --no-g --ignore-times \
  12. --exclude 'data' \
  13. -e "ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5" \
  14. /app/ ${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/
  15. else
  16. rsync -avz --no-o --no-g --ignore-times \
  17. --exclude 'data' \
  18. -e "ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5" \
  19. /app/ ${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_DIR}/
  20. fi
  21. echo "=> Sync done."
  22. else
  23. echo "=> No compute node configured, skipping code sync."
  24. fi
  25. # 启动主进程
  26. exec "$@"