vagrant-slurm/provision.sh

73 lines
1.9 KiB
Bash
Executable File

#!/bin/bash
#######################################
### Install and setup Slurm cluster ###
#######################################
# Print commands and exit on error
set -xe
# Prevents interactive prompts during package installation
export DEBIAN_FRONTEND=noninteractive
# Keep system clocks in sync
apt-get update
apt-get install -y chrony
systemctl start chrony
systemctl enable chrony
# Install MUNGE
apt-get update
apt-get install -y munge
# Create a dedicated non-privileged user account for MUNGE
getent group munge > /dev/null || groupadd -r -g 900 munge
id -u munge &>/dev/null || \
useradd -r -u 900 -g munge -d /var/lib/munge -s /usr/sbin/nologin munge
# node1 = manager
if [ "$(hostname)" == "node1" ]; then
# Create common MUNGE key on the manager node
if [ ! -f /etc/munge/munge.key ]; then
sudo -u munge /usr/sbin/mungekey --verbose
fi
# Set MUNGE key perms
chmod 600 /etc/munge/munge.key
# Copy to shared directory for other nodes
cp /etc/munge/munge.key /vagrant/munge.key
# Enable/start/test munge service
systemctl enable munge.service
systemctl start munge.service
munge -n | unmunge
else
# Initial delay
sleep 5
# Waits JOIN_TIMEOUT of seconds to find the munge.key file before giving up
START_TIME="$(date +%s)"
# Wait until the munge.key can be found via Vagrant provider file sharing /vagrant
while [ ! -f /vagrant/munge.key ]; do
CURRENT_TIME="$(date +%s)"
DIFF_TIME="$((CURRENT_TIME - START_TIME))"
# Timeout
if [ "$DIFF_TIME" -ge "$JOIN_TIMEOUT" ]; then
echo "[ERROR]: $(hostname) waited $DIFF_TIME/$JOIN_TIMEOUT seconds"
exit 1
fi
# Waiting
echo "Waiting ($DIFF_TIME/$JOIN_TIMEOUT seconds) for /vagrant/munge.key file"
sleep 10
done
# Enable/start/test munge service
cp /vagrant/munge.key /etc/munge/munge.key
systemctl enable munge.service
systemctl start munge.service
munge -n | unmunge
fi