diff --git a/.gitignore b/.gitignore index 8927f51..d372503 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ munge.key nodes.rb .settings.yml .vagrant +vagrantup.log diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6ef1138 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +.PHONY: all vagrant clean + +all: vagrant + +vagrant: + vagrant up --no-destroy-on-error --no-color | tee ./vagrantup.log + +clean: + vagrant destroy -f --no-color + rm -rf .vagrant vagrantup.log munge.key ./scratch/submit diff --git a/cgroup.conf b/cgroup.conf new file mode 100644 index 0000000..4199fbd --- /dev/null +++ b/cgroup.conf @@ -0,0 +1,10 @@ +### +# Slurm cgroup support configuration file. +### +CgroupAutomount=yes +CgroupMountpoint=/sys/fs/cgroup +ConstrainCores=yes +ConstrainDevices=yes +ConstrainKmemSpace=no #avoid known Kernel issues +ConstrainRAMSpace=yes +ConstrainSwapSpace=yes diff --git a/provision.sh b/provision.sh index a0fe65f..2101a90 100755 --- a/provision.sh +++ b/provision.sh @@ -16,7 +16,7 @@ apt-get install -y chrony systemctl start chrony systemctl enable chrony -# Create a dedicated non-privileged user account for MUNGE +# Create MUNGE user getent group munge > /dev/null || groupadd -r -g 900 munge id -u munge &>/dev/null || \ useradd -r -u 900 -g munge -d /var/lib/munge -s /usr/sbin/nologin munge @@ -26,25 +26,34 @@ getent group slurm > /dev/null || groupadd -g 1001 slurm id -u slurm &>/dev/null || \ useradd -m -u 1001 -g slurm -s /bin/bash slurm -# Create job 'submit' user +# Create submit user getent group submit > /dev/null || groupadd -g 1002 submit id -u submit &>/dev/null || \ - useradd -m -u 1002 -g submit -s /bin/bash submit + useradd -M -u 1002 -g submit -s /bin/bash -d /vagrant/scratch/submit submit +mkdir -p /vagrant/scratch/submit -# Install MUNGE, remove any default key, and stop to another place key later +# Update APT cache apt-get update -apt-get install -y munge -systemctl stop munge -rm -f /etc/munge/munge.key + +# Install MUNGE, remove any default key, and stop to place another key later +if ! dpkg -s munge &>/dev/null; then + apt-get install -y munge + systemctl stop munge + rm -f /etc/munge/munge.key +fi + +# Install slurm client tools +dpkg -s slurm-client &>/dev/null || apt-get install -y slurm-client # Create directories for Slurm mkdir -p /var/spool/slurm /var/log/slurm /etc/slurm chown slurm:slurm /var/spool/slurm /var/log/slurm /etc/slurm -# Copy slurm.conf +# Copy slurm.conf and cgroup.conf cp -u /vagrant/slurm.conf /etc/slurm/slurm.conf -chown slurm:slurm /etc/slurm/slurm.conf -chmod 644 /etc/slurm/slurm.conf +cp -u /vagrant/cgroup.conf /etc/slurm/cgroup.conf +chown slurm:slurm /etc/slurm/slurm.conf /etc/slurm/cgroup.conf +chmod 644 /etc/slurm/slurm.conf /etc/slurm/cgroup.conf # node1 = manager if [ "$(hostname)" == "node1" ]; then @@ -53,34 +62,36 @@ if [ "$(hostname)" == "node1" ]; then sudo -u munge /usr/sbin/mungekey --verbose fi - # Set MUNGE key perms - chmod 600 /etc/munge/munge.key - # Copy to shared directory for other nodes cp /etc/munge/munge.key /vagrant/munge.key # Enable/start/test munge service - systemctl enable munge.service - systemctl start munge.service - munge -n | unmunge + chmod 400 /etc/munge/munge.key + systemctl enable munge + systemctl start munge # Install Slurm Workload Manager and doc package for the Slurm config tool - apt-get install -y slurm-wlm slurm-wlm-doc + if ! dpkg -s slurm-wlm &>/dev/null; then + apt-get install -y slurm-wlm slurm-wlm-doc - # Create directories for slurmctld - mkdir -p /var/spool/slurmctld - chown slurm:slurm /var/spool/slurmctld + # Create directories for slurmctld + systemctl stop slurmctld + mkdir -p /var/spool/slurmctld + chown slurm:slurm /var/spool/slurmctld + chmod 755 /var/spool/slurmctld - # Start Slurm controller - systemctl enable slurmctld - systemctl start slurmctld + # Start Slurm controller + systemctl enable slurmctld + systemctl start slurmctld + fi else # Initial delay sleep 5 # Waits JOIN_TIMEOUT of seconds to find the munge.key file before giving up START_TIME="$(date +%s)" - # Wait until the munge.key can be found via Vagrant provider file sharing /vagrant + + # Wait until the munge.key can be found via Vagrant provider file sharing while [ ! -f /vagrant/munge.key ]; do CURRENT_TIME="$(date +%s)" DIFF_TIME="$((CURRENT_TIME - START_TIME))" @@ -100,21 +111,13 @@ else cp -f /vagrant/munge.key /etc/munge/munge.key chown munge:munge /etc/munge/munge.key chmod 400 /etc/munge/munge.key - systemctl enable munge.service - systemctl start munge.service - munge -n | unmunge + systemctl enable munge + systemctl start munge - # Submit job as 'submit' on node2 - if [ "$(hostname)" == "node2" ]; then - # Install Slurm client tools - apt-get install -y slurm-client - - # Submit a test job as the 'submit' user - sleep 10 - sudo -u submit bash -c 'sbatch -N2 --wrap="srun hostname"' - sudo -u submit squeue - else - # Install SLURM compute node daemon on node3+ + # Install SLURM compute node daemon on node[3-4] + if [[ $(hostname) == node[3-4] ]]; then + mkdir -p /var/spool/slurmd + chown slurm:slurm /var/spool/slurmd apt-get install -y slurmd systemctl enable slurmd systemctl start slurmd diff --git a/scratch/.gitignore b/scratch/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/scratch/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore