From 1e95dd7b2f630f2764cc7b8d917d27e158c18018 Mon Sep 17 00:00:00 2001 From: Kris Lamoureux Date: Sun, 18 Aug 2024 00:00:08 -0400 Subject: [PATCH] Fix munge key race condition and update docs - Add sleep to prevent munge.key race condition - Warn about CPU override conflicts in README - Update README with CPU config details for minimal setup - Correct `less` command examples for prime number output - Force update slurm/cgroups configs in the provision script --- README.md | 11 ++++++++++- provision.sh | 11 ++++++----- slurm.conf | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 89ae680..183c7a8 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ By default, each node is allocated: 4. View the resulting prime numbers found, check `ls` for exact filenames less slurm-1_0.out - less slurm-2_1.out + less slurm-1_1.out ### Configuration Tool @@ -96,6 +96,10 @@ ignored by .gitignore. Be cautious when using this command as it will delete files that are not tracked by Git. Use the `-n` flag to dry-run first. ## Global Overrides + +**WARNING:** Always update `slurm.conf` to match any CPU overrides to prevent +resource allocation conflicts. + If you wish to override the default settings on a global level, you can do so by creating a `.settings.yml` file based on the provided `example-.settings.yml` file: @@ -125,6 +129,11 @@ file without modifications. This results in a cluster configuration using only 1 vCPU and 1 GB RAM per node (totaling 4 threads/cores and 4 GB RAM), allowing basic operation on modest hardware. +When using this minimal setup with 1 vCPU, you'll need to update the `slurm.conf` file. +Apply the following change to the default `slurm.conf`: + + sed -i 's/CPUs=2/CPUs=1/g' slurm.conf + ### Slurm Settings Overrides - `SLURM_NODES` - Default: `4` diff --git a/provision.sh b/provision.sh index 2101a90..795f903 100755 --- a/provision.sh +++ b/provision.sh @@ -46,12 +46,12 @@ fi dpkg -s slurm-client &>/dev/null || apt-get install -y slurm-client # Create directories for Slurm -mkdir -p /var/spool/slurm /var/log/slurm /etc/slurm -chown slurm:slurm /var/spool/slurm /var/log/slurm /etc/slurm +mkdir -p /var/spool/slurm /etc/slurm +chown slurm:slurm /var/spool/slurm /etc/slurm # Copy slurm.conf and cgroup.conf -cp -u /vagrant/slurm.conf /etc/slurm/slurm.conf -cp -u /vagrant/cgroup.conf /etc/slurm/cgroup.conf +cp -f /vagrant/slurm.conf /etc/slurm/slurm.conf +cp -f /vagrant/cgroup.conf /etc/slurm/cgroup.conf chown slurm:slurm /etc/slurm/slurm.conf /etc/slurm/cgroup.conf chmod 644 /etc/slurm/slurm.conf /etc/slurm/cgroup.conf @@ -107,7 +107,8 @@ else sleep 10 done - # Enable/start/test munge service + # Enable/start munge service + sleep 3 cp -f /vagrant/munge.key /etc/munge/munge.key chown munge:munge /etc/munge/munge.key chmod 400 /etc/munge/munge.key diff --git a/slurm.conf b/slurm.conf index cbfb6fb..5a48aca 100644 --- a/slurm.conf +++ b/slurm.conf @@ -1,4 +1,4 @@ -#slurm.conf file generated by configurator easy.html. +# slurm.conf file generated by configurator easy.html. # Put this file on all nodes of your cluster. # See the slurm.conf man page for more information. #