Set up a functional cluster for basic operations
- Include cgroup.conf configuration - Install slurm-client on compute nodes - Move home dir to /vagrant for result sharing - Add Makefile for streamlined setup and cleanup - Fix idempotency issues
This commit is contained in:
		
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -2,3 +2,4 @@ munge.key | |||||||
| nodes.rb | nodes.rb | ||||||
| .settings.yml | .settings.yml | ||||||
| .vagrant | .vagrant | ||||||
|  | vagrantup.log | ||||||
|   | |||||||
							
								
								
									
										10
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								Makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | .PHONY: all vagrant clean | ||||||
|  |  | ||||||
|  | all: vagrant | ||||||
|  |  | ||||||
|  | vagrant: | ||||||
|  | 	vagrant up --no-destroy-on-error --no-color | tee ./vagrantup.log | ||||||
|  |  | ||||||
|  | clean: | ||||||
|  | 	vagrant destroy -f --no-color | ||||||
|  | 	rm -rf .vagrant vagrantup.log munge.key ./scratch/submit | ||||||
							
								
								
									
										10
									
								
								cgroup.conf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								cgroup.conf
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | ### | ||||||
|  | # Slurm cgroup support configuration file. | ||||||
|  | ### | ||||||
|  | CgroupAutomount=yes | ||||||
|  | CgroupMountpoint=/sys/fs/cgroup | ||||||
|  | ConstrainCores=yes | ||||||
|  | ConstrainDevices=yes | ||||||
|  | ConstrainKmemSpace=no        #avoid known Kernel issues | ||||||
|  | ConstrainRAMSpace=yes | ||||||
|  | ConstrainSwapSpace=yes | ||||||
							
								
								
									
										79
									
								
								provision.sh
									
									
									
									
									
								
							
							
						
						
									
										79
									
								
								provision.sh
									
									
									
									
									
								
							| @@ -16,7 +16,7 @@ apt-get install -y chrony | |||||||
| systemctl start chrony | systemctl start chrony | ||||||
| systemctl enable chrony | systemctl enable chrony | ||||||
|  |  | ||||||
| # Create a dedicated non-privileged user account for MUNGE | # Create MUNGE user | ||||||
| getent group munge > /dev/null || groupadd -r -g 900 munge | getent group munge > /dev/null || groupadd -r -g 900 munge | ||||||
| id -u munge &>/dev/null || \ | id -u munge &>/dev/null || \ | ||||||
|   useradd -r -u 900 -g munge -d /var/lib/munge -s /usr/sbin/nologin munge |   useradd -r -u 900 -g munge -d /var/lib/munge -s /usr/sbin/nologin munge | ||||||
| @@ -26,25 +26,34 @@ getent group slurm > /dev/null || groupadd -g 1001 slurm | |||||||
| id -u slurm &>/dev/null || \ | id -u slurm &>/dev/null || \ | ||||||
|   useradd -m -u 1001 -g slurm -s /bin/bash slurm |   useradd -m -u 1001 -g slurm -s /bin/bash slurm | ||||||
|  |  | ||||||
| # Create job 'submit' user | # Create submit user | ||||||
| getent group submit > /dev/null || groupadd -g 1002 submit | getent group submit > /dev/null || groupadd -g 1002 submit | ||||||
| id -u submit &>/dev/null || \ | id -u submit &>/dev/null || \ | ||||||
|   useradd -m -u 1002 -g submit -s /bin/bash submit |   useradd -M -u 1002 -g submit -s /bin/bash -d /vagrant/scratch/submit submit | ||||||
|  | mkdir -p /vagrant/scratch/submit | ||||||
|  |  | ||||||
| # Install MUNGE, remove any default key, and stop to another place key later | # Update APT cache | ||||||
| apt-get update | apt-get update | ||||||
| apt-get install -y munge |  | ||||||
| systemctl stop munge | # Install MUNGE, remove any default key, and stop to place another key later | ||||||
| rm -f /etc/munge/munge.key | if ! dpkg -s munge &>/dev/null; then | ||||||
|  |   apt-get install -y munge | ||||||
|  |   systemctl stop munge | ||||||
|  |   rm -f /etc/munge/munge.key | ||||||
|  | fi | ||||||
|  |  | ||||||
|  | # Install slurm client tools | ||||||
|  | dpkg -s slurm-client &>/dev/null || apt-get install -y slurm-client | ||||||
|  |  | ||||||
| # Create directories for Slurm | # Create directories for Slurm | ||||||
| mkdir -p /var/spool/slurm /var/log/slurm /etc/slurm | mkdir -p /var/spool/slurm /var/log/slurm /etc/slurm | ||||||
| chown slurm:slurm /var/spool/slurm /var/log/slurm /etc/slurm | chown slurm:slurm /var/spool/slurm /var/log/slurm /etc/slurm | ||||||
|  |  | ||||||
| # Copy slurm.conf | # Copy slurm.conf and cgroup.conf | ||||||
| cp -u /vagrant/slurm.conf /etc/slurm/slurm.conf | cp -u /vagrant/slurm.conf /etc/slurm/slurm.conf | ||||||
| chown slurm:slurm /etc/slurm/slurm.conf | cp -u /vagrant/cgroup.conf /etc/slurm/cgroup.conf | ||||||
| chmod 644 /etc/slurm/slurm.conf | chown slurm:slurm /etc/slurm/slurm.conf /etc/slurm/cgroup.conf | ||||||
|  | chmod 644 /etc/slurm/slurm.conf /etc/slurm/cgroup.conf | ||||||
|  |  | ||||||
| # node1 = manager | # node1 = manager | ||||||
| if [ "$(hostname)" == "node1" ]; then | if [ "$(hostname)" == "node1" ]; then | ||||||
| @@ -53,34 +62,36 @@ if [ "$(hostname)" == "node1" ]; then | |||||||
|     sudo -u munge /usr/sbin/mungekey --verbose |     sudo -u munge /usr/sbin/mungekey --verbose | ||||||
|   fi |   fi | ||||||
|  |  | ||||||
|   # Set MUNGE key perms |  | ||||||
|   chmod 600 /etc/munge/munge.key |  | ||||||
|  |  | ||||||
|   # Copy to shared directory for other nodes |   # Copy to shared directory for other nodes | ||||||
|   cp /etc/munge/munge.key /vagrant/munge.key |   cp /etc/munge/munge.key /vagrant/munge.key | ||||||
|  |  | ||||||
|   # Enable/start/test munge service |   # Enable/start/test munge service | ||||||
|   systemctl enable munge.service |   chmod 400 /etc/munge/munge.key | ||||||
|   systemctl start munge.service |   systemctl enable munge | ||||||
|   munge -n | unmunge |   systemctl start munge | ||||||
|  |  | ||||||
|   # Install Slurm Workload Manager and doc package for the Slurm config tool |   # Install Slurm Workload Manager and doc package for the Slurm config tool | ||||||
|   apt-get install -y slurm-wlm slurm-wlm-doc |   if ! dpkg -s slurm-wlm &>/dev/null; then | ||||||
|  |     apt-get install -y slurm-wlm slurm-wlm-doc | ||||||
|  |  | ||||||
|   # Create directories for slurmctld |     # Create directories for slurmctld | ||||||
|   mkdir -p /var/spool/slurmctld |     systemctl stop slurmctld | ||||||
|   chown slurm:slurm /var/spool/slurmctld |     mkdir -p /var/spool/slurmctld | ||||||
|  |     chown slurm:slurm /var/spool/slurmctld | ||||||
|  |     chmod 755 /var/spool/slurmctld | ||||||
|  |  | ||||||
|   # Start Slurm controller |     # Start Slurm controller | ||||||
|   systemctl enable slurmctld |     systemctl enable slurmctld | ||||||
|   systemctl start slurmctld |     systemctl start slurmctld | ||||||
|  |   fi | ||||||
| else | else | ||||||
|   # Initial delay |   # Initial delay | ||||||
|   sleep 5 |   sleep 5 | ||||||
|  |  | ||||||
|   # Waits JOIN_TIMEOUT of seconds to find the munge.key file before giving up |   # Waits JOIN_TIMEOUT of seconds to find the munge.key file before giving up | ||||||
|   START_TIME="$(date +%s)" |   START_TIME="$(date +%s)" | ||||||
|   # Wait until the munge.key can be found via Vagrant provider file sharing /vagrant |  | ||||||
|  |   # Wait until the munge.key can be found via Vagrant provider file sharing | ||||||
|   while [ ! -f /vagrant/munge.key ]; do |   while [ ! -f /vagrant/munge.key ]; do | ||||||
|     CURRENT_TIME="$(date +%s)" |     CURRENT_TIME="$(date +%s)" | ||||||
|     DIFF_TIME="$((CURRENT_TIME - START_TIME))" |     DIFF_TIME="$((CURRENT_TIME - START_TIME))" | ||||||
| @@ -100,21 +111,13 @@ else | |||||||
|   cp -f /vagrant/munge.key /etc/munge/munge.key |   cp -f /vagrant/munge.key /etc/munge/munge.key | ||||||
|   chown munge:munge /etc/munge/munge.key |   chown munge:munge /etc/munge/munge.key | ||||||
|   chmod 400 /etc/munge/munge.key |   chmod 400 /etc/munge/munge.key | ||||||
|   systemctl enable munge.service |   systemctl enable munge | ||||||
|   systemctl start munge.service |   systemctl start munge | ||||||
|   munge -n | unmunge |  | ||||||
|  |  | ||||||
|   # Submit job as 'submit' on node2 |   # Install SLURM compute node daemon on node[3-4] | ||||||
|   if [ "$(hostname)" == "node2" ]; then |   if [[ $(hostname) == node[3-4] ]]; then | ||||||
|     # Install Slurm client tools |     mkdir -p /var/spool/slurmd | ||||||
|     apt-get install -y slurm-client |     chown slurm:slurm /var/spool/slurmd | ||||||
|  |  | ||||||
|     # Submit a test job as the 'submit' user |  | ||||||
|     sleep 10 |  | ||||||
|     sudo -u submit bash -c 'sbatch -N2 --wrap="srun hostname"' |  | ||||||
|     sudo -u submit squeue |  | ||||||
|   else |  | ||||||
|     # Install SLURM compute node daemon on node3+ |  | ||||||
|     apt-get install -y slurmd |     apt-get install -y slurmd | ||||||
|     systemctl enable slurmd |     systemctl enable slurmd | ||||||
|     systemctl start slurmd |     systemctl start slurmd | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								scratch/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								scratch/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | |||||||
|  | * | ||||||
|  | !.gitignore | ||||||
		Reference in New Issue
	
	Block a user