From 71a2c08013c4e43621acd46997d965fab19e5f4d Mon Sep 17 00:00:00 2001 From: Chris Hines <chris.hines@monash.edu> Date: Mon, 17 Nov 2014 04:22:13 +0000 Subject: [PATCH] a new slurm role. Slurm communicates on the default interface currently, need to alter slurm.conf.j2 to use the VPN interface --- roles/slurm/handlers/main.yml | 8 +++ roles/slurm/tasks/main.yml | 49 +++++++++++++ roles/slurm/templates/munge_key.j2 | 1 + roles/slurm/templates/slurm.conf.j2 | 102 ++++++++++++++++++++++++++++ topplay.yml | 19 ++++++ 5 files changed, 179 insertions(+) create mode 100644 roles/slurm/handlers/main.yml create mode 100644 roles/slurm/tasks/main.yml create mode 100644 roles/slurm/templates/munge_key.j2 create mode 100644 roles/slurm/templates/slurm.conf.j2 diff --git a/roles/slurm/handlers/main.yml b/roles/slurm/handlers/main.yml new file mode 100644 index 00000000..f2c4f288 --- /dev/null +++ b/roles/slurm/handlers/main.yml @@ -0,0 +1,8 @@ +--- + - name: restart munge + service: name=munge state=restarted + sudo: true + + - name: restart slurm + service: name=slurm state=restarted + sudo: true diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml new file mode 100644 index 00000000..0d6513c6 --- /dev/null +++ b/roles/slurm/tasks/main.yml @@ -0,0 +1,49 @@ +--- +- name: copy rpms + copy: src=/tmp/rpmbuild dest=/tmp/ + +- name: install munge rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/munge*{{ munge_version }}*rpm" + sudo: true + ignore_errors: true + +- name: install perl + yum: name={{ item }} state=latest + with_items: + - perl + - perl-DBI + sudo: true + +- name: create slurm group + group: name=slurm + sudo: true + +- name: create slurm user + user: name=slurm group=slurm + sudo: true + +- name: install slurm rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/slurm*{{ slurm_version }}*rpm" + sudo: true + ignore_errors: true + +- name: load munge key + include_vars: passwords.yml + +- name: install munge key + template: src=munge_key.j2 dest=/etc/munge/munge.key + sudo: true + notify: restart munge + +- name: start munge + service: name=munge state=started + sudo: true + +- name: install slurm.conf + template: src=slurm.conf.j2 dest=/etc/slurm/slurm.conf + sudo: true + notify: restart slurm + +- name: start slurm + service: name=slurm state=started + sudo: true diff --git a/roles/slurm/templates/munge_key.j2 b/roles/slurm/templates/munge_key.j2 new file mode 100644 index 00000000..83d3483e --- /dev/null +++ b/roles/slurm/templates/munge_key.j2 @@ -0,0 +1 @@ +{{ mungekey }} diff --git a/roles/slurm/templates/slurm.conf.j2 b/roles/slurm/templates/slurm.conf.j2 new file mode 100644 index 00000000..b5475d92 --- /dev/null +++ b/roles/slurm/templates/slurm.conf.j2 @@ -0,0 +1,102 @@ +# +# Example slurm.conf file. Please run configurator.html +# (in doc/html) to build a configuration file customized +# for your environment. +# +# +# slurm.conf file generated by configurator.html. +# +# See the slurm.conf man page for more information. +# +ClusterName=CIAB +ControlMachine={{ slurmctrl }} +#ControlAddr= +#BackupController= +#BackupAddr= +# +SlurmUser=slurm +#SlurmdUser=root +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +#JobCredentialPrivateKey= +#JobCredentialPublicCertificate= +StateSaveLocation=/tmp +SlurmdSpoolDir=/tmp/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid +#PluginDir= +CacheGroups=0 +#FirstJobId= +ReturnToService=0 +#MaxJobCount= +#PlugStackConfig= +#PropagatePrioProcess= +#PropagateResourceLimits= +#PropagateResourceLimitsExcept= +#Prolog= +#Epilog= +#SrunProlog= +#SrunEpilog= +#TaskProlog= +#TaskEpilog= +#TaskPlugin= +#TrackWCKey=no +#TreeWidth=50 +#TmpFS= +#UsePAM= +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +#SchedulerAuth= +#SchedulerPort= +#SchedulerRootFilter= +SelectType=select/linear +FastSchedule=1 +#PriorityType=priority/multifactor +#PriorityDecayHalfLife=14-0 +#PriorityUsageResetPeriod=14-0 +#PriorityWeightFairshare=100000 +#PriorityWeightAge=1000 +#PriorityWeightPartition=10000 +#PriorityWeightJobSize=1000 +#PriorityMaxAge=1-0 +# +# LOGGING +SlurmctldDebug=3 +#SlurmctldLogFile= +SlurmdDebug=3 +#SlurmdLogFile= +JobCompType=jobcomp/none +#JobCompLoc= +# +# ACCOUNTING +#JobAcctGatherType=jobacct_gather/linux +#JobAcctGatherFrequency=30 +# +#AccountingStorageType=accounting_storage/slurmdbd +#AccountingStorageHost= +#AccountingStorageLoc= +#AccountingStoragePass= +#AccountingStorageUser= +# +# COMPUTE NODES +{% for queue in slurmqueues %} +{% for node in groups[queue.group] %} +NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_cores'] }} State=UNKNOWN +{% endfor %} +{% endfor %} +{% for queue in slurmqueues %} +PartitionName={{ queue.name }} Nodes={{ groups[queue.group]|join(',') }} +{% endfor %} diff --git a/topplay.yml b/topplay.yml index 37280b0a..e264a871 100644 --- a/topplay.yml +++ b/topplay.yml @@ -21,3 +21,22 @@ openvpn_servers: "{{ groups['OpenVPN-Server'] }}" roles: - { role: OpenVPN-Client } + + +- hosts: 'SubmitHost' + roles: + - { role: slurm-build, slurm_version: 14.11.0, munge_version: 0.5.11 } + +- hosts: '*' + vars: + slurm_version: 14.11.0 + munge_version: 0.5.11 + slurmqueues: + - {name: DEFAULT, group: ComputeNode} + - {name: batch, group: ComputeNode} + - {name: gpu, group: ComputeNode} + slurmctrl: "{{ groups['SubmitHost'][0] }}" + roles: + - { role: slurm } + - { role: test_user } + -- GitLab