diff --git a/roles/slurm/handlers/main.yml b/roles/slurm/handlers/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..f2c4f28801f466b420288f619cd9683e7d642a33 --- /dev/null +++ b/roles/slurm/handlers/main.yml @@ -0,0 +1,8 @@ +--- + - name: restart munge + service: name=munge state=restarted + sudo: true + + - name: restart slurm + service: name=slurm state=restarted + sudo: true diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d6513c6af6d957e2370d298be52c5c7d1af1648 --- /dev/null +++ b/roles/slurm/tasks/main.yml @@ -0,0 +1,49 @@ +--- +- name: copy rpms + copy: src=/tmp/rpmbuild dest=/tmp/ + +- name: install munge rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/munge*{{ munge_version }}*rpm" + sudo: true + ignore_errors: true + +- name: install perl + yum: name={{ item }} state=latest + with_items: + - perl + - perl-DBI + sudo: true + +- name: create slurm group + group: name=slurm + sudo: true + +- name: create slurm user + user: name=slurm group=slurm + sudo: true + +- name: install slurm rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/slurm*{{ slurm_version }}*rpm" + sudo: true + ignore_errors: true + +- name: load munge key + include_vars: passwords.yml + +- name: install munge key + template: src=munge_key.j2 dest=/etc/munge/munge.key + sudo: true + notify: restart munge + +- name: start munge + service: name=munge state=started + sudo: true + +- name: install slurm.conf + template: src=slurm.conf.j2 dest=/etc/slurm/slurm.conf + sudo: true + notify: restart slurm + +- name: start slurm + service: name=slurm state=started + sudo: true diff --git a/roles/slurm/templates/munge_key.j2 b/roles/slurm/templates/munge_key.j2 new file mode 100644 index 0000000000000000000000000000000000000000..83d3483ee198fffce76dd82dee5cbe1fb8c0ab8f --- /dev/null +++ b/roles/slurm/templates/munge_key.j2 @@ -0,0 +1 @@ +{{ mungekey }} diff --git a/roles/slurm/templates/slurm.conf.j2 b/roles/slurm/templates/slurm.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..b5475d92d5575ff4a13f7441816856608d42cfbe --- /dev/null +++ b/roles/slurm/templates/slurm.conf.j2 @@ -0,0 +1,102 @@ +# +# Example slurm.conf file. Please run configurator.html +# (in doc/html) to build a configuration file customized +# for your environment. +# +# +# slurm.conf file generated by configurator.html. +# +# See the slurm.conf man page for more information. +# +ClusterName=CIAB +ControlMachine={{ slurmctrl }} +#ControlAddr= +#BackupController= +#BackupAddr= +# +SlurmUser=slurm +#SlurmdUser=root +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +#JobCredentialPrivateKey= +#JobCredentialPublicCertificate= +StateSaveLocation=/tmp +SlurmdSpoolDir=/tmp/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid +#PluginDir= +CacheGroups=0 +#FirstJobId= +ReturnToService=0 +#MaxJobCount= +#PlugStackConfig= +#PropagatePrioProcess= +#PropagateResourceLimits= +#PropagateResourceLimitsExcept= +#Prolog= +#Epilog= +#SrunProlog= +#SrunEpilog= +#TaskProlog= +#TaskEpilog= +#TaskPlugin= +#TrackWCKey=no +#TreeWidth=50 +#TmpFS= +#UsePAM= +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +#SchedulerAuth= +#SchedulerPort= +#SchedulerRootFilter= +SelectType=select/linear +FastSchedule=1 +#PriorityType=priority/multifactor +#PriorityDecayHalfLife=14-0 +#PriorityUsageResetPeriod=14-0 +#PriorityWeightFairshare=100000 +#PriorityWeightAge=1000 +#PriorityWeightPartition=10000 +#PriorityWeightJobSize=1000 +#PriorityMaxAge=1-0 +# +# LOGGING +SlurmctldDebug=3 +#SlurmctldLogFile= +SlurmdDebug=3 +#SlurmdLogFile= +JobCompType=jobcomp/none +#JobCompLoc= +# +# ACCOUNTING +#JobAcctGatherType=jobacct_gather/linux +#JobAcctGatherFrequency=30 +# +#AccountingStorageType=accounting_storage/slurmdbd +#AccountingStorageHost= +#AccountingStorageLoc= +#AccountingStoragePass= +#AccountingStorageUser= +# +# COMPUTE NODES +{% for queue in slurmqueues %} +{% for node in groups[queue.group] %} +NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_cores'] }} State=UNKNOWN +{% endfor %} +{% endfor %} +{% for queue in slurmqueues %} +PartitionName={{ queue.name }} Nodes={{ groups[queue.group]|join(',') }} +{% endfor %} diff --git a/topplay.yml b/topplay.yml index 37280b0a7b43fa70a41e65e0c5e7c1b4dfc74e8a..e264a87144986e5afc44d5c4c213c1ad3f4720b8 100644 --- a/topplay.yml +++ b/topplay.yml @@ -21,3 +21,22 @@ openvpn_servers: "{{ groups['OpenVPN-Server'] }}" roles: - { role: OpenVPN-Client } + + +- hosts: 'SubmitHost' + roles: + - { role: slurm-build, slurm_version: 14.11.0, munge_version: 0.5.11 } + +- hosts: '*' + vars: + slurm_version: 14.11.0 + munge_version: 0.5.11 + slurmqueues: + - {name: DEFAULT, group: ComputeNode} + - {name: batch, group: ComputeNode} + - {name: gpu, group: ComputeNode} + slurmctrl: "{{ groups['SubmitHost'][0] }}" + roles: + - { role: slurm } + - { role: test_user } +