diff --git a/roles/slurm-build/tasks/main.yml b/roles/slurm-build/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..add696e828fee1eee49d2f241442a7aa44d42724 --- /dev/null +++ b/roles/slurm-build/tasks/main.yml @@ -0,0 +1,55 @@ +--- +- name: install deps + yum: name={{ item }} state=installed + sudo: true + with_items: + - gcc + - rpm-build + - wget + - openssl-devel + - readline-devel + - pam-devel + - perl-ExtUtils-MakeMaker + - bzip2-devel + +- name: get munge + shell: wget https://munge.googlecode.com/files/munge-{{ munge_version }}.tar.bz2 + args: + chdir: /tmp + creates: /tmp/munge-{{ munge_version }}.tar.bz2 + +- name: make munge rpms + shell: rpmbuild -ta --clean munge-{{ munge_version }}.tar.bz2 + sudo: true + args: + chdir: /tmp + creates: /root/rpmbuild/RPMS/x86_64/munge-{{ munge_version }}-1.el6.x86_64.rpm + +- name: get slurm + shell: wget http://www.schedmd.com/download/latest/slurm-{{ slurm_version }}.tar.bz2 + args: + chdir: /tmp + creates: /tmp/slurm-{{ slurm_version }}.tar.bz2 + +- name: install munge deps + shell: rpm -i /root/rpmbuild/RPMS/x86_64/munge-libs-{{ munge_version }}-1.el6.x86_64.rpm /root/rpmbuild/RPMS/x86_64/munge-{{ munge_version }}-1.el6.x86_64.rpm /root/rpmbuild/RPMS/x86_64/munge-devel-{{ munge_version }}-1.el6.x86_64.rpm + sudo: true + ignore_errors: true + +- name: make slurm rpms + shell: rpmbuild -ta --clean slurm-{{ slurm_version }}.tar.bz2 + sudo: true + args: + chdir: /tmp + creates: /root/rpmbuild/RPMS/x86_64/slurm-{{ slurm_version }}-1.el6.x86_64.rpm + +- name: copy rpms + shell: cp -r /root/rpmbuild /tmp + sudo: true + args: + creates: /tmp/rpmbuild/RPMS/x86_64/slurm-{{ slurm_version }}-1.el6.x86_64.rpm + +- name: retrieve rpms + shell: scp -r {{ hostvars[ansible_hostname]['ansible_user_id'] }}@{{ ansible_ssh_host }}:/tmp/rpmbuild/ /tmp + delegate_to: 127.0.0.1 + diff --git a/roles/slurm/handlers/main.yml b/roles/slurm/handlers/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..f2c4f28801f466b420288f619cd9683e7d642a33 --- /dev/null +++ b/roles/slurm/handlers/main.yml @@ -0,0 +1,8 @@ +--- + - name: restart munge + service: name=munge state=restarted + sudo: true + + - name: restart slurm + service: name=slurm state=restarted + sudo: true diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d6513c6af6d957e2370d298be52c5c7d1af1648 --- /dev/null +++ b/roles/slurm/tasks/main.yml @@ -0,0 +1,49 @@ +--- +- name: copy rpms + copy: src=/tmp/rpmbuild dest=/tmp/ + +- name: install munge rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/munge*{{ munge_version }}*rpm" + sudo: true + ignore_errors: true + +- name: install perl + yum: name={{ item }} state=latest + with_items: + - perl + - perl-DBI + sudo: true + +- name: create slurm group + group: name=slurm + sudo: true + +- name: create slurm user + user: name=slurm group=slurm + sudo: true + +- name: install slurm rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/slurm*{{ slurm_version }}*rpm" + sudo: true + ignore_errors: true + +- name: load munge key + include_vars: passwords.yml + +- name: install munge key + template: src=munge_key.j2 dest=/etc/munge/munge.key + sudo: true + notify: restart munge + +- name: start munge + service: name=munge state=started + sudo: true + +- name: install slurm.conf + template: src=slurm.conf.j2 dest=/etc/slurm/slurm.conf + sudo: true + notify: restart slurm + +- name: start slurm + service: name=slurm state=started + sudo: true diff --git a/roles/slurm/templates/munge_key.j2 b/roles/slurm/templates/munge_key.j2 new file mode 100644 index 0000000000000000000000000000000000000000..83d3483ee198fffce76dd82dee5cbe1fb8c0ab8f --- /dev/null +++ b/roles/slurm/templates/munge_key.j2 @@ -0,0 +1 @@ +{{ mungekey }} diff --git a/roles/slurm/templates/slurm.conf.j2 b/roles/slurm/templates/slurm.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..1c5649539bf2c8e0f02c31394ae820be1d2a004a --- /dev/null +++ b/roles/slurm/templates/slurm.conf.j2 @@ -0,0 +1,108 @@ +# +# Example slurm.conf file. Please run configurator.html +# (in doc/html) to build a configuration file customized +# for your environment. +# +# +# slurm.conf file generated by configurator.html. +# +# See the slurm.conf man page for more information. +# +ClusterName=CIAB +ControlMachine={{ slurmctrl }} +#ControlAddr= +#BackupController= +#BackupAddr= +# +SlurmUser=slurm +#SlurmdUser=root +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +#JobCredentialPrivateKey= +#JobCredentialPublicCertificate= +StateSaveLocation=/tmp +SlurmdSpoolDir=/tmp/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid +#PluginDir= +CacheGroups=0 +#FirstJobId= +ReturnToService=0 +#MaxJobCount= +#PlugStackConfig= +#PropagatePrioProcess= +#PropagateResourceLimits= +#PropagateResourceLimitsExcept= +#Prolog= +#Epilog= +#SrunProlog= +#SrunEpilog= +#TaskProlog= +#TaskEpilog= +#TaskPlugin= +#TrackWCKey=no +#TreeWidth=50 +#TmpFS= +#UsePAM= +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +#SchedulerAuth= +#SchedulerPort= +#SchedulerRootFilter= +SelectType=select/linear +FastSchedule=1 +#PriorityType=priority/multifactor +#PriorityDecayHalfLife=14-0 +#PriorityUsageResetPeriod=14-0 +#PriorityWeightFairshare=100000 +#PriorityWeightAge=1000 +#PriorityWeightPartition=10000 +#PriorityWeightJobSize=1000 +#PriorityMaxAge=1-0 +# +# LOGGING +SlurmctldDebug=3 +#SlurmctldLogFile= +SlurmdDebug=3 +#SlurmdLogFile= +JobCompType=jobcomp/none +#JobCompLoc= +# +# ACCOUNTING +#JobAcctGatherType=jobacct_gather/linux +#JobAcctGatherFrequency=30 +# +#AccountingStorageType=accounting_storage/slurmdbd +#AccountingStorageHost= +#AccountingStorageLoc= +#AccountingStoragePass= +#AccountingStorageUser= +# +MpiParams=ports=12000-12999 +# COMPUTE NODES +{% set nodelist = [] %} +{% for queue in slurmqueues %} +{% for node in groups[queue.group] %} +{% if nodelist.append(node) %} +{% endif %} +{% endfor %} +{% endfor %} +{% for node in nodelist|unique %} +NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_cores'] }} State=UNKNOWN +{% endfor %} +{% for queue in slurmqueues %} +PartitionName={{ queue.name }} Nodes={{ groups[queue.group]|join(',') }} +{% endfor %} diff --git a/roles/slurm/vars/main.yml b/roles/slurm/vars/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..755aace05664cee4c402f0e6b70e6d36ab2afd75 --- /dev/null +++ b/roles/slurm/vars/main.yml @@ -0,0 +1,2 @@ +--- +slurm_use_vpn: True diff --git a/topplay.yml b/topplay.yml index 37280b0a7b43fa70a41e65e0c5e7c1b4dfc74e8a..992c5e974336eb5d50a4b499e160283064bc68be 100644 --- a/topplay.yml +++ b/topplay.yml @@ -4,6 +4,36 @@ roles: - etcHosts +<<<<<<< HEAD +#- hosts: 'x509_ca' +# vars: +# roles: +# - { role: easy-rsa-CA } +# +#- hosts: 'OpenVPN-Server' +# vars: +# x509_ca_server: "{{ groups['x509_ca'][0] }}" +# roles: +# - { role: OpenVPN-Server } +# +#- hosts: 'OpenVPN-Client' +# vars: +# x509_ca_server: "{{ groups['x509_ca'][0] }}" +# openvpn_servers: "{{ groups['OpenVPN-Server'] }}" +# roles: +# - { role: OpenVPN-Client } +# +# +#- hosts: 'SubmitHost' +# roles: +# - { role: slurm-build, slurm_version: 14.11.0, munge_version: 0.5.11 } + +- hosts: ComputeNode + vars: + openmpi_version: 1.8.3 + build_node: aciab-comp-0 + roles: + - hosts: 'x509_ca' vars: roles: @@ -21,3 +51,22 @@ openvpn_servers: "{{ groups['OpenVPN-Server'] }}" roles: - { role: OpenVPN-Client } + +- hosts: 'SubmitHost' + roles: + - { role: slurm-build, slurm_version: 14.11.0, munge_version: 0.5.11 } + +- hosts: '*' + vars: + slurm_version: 14.11.0 + munge_version: 0.5.11 + slurmqueues: + - {name: DEFAULT, group: ComputeNode} + - {name: batch, group: ComputeNode} + - {name: gpu, group: ComputeNode} + slurmctrl: "{{ groups['SubmitHost'][0] }}" + roles: + - { role: slurm } + - { role: test_user } + - { role: openmpi-build } +