diff --git a/roles/easy-rsa-certificate/tasks/buildCert.yml b/roles/easy-rsa-certificate/tasks/buildCert.yml index 265efd31645ca13347bd9b2d1122286ea16762bf..ee22f077b354dbfd53a82106e1babdeb32e852e8 100644 --- a/roles/easy-rsa-certificate/tasks/buildCert.yml +++ b/roles/easy-rsa-certificate/tasks/buildCert.yml @@ -51,12 +51,14 @@ when: needcert - name: "Copy CSR to CA" + remote_user: "{{ hostvars[x509_ca_server]['ansible_ssh_user'] }}" delegate_to: "{{ x509_ca_server }}" copy: "src=/tmp/{{ x509_common_name }}.csr dest=/etc/easy-rsa/2.0/keys/{{ x509_common_name }}.csr force=yes" when: needcert sudo: true - name: "Sign Certificate" + remote_user: "{{ hostvars[x509_ca_server]['ansible_ssh_user'] }}" delegate_to: "{{ x509_ca_server }}" shell: ". ./vars; export EASY_RSA=\"${EASY_RSA:-.}\" ;\"$EASY_RSA\"/pkitool --sign {{ x509_sign_args }} {{ x509_common_name }}" args: @@ -65,12 +67,14 @@ sudo: true - name: "Copy the Certificate to ansible host" + remote_user: "{{ hostvars[x509_ca_server]['ansible_ssh_user'] }}" delegate_to: "{{ x509_ca_server }}" fetch: "src=/etc/easy-rsa/2.0/keys/{{ x509_common_name }}.crt dest=/tmp/ fail_on_missing=yes validate_md5=yes flat=yes" sudo: true when: needcert - name: "Copy the CA Certificate to the ansible host" + remote_user: "{{ hostvars[x509_ca_server]['ansible_ssh_user'] }}" delegate_to: "{{ x509_ca_server }}" fetch: "src=/etc/easy-rsa/2.0/keys/ca.crt dest=/tmp/ca.crt fail_on_missing=yes validate_md5=yes flat=yes" sudo: true diff --git a/roles/slurm-build/tasks/main.yml b/roles/slurm-build/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..add696e828fee1eee49d2f241442a7aa44d42724 --- /dev/null +++ b/roles/slurm-build/tasks/main.yml @@ -0,0 +1,55 @@ +--- +- name: install deps + yum: name={{ item }} state=installed + sudo: true + with_items: + - gcc + - rpm-build + - wget + - openssl-devel + - readline-devel + - pam-devel + - perl-ExtUtils-MakeMaker + - bzip2-devel + +- name: get munge + shell: wget https://munge.googlecode.com/files/munge-{{ munge_version }}.tar.bz2 + args: + chdir: /tmp + creates: /tmp/munge-{{ munge_version }}.tar.bz2 + +- name: make munge rpms + shell: rpmbuild -ta --clean munge-{{ munge_version }}.tar.bz2 + sudo: true + args: + chdir: /tmp + creates: /root/rpmbuild/RPMS/x86_64/munge-{{ munge_version }}-1.el6.x86_64.rpm + +- name: get slurm + shell: wget http://www.schedmd.com/download/latest/slurm-{{ slurm_version }}.tar.bz2 + args: + chdir: /tmp + creates: /tmp/slurm-{{ slurm_version }}.tar.bz2 + +- name: install munge deps + shell: rpm -i /root/rpmbuild/RPMS/x86_64/munge-libs-{{ munge_version }}-1.el6.x86_64.rpm /root/rpmbuild/RPMS/x86_64/munge-{{ munge_version }}-1.el6.x86_64.rpm /root/rpmbuild/RPMS/x86_64/munge-devel-{{ munge_version }}-1.el6.x86_64.rpm + sudo: true + ignore_errors: true + +- name: make slurm rpms + shell: rpmbuild -ta --clean slurm-{{ slurm_version }}.tar.bz2 + sudo: true + args: + chdir: /tmp + creates: /root/rpmbuild/RPMS/x86_64/slurm-{{ slurm_version }}-1.el6.x86_64.rpm + +- name: copy rpms + shell: cp -r /root/rpmbuild /tmp + sudo: true + args: + creates: /tmp/rpmbuild/RPMS/x86_64/slurm-{{ slurm_version }}-1.el6.x86_64.rpm + +- name: retrieve rpms + shell: scp -r {{ hostvars[ansible_hostname]['ansible_user_id'] }}@{{ ansible_ssh_host }}:/tmp/rpmbuild/ /tmp + delegate_to: 127.0.0.1 + diff --git a/roles/slurm/handlers/main.yml b/roles/slurm/handlers/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..f2c4f28801f466b420288f619cd9683e7d642a33 --- /dev/null +++ b/roles/slurm/handlers/main.yml @@ -0,0 +1,8 @@ +--- + - name: restart munge + service: name=munge state=restarted + sudo: true + + - name: restart slurm + service: name=slurm state=restarted + sudo: true diff --git a/roles/slurm/tasks/main.yml b/roles/slurm/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..a3a5ca1950ac30106ab54692280e2a7d7d37d82b --- /dev/null +++ b/roles/slurm/tasks/main.yml @@ -0,0 +1,56 @@ +--- +- name: copy rpms + copy: src=/tmp/rpmbuild dest=/tmp/ + +- name: install munge rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/munge*{{ munge_version }}*rpm" + sudo: true + ignore_errors: true + +- name: install perl + yum: name={{ item }} state=latest + with_items: + - perl + - perl-DBI + sudo: true + +- name: create slurm group + group: name=slurm + sudo: true + +- name: create slurm user + user: name=slurm group=slurm + sudo: true + +- name: install slurm rpms + shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/slurm*{{ slurm_version }}*rpm" + sudo: true + ignore_errors: true + +- name: load munge key + include_vars: passwords.yml + +- name: install munge key + template: src=munge_key.j2 dest=/etc/munge/munge.key + sudo: true + notify: restart munge + +- name: start munge + service: name=munge state=started + sudo: true + +- name: install slurm.conf + template: src=slurm.conf.j2 dest=/etc/slurm/slurm.conf + sudo: true + notify: restart slurm + when: slurm_use_vpn==False + +- name: install slurm.conf + template: src=slurm-vpn.conf.j2 dest=/etc/slurm/slurm.conf + sudo: true + notify: restart slurm + when: slurm_use_vpn==True + +- name: start slurm + service: name=slurm state=started + sudo: true diff --git a/roles/slurm/templates/munge_key.j2 b/roles/slurm/templates/munge_key.j2 new file mode 100644 index 0000000000000000000000000000000000000000..83d3483ee198fffce76dd82dee5cbe1fb8c0ab8f --- /dev/null +++ b/roles/slurm/templates/munge_key.j2 @@ -0,0 +1 @@ +{{ mungekey }} diff --git a/roles/slurm/templates/slurm-vpn.conf.j2 b/roles/slurm/templates/slurm-vpn.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..9e4f5867638ae20ebc24a26e018f07c5c4710b59 --- /dev/null +++ b/roles/slurm/templates/slurm-vpn.conf.j2 @@ -0,0 +1,109 @@ +# +# Example slurm.conf file. Please run configurator.html +# (in doc/html) to build a configuration file customized +# for your environment. +# +# +# slurm.conf file generated by configurator.html. +# +# See the slurm.conf man page for more information. +# +ClusterName=CIAB +ControlMachine={{ slurmctrl }} +ControlAddr={{ slurmctrl }}-vpn +#BackupController= +#BackupAddr= +# +SlurmUser=slurm +#SlurmdUser=root +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +#JobCredentialPrivateKey= +#JobCredentialPublicCertificate= +StateSaveLocation=/tmp +SlurmdSpoolDir=/tmp/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid +#PluginDir= +CacheGroups=0 +#FirstJobId= +ReturnToService=0 +#MaxJobCount= +#PlugStackConfig= +#PropagatePrioProcess= +#PropagateResourceLimits= +#PropagateResourceLimitsExcept= +#Prolog= +#Epilog= +#SrunProlog= +#SrunEpilog= +#TaskProlog= +#TaskEpilog= +#TaskPlugin= +#TrackWCKey=no +#TreeWidth=50 +#TmpFS= +#UsePAM= +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +#SchedulerAuth= +#SchedulerPort= +#SchedulerRootFilter= +SelectType=select/linear +FastSchedule=1 +#PriorityType=priority/multifactor +#PriorityDecayHalfLife=14-0 +#PriorityUsageResetPeriod=14-0 +#PriorityWeightFairshare=100000 +#PriorityWeightAge=1000 +#PriorityWeightPartition=10000 +#PriorityWeightJobSize=1000 +#PriorityMaxAge=1-0 +# +# LOGGING +SlurmctldDebug=3 +#SlurmctldLogFile= +SlurmdDebug=3 +#SlurmdLogFile= +JobCompType=jobcomp/none +#JobCompLoc= +# +# ACCOUNTING +#JobAcctGatherType=jobacct_gather/linux +#JobAcctGatherFrequency=30 +# +#AccountingStorageType=accounting_storage/slurmdbd +#AccountingStorageHost= +#AccountingStorageLoc= +#AccountingStoragePass= +#AccountingStorageUser= +# +MpiParams=ports=12000-12999 +# COMPUTE NODES +{% set nodelist = [] %} +{% for queue in slurmqueues %} +{% for node in groups[queue.group] %} +{% if nodelist.append(node) %} +{% endif %} +{% endfor %} +{% endfor %} +{% for node in nodelist|unique %} +NodeName={{ node }} NodeAddr={{ node }}-vpn Procs={{ hostvars[node]['ansible_processor_cores'] }} State=UNKNOWN +{% endfor %} + +{% for queue in slurmqueues %} +PartitionName={{ queue.name }} Nodes={{ groups[queue.group]|join(',') }} +{% endfor %} diff --git a/roles/slurm/templates/slurm.conf.j2 b/roles/slurm/templates/slurm.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..1afeab1961db6731b9a25052e8d7c844e3a16d45 --- /dev/null +++ b/roles/slurm/templates/slurm.conf.j2 @@ -0,0 +1,109 @@ +# +# Example slurm.conf file. Please run configurator.html +# (in doc/html) to build a configuration file customized +# for your environment. +# +# +# slurm.conf file generated by configurator.html. +# +# See the slurm.conf man page for more information. +# +ClusterName=CIAB +ControlMachine={{ slurmctrl }} +#ControlAddr= +#BackupController= +#BackupAddr= +# +SlurmUser=slurm +#SlurmdUser=root +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +#JobCredentialPrivateKey= +#JobCredentialPublicCertificate= +StateSaveLocation=/tmp +SlurmdSpoolDir=/tmp/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmctld.pid +SlurmdPidFile=/var/run/slurmd.pid +ProctrackType=proctrack/pgid +#PluginDir= +CacheGroups=0 +#FirstJobId= +ReturnToService=0 +#MaxJobCount= +#PlugStackConfig= +#PropagatePrioProcess= +#PropagateResourceLimits= +#PropagateResourceLimitsExcept= +#Prolog= +#Epilog= +#SrunProlog= +#SrunEpilog= +#TaskProlog= +#TaskEpilog= +#TaskPlugin= +#TrackWCKey=no +#TreeWidth=50 +#TmpFS= +#UsePAM= +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +#SchedulerAuth= +#SchedulerPort= +#SchedulerRootFilter= +SelectType=select/linear +FastSchedule=1 +#PriorityType=priority/multifactor +#PriorityDecayHalfLife=14-0 +#PriorityUsageResetPeriod=14-0 +#PriorityWeightFairshare=100000 +#PriorityWeightAge=1000 +#PriorityWeightPartition=10000 +#PriorityWeightJobSize=1000 +#PriorityMaxAge=1-0 +# +# LOGGING +SlurmctldDebug=3 +#SlurmctldLogFile= +SlurmdDebug=3 +#SlurmdLogFile= +JobCompType=jobcomp/none +#JobCompLoc= +# +# ACCOUNTING +#JobAcctGatherType=jobacct_gather/linux +#JobAcctGatherFrequency=30 +# +#AccountingStorageType=accounting_storage/slurmdbd +#AccountingStorageHost= +#AccountingStorageLoc= +#AccountingStoragePass= +#AccountingStorageUser= +# +MpiParams=ports=12000-12999 +# COMPUTE NODES +{% set nodelist = [] %} +{% for queue in slurmqueues %} +{% for node in groups[queue.group] %} +{% if nodelist.append(node) %} +{% endif %} +{% endfor %} +{% endfor %} +{% for node in nodelist|unique %} +NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_cores'] }} State=UNKNOWN +{% endfor %} + +{% for queue in slurmqueues %} +PartitionName={{ queue.name }} Nodes={{ groups[queue.group]|join(',') }} +{% endfor %} diff --git a/scripts/makehosts.py b/scripts/makehosts.py index b01508bb1b8e5d272a06167111caf5bc9bf5db49..1bf5c654f8bf84352d6c9d8d2e76a21b59cf54b6 100755 --- a/scripts/makehosts.py +++ b/scripts/makehosts.py @@ -25,3 +25,10 @@ for h in hosts.keys(): for name in hosts[h]: string=string+" %s"%name print string + +for h in hosts.keys(): + if d['hostvars'].has_key(h): + if d['hostvars'][h].has_key('ansible_tun0'): + string="%s"%(d['hostvars'][h]['ansible_tun0']['ipv4']['address']) + string=string+" %s-vpn"%h + print string diff --git a/topplay.yml b/topplay.yml index 819679874fdb5f457ae966983fa68f18b1332c64..11af31049274a07293bd06de2462086fca5ced25 100644 --- a/topplay.yml +++ b/topplay.yml @@ -1,39 +1,87 @@ --- +# First we need hostname -f to return the correct domain + - hosts: '*' roles: - - etcHosts + - {role: etcHosts, domain: 'test.massive.org.au' } + +# Create an x509 CA. Certificates are used for the VPN, the ldap server and the web server (karaage) -- hosts: 'x509_ca' +- hosts: ManagmentNodes[0] vars: roles: - { role: easy-rsa-CA } -- hosts: 'OpenVPN-Server' +- hosts: 'ManagmentNodes' vars: - x509_ca_server: "{{ groups['x509_ca'][0] }}" + x509_ca_server: "{{ groups['ManagmentNodes'][0] }}" + ldapDomain: "dc=test,dc=massive,dc=org,dc=au" roles: - { role: OpenVPN-Server } - -- hosts: 'OpenVPN-Client' - vars: - x509_ca_server: "{{ groups['x509_ca'][0] }}" - openvpn_servers: "{{ groups['OpenVPN-Server'] }}" - roles: - - { role: OpenVPN-Client } + - { role: ldapserver } - hosts: 'karaage' vars: - x509_ca_server: "{{ groups['x509_ca'][0] }}" + x509_ca_server: "{{ groups['ManagmentNodes'][0] }}" smtp_smarthost: "does_not_exist.massive.org.au" ldapDomain: "dc=test,dc=massive,dc=org,dc=au" - ldapURL: "ldaps://{{ hostvars[groups['ldapserver'][0]]['ansible_fqdn'] }}" + ldapURL: "ldaps://{{ hostvars[groups['ManagmentNodes'][0]]['ansible_fqdn'] }}" roles: - { role: karaage2.7 } -- hosts: 'ldapserver' +# The next two tasks, apply the VPN and recalcuate /etc/hosts. Comment them if you are on a secure network + +- hosts: ComputeNodes vars: - x509_ca_server: "{{ groups['x509_ca'][0] }}" - ldapDomain: "dc=test,dc=massive,dc=org,dc=au" + x509_ca_server: "{{ groups['ManagmentNodes'][0] }}" + openvpn_servers: "{{ groups['ManagmentNodes'] }}" roles: - - { role: ldapserver } + - { role: OpenVPN-Client } + +- hosts: '*' + roles: + - {role: etcHosts, domain: 'test.massive.org.au' } + +# These tasks build some software (slurm, munge and openmpi) and copy them back to localhost. Skip if you have some other way of getting +# copyies of these software + +- hosts: ComputeNodes[0] + vars: + slurm_version: 14.11.0 + munge_version: 0.5.11 + roles: + - { role: slurm-build } + +# Slurm and openmpi tasks assume copies of the software in /tmp under a very specific directory strucutre (created by the build tasks above) Modifiy +# the roles if you have a different way of installing the software + +- hosts: '*' + vars: + slurm_version: 14.11.0 + munge_version: 0.5.11 + slurmqueues: + - {name: batch, group: ComputeNodes} + slurmctrl: "{{ groups['ManagmentNodes'][0] }}" + roles: + - { role: slurm, slurm_use_vpn: true} # change to false if you are on a secure network + - { role: test_user } + +# These tasks build some software (slurm, munge and openmpi) and copy them back to localhost. Skip if you have some other way of getting +# copyies of these software + +- hosts: ComputeNodes[0] + vars: + openmpi_version: 1.8.3 + roles: + - { role: openmpi-build } + +# Slurm and openmpi tasks assume copies of the software in /tmp under a very specific directory strucutre (created by the build tasks above) Modifiy +# the roles if you have a different way of installing the software + +- hosts: ComputeNodes|ManagmentNodes|LoginNodes + vars: + openmpi_version: 1.8.3 + roles: + - { role: openmpi } +