Skip to content
Snippets Groups Projects
Commit 3d165e68 authored by Jupiter Hu's avatar Jupiter Hu
Browse files

Merge pull request #14 from l1ll1/slurm

a new slurm role. Slurm communicates on the default interface currently,...
parents e4c36d0b 90fa9a49
No related branches found
No related tags found
No related merge requests found
---
- name: install deps
yum: name={{ item }} state=installed
sudo: true
with_items:
- gcc
- rpm-build
- wget
- openssl-devel
- readline-devel
- pam-devel
- perl-ExtUtils-MakeMaker
- bzip2-devel
- name: get munge
shell: wget https://munge.googlecode.com/files/munge-{{ munge_version }}.tar.bz2
args:
chdir: /tmp
creates: /tmp/munge-{{ munge_version }}.tar.bz2
- name: make munge rpms
shell: rpmbuild -ta --clean munge-{{ munge_version }}.tar.bz2
sudo: true
args:
chdir: /tmp
creates: /root/rpmbuild/RPMS/x86_64/munge-{{ munge_version }}-1.el6.x86_64.rpm
- name: get slurm
shell: wget http://www.schedmd.com/download/latest/slurm-{{ slurm_version }}.tar.bz2
args:
chdir: /tmp
creates: /tmp/slurm-{{ slurm_version }}.tar.bz2
- name: install munge deps
shell: rpm -i /root/rpmbuild/RPMS/x86_64/munge-libs-{{ munge_version }}-1.el6.x86_64.rpm /root/rpmbuild/RPMS/x86_64/munge-{{ munge_version }}-1.el6.x86_64.rpm /root/rpmbuild/RPMS/x86_64/munge-devel-{{ munge_version }}-1.el6.x86_64.rpm
sudo: true
ignore_errors: true
- name: make slurm rpms
shell: rpmbuild -ta --clean slurm-{{ slurm_version }}.tar.bz2
sudo: true
args:
chdir: /tmp
creates: /root/rpmbuild/RPMS/x86_64/slurm-{{ slurm_version }}-1.el6.x86_64.rpm
- name: copy rpms
shell: cp -r /root/rpmbuild /tmp
sudo: true
args:
creates: /tmp/rpmbuild/RPMS/x86_64/slurm-{{ slurm_version }}-1.el6.x86_64.rpm
- name: retrieve rpms
shell: scp -r {{ hostvars[ansible_hostname]['ansible_user_id'] }}@{{ ansible_ssh_host }}:/tmp/rpmbuild/ /tmp
delegate_to: 127.0.0.1
---
- name: restart munge
service: name=munge state=restarted
sudo: true
- name: restart slurm
service: name=slurm state=restarted
sudo: true
---
- name: copy rpms
copy: src=/tmp/rpmbuild dest=/tmp/
- name: install munge rpms
shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/munge*{{ munge_version }}*rpm"
sudo: true
ignore_errors: true
- name: install perl
yum: name={{ item }} state=latest
with_items:
- perl
- perl-DBI
sudo: true
- name: create slurm group
group: name=slurm
sudo: true
- name: create slurm user
user: name=slurm group=slurm
sudo: true
- name: install slurm rpms
shell: "rpm --install /tmp/rpmbuild/RPMS/x86_64/slurm*{{ slurm_version }}*rpm"
sudo: true
ignore_errors: true
- name: load munge key
include_vars: passwords.yml
- name: install munge key
template: src=munge_key.j2 dest=/etc/munge/munge.key
sudo: true
notify: restart munge
- name: start munge
service: name=munge state=started
sudo: true
- name: install slurm.conf
template: src=slurm.conf.j2 dest=/etc/slurm/slurm.conf
sudo: true
notify: restart slurm
- name: start slurm
service: name=slurm state=started
sudo: true
{{ mungekey }}
#
# Example slurm.conf file. Please run configurator.html
# (in doc/html) to build a configuration file customized
# for your environment.
#
#
# slurm.conf file generated by configurator.html.
#
# See the slurm.conf man page for more information.
#
ClusterName=CIAB
ControlMachine={{ slurmctrl }}
#ControlAddr=
#BackupController=
#BackupAddr=
#
SlurmUser=slurm
#SlurmdUser=root
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
StateSaveLocation=/tmp
SlurmdSpoolDir=/tmp/slurmd
SwitchType=switch/none
MpiDefault=none
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmdPidFile=/var/run/slurmd.pid
ProctrackType=proctrack/pgid
#PluginDir=
CacheGroups=0
#FirstJobId=
ReturnToService=0
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
#TaskPlugin=
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
#UsePAM=
#
# TIMERS
SlurmctldTimeout=300
SlurmdTimeout=300
InactiveLimit=0
MinJobAge=300
KillWait=30
Waittime=0
#
# SCHEDULING
SchedulerType=sched/backfill
#SchedulerAuth=
#SchedulerPort=
#SchedulerRootFilter=
SelectType=select/linear
FastSchedule=1
#PriorityType=priority/multifactor
#PriorityDecayHalfLife=14-0
#PriorityUsageResetPeriod=14-0
#PriorityWeightFairshare=100000
#PriorityWeightAge=1000
#PriorityWeightPartition=10000
#PriorityWeightJobSize=1000
#PriorityMaxAge=1-0
#
# LOGGING
SlurmctldDebug=3
#SlurmctldLogFile=
SlurmdDebug=3
#SlurmdLogFile=
JobCompType=jobcomp/none
#JobCompLoc=
#
# ACCOUNTING
#JobAcctGatherType=jobacct_gather/linux
#JobAcctGatherFrequency=30
#
#AccountingStorageType=accounting_storage/slurmdbd
#AccountingStorageHost=
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStorageUser=
#
MpiParams=ports=12000-12999
# COMPUTE NODES
{% set nodelist = [] %}
{% for queue in slurmqueues %}
{% for node in groups[queue.group] %}
{% if nodelist.append(node) %}
{% endif %}
{% endfor %}
{% endfor %}
{% for node in nodelist|unique %}
NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_cores'] }} State=UNKNOWN
{% endfor %}
{% for queue in slurmqueues %}
PartitionName={{ queue.name }} Nodes={{ groups[queue.group]|join(',') }}
{% endfor %}
---
slurm_use_vpn: True
...@@ -4,6 +4,36 @@ ...@@ -4,6 +4,36 @@
roles: roles:
- etcHosts - etcHosts
<<<<<<< HEAD
#- hosts: 'x509_ca'
# vars:
# roles:
# - { role: easy-rsa-CA }
#
#- hosts: 'OpenVPN-Server'
# vars:
# x509_ca_server: "{{ groups['x509_ca'][0] }}"
# roles:
# - { role: OpenVPN-Server }
#
#- hosts: 'OpenVPN-Client'
# vars:
# x509_ca_server: "{{ groups['x509_ca'][0] }}"
# openvpn_servers: "{{ groups['OpenVPN-Server'] }}"
# roles:
# - { role: OpenVPN-Client }
#
#
#- hosts: 'SubmitHost'
# roles:
# - { role: slurm-build, slurm_version: 14.11.0, munge_version: 0.5.11 }
- hosts: ComputeNode
vars:
openmpi_version: 1.8.3
build_node: aciab-comp-0
roles:
- hosts: 'x509_ca' - hosts: 'x509_ca'
vars: vars:
roles: roles:
...@@ -21,3 +51,22 @@ ...@@ -21,3 +51,22 @@
openvpn_servers: "{{ groups['OpenVPN-Server'] }}" openvpn_servers: "{{ groups['OpenVPN-Server'] }}"
roles: roles:
- { role: OpenVPN-Client } - { role: OpenVPN-Client }
- hosts: 'SubmitHost'
roles:
- { role: slurm-build, slurm_version: 14.11.0, munge_version: 0.5.11 }
- hosts: '*'
vars:
slurm_version: 14.11.0
munge_version: 0.5.11
slurmqueues:
- {name: DEFAULT, group: ComputeNode}
- {name: batch, group: ComputeNode}
- {name: gpu, group: ComputeNode}
slurmctrl: "{{ groups['SubmitHost'][0] }}"
roles:
- { role: slurm }
- { role: test_user }
- { role: openmpi-build }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment