Skip to content
Snippets Groups Projects
Commit 1fe96c79 authored by Andreas Hamacher's avatar Andreas Hamacher
Browse files

Merge branch 'slurm20cicd' into 'master'

Slurm20cicd

See merge request !459
parents ea6cfadc ecb38a38
No related branches found
No related tags found
1 merge request!459Slurm20cicd
---
- import_playbook: plays/make_files.yml
tags: [make_files]
- import_playbook: plays/allnodes.yml
tags: [allnodes]
- import_playbook: plays/init_slurmconf.yml # this requires management nodes
tags: [init_slurm]
- import_playbook: plays/nfssqlnodes.yml
tags: [nfssql]
- import_playbook: plays/mockldap.yml
tags: [mockldap]
- import_playbook: plays/mgmtnodes.yml
tags: [mgmtnodesplaybook]
- import_playbook: plays/computenodes.yml
- import_playbook: plays/loginnodes.yml
\ No newline at end of file
tags: [computenodesplaybook]
- import_playbook: plays/loginnodes.yml
tags: [loginnodesplaybook]
......@@ -3,6 +3,7 @@
#
- hosts: 'ManagementNodes'
gather_facts: True
vars_files:
- vars/passwords.yml
- vars/names.yml
......@@ -12,7 +13,10 @@
- vars/vars.yml
- vars/vars_centos78.yml
tasks:
# - { name: set hostgroup, set_fact: hostgroup='ManagementNodes' }
- { name: unmount vdb if absent, mount: { path: "/mnt", src: "/dev/vdb", state: absent},
when: 'hostvars[inventory_hostname]["ansible_devices"]["vdb"] is not defined', become: true }
- { name: keep mnt present, file: { path: "/mnt", owner: root, group: root, mode: "u=rwx,g=rx,o=rx", state: directory},
when: 'hostvars[inventory_hostname]["ansible_devices"]["vdb"] is not defined', become: true }
- { name: set use shared state, set_fact: usesharedstatedir=True }
tags: [ always ]
......@@ -35,7 +39,7 @@
- { role: slurmdb-config, tags: [ slurm, slurmdb-config ] }
- { role: slurm-common, tags: [ slurm, slurm-common ] }
- { role: slurm_config, tags: [ slurm, slurm-config ] }
- { role: slurm-start, start_slurmdbd: True, start_slurmctld: True, tags: [ slurm-start ] }
- { role: slurm-start, start_slurmdbd: True, start_slurmctld: True, slurmd_enabled: False, start_slurmd: False, use_glusterfs: False, EMAIL_DEST: "nobody@nowhere.com", tags: [ slurm-start ] }
- { role: telegraf, telegraf_install_rpm_url: 'http://consistency0/src/telegraf-1.12.6-1.x86_64.rpm', tags: [ monitoring, SiteSpecific ] }
# - { role: provision_slurm, use_active_directory: False, lockpath: "/mnt/home", tags: [ slurm ] }
# - { role: provision_homedir, use_active_directory: False, mntpt: "/mnt/home", tags: [ provisioning ] }
......
---
computeNfsMounts:
- { name: '/home', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/home", 'opts': 'defaults,nofail', 'fstype':'nfs4' }
- { name: '/usr/local', ipv4: "118.138.235.37", src: "/usr_local", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' }
- { name: '/usr/local', ipv4: "118.138.235.55", src: "/usr_local", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' }
- { name: '/projects', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/projects", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' }
- { name: '/scratch', ipv4: "{{ groups['NFSNodes'][0] }}", src: "/nfsvol/scratch", 'opts': 'defaults,rw,nofail', 'fstype':'nfs4' }
mgmtNfsMounts:
......
......@@ -5,6 +5,7 @@ sqlrootPasswd: EXAMPLESQLROOTPASSWORD
sudo_group: systems
default_user_password_clear: EXAMPLEDEFAULTUSERPASSWORDCLEAR
default_user_password: EXAMPLEDEFAULTUSERPASSWORD
ldapManagerDNPassword: EXAMPLELDAPMANAGERDNPASSWORD
ldapManagerPassword: EXAMPLELDAPMANAGERPASSWORD
ldapBindDNPassword: EXAMPLEBINDDNPASSWORD
# the redhat passwords are also example passwords
ldapManagerDNPassword: redhat
ldapManagerPassword: redhat
ldapBindDNPassword: redhat
......@@ -3,7 +3,7 @@ desktopNodeList:
- { name : 'DesktopNodes', interface : 'eth0' }
clustername: "cicd"
projectname: "cicd"
slurm_version: 19.05.4
slurm_version: 20.02.6
munge_version: 0.5.13
nhc_version: 1.4.2
munge_dir: /opt/munge-{{ munge_version }}
......@@ -38,7 +38,7 @@ slurmsharedstatedir: "/slurmstate"
slurmpiddir: "/opt/slurm/var/run"
slurmaccount_create_user: "/usr/local/sbin/slurmuseraccount.sh"
slurm_provision: "/cinderVolume/local/sbin/slurm_provision.sh"
slurmselecttype: "select/linear"
slurmselecttype: "select/cons_tres"
slurmfastschedule: "1"
slurmschedulertype: "sched/backfill"
restartServerList:
......
......@@ -17,6 +17,7 @@ BackupController={{ slurmctrlbackup }}
#BackupController=
#BackupAddr=
#
SlurmctldParameters=enable_configless
SlurmUser=slurm
SlurmdUser=root
SlurmctldPort=6817
......@@ -36,7 +37,7 @@ ProctrackType=proctrack/cgroup
CacheGroups=0
#FirstJobId=
ReturnToService=1
#RebootProgram=/sbin/reboot
RebootProgram=/sbin/reboot
#ResumeTimeout=300
#MaxJobCount=
#PlugStackConfig=
......@@ -77,10 +78,9 @@ SchedulerType={{ slurmschedulertype }}
#SchedulerPort=
#SchedulerRootFilter=
SelectType={{ slurmselecttype }}
{% if slurmselecttype.find("cons_res") > 0 %}
{% if slurmselecttype.find("cons_tres") > 0 %}
SelectTypeParameters=CR_Core_Memory
{% endif %}
FastSchedule={{ slurmfastschedule }}
PriorityType=priority/multifactor
#PriorityFlags=Ticket_Based
#PriorityCalcPeriod=5
......@@ -140,7 +140,7 @@ AccountingStorageEnforce=limits,safe
#AccountingStorageUser=
#
#GRES
#GresTypes=gpu
GresTypes=gpu
#
HealthCheckInterval=300
......
......@@ -31,9 +31,9 @@
file:
path: '/usr/local/ucx/'
state: directory
owner: damienl
group: systems
mode: '0755'
owner: root
group: root
mode: u=rw,g=rx,o=rx
become: true
when: not stat_ucx.stat.exists
......@@ -52,9 +52,9 @@
src: '/usr/local/ucx/1.8.0'
dest: '/usr/local/ucx/latest'
state: link
owner: damienl
group: systems
mode: '0755'
owner: root
group: root
mode: u=rw,g=rx,o=rx
become: true
when: newucx.changed
......@@ -68,7 +68,7 @@
- ansible_os_family == 'RedHat'
- name: configure slurm ubuntu
command: /tmp/slurm-{{ slurm_version }}/configure --prefix={{ slurm_dir }} --with-munge={{ munge_dir }} --enable-pam --with-pmix --with-ucx
command: /tmp/slurm-{{ slurm_version }}/configure --prefix={{ slurm_dir }} --with-munge={{ munge_dir }} --enable-pam --with-pmix --with-ucx=/usr/local/ucx/1.8.0
args:
creates: "{{ slurm_dir }}/bin/srun"
chdir: /tmp/slurm-{{ slurm_version }}
......
......@@ -44,9 +44,9 @@
register: slurmd_service_installed
- name: deploy glusterfsheltest
template: dest=/etc/systemd/system/glusterfsheltest.sh src=glusterfsheltest.j2 mode=744
template: dest=/etc/systemd/system/glusterfsheltest.sh src=glusterfsheltest.sh.j2 mode=744
become: true
when: use_systemd is defined and start_slurmctld is defined
when: use_systemd is defined and start_slurmctld is defined and use_glusterfs
register: slurmctld_service_installed
- name: slurmctld.service
......@@ -93,15 +93,15 @@
become: true
when: start_slurmdbd is defined and slurmdbd_service_installed.changed
- name: start slurmctld
- name: enable slurmctld
service: name=slurmctld state=stopped enabled={{ start_slurmctld }}
become: true
when: use_systemd is defined and start_slurmctld is defined
when: use_systemd is defined and start_slurmctld is defined and slurmctld_service_installed.changed
- name: restart slurmctld
service: name=slurmctld state=stopped enabled={{ start_slurmctld }}
- name: start slurmctld
service: name=slurmctld state=started
become: true
when: use_systemd is defined and start_slurmctld is defined and slurmctld_service_installed.changed
when: use_systemd is defined and start_slurmctld is defined and start_slurmctld
- name: "count clusters in slurm db"
shell: "{{ slurm_dir }}/bin/sacctmgr show cluster -p | wc -l"
......@@ -130,4 +130,4 @@
- name: start slurm
service: name=slurm state=restarted enabled={{ slurmd_enabled }}
become: true
when: use_systemd is not defined and ( start_slurmd is defined or start_slurmctld is defined )
when: use_systemd is not defined and start_slurmd is defined
......@@ -6,7 +6,9 @@ ConditionPathExists={{ slurm_dir }}/etc/slurm.conf
[Service]
Type=forking
#EnvironmentFile=/etc/default/slurmctld
{% if use_glusterfs %}
ExecStartPre=/etc/systemd/system/glusterfsheltest.sh
{% endif %}
ExecStart={{ slurm_dir }}/sbin/slurmctld $SLURMCTLD_OPTIONS
PIDFile={{ slurmpiddir }}/slurmctld.pid
[Install]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment