diff --git a/roles/calculateSlurmConf/templates/slurm.conf.j2 b/roles/calculateSlurmConf/templates/slurm.conf.j2 index 3911638ef4e48018f53b90ed1b103435fab27844..c33b8ee0feed40aa90fb62188ccabc8e9f1b3f3a 100644 --- a/roles/calculateSlurmConf/templates/slurm.conf.j2 +++ b/roles/calculateSlurmConf/templates/slurm.conf.j2 @@ -34,7 +34,6 @@ SlurmdPidFile={{ slurmpiddir }}/slurmd.pid #ProctrackType=proctrack/linuxproc ProctrackType=proctrack/cgroup #PluginDir= -CacheGroups=0 #FirstJobId= ReturnToService=1 RebootProgram=/sbin/reboot diff --git a/roles/lmod/tasks/main.yml b/roles/lmod/tasks/main.yml index ea62d811cdb0b08bb3a2b286ec2251ffda0d8c0e..7f321c04737dd629f15b85841259f5c1ef215e81 100644 --- a/roles/lmod/tasks/main.yml +++ b/roles/lmod/tasks/main.yml @@ -21,7 +21,9 @@ - lua-filesystem - lua-posix become: true - when: ansible_os_family == 'RedHat' + when: + - ansible_os_family == 'RedHat' + - '"DGX" not in ansible_product_name' - name: install lua RHEL7 yum: @@ -54,7 +56,7 @@ - name: Download LMOD get_url: - url=http://{{ reposerverip }}/src/Lmod-{{ lmod_version }}.tar.bz2 + url=https://object-store.rc.nectar.org.au/v1/AUTH_56ccfd36d0ad454a883a98e8489c97b5/hpc-repo/src/Lmod-{{ lmod_version }}.tar.bz2 dest={{ source_dir }}/Lmod-{{ lmod_version }}.tar.bz2 mode=0444 when: ansible_os_family == 'RedHat' and not lmodstat.stat.exists diff --git a/roles/slurm-common/tasks/installMungeFromSource.yml b/roles/slurm-common/tasks/installMungeFromSource.yml index 302665b36e1151f32fcef06e5ef325328a7dc5ad..ef1b621bbb788806ae6f515498bccc96e4e5327d 100644 --- a/roles/slurm-common/tasks/installMungeFromSource.yml +++ b/roles/slurm-common/tasks/installMungeFromSource.yml @@ -70,6 +70,16 @@ creates: "{{ munge_dir }}/bin/munge" when: not munge_binary.stat.exists +- name: Create munge socket directory if it does not exist + file: + path: "{{ munge_dir }}/var/run/munge" + state: directory + owner: munge + group: root + mode: u=rwx + become: true + tags: CHRISTMAS + - name: set use_systemd Redhat set_fact: use_systemd: True diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index be66b59dffc507b4fd91e81f412390e42c50dead..8d117daa1880d62351c73a0ab9a9a9bfc6c45762 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -24,7 +24,7 @@ path: /mnt/nvme become: true - - name: Set /mnt/nvme as spankprivatetmpdir if present + - name: Set /mnt/nvme as spankprivatetmpmount if present file: src: /mnt/nvme dest: "{{ spankprivatetmpmount }}" @@ -33,9 +33,9 @@ mode: u=rwx,g=rx,o=rx state: link become: true - when: spankprivatetmpdir is defined and hostvars[inventory_hostname]['ansible_devices']['nvme0n1'] is defined + when: spankprivatetmpmount is defined and hostvars[inventory_hostname]['ansible_devices']['nvme0n1'] is defined -- name: Link /raid as spankprivatetmpdir if present +- name: Link /raid as spankprivatetmpmount if present file: src: /raid dest: "{{ spankprivatetmpmount }}" @@ -44,9 +44,9 @@ mode: u=rwx,g=rx,o=rx state: link become: true - when: spankprivatetmpdir is defined and raiddir.stat.isdir is defined and raiddir.stat.isdir == True + when: spankprivatetmpmount is defined and raiddir.stat.isdir is defined and raiddir.stat.isdir == True -- name: create spankprivatetmpdir as directory if there is not a fast drive present +- name: create spankprivatetmpmount as directory if there is not a fast drive present file: path: "{{ spankprivatetmpmount }}" owner: root @@ -54,7 +54,7 @@ mode: u=rwx,g=rx,o=rx state: directory become: true - when: spankprivatetmpdir is defined and hostvars[inventory_hostname]['ansible_devices']['nvme0n1'] is not defined and raiddir.stat.isdir is not defined + when: spankprivatetmpmount is defined and hostvars[inventory_hostname]['ansible_devices']['nvme0n1'] is not defined and raiddir.stat.isdir is not defined - name: create munge group group: name=munge system=yes gid=498 diff --git a/roles/slurm-common/templates/slurm-vpn.conf.j2 b/roles/slurm-common/templates/slurm-vpn.conf.j2 index 671840e07831bac8de9b440c3c9e90ca65ff3fd3..c3eca3f83a1513b2e5c9adfccff8bf0109689a07 100644 --- a/roles/slurm-common/templates/slurm-vpn.conf.j2 +++ b/roles/slurm-common/templates/slurm-vpn.conf.j2 @@ -29,7 +29,6 @@ SlurmctldPidFile=/var/run/slurmctld.pid SlurmdPidFile=/var/run/slurmd.pid ProctrackType=proctrack/pgid #PluginDir= -CacheGroups=0 #FirstJobId= ReturnToService=0 #MaxJobCount= diff --git a/roles/slurm-start/tasks/main.yml b/roles/slurm-start/tasks/main.yml index 541ae97b80c740734a1969876246a3501d56185e..1cf792ac95fd0a270511db9a0dd1135d1f0f3eb4 100644 --- a/roles/slurm-start/tasks/main.yml +++ b/roles/slurm-start/tasks/main.yml @@ -147,7 +147,7 @@ port: 6817 delay: 5 timeout: 300 - + when: start_slurmctld is defined and start_slurmctld - name: start slurmctld on secondary service: name=slurmctld state=started diff --git a/roles/slurm_config/tasks/main.yml b/roles/slurm_config/tasks/main.yml index 3c59c61413b2b5396563cd821797d1b23bf153bd..f08efe59b72b72ca435fdb8b44aac32ea68e40eb 100644 --- a/roles/slurm_config/tasks/main.yml +++ b/roles/slurm_config/tasks/main.yml @@ -34,7 +34,7 @@ ansible.builtin.copy: src: files/job_submit.lua dest: "{{ slurm_dir }}/etc/job_submit.lua" - mode: 755 + mode: u+rwx,g+rx,o+rx become: true become_user: root when: local_lua_file is defined and local_lua_file.stat.exists==True diff --git a/roles/upgrade/tasks/main.yml b/roles/upgrade/tasks/main.yml index 2025927e316bfa6158f1cca78b24ef2bfa9facaa..1c2c7206afe2509234c79a6af6c197c39b3abc61 100644 --- a/roles/upgrade/tasks/main.yml +++ b/roles/upgrade/tasks/main.yml @@ -15,13 +15,6 @@ become: true when: services["lustre-client.service"] is defined - - name: ensure the lustre module is absent - modprobe: - name: lustre - state: absent - become: true - when: services["lustre-client.service"] is defined - - name: count lustre mounts shell: cmd: mount -t lustre | wc -l