From 6e4420db4d7008cb9c245435e0444d9520cc8cb7 Mon Sep 17 00:00:00 2001 From: Damien Leong <damien@dyn-49-127-52-89.its.monash.edu.au> Date: Thu, 6 May 2021 12:19:19 +1000 Subject: [PATCH] Remove probe for GRES/CPU on gpu nodes, and remove gres.conf & slurm.conf, going configless --- roles/slurm-common/tasks/main.yml | 29 ------------------- roles/slurm-start/templates/slurmd.service.j2 | 5 ++-- 2 files changed, 3 insertions(+), 31 deletions(-) diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index 5b378354..f9832893 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -118,32 +118,12 @@ - include: createSlurmDirectories.yml -- name: check slurm generic resource - shell: "{{ slurm_gres_check }}" - register: slurm_generic_resource - ignore_errors: true - when: slurm_gres_check is defined - check_mode: no - changed_when: False - -- name: Gres - Test for Nvidia devices - script: scripts/nvidia-probe.py - register: probeOutput - check_mode: no - changed_when: False - - name: get cpu count shell: 'lscpu | grep "On-line CPU" | cut -f 2 -d ":" | sed "s/\ *//g"' register: cpucount check_mode: no changed_when: False -- name: "set nvidiaprobe slurm_gres_list" - set_fact: "slurm_gres_list={{ probeOutput.stdout }}" - -- name: template gres.conf file - template: src="gres.conf.j2" dest={{ slurm_dir }}/etc/gres.conf mode=644 - become: true - name: make slurm prolog dir file: path=/opt/slurm/etc state=directory mode=755 @@ -158,15 +138,6 @@ template: src=slurm.epilog.j2 dest=/opt/slurm/etc/slurm.epilog mode=755 become: true -- name: install slurm.conf - copy: src=files/slurm.conf dest={{ slurm_dir }}/etc/slurm.conf - become: true - when: slurm_use_vpn==False - -- name: install slurm.conf - template: src=slurm-vpn.conf.j2 dest={{ slurm_dir }}/etc/slurm.conf - become: true - when: slurm_use_vpn==True - name: setup envirnment variables template: src=slurm_setup.sh.j2 dest=/etc/profile.d/slurm_setup.sh diff --git a/roles/slurm-start/templates/slurmd.service.j2 b/roles/slurm-start/templates/slurmd.service.j2 index 60d051db..36c56120 100644 --- a/roles/slurm-start/templates/slurmd.service.j2 +++ b/roles/slurm-start/templates/slurmd.service.j2 @@ -1,14 +1,15 @@ [Unit] Description=Slurm node daemon After=network.target -ConditionPathExists={{ slurm_dir }}/etc/slurm.conf +# After Slurm-20.02 (configless), this is not needed +# ConditionPathExists={{ slurm_dir }}/etc/slurm.conf [Service] Type=forking KillMode=process LimitMEMLOCK=infinity #EnvironmentFile=/etc/default/slurmd -ExecStart={{ slurm_dir }}/sbin/slurmd $SLURMD_OPTIONS +ExecStart={{ slurm_dir }}/sbin/slurmd --conf-server {{ slurmctrl }}:6817 PIDFile={{ slurmpiddir }}/slurmd.pid [Install] -- GitLab