diff --git a/roles/slurm-common/files/scripts/nvidia-probe.py b/roles/slurm-common/files/scripts/nvidia-probe.py index 4b3e93e1934b9c6b648381c99b565a52240d4660..ba23982e7388c98f89f4f16f09eef38620bc1563 100755 --- a/roles/slurm-common/files/scripts/nvidia-probe.py +++ b/roles/slurm-common/files/scripts/nvidia-probe.py @@ -35,7 +35,7 @@ try: if not line : break #print "Line is ",line - pe=re.compile('GPU\s*(\d*).*Tesla\s*(\S*)') + pe=re.compile('GPU\s+(\d*):\s+\S+\s+(\S*)') m=pe.search(line) if not m: #print "No match found" diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index 2e4146507057db8d0a285ed3c00558ba4ae45fd7..d99cb2ff52283205eb6ccd627505c573cf9180de 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -90,6 +90,11 @@ register: probeOutput check_mode: no +- name: get cpu count + shell: 'lscpu | grep "On-line CPU" | cut -f 2 -d ":" | sed "s/\ *//g"' + register: cpucount + check_mode: no + - name: "set nvidiaprobe slurm_gres_list" set_fact: "slurm_gres_list={{ probeOutput.stdout }}" diff --git a/roles/slurm-common/templates/gres.conf.j2 b/roles/slurm-common/templates/gres.conf.j2 index 24001d0dc4c874f63a23e55bee68fcd47ca4c2eb..9d13ec66f8aaf2e1dedc8d813ebd6b5ba19356a1 100644 --- a/roles/slurm-common/templates/gres.conf.j2 +++ b/roles/slurm-common/templates/gres.conf.j2 @@ -1,5 +1,5 @@ #slurm gres file for {{ ansible_hostname }} #No Of Devices={{ slurm_gres_list | length }} {% for gr in slurm_gres_list %} -Name={{ gr.name }} Type={{ gr.type }} File={{ gr.file }} CPUs=0-23 +Name={{ gr.name }} Type={{ gr.type }} File={{ gr.file }} CPUs={{ cpucount.stdout }} {% endfor %}