From f2198631a157aec0667258d4da271fdb15beadc4 Mon Sep 17 00:00:00 2001 From: Simon Michnowicz <simon.michnowicz@monash.edu> Date: Tue, 1 Aug 2017 14:59:21 +1000 Subject: [PATCH] change the way a script is called to determine number of GPUs Unfortunately this new way also produced weird Python errors, which could only be fixed by running as root (which is probably problem with original way) --- roles/slurm-common/tasks/main.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index d99cb2ff..1f3c7e56 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -85,10 +85,15 @@ when: slurm_gres_check is defined check_mode: no +#as we change home directory earlier gather_facts causes the 'script' role to fail. so we do the probe this way +- name: Copy Gres - Test script to /tmp + copy: src="scripts/nvidia-probe.py" dest="/tmp" mode="u=rwx,g=rx,o=rx" + - name: Gres - Test for Nvidia devices - script: scripts/nvidia-probe.py + shell: /tmp/nvidia-probe.py register: probeOutput check_mode: no + sudo: true - name: get cpu count shell: 'lscpu | grep "On-line CPU" | cut -f 2 -d ":" | sed "s/\ *//g"' @@ -98,6 +103,9 @@ - name: "set nvidiaprobe slurm_gres_list" set_fact: "slurm_gres_list={{ probeOutput.stdout }}" +- name: simon debug of slurm_gres_list + debug: msg="List of slurm_gres_list is {{ slurm_gres_list }}" + - name: template gres.conf file template: src="gres.conf.j2" dest={{ slurm_dir }}/etc/gres.conf mode=644 sudo: true -- GitLab