From c029cfbb35a3a581ca634ce14b14f48faaeff085 Mon Sep 17 00:00:00 2001 From: Chris Hines <chris.hines@monash.edu> Date: Tue, 6 Sep 2016 11:53:10 +1000 Subject: [PATCH] add better conditionals to installing mellanx and nvidia drivers Former-commit-id: dfb9559a4a96eb34ba6a2fab45bf0dd1ecb18b5c --- roles/gpu/tasks/main.yml | 13 +++++--- roles/mellanox_drivers/tasks/main.yml | 32 ++++++++++++++++++-- roles/mellanox_drivers/vars/mellanoxVars.yml | 2 +- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/roles/gpu/tasks/main.yml b/roles/gpu/tasks/main.yml index 3e5683bb..8991ee25 100644 --- a/roles/gpu/tasks/main.yml +++ b/roles/gpu/tasks/main.yml @@ -51,16 +51,21 @@ become: true become_user: root +- name: get kernel version + shell: uname -r + register: kernel_version + + - name: check nvidia driver - stat: path="/usr/lib64/libnvidia-opencl.so.{{ nvidia_version }}" - register: opencl + stat: path="/lib/modules/{{ kernel_version.stdout }}/kernel/drivers/video/nvidia.ko" + register: nvidia_driver ignore_errors: true - name: get nvidia driver get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run become: true become_user: root - when: not opencl.stat.exists + when: not nvidia_driver.stat.exists #- name: Copy boot file # template: src=grub.conf.j2 dest=/boot/grub/grub.conf @@ -77,7 +82,7 @@ - name: build nvidia driver shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent sudo: true - when: not opencl.stat.exists + when: not nvidia_driver.stat.exists - name: set the GOM shell: nvidia-smi --gom=0 diff --git a/roles/mellanox_drivers/tasks/main.yml b/roles/mellanox_drivers/tasks/main.yml index 1a9b9f32..4588f6a8 100644 --- a/roles/mellanox_drivers/tasks/main.yml +++ b/roles/mellanox_drivers/tasks/main.yml @@ -14,12 +14,38 @@ ignore_errors: true - name: default dont install - set_fact: install_now=false reboot_now=False + set_fact: + install_now: false + reboot_now: false -- name: set install - set_fact: install_now=True reboot_now=True +- name: get driver version + command: 'ofed_info | head -n 1 | cut -f 1 -d " "' + register: driver_version + ignore_errors: true + +- name: get desired driver version + shell: 'echo {{ MELLANOX_DRIVER_SRC }} | cut -f 1,2,3 -d "-"' + register: desired_driver_version + + +- name: set install due to drivers not installed + set_fact: + install_now: true + reboot_now: true when: drivers_installed | failed +- name: debug + debug: var=driver_version + +- name: debug + debug: var=desired_driver_version + +- name: set install due to version mismatch + set_fact: + install_now: true + reboot_now: true + when: driver_version | failed or not desired_driver_version.stdout in driver_version.stdout + - name: copy driver source unarchive: copy=no src="http://consistency0/src/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp sudo: true diff --git a/roles/mellanox_drivers/vars/mellanoxVars.yml b/roles/mellanox_drivers/vars/mellanoxVars.yml index 2c4c1a20..8ab87f8b 100644 --- a/roles/mellanox_drivers/vars/mellanoxVars.yml +++ b/roles/mellanox_drivers/vars/mellanoxVars.yml @@ -2,4 +2,4 @@ #note. do not add '.tgz' to driver src. done in playbook #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" - MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.3-1.0.0.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" + MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.3-1.0.4.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" -- GitLab