From c029cfbb35a3a581ca634ce14b14f48faaeff085 Mon Sep 17 00:00:00 2001
From: Chris Hines <chris.hines@monash.edu>
Date: Tue, 6 Sep 2016 11:53:10 +1000
Subject: [PATCH] add better conditionals to installing mellanx and nvidia
 drivers

Former-commit-id: dfb9559a4a96eb34ba6a2fab45bf0dd1ecb18b5c
---
 roles/gpu/tasks/main.yml                     | 13 +++++---
 roles/mellanox_drivers/tasks/main.yml        | 32 ++++++++++++++++++--
 roles/mellanox_drivers/vars/mellanoxVars.yml |  2 +-
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/roles/gpu/tasks/main.yml b/roles/gpu/tasks/main.yml
index 3e5683bb..8991ee25 100644
--- a/roles/gpu/tasks/main.yml
+++ b/roles/gpu/tasks/main.yml
@@ -51,16 +51,21 @@
   become: true 
   become_user: root
 
+- name: get kernel version
+  shell: uname -r
+  register: kernel_version
+
+
 - name: check nvidia driver
-  stat: path="/usr/lib64/libnvidia-opencl.so.{{ nvidia_version }}"
-  register: opencl
+  stat: path="/lib/modules/{{ kernel_version.stdout }}/kernel/drivers/video/nvidia.ko"
+  register: nvidia_driver
   ignore_errors: true
 
 - name: get nvidia driver 
   get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run
   become: true
   become_user: root
-  when: not opencl.stat.exists
+  when: not nvidia_driver.stat.exists
 
 #- name: Copy boot file
 #  template: src=grub.conf.j2 dest=/boot/grub/grub.conf 
@@ -77,7 +82,7 @@
 - name: build nvidia driver 
   shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent
   sudo: true
-  when: not opencl.stat.exists
+  when: not nvidia_driver.stat.exists
 
 - name: set the GOM
   shell: nvidia-smi --gom=0
diff --git a/roles/mellanox_drivers/tasks/main.yml b/roles/mellanox_drivers/tasks/main.yml
index 1a9b9f32..4588f6a8 100644
--- a/roles/mellanox_drivers/tasks/main.yml
+++ b/roles/mellanox_drivers/tasks/main.yml
@@ -14,12 +14,38 @@
   ignore_errors: true
 
 - name: default dont install
-  set_fact: install_now=false reboot_now=False
+  set_fact: 
+    install_now: false 
+    reboot_now: false
 
-- name: set install
-  set_fact: install_now=True reboot_now=True
+- name: get driver version
+  command: 'ofed_info | head -n 1 | cut -f 1 -d " "'
+  register: driver_version
+  ignore_errors: true
+
+- name: get desired driver version
+  shell: 'echo {{ MELLANOX_DRIVER_SRC }} | cut -f 1,2,3 -d "-"'
+  register: desired_driver_version
+
+
+- name: set install due to drivers not installed
+  set_fact: 
+    install_now: true 
+    reboot_now: true
   when: drivers_installed | failed
 
+- name: debug
+  debug: var=driver_version
+
+- name: debug
+  debug: var=desired_driver_version
+
+- name: set install due to version mismatch
+  set_fact: 
+    install_now: true 
+    reboot_now: true
+  when: driver_version | failed or not desired_driver_version.stdout in driver_version.stdout
+
 - name: copy driver source
   unarchive: copy=no src="http://consistency0/src/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp 
   sudo: true
diff --git a/roles/mellanox_drivers/vars/mellanoxVars.yml b/roles/mellanox_drivers/vars/mellanoxVars.yml
index 2c4c1a20..8ab87f8b 100644
--- a/roles/mellanox_drivers/vars/mellanoxVars.yml
+++ b/roles/mellanox_drivers/vars/mellanoxVars.yml
@@ -2,4 +2,4 @@
  #note. do not add '.tgz' to driver src. done in playbook
  #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
  #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
- MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-3.3-1.0.0.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
+ MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-3.3-1.0.4.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
-- 
GitLab