diff --git a/roles/gpu/tasks/main.yml b/roles/gpu/tasks/main.yml index 3e5683bba7955facf50c7b29b27742f821e93d3e..8991ee2548217c5b76513b7a8841ab7559c26739 100644 --- a/roles/gpu/tasks/main.yml +++ b/roles/gpu/tasks/main.yml @@ -51,16 +51,21 @@ become: true become_user: root +- name: get kernel version + shell: uname -r + register: kernel_version + + - name: check nvidia driver - stat: path="/usr/lib64/libnvidia-opencl.so.{{ nvidia_version }}" - register: opencl + stat: path="/lib/modules/{{ kernel_version.stdout }}/kernel/drivers/video/nvidia.ko" + register: nvidia_driver ignore_errors: true - name: get nvidia driver get_url: url=http://consistency0/src/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run dest=/tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run become: true become_user: root - when: not opencl.stat.exists + when: not nvidia_driver.stat.exists #- name: Copy boot file # template: src=grub.conf.j2 dest=/boot/grub/grub.conf @@ -77,7 +82,7 @@ - name: build nvidia driver shell: chmod 755 /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run; /tmp/NVIDIA-Linux-x86_64-{{ nvidia_version }}.run --silent sudo: true - when: not opencl.stat.exists + when: not nvidia_driver.stat.exists - name: set the GOM shell: nvidia-smi --gom=0 diff --git a/roles/mellanox_drivers/tasks/main.yml b/roles/mellanox_drivers/tasks/main.yml index 1a9b9f32accd7d60e990ff7cebf2f53c5dcef29c..4588f6a846a30105dbc2cc4269ef24f1fb181e11 100644 --- a/roles/mellanox_drivers/tasks/main.yml +++ b/roles/mellanox_drivers/tasks/main.yml @@ -14,12 +14,38 @@ ignore_errors: true - name: default dont install - set_fact: install_now=false reboot_now=False + set_fact: + install_now: false + reboot_now: false -- name: set install - set_fact: install_now=True reboot_now=True +- name: get driver version + command: 'ofed_info | head -n 1 | cut -f 1 -d " "' + register: driver_version + ignore_errors: true + +- name: get desired driver version + shell: 'echo {{ MELLANOX_DRIVER_SRC }} | cut -f 1,2,3 -d "-"' + register: desired_driver_version + + +- name: set install due to drivers not installed + set_fact: + install_now: true + reboot_now: true when: drivers_installed | failed +- name: debug + debug: var=driver_version + +- name: debug + debug: var=desired_driver_version + +- name: set install due to version mismatch + set_fact: + install_now: true + reboot_now: true + when: driver_version | failed or not desired_driver_version.stdout in driver_version.stdout + - name: copy driver source unarchive: copy=no src="http://consistency0/src/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp sudo: true diff --git a/roles/mellanox_drivers/vars/mellanoxVars.yml b/roles/mellanox_drivers/vars/mellanoxVars.yml index 2c4c1a20fd78708f4878bfdb25d4cffc4a5940ef..8ab87f8be36fefa337f381f51c8a7e1f0257889e 100644 --- a/roles/mellanox_drivers/vars/mellanoxVars.yml +++ b/roles/mellanox_drivers/vars/mellanoxVars.yml @@ -2,4 +2,4 @@ #note. do not add '.tgz' to driver src. done in playbook #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" - MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.3-1.0.0.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" + MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.3-1.0.4.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"