From da079f08bba0ec3296c43943776939fe3b940c3c Mon Sep 17 00:00:00 2001
From: tngu0050 <trung.nguyen1@monash.edu>
Date: Thu, 16 Apr 2020 22:01:31 +1000
Subject: [PATCH] Update failover option for DGX

---
 roles/mellanox_drivers/tasks/main.yml | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/roles/mellanox_drivers/tasks/main.yml b/roles/mellanox_drivers/tasks/main.yml
index c084b9da..4a023a18 100644
--- a/roles/mellanox_drivers/tasks/main.yml
+++ b/roles/mellanox_drivers/tasks/main.yml
@@ -114,7 +114,6 @@
   - mlnx-ofa_kernel-modules
   when: install_now
 
-
 - name: install drivers
   shell: ./mlnxofedinstall -q --add-kernel-support --force --skip-repo
   args:
@@ -129,6 +128,18 @@
   become_user: root
   register: service_file
 
+- name: Set failover mode on DGX baremetal nodes
+  lineinfile:
+    path: /etc/modprobe.d/ko2iblnd.conf
+    state: present
+    line: "options ko2iblnd dev_failover=1"
+    insertafter: EOF
+  when:
+   - '"DGX" in ansible_product_name'
+   - '"RedHat" in ansible_distribution'
+   become: true
+   become_user: root 
+
 - name: Reload systemd
   shell: systemctl daemon-reload
   become: true
-- 
GitLab