From 2f7e8df7b77b2020a0b06bcf31b75c88986eaf33 Mon Sep 17 00:00:00 2001
From: "Trung Nguyen (Monash University)" <trung.nguyen1@monash.edu>
Date: Wed, 17 Jul 2019 11:35:17 +1000
Subject: [PATCH] Update mellanox and slurm roles

Former-commit-id: cc7c328d7af89a27f30ab40401985f0f6e22d159
---
 roles/mellanox_drivers/tasks/main.yml               | 8 ++++----
 roles/mellanox_drivers/vars/mellanoxVars.yml        | 7 ++-----
 roles/slurm-common/defaults/main.yml                | 7 +++----
 roles/slurm-common/tasks/createSlurmDirectories.yml | 4 ++++
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/roles/mellanox_drivers/tasks/main.yml b/roles/mellanox_drivers/tasks/main.yml
index c6c1f4a3..756c3b1a 100644
--- a/roles/mellanox_drivers/tasks/main.yml
+++ b/roles/mellanox_drivers/tasks/main.yml
@@ -53,7 +53,7 @@
   set_fact: 
     install_now: true 
     reboot_now: true
-  when: driver_version is failed or not desired_driver_version.stdout in driver_version.stdout
+  when: driver_version | failed or not desired_driver_version.stdout in driver_version.stdout
 
 - name: debug - print out value of install_now
   debug: var=install_now
@@ -65,7 +65,7 @@
   when: install_now
 
 - name: install drivers
-  shell: ./mlnxofedinstall -q --add-kernel-support --force
+  shell: ./mlnxofedinstall -q --add-kernel-support --force --skip-repo
   args:
     #more changes
     chdir: "/tmp/{{ MELLANOX_DRIVER_SRC }}"
@@ -87,9 +87,9 @@
   register: reload_service
 
 - name: enable roce_mode setting
-  service: name=roce_mode state=started enabled=True
+  service: name=roce_mode state=started enabled=yes
   become: true
-
+  ignore_errors: yes
 #
 # A REBOOT IS NEEDED AFTER SUCCESSFUL INSTALL
 #
diff --git a/roles/mellanox_drivers/vars/mellanoxVars.yml b/roles/mellanox_drivers/vars/mellanoxVars.yml
index 98f1c359..6aa643d5 100644
--- a/roles/mellanox_drivers/vars/mellanoxVars.yml
+++ b/roles/mellanox_drivers/vars/mellanoxVars.yml
@@ -1,7 +1,4 @@
 ---
  #note. do not add '.tgz' to driver src. done in playbook
- #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
- #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
- #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-3.4-1.0.0.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
-# MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-4.2-1.2.0.0-rhel7.4-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
-MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-4.4-1.0.0.0-rhel7.4-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
+#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-4.4-1.0.0.0-rhel7.4-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
+MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat'  %}MLNX_OFED_LINUX-4.5-1.0.1.0-rhel7.6-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
diff --git a/roles/slurm-common/defaults/main.yml b/roles/slurm-common/defaults/main.yml
index 362d9cf4..283c0627 100644
--- a/roles/slurm-common/defaults/main.yml
+++ b/roles/slurm-common/defaults/main.yml
@@ -1,9 +1,8 @@
 ---
 slurm_use_vpn: False 
-slurmctlddebug: {level: 9, log: '/var/log/slurm/slurmctld.log'}
-slurmddebug: {level: 9, log: '/var/log/slurm/slurmd.log'}
-slurmschedlog: {level: 9, log: '/var/log/slurm/slurmsched.log'}
-slurmdbdlog: {level: 9, log: '/var/log/slurm/slurmdbd.log'}
+slurmddebug: {level: 5, log: '/var/log/slurm/slurmd.log'}
+slurmctlddebug: {level: 5, log: '/mnt/slurm-logs/slurmctld.log'}
+slurmdbdlog: {level: 5, log: '/mnt/slurm-logs/slurmdbd.log'}
 slurmfairshare: {def: false, val: 10000}
 slurmdatadir: "/var/spool/slurm"
 slurmselecttype: "select/linear"
diff --git a/roles/slurm-common/tasks/createSlurmDirectories.yml b/roles/slurm-common/tasks/createSlurmDirectories.yml
index f4847d42..295aeadf 100644
--- a/roles/slurm-common/tasks/createSlurmDirectories.yml
+++ b/roles/slurm-common/tasks/createSlurmDirectories.yml
@@ -1,4 +1,8 @@
 ---
+- name: make sure slurmctld and slurmdb log dir exists
+  file: dest=/mnt/slurm-logs state=directory owner=root group=root mode=755
+  sudo: true
+
 - name: make sure slurm conf dir exists
   file: dest={{ slurm_dir }}/etc state=directory
   sudo: true
-- 
GitLab