Commit 2fa0b3ef authored by Chris Hines's avatar Chris Hines
Browse files

Merge branch 'master' into 'master'

Master

Mellanx drivers reboot did not work. Converted "command:" to "shell:"
Need new drivers for new kernel version

See merge request !6
parents e1d8dc2b 0a676a01
---
lustre_pkgs:
- lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
- lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
# old rmps for older kernel
#- lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
#- lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
#simon comment out
#- lustre-client-modules-2.7.0-3.10.0_229.20.1.el7.x86_64.x86_64.rpm
#- lustre-client-2.7.0-3.10.0_229.20.1.el7.x86_64.x86_64.rpm
# shahahh mods
- lustre-client-modules-2.7.65-3.10.0_327.4.4.el7.x86_64_gab38c3a.x86_64.rpm
- lustre-client-2.7.65-3.10.0_327.4.4.el7.x86_64_gab38c3a.x86_64.rpm
---
- include_vars: mellanoxVars.yml
- name: yum install dependencies
yum: name=perl,pciutils,gtk2,atk,cairo,gcc-gfortran,libxml2-python,tcsh,libnl,lsof,tcl,tk
sudo: true
......@@ -105,9 +104,9 @@
# A REBOOT IS NEEDED AFTER SUCCESSFUL INSTALL
#
- name: restart machine
command: "sleep 5; sudo shutdown -r now"
shell: "sleep 5; sudo shutdown -r now"
async: 2
poll: 0
poll: 1
ignore_errors: true
sudo: true
when: ansible_os_family=="RedHat" and drivers_installed|failed
......
---
#note. do not add '.tgz' to driver src. done in playbook
MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
MELLANOX_DEVICE_NAME: "{% if ansible_os_family == 'RedHat' %}ens6{% elif ansible_os_family == 'Debian' %}eth1{% endif %}"
......@@ -58,6 +58,9 @@ CompleteWait=10
#UsePAM=
#
# TIMERS
SlurmctldTimeout=3000 #added due to network failures causing jobs to be killed
#SlurmctldTimeout=300
#SlurmdTimeout=300
#InactiveLimit=0
......@@ -150,5 +153,5 @@ NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} RealMe
{% endfor %}
{% for queue in slurmqueues %}
PartitionName={{ queue.name }} {% if queue.default %}Default=yes{% endif %} Nodes={{ groups[queue.group]|join(',') }} DefaultTime=24:00:00 State=UP
PartitionName={{ queue.name }} {% if queue.default %}Default=yes{% endif %} Nodes={{ groups[queue.group]|join(',') }} DefaultTime=72:00:00 State=UP
{% endfor %}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment