Skip to content
Snippets Groups Projects
Commit 2fa0b3ef authored by Chris Hines's avatar Chris Hines
Browse files

Merge branch 'master' into 'master'

Master

Mellanx drivers reboot did not work. Converted "command:" to "shell:"
Need new drivers for new kernel version

See merge request !6
parents e1d8dc2b 0a676a01
No related branches found
No related tags found
No related merge requests found
---
lustre_pkgs:
- lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
- lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
# old rmps for older kernel
#- lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
#- lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
#simon comment out
#- lustre-client-modules-2.7.0-3.10.0_229.20.1.el7.x86_64.x86_64.rpm
#- lustre-client-2.7.0-3.10.0_229.20.1.el7.x86_64.x86_64.rpm
# shahahh mods
- lustre-client-modules-2.7.65-3.10.0_327.4.4.el7.x86_64_gab38c3a.x86_64.rpm
- lustre-client-2.7.65-3.10.0_327.4.4.el7.x86_64_gab38c3a.x86_64.rpm
---
- include_vars: mellanoxVars.yml
- name: yum install dependencies
yum: name=perl,pciutils,gtk2,atk,cairo,gcc-gfortran,libxml2-python,tcsh,libnl,lsof,tcl,tk
sudo: true
......@@ -105,9 +104,9 @@
# A REBOOT IS NEEDED AFTER SUCCESSFUL INSTALL
#
- name: restart machine
command: "sleep 5; sudo shutdown -r now"
shell: "sleep 5; sudo shutdown -r now"
async: 2
poll: 0
poll: 1
ignore_errors: true
sudo: true
when: ansible_os_family=="RedHat" and drivers_installed|failed
......
---
#note. do not add '.tgz' to driver src. done in playbook
MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
#MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
MELLANOX_DEVICE_NAME: "{% if ansible_os_family == 'RedHat' %}ens6{% elif ansible_os_family == 'Debian' %}eth1{% endif %}"
......@@ -58,6 +58,9 @@ CompleteWait=10
#UsePAM=
#
# TIMERS
SlurmctldTimeout=3000 #added due to network failures causing jobs to be killed
#SlurmctldTimeout=300
#SlurmdTimeout=300
#InactiveLimit=0
......@@ -150,5 +153,5 @@ NodeName={{ node }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} RealMe
{% endfor %}
{% for queue in slurmqueues %}
PartitionName={{ queue.name }} {% if queue.default %}Default=yes{% endif %} Nodes={{ groups[queue.group]|join(',') }} DefaultTime=24:00:00 State=UP
PartitionName={{ queue.name }} {% if queue.default %}Default=yes{% endif %} Nodes={{ groups[queue.group]|join(',') }} DefaultTime=72:00:00 State=UP
{% endfor %}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment