Skip to content
Snippets Groups Projects
Commit 3b82f500 authored by Chris Hines's avatar Chris Hines
Browse files
parents db9fcff7 60d4566a
No related branches found
No related tags found
No related merge requests found
Showing
with 321 additions and 18 deletions
---
lustre_pkgs:
- lustre-client-modules-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
- lustre-client-2.7.0-3.10.0_229.14.1.el7.x86_64.x86_64.rpm
---
lustre_pkgs:
- linux-patch-lustre_2.7.62-1_all.deb
- lustre-client-modules-3.13.0-58-generic_2.7.62-1_amd64.deb
- lustre-utils_2.7.62-1_amd64.deb
#!/bin/sh
# A CRUDE Script to install Mellanox OFED drivers
# Philip.Chan@monash.edu
#
# TODO: check if MLNX_OFED is already installed!
# TODO: check kernel...
KERN=`uname -r`
if [ "$KERN" != "3.10.0-229.14.1.el7.x86_64" ]
then
echo "Oops! Did you forget to reboot?"
echo "Kernel version has to be 3.10.0-229.14.1.el7.x86_64"
exit 1
fi
sudo yum install -y pciutils gcc-gfortran libxml2-python tcsh libnl lsof tcl tk perl
sudo yum install -y gtk2 atk cairo
tar xzvf MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext.tgz
cd MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext
sudo ./mlnxofedinstall -q
cd ..
tmpfile="/tmp/ifcfg.pc"
rm -f $tmpfile
./set_ifcfg.pl $tmpfile
if [ -f $tmpfile ]
then
echo "Attempting to install ifcfg-ens6"
if [ -f /etc/sysconfig/network-scripts/ifcfg-ens6 ]
then
echo "/etc/sysconfig/network-scripts/ifcfg-ens6 already exists!"
grep IP /etc/sysconfig/network-scripts/ifcfg-ens6
echo "bailing!"
else
sudo cp -ip $tmpfile /etc/sysconfig/network-scripts/ifcfg-ens6
sudo chown root:root /etc/sysconfig/network-scripts/ifcfg-ens6
cd /etc/sysconfig/network-scripts
sudo ./ifup ens6
ping -c 1 172.16.228.1
fi
fi
exit 0
#!/usr/bin/perl
#
# Assumes Mellanox NIC is named as ens6
# Philip.Chan@monash.edu
#
# Usage:
# ./set_ifcfg.pl [<tmpfilename>]
# To be used within the mlnx_install.sh
#
my $outfile = shift @ARGV;
$outfile = "tmp.ifcfg" if (! defined $outfile);
sub get_index
{
my $hn = shift;
my $maxhosts = 32;
if ($hn =~ /hc(\d+)/) {
return 33 + $1 if ($1 < $maxhosts);
}
if ($hn =~ /hs(\d+)/) {
return 1 + $1 if ($1 < $maxhosts);
}
return 0;
}
my $hostname = `/bin/hostname`;
my $x = get_index($hostname);
die "Unable to parse hostname $hostname" if ($x eq '0');
my $ip = "172.16.229.$x";
print "Assigning $ip to $hostname\n";
open OUT, ">$outfile" or die "Failed to create output file $outfile!";
print OUT "DEVICE=ens6\n";
print OUT "ONBOOT=yes\n";
print OUT "NM_CONTROLLED=no\n";
print OUT "BOOTPROTO=none\n";
print OUT "IPADDR=$ip\n";
print OUT "PREFIX=22\n";
print OUT "MTU=9000\n";
close OUT;
exit 0;
---
- include_vars: mellanoxVars.yml
- name: yum install dependencies
yum: name=perl,pciutils,gtk2,atk,cairo,gcc-gfortran,libxml2-python,tcsh,libnl,lsof,tcl,tk
sudo: true
ignore_errors: true
when: ansible_os_family == "RedHat"
- name: test for existing installation of drivers
command: ibv_devinfo
sudo: true
register: drivers_installed
ignore_errors: true
- name: yum update to upgrade kernel
shell: "yum update -y"
sudo: true
ignore_errors: true
when: ansible_os_family == "RedHat" and drivers_installed|failed
#
# A REBOOT IS NEEDED AFTER a KERNEL UPDATE
#
- name: restart machine
shell: sleep 5; sudo shutdown -r now "Ansible updates triggered"
async: 2
poll: 0
ignore_errors: true
sudo: true
when: ansible_os_family == "RedHat" and drivers_installed|failed
- name: waiting for server to come back
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
sudo: false
- name: waiting for server to come back number 2
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
sudo: false
- name: copy driver source
#make this a variable
unarchive: copy=yes src="files/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp
sudo: true
#when: drivers_installed|failed and ansible_os_family=="RedHat" and ansible_distribution_major_version == "7"
when: drivers_installed|failed
- name: install drivers
shell: ./mlnxofedinstall -q
args:
#more changes
chdir: "/tmp/{{ MELLANOX_DRIVER_SRC }}"
sudo: true
when: drivers_installed|failed
#
# get IP address before reboot
#
- name: get IP address
local_action: command ./scripts/map_ib_ip.pl {{ inventory_hostname }}
register: ip_address
sudo: false
#when: drivers_installed|failed
- name: template IP address
template: dest=/etc/sysconfig/network-scripts/ifcfg-ens6 src=ifcfg-ens6.j2 owner=root group=root
sudo: true
when: ansible_os_family=="RedHat" and ansible_distribution_major_version == "7" and drivers_installed|failed
#ubuntu equivalent of previous command
- name: Ubuntu network interfaces - line 1
lineinfile:
args:
dest: /etc/network/interfaces
line: auto {{ MELLANOX_DEVICE_NAME }}
state: present
sudo: true
when: ansible_os_family=="Debian" and drivers_installed|failed
- name: Ubuntu network interfaces - line 2
lineinfile:
args:
dest: /etc/network/interfaces
line: iface {{ MELLANOX_DEVICE_NAME }} inet static
state: present
insertafter: "auto {{ MELLANOX_DEVICE_NAME }}"
sudo: true
when: ansible_os_family=="Debian" and drivers_installed|failed
- name: Ubuntu network interfaces - line 3
lineinfile:
args:
dest: /etc/network/interfaces
line: address {{ ip_address.stdout }}
state: present
insertafter: "iface {{ MELLANOX_DEVICE_NAME }} inet static"
sudo: true
when: ansible_os_family=="Debian" and drivers_installed|failed
#
# A REBOOT IS NEEDED AFTER SUCCESSFUL INSTALL
#
- name: restart machine
command: "sleep 5; sudo shutdown -r now"
async: 2
poll: 0
ignore_errors: true
sudo: true
when: ansible_os_family=="Centos" and drivers_installed|failed
- name: restart machine for Ubuntu -cos it is 'special'
shell: "sleep 5; sudo shutdown -r now"
async: 2
poll: 1
ignore_errors: true
sudo: true
when: ansible_os_family=="Debian"
- name: waiting for server to come back
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
sudo: false
- name: waiting for server to come back 2
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
- name: bring up interface
#variable=eth0 or ens6
command: ifup {{ MELLANOX_DEVICE_NAME }}
sudo: true
when: ansible_distribution_major_version == "7"
DEVICE=ens6
ONBOOT=yes
NM_CONTROLLED=no
BOOTPROTO=none
IPADDR={{ ip_address.stdout }}
PREFIX=22
MTU=9000
---
#note. do not add '.tgz' to driver src. done in playbook
MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
MELLANOX_DEVICE_NAME: "{% if ansible_os_family == 'RedHat' %}ens6{% elif ansible_os_family == 'Debian' %}eth1{% endif %}"
......@@ -16,6 +16,7 @@
line: /usr/local/Modules/modulefiles
ignore_errors: true
sudo: true
when: ansible_os_family == 'RedHat'
# for some reason ubuntu uses lowercase modules
- name: add /usr/local/Modules to the module file path
......
......@@ -19,6 +19,7 @@
sudo: true
register: edit1
- name: edit passwd file
lineinfile:
args:
......@@ -29,6 +30,8 @@
sudo: true
register: edit2
- name: edit passwd file
lineinfile:
args:
......@@ -39,3 +42,14 @@
sudo: true
register: edit3
# ubuntu:x:1000:1000:Ubuntu:/home/ubuntu:/bin/bash
- name: edit passwd file for ubuntu 14
lineinfile:
args:
dest: /etc/passwd
regexp: "{{ ansible_ssh_user }}:x:1000:1000:Ubuntu:/home/{{ ansible_ssh_user }}:.*"
line: "{{ ansible_ssh_user }}:x:1000:1000:Ubuntu:/local_home/{{ ansible_ssh_user }}:/bin/bash"
backrefs: yes
sudo: true
register: edit4
......@@ -33,3 +33,9 @@
- name: "Giving priviliges to user"
mysql_user: name={{ mysql_user_name }} host={{ mysql_user_host }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL state=present
when: mysql_user_host is defined
- name: "Giving priviliges to user"
mysql_user: name={{ mysql_user_name }} host={{ hostvars[item].ansible_fqdn }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL state=present
with_items: mysql_user_hosts_group
when: mysql_user_hosts_group is defined
---
-
name: "Run rpcbind service"
service: "name=rpcbind state=started"
service: "name=rpcbind state=started enabled=yes"
......@@ -5,16 +5,16 @@
with_items: exportList
- name: "Starting rpcbind"
service: "name=rpcbind state=started"
service: "name=rpcbind state=started enabled=true"
sudo: true
when: ansible_os_family == "RedHat"
- name: "Start the Server"
service: "name=nfs state=started"
service: "name=nfs state=started enabled=true"
sudo: true
when: ansible_os_family == "RedHat"
- name: "Start the Server"
service: "name=nfs-kernel-server state=started"
service: "name=nfs-kernel-server state=started enabled=true"
sudo: true
when: ansible_os_family == "Debian"
- name: "Templating /etc/ssh/known_hosts"
template: src=known_hosts.j2 dest=/etc/ssh/known_hosts owner=root group=root mode=600
template: src=known_hosts.j2 dest=/etc/ssh/ssh_known_hosts owner=root group=root mode=644
sudo: true
register: sshknowhost
register: sshknownhost
- name: ecrypt the hosts file
shell: ssh-keygen -H -f /etc/ssh/known_hosts
- name: encrypt the hosts file
shell: ssh-keygen -H -f /etc/ssh/ssh_known_hosts
sudo: true
when: sshknownhost.changed
- name: set read permissions
file: path=/etc/ssh/ssh_known_hosts owner=root group=root mode=644 state=file
sudo: true
- name: delete ssh_known_hosts.old
file: path=/etc/ssh/ssh_known_hosts.old state=absent
sudo: true
when: sshknowhost.changed
......@@ -2,14 +2,22 @@
{% for node in groups['all'] %}
{% for interface in hostvars[node]['ansible_interfaces'] %}
{% if interface != "lo" %}
{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'rsa': hostvars[node]['ansible_ssh_host_key_rsa_public']} %}
{% if hostvars[node]['ansible_ssh_host_key_rsa_public'] %}
{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ssh-rsa', 'key': hostvars[node]['ansible_ssh_host_key_rsa_public']} %}
{% if nodelist.append(host) %}
{% endif %}
{% endif %}
{% if hostvars[node]['ansible_ssh_host_key_ecdsa_public'] %}
#{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ssh-ecdsa', 'key': hostvars[node]['ansible_ssh_host_key_ecdsa_public']} %}
{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ecdsa-sha2-nistp256', 'key': hostvars[node]['ansible_ssh_host_key_ecdsa_public']} %}
{% if nodelist.append(host) %}
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{% endfor %}
{% for host in nodelist|unique %}
{{ host.ip }} ssh-rsa {{ host.rsa }}
{{ host.name }} ssh-rsa {{ host.rsa }}
{% for host in nodelist %}
{{ host.ip }} {{ host.keytype }} {{ host.key }}
{{ host.name }} {{ host.keytype }} {{ host.key }}
{% endfor %}
......@@ -140,4 +140,4 @@
-
name: "Starting shibboleth"
sudo: true
service: name=shibd state=started
service: name=shibd state=started enabled=yes
......@@ -128,6 +128,14 @@
sudo: true
when: slurm_gres_list is defined
- name: install slurm prolog
template: src=slurm.prolog.j2 dest={{ slurm_dir }}/bin/slurm.prolog
sudo: true
- name: install slurm epilog
template: src=slurm.epilog.j2 dest={{ slurm_dir }}/bin/slurm.epilog
sudo: true
- name: install slurm.conf
copy: src=files/slurm.conf dest={{ slurm_dir }}/etc/slurm.conf
sudo: true
......@@ -141,7 +149,6 @@
- name: setup envirnment variables
template: src=slurm_setup.sh.j2 dest=/etc/profile.d/slurm_setup.sh
sudo: true
when: ansible_os_family == 'RedHat'
- name: setup plugin
template: src=job_submit.lua.j2 dest={{ slurm_dir }}/etc/job_submit.lua
......
#!/bin/sh
# specific files to be deleted to clean up after a Strudel session
find /tmp -user ${SLURM_JOB_USER} -name "pulse*" | xargs rm -rf
find /tmp -user ${SLURM_JOB_USER} -name ".esd-*" | xargs rm -rf
find /tmp -user ${SLURM_JOB_USER} -name ".X*-lock" | xargs rm -rf
find /tmp/.X11-unix -user ${SLURM_JOB_USER} -name "X*" | xargs rm -rf
#!/bin/sh
exit 0
......@@ -36,7 +36,7 @@
when: use_systemd is defined
- name: start munge
service: name=munge state=started
service: name=munge state=started enabled=yes
sudo: true
- name: start slurmdbd
......@@ -44,6 +44,12 @@
sudo: true
when: start_slurmdbd is defined
- name: "create cluster in slurm db"
shell: "{{slurm_dir}}/bin/sacctmgr -i create cluster {{ clustername }}"
sudo: true
ignore_errors: true
- name: start slurmctl
service: name=slurmctld state=started enabled=yes
sudo: true
......
---
system_packages:
- "@desktop"
- "@Desktop"
- tigervnc-server
- "@xfce"
- libXcomposite
......@@ -12,4 +12,5 @@
- graphite2
- xterm
- libpng
- "@MATE Desktop"
- "@mate-desktop-environment"
#- "@MATE Desktop"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment