Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hpc-team/HPCasCode
  • chines/ansible_cluster_in_a_box
2 results
Show changes
Showing
with 818 additions and 283 deletions
---
list: "{{ mailchimp_list | default('list') }}"
user: "{{ mailchimp_user }}"
key: "{{ mailchimp_key }}"
datetag: "{{ mailchimp_datetag | default('last_login') }}"
numtag: "{{ mailchimp_numtag | default('days_since_active') }}"
---
- name: set cinder
set_fact: cinder=True
when: '"ansible_host_volumes" in hostvars[inventory_hostname]'
- name: set ephemeral
set_fact: cinder=False
when: '"ansible_host_volumes" not in hostvars[inventory_hostname]'
- name: format volumes
filesystem: fstype={{ item.fstype }} dev={{ hostvars[inventory_hostname]['ansible_host_volumes'][item.name]['dev'] }}
with_items: "{{ volumes }}"
become: true
become_user: root
when: cinder
- name: format volumes
mount: name={{ item.mntpt }} fstype={{ item.fstype }} src={{ hostvars[inventory_hostname]['ansible_host_volumes'][item.name]['dev'] }} state=mounted
with_items: "{{ volumes }}"
become: true
become_user: root
when: cinder
- name: format volumes
filesystem: fstype={{ item.fstype }} dev={{ hostvars[ansible_hostname]['ansible_host_volumes'][item.name]['dev'] }}
with_items: volumes
sudo: true
filesystem: fstype={{ item.fstype }} dev={{ item.name }}
with_items: "{{ volumes }}"
become: true
when: not cinder
- name: format volumes
mount: name={{ item.mntpt }} fstype={{ item.fstype }} src={{ hostvars[ansible_hostname]['ansible_host_volumes'][item.name]['dev'] }} state=mounted
with_items: volumes
sudo: true
mount: name={{ item.mntpt }} fstype={{ item.fstype }} src={{ item.name }} state=mounted
with_items: "{{ volumes }}"
become: true
when: not cinder
- name: symlink volumes
file: force=yes state=link src="{{ item.mntpt }}" path="{{ item.linkto }}"
when: item.linkto is defined
with_items: volumes
sudo: true
with_items: "{{ volumes }}"
become: true
#- name: Format File Systems
# filesystem: fstype={{ item.fstype }} dev={{ item.dev }} opts={{ item.opts }}
# with_items: mkFileSystems
# sudo: true
# become: true
# when: mkFileSystems is defined
#
#- name: Mount device
#- name: Mount device
# mount: name={{ item.name }} src={{ item.dev }} fstype={{ item.fstype }} opts={{ item.opts }} state=mounted
# with_items: mountFileSystems
# sudo: true
# become: true
# when: mountFileSystems is defined
#
#
---
- name: rocemode is 1 or 2
assert:
that:
- rocemode is defined
- rocemode == "1" or rocemode == "2"
fail_msg: "'expecting parameter rocemode with values 1 or 2"
success_msg: "continuing with rocemode{{ rocemode }}.yml "
- include_tasks: "rocev{{ rocemode }}.yml"
\ No newline at end of file
---
- name: stat if etc systemd system roce_mode.service exists
stat:
path: /etc/systemd/system/roce_mode.service
register: statrocemode
# todo handle multiple devices found
- name: query ibstat for devicename
shell: /usr/sbin/ibstat | grep mlx5 | awk '{print $2}' | sed -r "s#'##g"
register: qibdevicenames
check_mode: no
#when: not statrocemode.stat.exists
- name: print qibdevicenames
debug:
var: qibdevicenames
- name: set ibdevicename
set_fact:
ibdevicename: "{{ qibdevicenames.stdout }}"
- name: template roce_mode.service file
template: dest=/etc/systemd/system/roce_mode.service src=roce_mode.service.j2 owner=root group=root mode=0644
become: true
become_user: root
register: service_file
- name: Reload systemd
shell: systemctl daemon-reload
become: true
become_user: root
when: service_file.changed
register: reload_service
- name: enable roce_mode setting
service: name=roce_mode enabled=yes
become: true
- name: enable roce_mode setting
service: name=roce_mode state=started enabled=yes
become: true
when: start_roce_service is undefined or start_roce_service
---
- name: stat if etc systemd system roce_mode.service exists
stat:
path: /etc/systemd/system/roce_mode.service
register: statrocemode
# todo handle multiple devices found
- name: query ibstat for devicename
shell: /usr/sbin/ibstat | grep mlx5 | awk '{print $2}' | sed -r "s#'##g"
register: qibdevicenames
check_mode: no
changed_when: false
#when: not statrocemode.stat.exists
- name: print qibdevicenames
debug:
var: qibdevicenames
- name: template pfc-ecn script
become: true
template:
src: pfc-ecn.sh.j2
dest: /usr/sbin/pfc-ecn.sh
owner: root
group: root
mode: u+rwx,g-wx,o-rwx
register: pfcecn
- name: template roce_mode.service file
template: dest=/etc/systemd/system/roce_mode.service src=roce_mode.service.j2 owner=root group=root mode=0644
become: true
become_user: root
register: service_file
- name: Reload systemd
shell: systemctl daemon-reload
become: true
become_user: root
when: service_file.changed
register: reload_service
- name: enable roce_mode setting
service: name=roce_mode state=restarted enabled=yes
become: true
when: service_file.changed or pfcecn.changed
- name: enable roce_mode setting
service: name=roce_mode state=started enabled=yes
become: true
#!/bin/bash
# Get the list of Mellanox interfaces (p1p1, p1p2, etc...)
declare -a mellanox_interface_array
for f in /sys/class/net/*; do
int=$(basename $f)
driver=$(readlink $f/device/driver/module)
if [ $driver ]; then
driver=$(basename $driver)
operstate=$(cat $f/operstate)
fi
if [[ "$driver" == *"mlx"* ]] && [[ "$operstate" == "up" ]]; then
mellanox_interface_array+=($int)
fi
done
# Get the list of Mellanox devices (mlx5_0, mlx5_1, etc...)
declare -a mellanox_device_array
for int in "${mellanox_interface_array[@]}"; do
dev=`\`which ibdev2netdev\` | grep $int | awk '{print $1}'`
mellanox_device_array+=($dev)
done
if [ -f /proc/net/bonding/bond0 ]; then
dev=`\`which ibdev2netdev\` | grep bond | awk '{print $1}'`
mellanox_device_array+=($dev)
fi
# Set DSCP (L3) as trust mode for the NIC
# Syntax: mlnx_qos -i <interface> --trust dscp
# Example: /bin/mlnx_qos -i p5p1 --trust dscp
{% if "OpenStack Nova" not in ansible_product_name %}
for int in "${mellanox_interface_array[@]}"; do
printf "`which mlnx_qos` -i $int --trust dscp \n"
`which mlnx_qos` -i $int --trust dscp
done
{% endif %}
# Set ToS to 106 (DSCP 26) for ALL RoCE traffic
# Syntax: echo 106 > /sys/class/infiniband/<mlx-device>/tc/1/traffic_class
# Example: echo 106 > /sys/class/infiniband/mlx5_0/tc/1/traffic_class
{% if "OpenStack Nova" not in ansible_product_name %}
for dev in "${mellanox_device_array[@]}"; do
printf "echo 106 > /sys/class/infiniband/$dev/tc/1/traffic_class \n"
echo 106 > /sys/class/infiniband/$dev/tc/1/traffic_class
done
# Set the RDMA-CM ToS to 106 (DSCP 26)
# Syntax: cma_roce_tos -d <mlx-device> -t 106
# Example: /sbin/cma_roce_tos -d mlx5_0 -t 106
for dev in "${mellanox_device_array[@]}"; do
printf "`which cma_roce_tos` -d $dev -t 106 \n"
`which cma_roce_tos` -d $dev -t 106
done
# Enable PFC on RoCE prioritry - Activate PFC on priority 3
# Syntax: mlnx_qos -i <interface> --pfc 0,0,0,1,0,0,0,0
# Example: mlnx_qos -i p5p1 --pfc 0,0,0,1,0,0,0,0
for int in "${mellanox_interface_array[@]}"; do
printf "`which mlnx_qos` -i $int --pfc 0,0,0,1,0,0,0,0 \n"
`which mlnx_qos` -i $int --pfc 0,0,0,1,0,0,0,0
done
/sbin/ethtool -A p1p1 rx off tx off || /bin/true
/sbin/ethtool -A p1p2 rx off tx off || /bin/true
{% endif %}
{% if "OpenStack Nova" in ansible_product_name %}
{% for device in qibdevicenames.stdout_lines %}
printf "echo 106 > /sys/class/infiniband/{{ device }}/tc/1/traffic_class \n"
echo 106 > /sys/class/infiniband/{{ device }}/tc/1/traffic_class
printf "`which cma_roce_tos` -d $dev -t 106 \n"
`which cma_roce_tos` -d {{ device }} -t 106
{% endfor %}
{% endif %}
# Enable ECN for TCP traffic
/sbin/sysctl -w net.ipv4.tcp_ecn=1
[Unit]
Description=Force the RoCE mode{{ rocemode }}
After=network.target network-online.target openibd.service
Wants=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
{% if lustre_server is defined %}
ExecStart=/usr/local/bin/flush_neigh_devs.sh
{% endif %}
{% if rocemode == "2" %}
ExecStartPre=/bin/sleep 30
{% endif %}
{% for device in qibdevicenames.stdout_lines %}
ExecStart=/usr/sbin/cma_roce_mode -d {{ device }} -p 1 -m {{ rocemode }}
{% endfor %}
{% if rocemode == "2" %}
ExecStart=/usr/sbin/pfc-ecn.sh
{% endif %}
[Install]
WantedBy=multi-user.target
WantedBy=final.target
---
# note: do not add '.tgz' to driver src. done in playbook
# download page is https://www.mellanox.com/products/infiniband-drivers/linux/mlnx_ofed
MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-4.9-4.0.8.0-rhel7.9-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-4.9-4.0.8.0-ubuntu20.04-x86_64{% endif %}"
buildKMOD: False
\ No newline at end of file
#!/bin/sh
# A CRUDE Script to install Mellanox OFED drivers
# Philip.Chan@monash.edu
#
# TODO: check if MLNX_OFED is already installed!
# TODO: check kernel...
KERN=`uname -r`
if [ "$KERN" != "3.10.0-229.14.1.el7.x86_64" ]
then
echo "Oops! Did you forget to reboot?"
echo "Kernel version has to be 3.10.0-229.14.1.el7.x86_64"
exit 1
fi
sudo yum install -y pciutils gcc-gfortran libxml2-python tcsh libnl lsof tcl tk perl
sudo yum install -y gtk2 atk cairo
tar xzvf MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext.tgz
cd MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext
sudo ./mlnxofedinstall -q
cd ..
tmpfile="/tmp/ifcfg.pc"
rm -f $tmpfile
./set_ifcfg.pl $tmpfile
if [ -f $tmpfile ]
then
echo "Attempting to install ifcfg-ens6"
if [ -f /etc/sysconfig/network-scripts/ifcfg-ens6 ]
then
echo "/etc/sysconfig/network-scripts/ifcfg-ens6 already exists!"
grep IP /etc/sysconfig/network-scripts/ifcfg-ens6
echo "bailing!"
else
sudo cp -ip $tmpfile /etc/sysconfig/network-scripts/ifcfg-ens6
sudo chown root:root /etc/sysconfig/network-scripts/ifcfg-ens6
cd /etc/sysconfig/network-scripts
sudo ./ifup ens6
ping -c 1 172.16.228.1
fi
fi
exit 0
#!/usr/bin/perl
#
# Assumes Mellanox NIC is named as ens6
# Philip.Chan@monash.edu
#
# Usage:
# ./set_ifcfg.pl [<tmpfilename>]
# To be used within the mlnx_install.sh
#
my $outfile = shift @ARGV;
$outfile = "tmp.ifcfg" if (! defined $outfile);
sub get_index
{
my $hn = shift;
my $maxhosts = 32;
if ($hn =~ /hc(\d+)/) {
return 33 + $1 if ($1 < $maxhosts);
}
if ($hn =~ /hs(\d+)/) {
return 1 + $1 if ($1 < $maxhosts);
}
return 0;
}
my $hostname = `/bin/hostname`;
my $x = get_index($hostname);
die "Unable to parse hostname $hostname" if ($x eq '0');
my $ip = "172.16.229.$x";
print "Assigning $ip to $hostname\n";
open OUT, ">$outfile" or die "Failed to create output file $outfile!";
print OUT "DEVICE=ens6\n";
print OUT "ONBOOT=yes\n";
print OUT "NM_CONTROLLED=no\n";
print OUT "BOOTPROTO=none\n";
print OUT "IPADDR=$ip\n";
print OUT "PREFIX=22\n";
print OUT "MTU=9000\n";
close OUT;
exit 0;
---
- include_vars: mellanoxVars.yml
- name: "Force this role to fail if no Mellanox hardware is present"
#Exclude the role via tags ans ansible-playbook --skip-tags mlx
shell: "/usr/sbin/lspci | grep Mellanox"
check_mode: yes
when: ansible_os_family == "RedHat"
- name: yum install dependencies
yum: name=perl,pciutils,gtk2,atk,cairo,gcc-gfortran,libxml2-python,tcsh,libnl,lsof,tcl,tk
sudo: true
ignore_errors: true
- name: "Force this role to fail if no Mellanox hardware is present"
#Exclude the role via tags ans ansible-playbook --skip-tags mlx
shell: "/usr/bin/lspci | grep Mellanox"
check_mode: yes
when: ansible_os_family == "Debian"
- name: install dependencies centos 7
package:
name:
- atk
- cairo
- createrepo
- gcc-gfortran
- gtk2
- libxml2-python
- lsof
- pciutils
- perl
- python-devel
- rpm-build
- tcl
- tcsh
- libnl
- tk
# kernel devel removed for because we might need to pin the version
become: true
become_user: root
when: ansible_os_family == "RedHat"
- name: install dependencies ubuntu18
package:
name:
- autoconf
- automake
- autotools-dev
- bison
- build-essential
- bzip2
- chrpath
- debhelper
- dh-autoreconf
- dpatch
- ethtool
- flex
- gcc
- gfortran
- graphviz
- libgfortran4
- libltdl-dev
- libnl-route-3-200
- m4
- make
- pkg-config
- quilt
- swig
- tcl
- tk
state: present
become: true
become_user: root
when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == 18
- name: install dependencies ubuntu20
package:
name:
- autoconf
- automake
- autotools-dev
- bison
- debhelper
- dkms
- dpatch
- ethtool
- flex
- gcc
- gfortran
- graphviz
- libgfortran4
- libltdl-dev
- libnl-3-dev
- libnl-route-3-dev
- m4
- make
- pkg-config
- quilt
- swig
- tcl
- tkchrpath
state: present
become: true
become_user: root
when: ansible_distribution == "Ubuntu" and ansible_distribution_major_version == 20
- name: test for existing installation of drivers
command: ibv_devinfo
sudo: true
stat:
path: /usr/bin/ibv_devinfo
become: true
register: drivers_installed
ignore_errors: true
- name: yum update to upgrade kernel
shell: "yum update -y"
sudo: true
ignore_errors: true
when: ansible_os_family == "RedHat" and drivers_installed|failed
#
# A REBOOT IS NEEDED AFTER a KERNEL UPDATE
#
- name: restart machine
shell: sleep 5; sudo shutdown -r now "Ansible updates triggered"
async: 2
poll: 0
ignore_errors: true
sudo: true
when: ansible_os_family == "RedHat" and drivers_installed|failed
- name: waiting for server to come back
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
sudo: false
- name: waiting for server to come back number 2
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
sudo: false
- name: copy driver source
#make this a variable
unarchive: copy=yes src="files/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp
sudo: true
#when: drivers_installed|failed and ansible_os_family=="RedHat" and ansible_distribution_major_version == "7"
when: drivers_installed|failed
- name: install drivers
shell: ./mlnxofedinstall -q
- name: debug - print out installed driver
debug: var=drivers_installed
- name: default dont install
set_fact:
install_now: false
reboot_now: false
- name: get driver version
shell: 'ofed_info -l | head -n 1 | cut -f 1 -d " "'
register: driver_version
check_mode: no
changed_when: False
- name: get desired driver version
shell: 'echo {{ MELLANOX_DRIVER_SRC }} | cut -f 1,2,3 -d "-"'
register: desired_driver_version
check_mode: no
changed_when: False
- name: set install due to drivers not installed
set_fact:
install_now: true
reboot_now: true
when: not drivers_installed.stat.exists
- name: debug
debug: var=driver_version
- name: debug
debug: var=desired_driver_version
- name: set install due to version mismatch
set_fact:
install_now: true
reboot_now: true
when: driver_version.failed or not desired_driver_version.stdout in driver_version.stdout
- name: debug - print out value of install_now
debug: var=install_now
# alternatively use #https://content.mellanox.com/ofed/MLNX_OFED-4.9-2.2.4.0/MLNX_OFED_LINUX-4.9-2.2.4.0-rhel7.8-x86_64.tgz
- name: fetch and unarchive driver source
unarchive: copy=no src="https://object-store.rc.nectar.org.au/v1/AUTH_56ccfd36d0ad454a883a98e8489c97b5/hpc-repo/drivers/{{ MELLANOX_DRIVER_SRC }}.tgz" dest=/tmp
become: true
become_user: root
when: install_now and mlx_driver_url is undefined
- name: copy driver source from mellanox
unarchive:
copy: no
src: "{{ mlx_driver_url }}"
dest: '/tmp'
become: true
become_user: root
when: install_now and mlx_driver_url is defined
- name: Populate service facts
service_facts:
#remove old mellanox drivers as they may interfere with an update
- name: stop lustre
service: name=lustre-client state=stopped
become: true
become_user: root
when: install_now and drivers_installed.stat.exists and services["lustre-client.service"] is defined
- name: remove ipa stuff
package:
state: absent
name:
- ipa-common
- ipa-python-compat
- python2-ipalib
become: true
when: install_now
- name: remove unnecessary packages that break stuff
package:
state: absent
name:
- libgpod
- usbmuxd
- libmlx5
- libmlx4
- libibverbs
become: true
when: install_now
- name: remove mellanox rpms
package:
state: absent
name:
- mlnx-ofa_kernel
- mlnx-ofa_kernel-devel
- mlnx-ofa_kernel-modules
- mlnx-ofed-kernel-dkms
- mlnx-ofed-kernel-utils
- ofed-scripts
become: true
when: install_now
- name: install drivers Debian
set_fact:
mlnxofedinstall_args: "-q --skip-repo --without-fw-update --hpc --dkms"
when: buildKMOD!=True and ansible_os_family == "Debian"
- name: install drivers Redhat VM
set_fact:
mlnxofedinstall_args: "-q --skip-repo --without-fw-update"
when: buildKMOD!=True and ansible_os_family == "RedHat" and "OpenStack Nova" in ansible_product_name
- name: install drivers Redhat with firmware on BM
set_fact:
mlnxofedinstall_args: "-q --skip-repo"
when: buildKMOD!=True and ansible_os_family == "RedHat" and "OpenStack Nova" not in ansible_product_name
- name: build and install drivers Redhat
set_fact:
mlnxofedinstall_args: "./mlnxofedinstall -q --skip-repo --without-fw-update --add-kernel-support --hpc --kmp --all"
when: buildKMOD==True and ansible_os_family == "RedHat"
- name: build and install drivers Debian
set_fact:
mlnxofedinstall_args: "-q --skip-repo --without-fw-update --hpc --dkms"
when: buildKMOD==True and ansible_os_family == "Debian"
- name: run install
shell: "./mlnxofedinstall {{ mlnxofedinstall_args }}"
args:
#more changes
chdir: "/tmp/{{ MELLANOX_DRIVER_SRC }}"
sudo: true
when: drivers_installed|failed
become: true
become_user: root
when: install_now
#
# get IP address before reboot
#
- name: get IP address
local_action: command ./scripts/map_ib_ip.pl {{ inventory_hostname }}
register: ip_address
sudo: false
#when: drivers_installed|failed
- name: template IP address
template: dest=/etc/sysconfig/network-scripts/ifcfg-ens6 src=ifcfg-ens6.j2 owner=root group=root
sudo: true
when: ansible_os_family=="RedHat" and ansible_distribution_major_version == "7" and drivers_installed|failed
#ubuntu equivalent of previous command
- name: Ubuntu network interfaces - line 1
lineinfile:
args:
dest: /etc/network/interfaces
line: auto {{ MELLANOX_DEVICE_NAME }}
state: present
sudo: true
when: ansible_os_family=="Debian" and drivers_installed|failed
- name: Ubuntu network interfaces - line 2
lineinfile:
args:
dest: /etc/network/interfaces
line: iface {{ MELLANOX_DEVICE_NAME }} inet static
state: present
insertafter: "auto {{ MELLANOX_DEVICE_NAME }}"
sudo: true
when: ansible_os_family=="Debian" and drivers_installed|failed
- name: Ubuntu network interfaces - line 3
lineinfile:
args:
dest: /etc/network/interfaces
line: address {{ ip_address.stdout }}
state: present
insertafter: "iface {{ MELLANOX_DEVICE_NAME }} inet static"
sudo: true
when: ansible_os_family=="Debian" and drivers_installed|failed
#
# A REBOOT IS NEEDED AFTER SUCCESSFUL INSTALL
#
- name: restart machine
command: "sleep 5; sudo shutdown -r now"
async: 2
poll: 0
ignore_errors: true
sudo: true
when: ansible_os_family=="Centos" and drivers_installed|failed
- name: restart machine for Ubuntu -cos it is 'special'
shell: "sleep 5; sudo shutdown -r now"
async: 2
poll: 1
ignore_errors: true
sudo: true
when: ansible_os_family=="Debian"
- name: waiting for server to come back
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
sudo: false
- name: waiting for server to come back 2
local_action: wait_for host={{ ansible_ssh_host }} state=started port=22 delay=10 search_regex=OpenSSH
- name: bring up interface
#variable=eth0 or ens6
command: ifup {{ MELLANOX_DEVICE_NAME }}
sudo: true
when: ansible_distribution_major_version == "7"
reboot:
become: true
when: reboot_now
DEVICE=ens6
ONBOOT=yes
NM_CONTROLLED=no
BOOTPROTO=none
IPADDR={{ ip_address.stdout }}
PREFIX=22
MTU=9000
---
#note. do not add '.tgz' to driver src. done in playbook
MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}"
MELLANOX_DEVICE_NAME: "{% if ansible_os_family == 'RedHat' %}ens6{% elif ansible_os_family == 'Debian' %}eth1{% endif %}"
---
- name: install environment-modules
yum: name=environment-modules state=installed
sudo: true
yum: name=environment-modules state=present
become: true
when: ansible_os_family == 'RedHat'
- name: install environment-modules
apt: name=environment-modules state=installed
sudo: true
apt: name=environment-modules state=present
become: true
when: ansible_os_family == 'Debian'
- name: add /usr/local/Modules to the module file path
lineinfile:
lineinfile:
args:
dest: /usr/share/Modules/init/.modulespath
line: /usr/local/Modules/modulefiles
ignore_errors: true
sudo: true
ignore_errors: false
become: true
when: ansible_os_family == 'RedHat'
# for some reason ubuntu uses lowercase modules
- name: add /usr/local/Modules to the module file path
lineinfile:
lineinfile:
args:
dest: /usr/share/modules/init/.modulespath
line: /usr/local/Modules/modulefiles
ignore_errors: true
sudo: true
ignore_errors: false
become: true
when: ansible_os_family == 'Debian'
- name: make /local_home
file: path=/local_home owner=root group=root state=directory
sudo: true
become: true
- name: copy the {{ ansible_ssh_user }} home
shell: cp -ar /home/{{ ansible_ssh_user }} /local_home
- name: stat the local_home path
stat: path=/local_home/{{ ansible_user }}
register: local_home_path
- name: copy the {{ ansible_user }} home
shell: cp -ar /home/{{ ansible_user }} /local_home
ignore_errors: true
sudo: true
become: true
register: home_copied
when: not local_home_path.stat.exists
- name: edit passwd file
lineinfile:
args:
dest: /etc/passwd
regexp: "{{ ansible_ssh_user }}:x:1001:1001::/home/{{ ansible_ssh_user }}:.*"
line: "{{ ansible_ssh_user }}:x:1001:1001::/local_home/{{ ansible_ssh_user }}:/bin/bash"
regexp: '{{ ansible_user }}:x:(.*):(.*):(.*):/home/{{ ansible_user }}:(.*)'
line: '{{ ansible_user }}:x:\1:\2:\3:/local_home/{{ ansible_user }}:\4'
backrefs: yes
sudo: true
register: edit1
- name: edit passwd file
lineinfile:
args:
dest: /etc/passwd
regexp: "{{ ansible_ssh_user }}:x:500:500::/home/{{ ansible_ssh_user }}:.*"
line: "{{ ansible_ssh_user }}:x:500:500::/local_home/{{ ansible_ssh_user }}:/bin/bash"
backrefs: yes
sudo: true
register: edit2
become: true
register: edit
- name: edit passwd file
lineinfile:
args:
dest: /etc/passwd
regexp: "{{ ansible_ssh_user }}:x:1000:1000::/home/{{ ansible_ssh_user }}:.*"
line: "{{ ansible_ssh_user }}:x:1000:1000::/local_home/{{ ansible_ssh_user }}:/bin/bash"
backrefs: yes
sudo: true
register: edit3
# ubuntu:x:1000:1000:Ubuntu:/home/ubuntu:/bin/bash
- name: edit passwd file for ubuntu 14
lineinfile:
args:
dest: /etc/passwd
regexp: "{{ ansible_ssh_user }}:x:1000:1000:Ubuntu:/home/{{ ansible_ssh_user }}:.*"
line: "{{ ansible_ssh_user }}:x:1000:1000:Ubuntu:/local_home/{{ ansible_ssh_user }}:/bin/bash"
backrefs: yes
sudo: true
register: edit4
# MYSQL FOR SLURM
The Mysql for slurm is problematic, as configuring the database is no longer simple.
Ansible commands such as mysql_user do not work in a brand new system, as it appears
mysql/maria db must run in a root shell and Ansible breaks at this point.
The preferred approach.
1) Run this role. It may/may not break when trying to modify the database as the root password is not set.
2) Login to sql machine. sudo as root
3) Run **/usr/bin/mysql_secure_installation** Set the root password to what is found in our Ansible scripts, i.e.
```
ansible-vault edit vars/passwords.yml
#Key to change is "sqlrootPasswd"
```
4). The datbase by default is in */var/lib* which is on a small Unix disk partition. You need to change it.
Several options exist.
1. Set up a soft pointer, i.e. **/var/lib/mysql -> /vdb/mysql** OR
1. Modify **/etc/my.cnf.d/mariadb-server.cnf** and point to a new location.
e.g.
```
[mysqld]
datadir=/mnt/vdb1/var_lib_mysql
socket=/mnt/vdb1/var_lib_mysql/mysql.sock
log-error=/var/log/mariadb/mariadb.log
pid-file=/run/mariadb/mariadb.pid
```
Note. The client uses the default socket */var/lib/mysql/mysql.sock* so you need to modify **/etc/my.cnf.d/mysql-clients.cnf** and add the same socket.
To avoid errors like:
```
ERROR 2002 (HY000): Can't connect to local MySQL server through socket '/var/lib/mysql/mysql.sock' (2)
```
You must go:
```
[mysql]
socket=/mnt/vdb1/var_lib_mysql/mysql.sock
```
5) The procures will be something like this:
1. **systemctl stop mariadb**
1. **mkdir /mnt/vdb1/var_lib_mysql**
1. **chown mysql:mysql /mnt/vdb1/var_lib_mysql**
1. **cp -rp /var/lib/mysql/\* /mnt/vdb1/var_lib_mysql**
1. **systemctl start mariadb**
1. Use **systemctl status mariadb** and view **/var/log/mariadb/mariadb.log** to trace errors
6) None of the above is in the ansible scripts. The *mysql* role is tagged as **never** due to these complications
7) The input parameters to this role have been modified, so users can hardcode the SLURM controllers directly.
Sample usage (for a mysql server)
```
- role: mysql
vars:
mysql_type: "mysql_server"
mysql_root_password: "{{ sqlrootPasswd }}"
mysql_user_name: "slurmdb"
mysql_user_db_name: "slurm_acct_db"
current_slurm_mgmt_nodes_fqdn: " {{ current_slurm_mgmt_nodes_fqdn }} "
current_slurm_mgmt_nodes_short: " {{ current_slurm_mgmt_nodes_short }} "
mysql_user_password: "{{ slurmdb_passwd }}"
tags: [ database, mysql, never ]
```
where
```
- set_fact:
current_slurm_mgmt_nodes_fqdn:
- 'm3-mgmt3.massive.org.au'
- 'm3-mgmt4.massive.org.au'
current_slurm_mgmt_nodes_short:
- 'm3-mgmt3'
- 'm3-mgmt4'
```
---
- name: "Restart MySQL"
- name: "Restart MySQL"
service: name={{ sqlServiceName }} state=restarted
sudo: true
become: true
---
- name: Make sure OS is updated since apt install might fail
apt:
update_cache: yes
become: true
when: ansible_os_family == "Debian"
- name: "Installing MySQL Debian"
apt: name="{{ server_packages }}" update_cache=yes state=present
become: true
when: ansible_os_family == "Debian"
- name: Installing MySQL RedHat
yum: name={{ item }}
with_items: "{{ server_packages }}"
become: true
when: ansible_os_family == "RedHat"
- name: make sure mysql conf directory exists
file: dest=/etc/mysql/conf.d state=directory
become: true
register: mysqldb_confdir_create
- name: "Starting MySQL"
service: name={{ sqlServiceName }} state=started enabled=true
become: true
#- name: "Adding root"
# become: true
# mysql_user: name=root host="{{ item }}" password="{{ mysql_root_password }}" login_user=root login_password="{{ mysql_root_password }}" check_implicit_admin=yes
# with_items:
# - "{{ ansible_hostname }}"
# - 127.0.0.1
# - ::1
# - localhost
- name: Check that the slurm_acct_db_directory exists
stat:
path: /var/lib/mysql/slurm_acct_db/ #defined in /vars/filesystems.yaml
register: slurm_acct_db_directory_result
# this will only work if a completely fresh db gets installed because it gets shipped with a blank root pw
- name: update mysql root password for all root accounts
mysql_user: name=root host=localhost password={{ mysql_root_password }} login_user=root
when: not slurm_acct_db_directory_result.stat.exists and mysqldb_confdir_create.changed
- name: "Adding user database"
mysql_db: name={{ mysql_user_db_name }} state=present login_user=root login_password={{ mysql_root_password }}
- name: "Giving priviliges to user"
mysql_user: name={{ mysql_user_name }} host={{ mysql_user_host }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
when: mysql_user_host is defined
- name: "Giving priviliges to user with shortname"
mysql_user: name={{ mysql_user_name }} host={{ item }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
with_items: "{{ mysql_user_hosts_group }}"
when: mysql_user_hosts_group is defined
- name: "Giving priviliges to user with hardcoded hostname"
mysql_user: name={{ mysql_user_name }} host='HARDCODE YOUR HOSTNAME' password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
tags: [never]
- name: "Giving priviliges to user with fdqn works only when run with --limit=ManagementNodes,SQLNodes"
mysql_user: name={{ mysql_user_name }} host={{ hostvars[item].ansible_fqdn }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
with_items: "{{ mysql_user_hosts_group }}"
when: mysql_user_hosts_group is defined
- name: "Giving priviliges to user with shortname"
mysql_user: name={{ mysql_user_name }} host={{ item }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
with_items: "{{ mysql_user_hosts_group }}"
when: mysql_user_hosts_group is defined
- name: "Giving priviliges to user with item.domain"
mysql_user: name={{ mysql_user_name }} host="{{ item }}.{{ domain }}" password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
with_items: "{{ mysql_user_hosts_group }}"
- debug:
msg: "{{ hostvars[item].ansible_fqdn }}"
with_items: "{{ mysql_user_hosts_group }}"
tags: [never]
---
- name: CHECKING VARS inside role
debug:
msg: "[DEBUG] current_slurm_mgmt_nodes_fqdn is {{ current_slurm_mgmt_nodes_fqdn }} and current_slurm_mgmt_nodes_short is {{ current_slurm_mgmt_nodes_short }}"
- name: Installing MySQL RedHat
yum: name={{ item }}
with_items: "{{ server_packages }}"
become: true
when: ansible_os_family == "Rocky"
- name: make sure mysql conf directory exists
file: dest=/etc/mysql/conf.d state=directory
become: true
register: mysqldb_confdir_create
- name: "Starting MySQL"
service: name={{ sqlServiceName }} state=started enabled=true
become: true
- name: Check that the slurm_acct_db_directory exists
stat:
path: /var/lib/mysql/slurm_acct_db/ #defined in /vars/filesystems.yaml
register: slurm_acct_db_directory_result
# this will only work if a completely fresh db gets installed because it gets shipped with a blank root pw
# thsi command has to be done manually after mysql installed as it is impossible due to issues with mariadb only resetting from a root shell
# See README.md for this role
#- name: "update mysql root password {{ mysql_root_password }} for all root accounts"
#mysql_user: name=root host=localhost password={{ mysql_root_password }} login_user=root login_password={{ mysql_root_password }}
#become: true
#become_user: root
#tags: [never]
#when: not slurm_acct_db_directory_result.stat.exists and mysqldb_confdir_create.changed
#when: not slurm_acct_db_directory_result.stat.exists
- name: "Adding user database {{ mysql_user_db_name }} "
mysql_db: name={{ mysql_user_db_name }} state=present login_user=root login_password={{ mysql_root_password }}
become: true
become_user: root
- name: "Giving priviliges to user {{ mysql_user_name }}"
mysql_user: name={{ mysql_user_name }} host={{ mysql_user_host }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
when: mysql_user_host is defined
- name: "Giving priviliges to user for nodes identifed by hostname ( short)"
mysql_user: name={{ mysql_user_name }} host={{ item }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
with_items: "{{ current_slurm_mgmt_nodes_short }}"
when: current_slurm_mgmt_nodes_short is defined
- name: "[Example only] Giving priviliges to user with hardcoded hostname"
mysql_user: name={{ mysql_user_name }} host='HARDCODE YOUR HOSTNAME' password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
tags: [never]
- name: "Giving priviliges to user for nodes identifed by hostname ( fqdn)"
mysql_user: name={{ mysql_user_name }} host={{ item }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
with_items: "{{ current_slurm_mgmt_nodes_fqdn }}"
when: current_slurm_mgmt_nodes_fqdn is defined
#COMMENT. I think this is redundant but left in as it does no harm
- name: "Giving priviliges to user with item.domain"
mysql_user:
name: "{{ mysql_user_name }}"
host: "{{ item }}.{{ domain }}"
password: "{{ mysql_user_password }}"
login_user: root
login_password: "{{ mysql_root_password }}"
priv: "{{ mysql_user_db_name }}.*:ALL,GRANT"
state: present
with_items: "{{ current_slurm_mgmt_nodes_short }}"
- debug:
msg: "{{ item }}"
with_items: "{{ current_slurm_mgmt_nodes_fqdn }}"
tags: [never]
---
- name: Make sure OS is updated since apt install might fail
apt:
update_cache: yes
become: true
- name: "Installing MySQL for Ubuntu"
apt: name="{{ server_packages }}" update_cache=yes state=present
become: true
- name: Comment out bind address so it doesn't bind to 127.0.0.1
replace:
path: /etc/mysql/mariadb.conf.d/50-server.cnf
regexp: '(.*bind.*)'
replace: '#\1'
become: true
- name: make sure mysql conf directory exists
file: dest=/etc/mysql/conf.d state=directory
become: true
register: mysqldb_confdir_create
- name: "Starting MySQL"
service: name={{ sqlServiceName }} state=started enabled=true
become: true
- name: Check that the slurm_acct_db_directory exists
stat:
path: /var/lib/mysql/slurm_acct_db/ #defined in /vars/filesystems.yaml
register: slurm_acct_db_directory_result
# this will only work if a completely fresh db gets installed because it gets shipped with a blank root pw
- name: update mysql root password for all root accounts
mysql_user: name=root host=localhost password={{ mysql_root_password }} login_user=root check_implicit_admin=yes
become: true
register: mysql_setrootpw
ignore_errors: yes
- name: update mysql root password for all root accounts
mysql_user: name=root host=localhost password={{ mysql_root_password }} login_user=root check_implicit_admin=yes login_password={{ mysql_root_password }}
become: true
when: mysql_setrootpw.failed
- name: "Adding user database"
mysql_db: name={{ mysql_user_db_name }} state=present login_user=root login_password={{ mysql_root_password }}
become: true
become_user: root
- name: "Giving priviliges to user"
mysql_user: name={{ mysql_user_name }} host={{ mysql_user_host }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
when: mysql_user_host is defined
become: true
become_user: root
- name: "Giving priviliges to user"
mysql_user: name={{ mysql_user_name }} host={{ hostvars[item].ansible_fqdn }} password={{ mysql_user_password }} login_user=root login_password={{ mysql_root_password }} priv={{ mysql_user_db_name }}.*:ALL,GRANT state=present
with_items: "{{ mysql_user_hosts_group }}"
when: mysql_user_hosts_group is defined
become: true
become_user: root