diff --git a/roles/calculateEtcHosts/files/makehosts.py b/roles/calculateEtcHosts/files/makehosts.py index 179809dc737fa1d4e593ba2dfeba77d125cc99d5..b56e2817b93eda2b56fc701175265892bba86115 100755 --- a/roles/calculateEtcHosts/files/makehosts.py +++ b/roles/calculateEtcHosts/files/makehosts.py @@ -17,6 +17,7 @@ for group in d['groups'].keys(): i=0 for h in d['groups'][group]: name = d['hostvars'][h]['ansible_hostname'] + name = h if not domain: hosts[h] = [name] else: diff --git a/roles/config_repos_upstream/files/glusterfs-epel.repo b/roles/config_repos_upstream/files/glusterfs-epel.repo new file mode 100644 index 0000000000000000000000000000000000000000..015ad0bb9c3b8539d6e87fe999c69605b52910e5 --- /dev/null +++ b/roles/config_repos_upstream/files/glusterfs-epel.repo @@ -0,0 +1,22 @@ +# Place this file in your /etc/yum.repos.d/ directory + +[glusterfs-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. +baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/$basearch/ +enabled=1 +skip_if_unavailable=1 +gpgcheck=0 + +[glusterfs-noarch-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. +baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/noarch +enabled=1 +skip_if_unavailable=1 +gpgcheck=0 + +[glusterfs-source-epel] +name=GlusterFS is a clustered file-system capable of scaling to several petabytes. - Source +baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/SRPMS +enabled=0 +skip_if_unavailable=1 +gpgcheck=0 diff --git a/roles/config_repos_upstream/files/monashhpc_base.repo b/roles/config_repos_upstream/files/monashhpc_base.repo new file mode 100644 index 0000000000000000000000000000000000000000..8f0d9aeee8c78a5a4d56b826e8ab100833d88bd0 --- /dev/null +++ b/roles/config_repos_upstream/files/monashhpc_base.repo @@ -0,0 +1,25 @@ +# Place this file in your /etc/yum.repos.d/ directory + +[monashhpc_base] +name=MonashHPC base repository mirrored to control the update process +baseurl=https://consistency0/centos/$releasever/os/$basearch/ +enabled=1 +sslverify=false + +[monashhpc_udpates] +name=MonashHPC base repository mirrored to control the update process +baseurl=https://consistency0/centos/$releasever/updates/$basearch/ +enabled=1 +sslverify=false + +[monashhpc_extras] +name=MonashHPC base repository mirrored to control the update process +baseurl=https://consistency0/centos/$releasever/extras/$basearch/ +enabled=1 +sslverify=false + +[monashhpc_centosplus] +name=MonashHPC base repository mirrored to control the update process +baseurl=https://consistency0/centos/$releasever/centosplus/$basearch/ +enabled=1 +sslverify=false diff --git a/roles/config_repos_upstream/files/monashhpc_others.repo b/roles/config_repos_upstream/files/monashhpc_others.repo new file mode 100644 index 0000000000000000000000000000000000000000..e78702bf53f5fe0a1284c0474aac75bba615aabd --- /dev/null +++ b/roles/config_repos_upstream/files/monashhpc_others.repo @@ -0,0 +1,8 @@ +# Place this file in your /etc/yum.repos.d/ directory + +[monashhpc_otherstuff] +name=MonashHPC base repository mirrored to control the update process +baseurl=https://consistency0/centos/hpcsystems/$releasever/$basearch/ +enabled=1 +sslverify=false +gpgcheck=0 diff --git a/roles/config_repos_upstream/tasks/main.yml b/roles/config_repos_upstream/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..2cd2254249275933b4416bde711758fc76e8611e --- /dev/null +++ b/roles/config_repos_upstream/tasks/main.yml @@ -0,0 +1,38 @@ +--- +- name: add gluster repo + copy: src=glusterfs-epel.repo dest=/etc/yum.repos.d/glusterfs-epel.repo + sudo: true + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" + +- name: enable epel + yum: name=epel-release state='latest' + sudo: true + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" + + +- name: Enable epel + command: yum-config-manager --enable epel + sudo: true + when: ansible_distribution == "CentOS" and ansible_distribution_major_version == "7" + +# Use mate DE on systems that have moved to gnome3, since there is no gpu acceleration by default on NeCTAR openstack +# Trusty (Ubuntu 14.04 LTS) needs repos added. Wheezy (Debian Stable) gets mate from backports, Utopic (Ubuntu 14.10) Jessie (Debian testing) and Sid (Debian unstable) get it by default +- name: add repos apt + shell: "add-apt-repository -y ppa:ubuntu-mate-dev/ppa" + sudo: true + when: ansible_distribution_release == 'trusty' + +- name: add repos apt + shell: "add-apt-repository -y ppa:ubuntu-mate-dev/trusty-mate" + sudo: true + when: ansible_distribution_release == 'trusty' + +- name: add repos apt + shell: "add-apt-repository -y ppa:gluster/glusterfs-3.7" + sudo: true + when: ansible_distribution == 'Ubuntu' + +- name: apt-get update + apt: update_cache=True + sudo: true + when: ansible_os_family=="Debian" diff --git a/roles/etcHosts/tasks/main.yml b/roles/etcHosts/tasks/main.yml index 52ca69082d2e13d8e4c954432efb9f1fb7367ce9..cb1d84a09812405cdb84cebabc03a802522e871b 100644 --- a/roles/etcHosts/tasks/main.yml +++ b/roles/etcHosts/tasks/main.yml @@ -3,7 +3,7 @@ sudo: true - name: set hostname by sysctl - shell: sysctl kernel.hostname="{{ ansible_hostname }}" + shell: sysctl kernel.hostname="{{ inventory_hostname }}" sudo: true - name: set domainname by sysctl @@ -11,7 +11,7 @@ sudo: true - name: set /etc/sysconfig/network on CentOS 6 - lineinfile: dest=/etc/sysconfig/network line='HOSTNAME={{ ansible_hostname }}' regexp='^HOSTNAME' + lineinfile: dest=/etc/sysconfig/network line='HOSTNAME={{ inventory_hostname }}' regexp='^HOSTNAME' sudo: true when: ansible_distribution == "CentOS" @@ -29,4 +29,3 @@ template: src=hostname dest=/etc/hostname become: true become_user: root - when: ansible_distribution == "CentOS" diff --git a/roles/etcHosts/templates/hostname b/roles/etcHosts/templates/hostname index c3e4c0866425f36098edb843bfe08ea171b36fe8..1fad51f6ea226be709a0bc3c5b958eabc6dcc145 100644 --- a/roles/etcHosts/templates/hostname +++ b/roles/etcHosts/templates/hostname @@ -1 +1 @@ -{{ ansible_hostname }} +{{ inventory_hostname }} diff --git a/roles/lustre-client/files/ibdev2netdev.sh b/roles/lustre-client/files/ibdev2netdev.sh new file mode 100755 index 0000000000000000000000000000000000000000..4fb4708aa0614ae5689f76d4ea68648d92fa0aa4 --- /dev/null +++ b/roles/lustre-client/files/ibdev2netdev.sh @@ -0,0 +1,222 @@ +#! /bin/bash +# +#modified by simon to print device name +# + + +usage() +{ + echo "$(basename $0) <options>" + echo "-h, --help print help message" + echo "-v, --verbose print more info" +} + +function find_pdev() +{ + pdevlist=$(ls /sys/bus/pci/devices) + + for pdev in $pdevlist; do + if [ -d /sys/bus/pci/devices/$pdev/infiniband ]; then + ibd=$(ls /sys/bus/pci/devices/$pdev/infiniband/) + if [ "x$ibd" == "x$1" ]; then + echo -n $pdev + fi + fi + done +} + +case $1 in + "-h" | "--help") + usage + exit 0 + ;; +esac + +if (( $# > 1 )); then + usage + exit -1 +fi + +if (( $# == 1 )) && [ "$1" != "-v" ]; then + usage + exit -1 +fi + +ibdevs=$(ls /sys/class/infiniband/) + +devs= +for netpath in /sys/class/net/* +do + if (grep 0x15b3 ${netpath}/device/vendor > /dev/null 2>&1); then + devs="$devs ${netpath##*/}" + fi +done + +if [ "x$devs" == "x" ]; then + # no relevant devices - quit immediately + exit +fi + +for d in $devs; do + if [ -f /sys/class/net/$d/dev_id ]; then + oldstyle=n + break + fi +done + +if [ "x$oldstyle" == "xn" ]; then + for d in $ibdevs; do + ibrsc=$(cat /sys/class/infiniband/$d/device/resource) + eths=$(ls /sys/class/net/) + for eth in $eths; do + filepath_resource=/sys/class/net/$eth/device/resource + + if [ -f $filepath_resource ]; then + ethrsc=$(cat $filepath_resource) + if [ "x$ethrsc" == "x$ibrsc" ]; then + filepath_devid=/sys/class/net/$eth/dev_id + filepath_devport=/sys/class/net/$eth/dev_port + if [ -f $filepath_devid ]; then + port1=0 + if [ -f $filepath_devport ]; then + port1=$(cat $filepath_devport) + port1=$(printf "%d" $port1) + fi + + port=$(cat $filepath_devid) + port=$(printf "%d" $port) + if [ $port1 -gt $port ]; then + port=$port1 + fi + + port=$(( port + 1 )) + + filepath_carrier=/sys/class/net/$eth/carrier + + if [ -f $filepath_carrier ]; then + link_state=$(cat $filepath_carrier 2> /dev/null) + if (( link_state == 1 )); then + link_state="Up" + else + link_state="Down" + fi + else + link_state="NA" + fi + + x=$(find_pdev $d) + if [ "$1" == "-v" ]; then + filepath_portstate=/sys/class/infiniband/$d/ports/$port/state + filepath_deviceid=/sys/class/infiniband/$d/device/device + filepath_fwver=/sys/class/infiniband/$d/fw_ver + filepath_vpd=/sys/class/infiniband/$d/device/vpd + + # read port state + if [ -f $filepath_portstate ]; then + ibstate=$(printf "%-6s" $(cat $filepath_portstate | gawk '{print $2}')) + else + ibstate="NA" + fi + + # read device + if [ -f $filepath_deviceid ]; then + devid=$(printf "MT%d" $(cat $filepath_deviceid)) + else + devid="NA" + fi + + # read FW version + if [ -f $filepath_fwver ]; then + fwver=$(cat $filepath_fwver) + else + fwver="NA" + fi + + # read device description and part ID from the VPD + if [ -f $filepath_vpd ]; then + tmp=$IFS + IFS=":" + vpd_content=`cat $filepath_vpd` + devdesc=$(printf "%-15s" $(echo $vpd_content | strings | head -1)) + partid=$(printf "%-11s" $(echo $vpd_content | strings | head -4 | tail -1 | gawk '{print $1}')) + IFS=$tmp + else + devdesc="" + partid="NA" + fi + #echo "$x $d ($devid - $partid) $devdesc fw $fwver port $port ($ibstate) ==> $eth ($link_state)" + echo -n "$eth" + else + echo -n "$eth" + #echo "$d port $port ==> $eth ($link_state)" + fi + fi + fi + fi + done + done +else +########################## +### old style +########################## + +function print_line() +{ + echo -n "$eth" +# echo "$1 port $2 <===> $3" + +} + +function find_guid() +{ + ibdevs=$(ls /sys/class/infiniband/) + for ibdev in $ibdevs; do + ports=$(ls /sys/class/infiniband/$ibdev/ports/) + for port in $ports; do + gids=$(ls /sys/class/infiniband/$ibdev/ports/$port/gids) + for gid in $gids; do + pguid=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g') + if [ x$pguid == x$1 ]; then + print_line $ibdev $port $2 + fi + done + done + done +} + +function find_mac() +{ + ibdevs=$(ls /sys/class/infiniband/) + for ibdev in $ibdevs; do + ports=$(ls /sys/class/infiniband/$ibdev/ports/) + for port in $ports; do + gids=$(ls /sys/class/infiniband/$ibdev/ports/$port/gids) + for gid in $gids; do + first=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21-22) + first=$(( first ^ 2 )) + first=$(printf "%02x" $first) + second=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g' | cut -b 3-6) + third=$(cat /sys/class/infiniband/$ibdev/ports/$port/gids/$gid | cut -b 21- | sed -e 's/://g' | cut -b 11-) + pmac=$first$second$third + if [ x$pmac == x$1 ]; then + print_line $ibdev $port $2 + fi + done + done + done +} + +ifcs=$(ifconfig -a | egrep '^eth|^ib' | gawk '{print $1}') + +for ifc in $ifcs; do + len=$(cat /sys/class/net/$ifc/addr_len) + if (( len == 20 )); then + guid=$(cat /sys/class/net/$ifc/address | cut -b 37- | sed -e 's/://g') + find_guid $guid $ifc + elif (( len == 6)); then + mac=$(cat /sys/class/net/$ifc/address | sed -e 's/://g') + find_mac $mac $ifc + fi +done +fi + diff --git a/roles/lustre-client/templates/lustre.conf.j2 b/roles/lustre-client/templates/lustre.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..023cf1c7fdfa8885cdbd2c64083f845b586a9925 --- /dev/null +++ b/roles/lustre-client/templates/lustre.conf.j2 @@ -0,0 +1 @@ +options lnet networks=tcp0({{ MELLANOX_DEVICE_NAME.stdout }}) diff --git a/roles/make_filesystems/tasks/main.yml b/roles/make_filesystems/tasks/main.yml index 34b0c6ed5ac859fbed246f88cce09b8eb3263d81..95afe48ac28c932cc71e05966f72f03b5044e476 100644 --- a/roles/make_filesystems/tasks/main.yml +++ b/roles/make_filesystems/tasks/main.yml @@ -1,11 +1,11 @@ --- - name: format volumes - filesystem: fstype={{ item.fstype }} dev={{ hostvars[ansible_hostname]['ansible_host_volumes'][item.name]['dev'] }} + filesystem: fstype={{ item.fstype }} dev={{ hostvars[inventory_hostname]['ansible_host_volumes'][item.name]['dev'] }} with_items: volumes sudo: true - name: format volumes - mount: name={{ item.mntpt }} fstype={{ item.fstype }} src={{ hostvars[ansible_hostname]['ansible_host_volumes'][item.name]['dev'] }} state=mounted + mount: name={{ item.mntpt }} fstype={{ item.fstype }} src={{ hostvars[inventory_hostname]['ansible_host_volumes'][item.name]['dev'] }} state=mounted with_items: volumes sudo: true diff --git a/roles/mellanox_drivers/templates/ifcfg-ens6.j2 b/roles/mellanox_drivers/templates/ifcfg-ens6.j2 index 1066d624333b0fb6b43e45bb6127a4e31989873d..ecd3c4f5ba04a41f82dfd1c396843274bf30e706 100644 --- a/roles/mellanox_drivers/templates/ifcfg-ens6.j2 +++ b/roles/mellanox_drivers/templates/ifcfg-ens6.j2 @@ -1,4 +1,4 @@ -DEVICE=ens6 +DEVICE={{ MELLANOX_DEVICE_NAME.stdout }} ONBOOT=yes NM_CONTROLLED=no BOOTPROTO=none diff --git a/roles/mellanox_drivers/vars/mellanoxVars.yml b/roles/mellanox_drivers/vars/mellanoxVars.yml index 77f83630d62fb6c7317a61386adce9fca60775c7..5b53fe8fbcf5c01af76fd93af02f86a5e4e86dd5 100644 --- a/roles/mellanox_drivers/vars/mellanoxVars.yml +++ b/roles/mellanox_drivers/vars/mellanoxVars.yml @@ -1,5 +1,5 @@ --- #note. do not add '.tgz' to driver src. done in playbook #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.1-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" - MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" - MELLANOX_DEVICE_NAME: "{% if ansible_os_family == 'RedHat' %}ens6{% elif ansible_os_family == 'Debian' %}eth1{% endif %}" + #MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.1-1.0.3-rhel7.2-x86_64-ext{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" + MELLANOX_DRIVER_SRC: "{% if ansible_os_family == 'RedHat' %}MLNX_OFED_LINUX-3.2-2.0.0.0-rhel7.2-x86_64{% elif ansible_os_family == 'Debian' %}MLNX_OFED_LINUX-3.1-1.0.3-ubuntu14.04-x86_64{% endif %}" diff --git a/roles/slurm-start/tasks/main.yml b/roles/slurm-start/tasks/main.yml index a857be9a01b37ba0b4d56ccd8079ca731abe71f5..2b163fcafde6ca3bf7d5702510157e8c419f442c 100644 --- a/roles/slurm-start/tasks/main.yml +++ b/roles/slurm-start/tasks/main.yml @@ -5,6 +5,11 @@ when: (ansible_distribution == "CentOS" or ansible_distribution == "RedHat") and ( ansible_distribution_major_version == "7") +- name: set slurmd_enabled (default enabled) + set_fact: + slurmd_enabled: True + when: slurmd_enabled is not defined + - name: install slurmdbd init template: src=slurmdbd.initd.j2 dest=/etc/init.d/slurmdbd mode=755 sudo: true @@ -56,11 +61,11 @@ when: use_systemd is defined and start_slurmctld is defined - name: start slurmd - service: name=slurmd state=started enabled=yes + service: name=slurmd state=started enabled={{ slurmd_enabled }} sudo: true when: use_systemd is defined and start_slurmd is defined - name: start slurm - service: name=slurm state=started enabled=yes + service: name=slurm state=started enabled={{ slurmd_enabled }} sudo: true when: use_systemd is not defined and ( start_slurmd is defined or start_slurmctld is defined )