Skip to content
Snippets Groups Projects
Commit dcbd13f6 authored by Andreas Hamacher's avatar Andreas Hamacher
Browse files

Merge remote-tracking branch 'origin/master' into pipelinefix

parents b3e8a227 a9ab598f
No related branches found
No related tags found
2 merge requests!376Pipelinefix,!373Pipelinefix
This commit is part of merge request !373. Comments created here will be created in the context of that merge request.
......@@ -16,15 +16,18 @@
when: ansible_os_family == 'RedHat'
- name: install lua RHEL7
yum: name={{ item }} state=present update_cache=yes enablerepo="Monash_University_EPEL7_EPEL_7_-_x86_64"
with_items:
- lua
- lua-filesystem
- lua-posix
- tcl
- rsync
- gcc
- lua-devel
yum:
state: present
update_cache: yes
enablerepo: "Monash_University_EPEL7_EPEL_7_-_x86_64"
name:
- lua
- lua-filesystem
- lua-posix
- tcl
- rsync
- gcc
- lua-devel
when:
- '"DGX" in ansible_product_name'
- '"RedHat" in ansible_distribution'
......@@ -61,4 +64,4 @@
args:
creates: "{{ soft_dir }}/lmod/{{ lmod_version }}"
become: true
when: ansible_os_family == 'RedHat'
\ No newline at end of file
when: ansible_os_family == 'RedHat'
---
# make sure firewalld is not installed
- name: make sure firewalld is not installed
yum: name={{ item }} state=absent
yum:
name:
- firewalld
- firewall-config
state: absent
become: true
become_user: root
with_items:
- firewalld
- firewall-config
# make sure iptables is installed
- name: make sure iptables-services is installed
......
- name: yum install cgroup
yum: name={{ item }} state=present
with_items:
- libcgroup
yum: name=libcgroup state=present
become: True
when: ansible_os_family == "RedHat"
......
- name: Install Telegraf from URL [RHEL/CentOS]
yum:
name: "{{ telegraf_install_rpm_url }}"
state: present
state: latest
when: ansible_os_family == "RedHat"
become: true
become_user: root
......@@ -17,7 +17,7 @@
- name: Install Telegraf package
apt:
deb: /tmp/telegraf-ansible-download.deb
state: present
state: latest
when: ansible_os_family == "Debian"
become: true
become_user: root
......
#!/usr/bin/python3
# The purpose of this script is to enrich every VM of an ansible inventory file
# in json format with the available hypervisor mapping found in /projects/pMOSP/hypervisor/
hypervisormapping = open('m3-latest', 'r') # coming from /projects/pMOSP/hypervisor/m3-latest
# for monarch see /projects/pMOSP/hypervisor/monarch-vm-hw-mapping-2020
mapping = hypervisormapping.readlines()
mapping=mapping[3:-1]
import json,socket,sys
with open('m3inventory.json') as json_file: # this file was created via m3inventory > m3inventory.json
inv = json.load(json_file)
for map in mapping:
vm=map.split('|')[2].strip()
hyp=map.split('|')[4].strip()
if vm not in inv['_meta']['hostvars'].keys():
sys.stderr.write("Not found in inventory: {}\n".format(vm))
continue
inv['_meta']['hostvars'][vm]['hypervisor_ip']=socket.gethostbyname(hyp+'-1g.erc.monash.edu')
print( "#!/bin/bash\necho '"+json.dumps(inv,indent=4, sort_keys=True)+"'")
......@@ -101,6 +101,63 @@ def load_data(artifactfile="artifacts.zip", nodeclass="compute_ansible_check.log
return df
def bokeh_plot(df,title):
# Create a series of colour bars (i.e. a HeatMap) from a list
# The list should include columns for task, host, change and changestr
# (the value of change sets the colour but the value of changestr is shown in the tool tip)
from bokeh.io import output_file, show
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar, Div
from bokeh.plotting import figure, save
from math import pi
# this is an abbreviated colormap from a bokeh example
colors = ['#084594', '#2171b5', '#4292c6', "#dfccce", "#550b1d"]
#colors = [ "#e2e2e2", "#dfccce", "#550b1d"]
mapper = LinearColorMapper(palette=colors, low=0, high=4)
#colors = [ "#e2e2e2", "#dfccce", "#550b1d"]
#mapper = LinearColorMapper(palette=colors, low=0, high=2)
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
dataxrange = list(df.index.get_level_values(1).unique())
datayrange = list(df.index.get_level_values(0).unique())
p = figure(title=title,
x_range=dataxrange, y_range=datayrange,
x_axis_location="above",
sizing_mode='stretch_width',
tools=TOOLS, toolbar_location='below',
tooltips=[('host', '@host'), ('task', '@task'), ('changed', '@changestr'),('taskid','@taskid') ])
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.xaxis.major_tick_line_color = None # turn off x-axis major ticks
p.xaxis.minor_tick_line_color = None # turn off x-axis minor ticks
p.xaxis.major_label_text_color = None # turn off x-axis tick labels leaving space
p.xaxis.major_label_text_font_size = '0pt' # turn off x-axis tick labels
p.yaxis.major_tick_line_color = None # turn off x-axis major ticks
p.yaxis.minor_tick_line_color = None # turn off x-axis minor ticks
p.yaxis.major_label_text_color = None # turn off y-axis tick labels leaving space
p.yaxis.major_label_text_font_size = '0pt' # turn off y-axis tick labels
#p.axis.major_tick_line_color = None
#p.axis.major_label_text_font_size = "5pt"
#p.axis.major_label_standoff = 0
#p.xaxis.major_label_orientation = pi / 3
p.rect(x="taskid", y="host", width=1, height=1,
source=df,
fill_color={'field': 'change', 'transform': mapper},
line_color=None)
save(p)
return p
import logging
from slack_logger import SlackHandler, SlackFormatter
slack_hook = os.environ['SLACK_HOOK']
......@@ -117,25 +174,82 @@ sh.setLevel(logging.DEBUG)
logger.addHandler(sh)
df = load_data(nodeclass="compute_ansible_check.log")
if (len(sys.argv)>1 and sys.argv[1]=='outputChangedNodeList'):
import bokeh.io
from datetime import datetime
strBokehfile="output.html"
#datetime.today().strftime('%Y%m%d')+'.html'
bokeh.io.output_file(strBokehfile)
#from bokeh.io import curdoc
from bokeh.models import Div
from bokeh.layouts import layout, column
import logging
import sys
import os
if (len(sys.argv)>1 and 'outputChangedNodeList' in sys.argv):
print(yaml.dump(list(df[df.change == 2].host.unique())))
sys.exit(0)
df = load_data(nodeclass="compute_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "Compute Nodes")
cmd="mv "+strBokehfile+" comp_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker comp_'+strBokehfile
#os.system(cmd)
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Compute nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
df = load_data(nodeclass="login_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "login Nodes")
cmd="mv "+strBokehfile+" login_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker login_'+strBokehfile
#os.system(cmd)
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Login nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
df = load_data(nodeclass="mgmt_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "mgmt_ Nodes")
cmd="mv "+strBokehfile+" mgmt_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker mgmt_'+strBokehfile
#os.system(cmd)
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Management nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
df = load_data(nodeclass="dgx_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "dgx_ Nodes")
cmd="mv "+strBokehfile+" dgx_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker dgx_'+strBokehfile
#os.system(cmd)
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} DGX nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
logger.info("this is defined in .gitlab-ci.yml in ansible_check and the trigger is configured in https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/pipeline_schedules ")
#logger.info("https://swift.rc.nectar.org.au/v1/AUTH_e86c925319094fb2b8cc1bf2373c69dc/ansiblechecker/"+strBokehfile)
str="https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/-/jobs/"+os.environ['CI_JOB_ID']+"/artifacts/browse"
logger.info(str)
#if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
#cmpplot = bokeh_plot(df, "Compute Nodes")
#cmd='swift upload ansiblechecker '+strBokehfile
#os.system(cmd)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment