Newer
Older

Andreas Hamacher
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pandas as pd
import zipfile
import json
import os,sys,yaml
def task_result(task):
# 0 is skipped (Because of a conditional"
# 1 is not changed (everything should be 1)
# 2 is changed (you should run ansible playbook to fix the cluster)
# 3 is skipped due to check mode (you should not write a role like this)
# 4 is failed. You've got somethign to fix.
if "skipped" in task and task["skipped"] is True:
if "skipped_reason" in task and task["skipped_reason"] == "Conditional result was False":
return 0
if "msg" in task and task["msg"] == "skipped, running in check mode":
return 3
if "failed" in task and task["failed"]:
return 4
if "changed" in task:
if task["changed"]:
return 2
else:
return 1
def change_value(change):
if change == False:
return 1
if change == True:
return 2
return 0
def change_str(change):
if change == 0:
return "N/A"
if change == 1:
return "False"
if change == 2:
return "True"
if change == 3:
return "Skipped in check mode"
if change == 4:
return "Failed"
def get_changes(data):
for play in data['plays']:
for task in play['tasks']:
for host,hosttask in task['hosts'].items():
#yield {'task':task['task']['name'],'host':host,'change':change_value(hosttask['changed']),'changestr':hosttask['changed'],'taskid':task['task']['id']}
yield {'task':task['task']['name'],'host':host,'change':task_result(hosttask),'changestr':change_str(task_result(hosttask)),'taskid':task['task']['id']}
#def change_value(change):
# if change == False:
# return 1
# if change == True:
# return 2
# return 0
#
#def change_str(change):
# if change == 0:
# return "N/A"
# if change == 1:
# return "False"
# if change == 2:
# return "True"
#
#def get_changes(data):
# for play in data['plays']:
# for task in play['tasks']:
# for host,hosttask in task['hosts'].items():
# yield {'task':task['task']['name'],'host':host,'change':change_value(hosttask['changed']),'changestr':hosttask['changed'],'taskid':task['task']['id']}
def load_data(artifactfile="artifacts.zip", nodeclass="compute_ansible_check.log"):
#with zipfile.ZipFile(artifactfile,'r') as zipf:
# data = json.loads(zipf.read(nodeclass))
with open(nodeclass) as f:
data = json.loads(f.read())
# Create my dataframe from a list of dictionaries
df = pd.DataFrame(list(get_changes(data)))
# Extract a mapping from the taskid to the task name
taskmap = df[['task','taskid']].copy().drop_duplicates().set_index('taskid')
# reindex the list of values, so that each change even can be referecned by a unique combination of host and taskid
midx = pd.MultiIndex.from_frame((df[['host','taskid']]))
df = df.set_index(midx);
# Assume that ever host executes every task. Use fillna to fill in task which hosts don't execute
#print(df)
#print(df.unstack('taskid'))
df.fillna(0)
#df=df.unstack('taskid').fillna(0).stack()
# Since our dataframe is now bigger (has those pesky NaNs filled in with zeros) create a new list of hosts and tasks.
hosts = df.index.get_level_values(0)
df['host']=hosts
tasks = list(map(lambda x: taskmap.loc[x]['task'],df.index.get_level_values(1)))
df['task'] = tasks
changestr = list(map(lambda x: change_str(x), df['change']))
df['changestr'] = changestr
df['taskid'] = df.index.get_level_values(1)
return df
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def bokeh_plot(df,title):
# Create a series of colour bars (i.e. a HeatMap) from a list
# The list should include columns for task, host, change and changestr
# (the value of change sets the colour but the value of changestr is shown in the tool tip)
from bokeh.io import output_file, show
from bokeh.layouts import column
from bokeh.plotting import figure
from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar, Div
from bokeh.plotting import figure, save
from math import pi
# this is an abbreviated colormap from a bokeh example
colors = ['#084594', '#2171b5', '#4292c6', "#dfccce", "#550b1d"]
#colors = [ "#e2e2e2", "#dfccce", "#550b1d"]
mapper = LinearColorMapper(palette=colors, low=0, high=4)
#colors = [ "#e2e2e2", "#dfccce", "#550b1d"]
#mapper = LinearColorMapper(palette=colors, low=0, high=2)
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
dataxrange = list(df.index.get_level_values(1).unique())
datayrange = list(df.index.get_level_values(0).unique())
p = figure(title=title,
x_range=dataxrange, y_range=datayrange,
x_axis_location="above",
sizing_mode='stretch_width',
tools=TOOLS, toolbar_location='below',
tooltips=[('host', '@host'), ('task', '@task'), ('changed', '@changestr'),('taskid','@taskid') ])
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.xaxis.major_tick_line_color = None # turn off x-axis major ticks
p.xaxis.minor_tick_line_color = None # turn off x-axis minor ticks
p.xaxis.major_label_text_color = None # turn off x-axis tick labels leaving space
p.xaxis.major_label_text_font_size = '0pt' # turn off x-axis tick labels
p.yaxis.major_tick_line_color = None # turn off x-axis major ticks
p.yaxis.minor_tick_line_color = None # turn off x-axis minor ticks
p.yaxis.major_label_text_color = None # turn off y-axis tick labels leaving space
p.yaxis.major_label_text_font_size = '0pt' # turn off y-axis tick labels
#p.axis.major_tick_line_color = None
#p.axis.major_label_text_font_size = "5pt"
#p.axis.major_label_standoff = 0
#p.xaxis.major_label_orientation = pi / 3
p.rect(x="taskid", y="host", width=1, height=1,
source=df,
fill_color={'field': 'change', 'transform': mapper},
line_color=None)
save(p)
return p

Andreas Hamacher
committed
import logging
from slack_logger import SlackHandler, SlackFormatter
slack_hook = os.environ['SLACK_HOOK']
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
sh = SlackHandler(username='m3-ansible-check', icon_emoji=':robot_face:', url=slack_hook)
sh.setLevel(logging.DEBUG)
logger.addHandler(sh)
import bokeh.io
from datetime import datetime
strBokehfile="output.html"
#datetime.today().strftime('%Y%m%d')+'.html'
bokeh.io.output_file(strBokehfile)
#from bokeh.io import curdoc
from bokeh.models import Div
from bokeh.layouts import layout, column
import logging
import sys
import os
if (len(sys.argv)>1 and 'outputChangedNodeList' in sys.argv):

Andreas Hamacher
committed
print(yaml.dump(list(df[df.change == 2].host.unique())))
sys.exit(0)
df = load_data(nodeclass="compute_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "Compute Nodes")
cmd="mv "+strBokehfile+" comp_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker comp_'+strBokehfile
#os.system(cmd)

Andreas Hamacher
committed
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Compute nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
df = load_data(nodeclass="login_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "login Nodes")
cmd="mv "+strBokehfile+" login_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker login_'+strBokehfile
#os.system(cmd)

Andreas Hamacher
committed
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Login nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
df = load_data(nodeclass="mgmt_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "mgmt_ Nodes")
cmd="mv "+strBokehfile+" mgmt_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker mgmt_'+strBokehfile
#os.system(cmd)

Andreas Hamacher
committed
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Management nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
df = load_data(nodeclass="dgx_ansible_check.log")
if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
cmpplot = bokeh_plot(df, "dgx_ Nodes")
cmd="mv "+strBokehfile+" dgx_"+strBokehfile
os.system(cmd)
#cmd='swift upload ansiblechecker dgx_'+strBokehfile
#os.system(cmd)
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} DGX nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))
logger.info("this is defined in .gitlab-ci.yml in ansible_check and the trigger is configured in https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/pipeline_schedules ")
#logger.info("https://swift.rc.nectar.org.au/v1/AUTH_e86c925319094fb2b8cc1bf2373c69dc/ansiblechecker/"+strBokehfile)
str="https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/-/jobs/"+os.environ['CI_JOB_ID']+"/artifacts/browse"
logger.info(str)
#if (len(sys.argv)>1 and 'bokehplot' in sys.argv):
#cmpplot = bokeh_plot(df, "Compute Nodes")
#cmd='swift upload ansiblechecker '+strBokehfile
#os.system(cmd)