import pandas as pd import zipfile import json import os,sys,yaml def task_result(task): # 0 is skipped (Because of a conditional" # 1 is not changed (everything should be 1) # 2 is changed (you should run ansible playbook to fix the cluster) # 3 is skipped due to check mode (you should not write a role like this) # 4 is failed. You've got somethign to fix. if "skipped" in task and task["skipped"] is True: if "skipped_reason" in task and task["skipped_reason"] == "Conditional result was False": return 0 if "msg" in task and task["msg"] == "skipped, running in check mode": return 3 if "failed" in task and task["failed"]: return 4 if "changed" in task: if task["changed"]: return 2 else: return 1 def change_value(change): if change == False: return 1 if change == True: return 2 return 0 def change_str(change): if change == 0: return "N/A" if change == 1: return "False" if change == 2: return "True" if change == 3: return "Skipped in check mode" if change == 4: return "Failed" def get_changes(data): for play in data['plays']: for task in play['tasks']: for host,hosttask in task['hosts'].items(): #yield {'task':task['task']['name'],'host':host,'change':change_value(hosttask['changed']),'changestr':hosttask['changed'],'taskid':task['task']['id']} yield {'task':task['task']['name'],'host':host,'change':task_result(hosttask),'changestr':change_str(task_result(hosttask)),'taskid':task['task']['id']} #def change_value(change): # if change == False: # return 1 # if change == True: # return 2 # return 0 # #def change_str(change): # if change == 0: # return "N/A" # if change == 1: # return "False" # if change == 2: # return "True" # #def get_changes(data): # for play in data['plays']: # for task in play['tasks']: # for host,hosttask in task['hosts'].items(): # yield {'task':task['task']['name'],'host':host,'change':change_value(hosttask['changed']),'changestr':hosttask['changed'],'taskid':task['task']['id']} def load_data(artifactfile="artifacts.zip", nodeclass="compute_ansible_check.log"): #with zipfile.ZipFile(artifactfile,'r') as zipf: # data = json.loads(zipf.read(nodeclass)) with open(nodeclass) as f: data = json.loads(f.read()) # Create my dataframe from a list of dictionaries df = pd.DataFrame(list(get_changes(data))) # Extract a mapping from the taskid to the task name taskmap = df[['task','taskid']].copy().drop_duplicates().set_index('taskid') # reindex the list of values, so that each change even can be referecned by a unique combination of host and taskid midx = pd.MultiIndex.from_frame((df[['host','taskid']])) df = df.set_index(midx); # Assume that ever host executes every task. Use fillna to fill in task which hosts don't execute #print(df) #print(df.unstack('taskid')) df.fillna(0) #df=df.unstack('taskid').fillna(0).stack() # Since our dataframe is now bigger (has those pesky NaNs filled in with zeros) create a new list of hosts and tasks. hosts = df.index.get_level_values(0) df['host']=hosts tasks = list(map(lambda x: taskmap.loc[x]['task'],df.index.get_level_values(1))) df['task'] = tasks changestr = list(map(lambda x: change_str(x), df['change'])) df['changestr'] = changestr df['taskid'] = df.index.get_level_values(1) return df def bokeh_plot(df,title): # Create a series of colour bars (i.e. a HeatMap) from a list # The list should include columns for task, host, change and changestr # (the value of change sets the colour but the value of changestr is shown in the tool tip) from bokeh.io import output_file, show from bokeh.layouts import column from bokeh.plotting import figure from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar, Div from bokeh.plotting import figure, save from math import pi # this is an abbreviated colormap from a bokeh example colors = ['#084594', '#2171b5', '#4292c6', "#dfccce", "#550b1d"] #colors = [ "#e2e2e2", "#dfccce", "#550b1d"] mapper = LinearColorMapper(palette=colors, low=0, high=4) #colors = [ "#e2e2e2", "#dfccce", "#550b1d"] #mapper = LinearColorMapper(palette=colors, low=0, high=2) TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom" dataxrange = list(df.index.get_level_values(1).unique()) datayrange = list(df.index.get_level_values(0).unique()) p = figure(title=title, x_range=dataxrange, y_range=datayrange, x_axis_location="above", sizing_mode='stretch_width', tools=TOOLS, toolbar_location='below', tooltips=[('host', '@host'), ('task', '@task'), ('changed', '@changestr'),('taskid','@taskid') ]) p.grid.grid_line_color = None p.axis.axis_line_color = None p.xaxis.major_tick_line_color = None # turn off x-axis major ticks p.xaxis.minor_tick_line_color = None # turn off x-axis minor ticks p.xaxis.major_label_text_color = None # turn off x-axis tick labels leaving space p.xaxis.major_label_text_font_size = '0pt' # turn off x-axis tick labels p.yaxis.major_tick_line_color = None # turn off x-axis major ticks p.yaxis.minor_tick_line_color = None # turn off x-axis minor ticks p.yaxis.major_label_text_color = None # turn off y-axis tick labels leaving space p.yaxis.major_label_text_font_size = '0pt' # turn off y-axis tick labels #p.axis.major_tick_line_color = None #p.axis.major_label_text_font_size = "5pt" #p.axis.major_label_standoff = 0 #p.xaxis.major_label_orientation = pi / 3 p.rect(x="taskid", y="host", width=1, height=1, source=df, fill_color={'field': 'change', 'transform': mapper}, line_color=None) save(p) return p import logging from slack_logger import SlackHandler, SlackFormatter slack_hook = os.environ['SLACK_HOOK'] logger = logging.getLogger() logger.setLevel(logging.INFO) handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter('%(asctime)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) sh = SlackHandler(username='m3-ansible-check', icon_emoji=':robot_face:', url=slack_hook) sh.setLevel(logging.DEBUG) logger.addHandler(sh) import bokeh.io from datetime import datetime strBokehfile="output.html" #datetime.today().strftime('%Y%m%d')+'.html' bokeh.io.output_file(strBokehfile) #from bokeh.io import curdoc from bokeh.models import Div from bokeh.layouts import layout, column import logging import sys import os if (len(sys.argv)>1 and 'outputChangedNodeList' in sys.argv): print(yaml.dump(list(df[df.change == 2].host.unique()))) sys.exit(0) df = load_data(nodeclass="compute_ansible_check.log") if (len(sys.argv)>1 and 'bokehplot' in sys.argv): cmpplot = bokeh_plot(df, "Compute Nodes") cmd="mv "+strBokehfile+" comp_"+strBokehfile os.system(cmd) #cmd='swift upload ansiblechecker comp_'+strBokehfile #os.system(cmd) nodes = len(df.host.unique()) changed = len(df[df.change == 2].host.unique()) failed = len(df[df.change == 4].host.unique()) logger.info("{} Compute nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) df = load_data(nodeclass="login_ansible_check.log") if (len(sys.argv)>1 and 'bokehplot' in sys.argv): cmpplot = bokeh_plot(df, "login Nodes") cmd="mv "+strBokehfile+" login_"+strBokehfile os.system(cmd) #cmd='swift upload ansiblechecker login_'+strBokehfile #os.system(cmd) nodes = len(df.host.unique()) changed = len(df[df.change == 2].host.unique()) failed = len(df[df.change == 4].host.unique()) logger.info("{} Login nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) df = load_data(nodeclass="mgmt_ansible_check.log") if (len(sys.argv)>1 and 'bokehplot' in sys.argv): cmpplot = bokeh_plot(df, "mgmt_ Nodes") cmd="mv "+strBokehfile+" mgmt_"+strBokehfile os.system(cmd) #cmd='swift upload ansiblechecker mgmt_'+strBokehfile #os.system(cmd) nodes = len(df.host.unique()) changed = len(df[df.change == 2].host.unique()) failed = len(df[df.change == 4].host.unique()) logger.info("{} Management nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) df = load_data(nodeclass="dgx_ansible_check.log") if (len(sys.argv)>1 and 'bokehplot' in sys.argv): cmpplot = bokeh_plot(df, "dgx_ Nodes") cmd="mv "+strBokehfile+" dgx_"+strBokehfile os.system(cmd) #cmd='swift upload ansiblechecker dgx_'+strBokehfile #os.system(cmd) nodes = len(df.host.unique()) changed = len(df[df.change == 2].host.unique()) failed = len(df[df.change == 4].host.unique()) logger.info("{} DGX nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) logger.info("this is defined in .gitlab-ci.yml in ansible_check and the trigger is configured in https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/pipeline_schedules ") #logger.info("https://swift.rc.nectar.org.au/v1/AUTH_e86c925319094fb2b8cc1bf2373c69dc/ansiblechecker/"+strBokehfile) str="https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/-/jobs/"+os.environ['CI_JOB_ID']+"/artifacts/browse" logger.info(str) #if (len(sys.argv)>1 and 'bokehplot' in sys.argv): #cmpplot = bokeh_plot(df, "Compute Nodes") #cmd='swift upload ansiblechecker '+strBokehfile #os.system(cmd)