check_summary.py

import pandas as pd

import zipfile
import json
import os,sys,yaml

def task_result(task):
    # 0 is skipped (Because of a conditional"
    # 1 is not changed (everything should be 1)
    # 2 is changed (you should run ansible playbook to fix the cluster)
    # 3 is skipped due to check mode (you should not write a role like this)
    # 4 is failed. You've got somethign to fix.
    if "skipped" in task and task["skipped"] is True:
        if "skipped_reason" in task and task["skipped_reason"] == "Conditional result was False":
            return 0
        if "msg" in task and task["msg"] == "skipped, running in check mode":
            return 3
    if "failed" in task and task["failed"]:
        return 4
    if "changed" in task:
        if task["changed"]:
            return 2
        else:
            return 1

def change_value(change):
    if change == False:
        return 1
    if change == True:
        return 2
    return 0

def change_str(change):
    if change == 0:
        return "N/A"
    if change == 1:
        return "False"
    if change == 2: 
        return "True"
    if change == 3:
        return "Skipped in check mode"
    if change == 4:
        return "Failed"

def get_changes(data):
    for play in data['plays']:
        for task in play['tasks']:
            for host,hosttask in task['hosts'].items():
                #yield {'task':task['task']['name'],'host':host,'change':change_value(hosttask['changed']),'changestr':hosttask['changed'],'taskid':task['task']['id']}
                yield {'task':task['task']['name'],'host':host,'change':task_result(hosttask),'changestr':change_str(task_result(hosttask)),'taskid':task['task']['id']}

#def change_value(change):
#    if change == False:
#        return 1
#    if change == True:
#        return 2
#    return 0
#
#def change_str(change):
#    if change == 0:
#        return "N/A"
#    if change == 1:
#        return "False"
#    if change == 2: 
#        return "True"
#
#def get_changes(data):
#    for play in data['plays']:
#        for task in play['tasks']:
#            for host,hosttask in task['hosts'].items():
#                yield {'task':task['task']['name'],'host':host,'change':change_value(hosttask['changed']),'changestr':hosttask['changed'],'taskid':task['task']['id']}


def load_data(artifactfile="artifacts.zip", nodeclass="compute_ansible_check.log"):
    #with zipfile.ZipFile(artifactfile,'r') as zipf:
    #    data = json.loads(zipf.read(nodeclass))
    with open(nodeclass) as f:
        data = json.loads(f.read())

    # Create my dataframe from a list of dictionaries
    df = pd.DataFrame(list(get_changes(data)))
    # Extract a mapping from the taskid to the task name
    taskmap = df[['task','taskid']].copy().drop_duplicates().set_index('taskid')
    # reindex the list of values, so that each change even can be referecned by a unique combination of host and taskid
    midx = pd.MultiIndex.from_frame((df[['host','taskid']]))
    df = df.set_index(midx);
    # Assume that ever host executes every task. Use fillna to fill in task which hosts don't execute
    #print(df)
    #print(df.unstack('taskid'))
    df.fillna(0)
    #df=df.unstack('taskid').fillna(0).stack()
    # Since our dataframe is now bigger (has those pesky NaNs filled in with zeros) create a new list of hosts and tasks.
    hosts = df.index.get_level_values(0)
    df['host']=hosts
    tasks = list(map(lambda x: taskmap.loc[x]['task'],df.index.get_level_values(1)))
    df['task'] = tasks
    changestr = list(map(lambda x: change_str(x), df['change']))
    df['changestr'] = changestr
    df['taskid'] = df.index.get_level_values(1)
    return df


import logging
from slack_logger import SlackHandler, SlackFormatter
slack_hook = os.environ['SLACK_HOOK']

logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(asctime)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

sh = SlackHandler(username='m3-ansible-check', icon_emoji=':robot_face:', url=slack_hook)
sh.setLevel(logging.DEBUG)

logger.addHandler(sh)

df = load_data(nodeclass="compute_ansible_check.log")

if (len(sys.argv)>1 and sys.argv[1]=='outputChangedNodeList'):
    print(yaml.dump(list(df[df.change == 2].host.unique())))
    sys.exit(0)
    
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Compute nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))

df = load_data(nodeclass="login_ansible_check.log")
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Login nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))

df = load_data(nodeclass="mgmt_ansible_check.log")
nodes = len(df.host.unique())
changed = len(df[df.change == 2].host.unique())
failed = len(df[df.change == 4].host.unique())
logger.info("{} Management nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed))