diff --git a/roles/lmod/tasks/main.yml b/roles/lmod/tasks/main.yml index 6d57b95b1aeb55c8b4fda8a93c10fe03b3996149..3bb127b247a0e0b05c91b1b16af35f13fbdd4bc6 100644 --- a/roles/lmod/tasks/main.yml +++ b/roles/lmod/tasks/main.yml @@ -16,15 +16,18 @@ when: ansible_os_family == 'RedHat' - name: install lua RHEL7 - yum: name={{ item }} state=present update_cache=yes enablerepo="Monash_University_EPEL7_EPEL_7_-_x86_64" - with_items: - - lua - - lua-filesystem - - lua-posix - - tcl - - rsync - - gcc - - lua-devel + yum: + state: present + update_cache: yes + enablerepo: "Monash_University_EPEL7_EPEL_7_-_x86_64" + name: + - lua + - lua-filesystem + - lua-posix + - tcl + - rsync + - gcc + - lua-devel when: - '"DGX" in ansible_product_name' - '"RedHat" in ansible_distribution' @@ -61,4 +64,4 @@ args: creates: "{{ soft_dir }}/lmod/{{ lmod_version }}" become: true - when: ansible_os_family == 'RedHat' \ No newline at end of file + when: ansible_os_family == 'RedHat' diff --git a/roles/nat_server/tasks/main.yml b/roles/nat_server/tasks/main.yml index 6c56c0ce8b10d821baee37ebf990ceb3ca82c47d..7bff1811311ec7811cc6430df408191e6342617a 100644 --- a/roles/nat_server/tasks/main.yml +++ b/roles/nat_server/tasks/main.yml @@ -1,12 +1,13 @@ --- # make sure firewalld is not installed - name: make sure firewalld is not installed - yum: name={{ item }} state=absent + yum: + name: + - firewalld + - firewall-config + state: absent become: true become_user: root - with_items: - - firewalld - - firewall-config # make sure iptables is installed - name: make sure iptables-services is installed diff --git a/roles/slurm-common/tasks/installCgroup.yml b/roles/slurm-common/tasks/installCgroup.yml index 9212a7cdbd61f1523a54eb43854879e73e2ad3cf..1413489733f63020f5c20a47e8b7c62dc2dd5629 100644 --- a/roles/slurm-common/tasks/installCgroup.yml +++ b/roles/slurm-common/tasks/installCgroup.yml @@ -1,7 +1,5 @@ - name: yum install cgroup - yum: name={{ item }} state=present - with_items: - - libcgroup + yum: name=libcgroup state=present become: True when: ansible_os_family == "RedHat" diff --git a/scripts/check_summary.py b/scripts/check_summary.py index acd372384e03b4836a190896273dcb80977c0acf..7b4e294aef3800a263a607ff25b16343a699d02b 100644 --- a/scripts/check_summary.py +++ b/scripts/check_summary.py @@ -101,6 +101,63 @@ def load_data(artifactfile="artifacts.zip", nodeclass="compute_ansible_check.log return df +def bokeh_plot(df,title): + # Create a series of colour bars (i.e. a HeatMap) from a list + # The list should include columns for task, host, change and changestr + # (the value of change sets the colour but the value of changestr is shown in the tool tip) + from bokeh.io import output_file, show + from bokeh.layouts import column + from bokeh.plotting import figure + from bokeh.models import LinearColorMapper, BasicTicker, PrintfTickFormatter, ColorBar, Div + from bokeh.plotting import figure, save + from math import pi + + + # this is an abbreviated colormap from a bokeh example + colors = ['#084594', '#2171b5', '#4292c6', "#dfccce", "#550b1d"] + #colors = [ "#e2e2e2", "#dfccce", "#550b1d"] + mapper = LinearColorMapper(palette=colors, low=0, high=4) + #colors = [ "#e2e2e2", "#dfccce", "#550b1d"] + #mapper = LinearColorMapper(palette=colors, low=0, high=2) + + TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom" + + + dataxrange = list(df.index.get_level_values(1).unique()) + datayrange = list(df.index.get_level_values(0).unique()) + p = figure(title=title, + x_range=dataxrange, y_range=datayrange, + x_axis_location="above", + sizing_mode='stretch_width', + tools=TOOLS, toolbar_location='below', + tooltips=[('host', '@host'), ('task', '@task'), ('changed', '@changestr'),('taskid','@taskid') ]) + + p.grid.grid_line_color = None + p.axis.axis_line_color = None + + p.xaxis.major_tick_line_color = None # turn off x-axis major ticks + p.xaxis.minor_tick_line_color = None # turn off x-axis minor ticks + p.xaxis.major_label_text_color = None # turn off x-axis tick labels leaving space + p.xaxis.major_label_text_font_size = '0pt' # turn off x-axis tick labels + + p.yaxis.major_tick_line_color = None # turn off x-axis major ticks + p.yaxis.minor_tick_line_color = None # turn off x-axis minor ticks + p.yaxis.major_label_text_color = None # turn off y-axis tick labels leaving space + p.yaxis.major_label_text_font_size = '0pt' # turn off y-axis tick labels + #p.axis.major_tick_line_color = None + #p.axis.major_label_text_font_size = "5pt" + #p.axis.major_label_standoff = 0 + #p.xaxis.major_label_orientation = pi / 3 + + p.rect(x="taskid", y="host", width=1, height=1, + source=df, + fill_color={'field': 'change', 'transform': mapper}, + line_color=None) + save(p) + + return p + + import logging from slack_logger import SlackHandler, SlackFormatter slack_hook = os.environ['SLACK_HOOK'] @@ -117,25 +174,82 @@ sh.setLevel(logging.DEBUG) logger.addHandler(sh) -df = load_data(nodeclass="compute_ansible_check.log") -if (len(sys.argv)>1 and sys.argv[1]=='outputChangedNodeList'): +import bokeh.io +from datetime import datetime +strBokehfile="output.html" +#datetime.today().strftime('%Y%m%d')+'.html' +bokeh.io.output_file(strBokehfile) +#from bokeh.io import curdoc +from bokeh.models import Div +from bokeh.layouts import layout, column +import logging +import sys +import os + + + + +if (len(sys.argv)>1 and 'outputChangedNodeList' in sys.argv): print(yaml.dump(list(df[df.change == 2].host.unique()))) sys.exit(0) - + +df = load_data(nodeclass="compute_ansible_check.log") + + +if (len(sys.argv)>1 and 'bokehplot' in sys.argv): + cmpplot = bokeh_plot(df, "Compute Nodes") + cmd="mv "+strBokehfile+" comp_"+strBokehfile + os.system(cmd) + #cmd='swift upload ansiblechecker comp_'+strBokehfile + #os.system(cmd) + nodes = len(df.host.unique()) changed = len(df[df.change == 2].host.unique()) failed = len(df[df.change == 4].host.unique()) logger.info("{} Compute nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) df = load_data(nodeclass="login_ansible_check.log") +if (len(sys.argv)>1 and 'bokehplot' in sys.argv): + cmpplot = bokeh_plot(df, "login Nodes") + cmd="mv "+strBokehfile+" login_"+strBokehfile + os.system(cmd) + #cmd='swift upload ansiblechecker login_'+strBokehfile + #os.system(cmd) nodes = len(df.host.unique()) changed = len(df[df.change == 2].host.unique()) failed = len(df[df.change == 4].host.unique()) logger.info("{} Login nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) df = load_data(nodeclass="mgmt_ansible_check.log") +if (len(sys.argv)>1 and 'bokehplot' in sys.argv): + cmpplot = bokeh_plot(df, "mgmt_ Nodes") + cmd="mv "+strBokehfile+" mgmt_"+strBokehfile + os.system(cmd) + #cmd='swift upload ansiblechecker mgmt_'+strBokehfile + #os.system(cmd) nodes = len(df.host.unique()) changed = len(df[df.change == 2].host.unique()) failed = len(df[df.change == 4].host.unique()) logger.info("{} Management nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) + +df = load_data(nodeclass="dgx_ansible_check.log") +if (len(sys.argv)>1 and 'bokehplot' in sys.argv): + cmpplot = bokeh_plot(df, "dgx_ Nodes") + cmd="mv "+strBokehfile+" dgx_"+strBokehfile + os.system(cmd) + #cmd='swift upload ansiblechecker dgx_'+strBokehfile + #os.system(cmd) +nodes = len(df.host.unique()) +changed = len(df[df.change == 2].host.unique()) +failed = len(df[df.change == 4].host.unique()) +logger.info("{} DGX nodes, {} had at least one change {} had at least one failed task".format(nodes,changed,failed)) + +logger.info("this is defined in .gitlab-ci.yml in ansible_check and the trigger is configured in https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/pipeline_schedules ") +#logger.info("https://swift.rc.nectar.org.au/v1/AUTH_e86c925319094fb2b8cc1bf2373c69dc/ansiblechecker/"+strBokehfile) +str="https://gitlab.erc.monash.edu.au/hpc-team/clusterbuild/-/jobs/"+os.environ['CI_JOB_ID']+"/artifacts/browse" +logger.info(str) +#if (len(sys.argv)>1 and 'bokehplot' in sys.argv): + #cmpplot = bokeh_plot(df, "Compute Nodes") + #cmd='swift upload ansiblechecker '+strBokehfile + #os.system(cmd) \ No newline at end of file