diff --git a/roles/extra_rpms/tasks/main.yml b/roles/extra_rpms/tasks/main.yml index 2a85fba6ddd111326b878ba24e599bc1dc12f5d2..6d828f79dcb2a89c4c93129b7840bf58c90988d1 100644 --- a/roles/extra_rpms/tasks/main.yml +++ b/roles/extra_rpms/tasks/main.yml @@ -1,8 +1,7 @@ --- - name: "Install extra packages" yum: "name={{ item }} state=present" - with_items: - pkgs + with_items: "{{ pkgs }}" sudo: true when: ansible_os_family == 'RedHat' diff --git a/roles/extra_rpms/vars/main.yml b/roles/extra_rpms/vars/main.yml index df10f44104f4a08b545af265cbefa0b3ce8338ec..b70d611e95a35d73562b2d50a066f34d98c685d8 100644 --- a/roles/extra_rpms/vars/main.yml +++ b/roles/extra_rpms/vars/main.yml @@ -190,6 +190,7 @@ pkgs: - qt-x11 - rhino - rsync + - samba-client - scipy - spice-vdagent - suitesparse diff --git a/roles/ldapserver/files/DB_CONFIG b/roles/ldapserver/files/DB_CONFIG new file mode 100644 index 0000000000000000000000000000000000000000..8fb2cb25da676c2ecb7a5e31c3f4e495ff1fba64 --- /dev/null +++ b/roles/ldapserver/files/DB_CONFIG @@ -0,0 +1,8 @@ +# one 0.25 GB cache +set_cachesize 0 268435456 1 + +# Transaction Log settings +set_lg_regionmax 262144 +set_lg_bsize 2097152 + +set_flags DB_LOG_AUTOREMOVE diff --git a/roles/ldapserver/tasks/main.yml b/roles/ldapserver/tasks/main.yml index 2909be7b7685a8f01d454c3784c18ef66f6d3863..c007a12a85efccafa53b2520c6928f5f73e94953 100644 --- a/roles/ldapserver/tasks/main.yml +++ b/roles/ldapserver/tasks/main.yml @@ -102,6 +102,10 @@ sudo: true register: tlsConfigured +- name: copy db config + copy: src=files/DB_CONFIG dest=/var/lib/ldap/DB_CONFIG owner=ldap group=ldap mode=644 + sudo: true + - name: start ldap service: name=slapd state=restarted sudo: true diff --git a/roles/ldapserver/vars/CentOS_6.7_x86_64.yml b/roles/ldapserver/vars/CentOS_6.7_x86_64.yml new file mode 100644 index 0000000000000000000000000000000000000000..ae41ae86c9d53c509d1464ef8d21b1b18b1f1267 --- /dev/null +++ b/roles/ldapserver/vars/CentOS_6.7_x86_64.yml @@ -0,0 +1,8 @@ +--- + system_packages: + - openldap-servers + - openldap-clients + - openssl + dbname: olcDatabase={2}bdb + ldapuser: ldap + ldapgroup: ldap diff --git a/roles/nagios_config/tasks/main.yml b/roles/nagios_config/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f6c3a4e6c80d501863836cd4199ef5aaffc1ccf --- /dev/null +++ b/roles/nagios_config/tasks/main.yml @@ -0,0 +1,20 @@ +--- +- name: configure monitoring + template: src={{ item }}_nagios2.cfg.j2 dest=/etc/nagios3/conf.d/{{ item }}_nagios2.cfg + with_items: + - 'hostgroups' + - 'hosts' + - 'commands' + - 'services' + - 'contactgroup' + - 'contacts' + sudo: true + +- name: change cgi config + template: src=cgi.cfg.j2 dest=/etc/nagios3/cgi.cfg + sudo: true + +- name: nagios restart + service: name=nagios3 state=restarted + sudo: true + diff --git a/roles/nagios_config/templates/cgi.cfg.j2 b/roles/nagios_config/templates/cgi.cfg.j2 new file mode 100644 index 0000000000000000000000000000000000000000..a3f3a07cd440ba32de38771fe0350102443b6618 --- /dev/null +++ b/roles/nagios_config/templates/cgi.cfg.j2 @@ -0,0 +1,377 @@ +################################################################# +# +# CGI.CFG - Sample CGI Configuration File for Nagios +# +################################################################# + + +# MAIN CONFIGURATION FILE +# This tells the CGIs where to find your main configuration file. +# The CGIs will read the main and host config files for any other +# data they might need. + +main_config_file=/etc/nagios3/nagios.cfg + + + +# PHYSICAL HTML PATH +# This is the path where the HTML files for Nagios reside. This +# value is used to locate the logo images needed by the statusmap +# and statuswrl CGIs. + +physical_html_path=/usr/share/nagios3/htdocs + + + +# URL HTML PATH +# This is the path portion of the URL that corresponds to the +# physical location of the Nagios HTML files (as defined above). +# This value is used by the CGIs to locate the online documentation +# and graphics. If you access the Nagios pages with an URL like +# http://www.myhost.com/nagios, this value should be '/nagios' +# (without the quotes). + +url_html_path=/nagios3 + + + +# CONTEXT-SENSITIVE HELP +# This option determines whether or not a context-sensitive +# help icon will be displayed for most of the CGIs. +# Values: 0 = disables context-sensitive help +# 1 = enables context-sensitive help + +show_context_help=1 + + + +# PENDING STATES OPTION +# This option determines what states should be displayed in the web +# interface for hosts/services that have not yet been checked. +# Values: 0 = leave hosts/services that have not been check yet in their original state +# 1 = mark hosts/services that have not been checked yet as PENDING + +use_pending_states=1 + +# NAGIOS PROCESS CHECK COMMAND +# This is the full path and filename of the program used to check +# the status of the Nagios process. It is used only by the CGIs +# and is completely optional. However, if you don't use it, you'll +# see warning messages in the CGIs about the Nagios process +# not running and you won't be able to execute any commands from +# the web interface. The program should follow the same rules +# as plugins; the return codes are the same as for the plugins, +# it should have timeout protection, it should output something +# to STDIO, etc. +# +# Note: The command line for the check_nagios plugin below may +# have to be tweaked a bit, as different versions of the plugin +# use different command line arguments/syntaxes. + +nagios_check_command=/usr/lib/nagios/plugins/check_nagios /var/cache/nagios3/status.dat 5 '/usr/sbin/nagios3' + + +# AUTHENTICATION USAGE +# This option controls whether or not the CGIs will use any +# authentication when displaying host and service information, as +# well as committing commands to Nagios for processing. +# +# Read the HTML documentation to learn how the authorization works! +# +# NOTE: It is a really *bad* idea to disable authorization, unless +# you plan on removing the command CGI (cmd.cgi)! Failure to do +# so will leave you wide open to kiddies messing with Nagios and +# possibly hitting you with a denial of service attack by filling up +# your drive by continuously writing to your command file! +# +# Setting this value to 0 will cause the CGIs to *not* use +# authentication (bad idea), while any other value will make them +# use the authentication functions (the default). + +use_authentication=1 + + + + +# x509 CERT AUTHENTICATION +# When enabled, this option allows you to use x509 cert (SSL) +# authentication in the CGIs. This is an advanced option and should +# not be enabled unless you know what you're doing. + +use_ssl_authentication=0 + + + + +# DEFAULT USER +# Setting this variable will define a default user name that can +# access pages without authentication. This allows people within a +# secure domain (i.e., behind a firewall) to see the current status +# without authenticating. You may want to use this to avoid basic +# authentication if you are not using a secure server since basic +# authentication transmits passwords in the clear. +# +# Important: Do not define a default username unless you are +# running a secure web server and are sure that everyone who has +# access to the CGIs has been authenticated in some manner! If you +# define this variable, anyone who has not authenticated to the web +# server will inherit all rights you assign to this user! + +#default_user_name=guest + + + +# SYSTEM/PROCESS INFORMATION ACCESS +# This option is a comma-delimited list of all usernames that +# have access to viewing the Nagios process information as +# provided by the Extended Information CGI (extinfo.cgi). By +# default, *no one* has access to this unless you choose to +# not use authorization. You may use an asterisk (*) to +# authorize any user who has authenticated to the web server. + +authorized_for_system_information=nagiosadmin,nagios + + + +# CONFIGURATION INFORMATION ACCESS +# This option is a comma-delimited list of all usernames that +# can view ALL configuration information (hosts, commands, etc). +# By default, users can only view configuration information +# for the hosts and services they are contacts for. You may use +# an asterisk (*) to authorize any user who has authenticated +# to the web server. + +authorized_for_configuration_information=nagiosadmin,nagios + + + +# SYSTEM/PROCESS COMMAND ACCESS +# This option is a comma-delimited list of all usernames that +# can issue shutdown and restart commands to Nagios via the +# command CGI (cmd.cgi). Users in this list can also change +# the program mode to active or standby. By default, *no one* +# has access to this unless you choose to not use authorization. +# You may use an asterisk (*) to authorize any user who has +# authenticated to the web server. + +authorized_for_system_commands=nagiosadmin,nagios + + + +# GLOBAL HOST/SERVICE VIEW ACCESS +# These two options are comma-delimited lists of all usernames that +# can view information for all hosts and services that are being +# monitored. By default, users can only view information +# for hosts or services that they are contacts for (unless you +# you choose to not use authorization). You may use an asterisk (*) +# to authorize any user who has authenticated to the web server. + + +authorized_for_all_services=nagiosadmin,nagios +authorized_for_all_hosts=nagiosadmin,nagios + + + +# GLOBAL HOST/SERVICE COMMAND ACCESS +# These two options are comma-delimited lists of all usernames that +# can issue host or service related commands via the command +# CGI (cmd.cgi) for all hosts and services that are being monitored. +# By default, users can only issue commands for hosts or services +# that they are contacts for (unless you you choose to not use +# authorization). You may use an asterisk (*) to authorize any +# user who has authenticated to the web server. + +authorized_for_all_service_commands=nagiosadmin,nagios +authorized_for_all_host_commands=nagiosadmin,nagios + + + +# READ-ONLY USERS +# A comma-delimited list of usernames that have read-only rights in +# the CGIs. This will block any service or host commands normally shown +# on the extinfo CGI pages. It will also block comments from being shown +# to read-only users. + +#authorized_for_read_only=user1,user2 + + + + +# STATUSMAP BACKGROUND IMAGE +# This option allows you to specify an image to be used as a +# background in the statusmap CGI. It is assumed that the image +# resides in the HTML images path (i.e. /usr/local/nagios/share/images). +# This path is automatically determined by appending "/images" +# to the path specified by the 'physical_html_path' directive. +# Note: The image file may be in GIF, PNG, JPEG, or GD2 format. +# However, I recommend that you convert your image to GD2 format +# (uncompressed), as this will cause less CPU load when the CGI +# generates the image. + +#statusmap_background_image=smbackground.gd2 + + + + +# STATUSMAP TRANSPARENCY INDEX COLOR +# These options set the r,g,b values of the background color used the statusmap CGI, +# so normal browsers that can't show real png transparency set the desired color as +# a background color instead (to make it look pretty). +# Defaults to white: (R,G,B) = (255,255,255). + +#color_transparency_index_r=255 +#color_transparency_index_g=255 +#color_transparency_index_b=255 + + + + +# DEFAULT STATUSMAP LAYOUT METHOD +# This option allows you to specify the default layout method +# the statusmap CGI should use for drawing hosts. If you do +# not use this option, the default is to use user-defined +# coordinates. Valid options are as follows: +# 0 = User-defined coordinates +# 1 = Depth layers +# 2 = Collapsed tree +# 3 = Balanced tree +# 4 = Circular +# 5 = Circular (Marked Up) + +default_statusmap_layout=5 + + + +# DEFAULT STATUSWRL LAYOUT METHOD +# This option allows you to specify the default layout method +# the statuswrl (VRML) CGI should use for drawing hosts. If you +# do not use this option, the default is to use user-defined +# coordinates. Valid options are as follows: +# 0 = User-defined coordinates +# 2 = Collapsed tree +# 3 = Balanced tree +# 4 = Circular + +default_statuswrl_layout=4 + + + +# STATUSWRL INCLUDE +# This option allows you to include your own objects in the +# generated VRML world. It is assumed that the file +# resides in the HTML path (i.e. /usr/local/nagios/share). + +#statuswrl_include=myworld.wrl + + + +# PING SYNTAX +# This option determines what syntax should be used when +# attempting to ping a host from the WAP interface (using +# the statuswml CGI. You must include the full path to +# the ping binary, along with all required options. The +# $HOSTADDRESS$ macro is substituted with the address of +# the host before the command is executed. +# Please note that the syntax for the ping binary is +# notorious for being different on virtually ever *NIX +# OS and distribution, so you may have to tweak this to +# work on your system. + +ping_syntax=/bin/ping -n -U -c 5 $HOSTADDRESS$ + + + +# REFRESH RATE +# This option allows you to specify the refresh rate in seconds +# of various CGIs (status, statusmap, extinfo, and outages). + +refresh_rate=90 + +# DEFAULT PAGE LIMIT +# This option allows you to specify the default number of results +# displayed on the status.cgi. This number can be adjusted from +# within the UI after the initial page load. Setting this to 0 +# will show all results. + +result_limit=100 + + +# ESCAPE HTML TAGS +# This option determines whether HTML tags in host and service +# status output is escaped in the web interface. If enabled, +# your plugin output will not be able to contain clickable links. + +escape_html_tags=1 + + + + +# SOUND OPTIONS +# These options allow you to specify an optional audio file +# that should be played in your browser window when there are +# problems on the network. The audio files are used only in +# the status CGI. Only the sound for the most critical problem +# will be played. Order of importance (higher to lower) is as +# follows: unreachable hosts, down hosts, critical services, +# warning services, and unknown services. If there are no +# visible problems, the sound file optionally specified by +# 'normal_sound' variable will be played. +# +# +# <varname>=<sound_file> +# +# Note: All audio files must be placed in the /media subdirectory +# under the HTML path (i.e. /usr/local/nagios/share/media/). + +#host_unreachable_sound=hostdown.wav +#host_down_sound=hostdown.wav +#service_critical_sound=critical.wav +#service_warning_sound=warning.wav +#service_unknown_sound=warning.wav +#normal_sound=noproblem.wav + + + +# URL TARGET FRAMES +# These options determine the target frames in which notes and +# action URLs will open. + +action_url_target=_blank +notes_url_target=_blank + + + + +# LOCK AUTHOR NAMES OPTION +# This option determines whether users can change the author name +# when submitting comments, scheduling downtime. If disabled, the +# author names will be locked into their contact name, as defined in Nagios. +# Values: 0 = allow editing author names +# 1 = lock author names (disallow editing) + +lock_author_names=1 + + + + +# SPLUNK INTEGRATION OPTIONS +# These options allow you to enable integration with Splunk +# in the web interface. If enabled, you'll be presented with +# "Splunk It" links in various places in the CGIs (log file, +# alert history, host/service detail, etc). Useful if you're +# trying to research why a particular problem occurred. +# For more information on Splunk, visit http://www.splunk.com/ + +# This option determines whether the Splunk integration is enabled +# Values: 0 = disable Splunk integration +# 1 = enable Splunk integration + +#enable_splunk_integration=1 + + +# This option should be the URL used to access your instance of Splunk + +#splunk_url=http://127.0.0.1:8000/ + + + diff --git a/roles/nagios_config/templates/commands_nagios2.cfg.j2 b/roles/nagios_config/templates/commands_nagios2.cfg.j2 new file mode 100644 index 0000000000000000000000000000000000000000..385fa645e3d13ed4d32a6a47cc094017dc04c124 --- /dev/null +++ b/roles/nagios_config/templates/commands_nagios2.cfg.j2 @@ -0,0 +1,8 @@ +{% for service in nagios_services %} +{% if service.script %} +define command { + command_name {{ service.name }} + command_line /usr/lib/nagios/plugins/check_by_ssh -H $HOSTADDRESS$ -o StrictHostKeyChecking=no -C "{{ nagios_home }}/scripts/{{ service.script }}" -E +} +{% endif %} +{% endfor %} diff --git a/roles/nagios_config/templates/contactgroup_nagios2.cfg.j2 b/roles/nagios_config/templates/contactgroup_nagios2.cfg.j2 new file mode 100644 index 0000000000000000000000000000000000000000..8e671a0956e5df1ddd6a694a5350ef711ede14bb --- /dev/null +++ b/roles/nagios_config/templates/contactgroup_nagios2.cfg.j2 @@ -0,0 +1,9 @@ +define contactgroup { + contactgroup_name admins ; Group name used in configuration + alias Administrators ; Alias for group displayed on webpage + members admins +; contactgroup_members ; Other contact groups to be notified + } + + + diff --git a/roles/nagios_config/templates/contacts_nagios2.cfg.j2 b/roles/nagios_config/templates/contacts_nagios2.cfg.j2 new file mode 100644 index 0000000000000000000000000000000000000000..1fd6d8509fd0d65546b199ea14f686fefcd2d1ed --- /dev/null +++ b/roles/nagios_config/templates/contacts_nagios2.cfg.j2 @@ -0,0 +1,14 @@ +define contact{ + contact_name admins + alias Admins + service_notification_period 24x7 + host_notification_period 24x7 + service_notification_options w,u,c,r + host_notification_options d,r + service_notifications_enabled 1 + host_notifications_enabled 1 + service_notification_commands notify-service-by-email + host_notification_commands notify-host-by-email + email hpc-alerts-warning-l@monash.edu + } + diff --git a/roles/nagios_config/templates/hostgroups_nagios2.cfg.j2 b/roles/nagios_config/templates/hostgroups_nagios2.cfg.j2 new file mode 100644 index 0000000000000000000000000000000000000000..10e49bde89f6cee52f8a77a73915456737b3c811 --- /dev/null +++ b/roles/nagios_config/templates/hostgroups_nagios2.cfg.j2 @@ -0,0 +1,13 @@ +# Some generic hostgroup definitions + +{% for group in groups %} +{% set nodelist = [] %} +{% for node in groups[group] %} +{% if nodelist.append(node) %} +{% endif %} +{% endfor %} +define hostgroup { + hostgroup_name {{ group }} + members {{ nodelist|unique|join(',') }} +} +{% endfor %} diff --git a/roles/nagios_server/templates/hosts_nagios2.cfg.j2 b/roles/nagios_config/templates/hosts_nagios2.cfg.j2 similarity index 100% rename from roles/nagios_server/templates/hosts_nagios2.cfg.j2 rename to roles/nagios_config/templates/hosts_nagios2.cfg.j2 diff --git a/roles/nagios_server/templates/services_nagios2.cfg.j2 b/roles/nagios_config/templates/services_nagios2.cfg.j2 similarity index 77% rename from roles/nagios_server/templates/services_nagios2.cfg.j2 rename to roles/nagios_config/templates/services_nagios2.cfg.j2 index d5cbbca3b22bbd77003de659af549c130b87c71a..2eec8a5eb5b02b7bfec039b0721c6e646237ca2a 100644 --- a/roles/nagios_server/templates/services_nagios2.cfg.j2 +++ b/roles/nagios_config/templates/services_nagios2.cfg.j2 @@ -1,8 +1,9 @@ {% for service in nagios_services %} define service { + name {{ service.name }} service_description {{ service.description }} hostgroup_name {{ service.groups|join(',') }} - check_command {{ service.command }} + check_command {{ service.name }} use generic-service notification_interval 0 } diff --git a/roles/nagios_monitored/files/scripts/check_blocked_beamline_jobs b/roles/nagios_monitored/files/scripts/check_blocked_beamline_jobs new file mode 100755 index 0000000000000000000000000000000000000000..43a63de5ae66a9fe9b9d9012cf350c9d0782107b --- /dev/null +++ b/roles/nagios_monitored/files/scripts/check_blocked_beamline_jobs @@ -0,0 +1,44 @@ +#!/usr/bin/python +import sys, os, pwd +import getopt +import commands +import subprocess +import datetime + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE=STATE_OK + +# get info about reservation +reservationname="beamline" +reservation_cmd=["scontrol","show","--oneliner","reservation=" + reservationname] +p = subprocess.Popen(reservation_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) +line = p.stdout.readline() +reservation_dict = dict( (n,v) for n,v in (a.split('=') for a in line.split() ) ) +retval = p.wait() + +# count free nodes +process=subprocess.Popen("/usr/local/slurm/latest/bin/scontrol show node=%s | grep -c \" State=RESERVED\"" % reservation_dict['Nodes'], shell=True, stdout=subprocess.PIPE) +free_nodes=process.communicate()[0] + +if int(free_nodes) < 1: + # not looking good, no nodes left in beamline reservation - let's check for pending jobs to decide on the warning level + STATE=STATE_WARNING + process=subprocess.Popen("/usr/local/slurm/latest/bin/squeue --noheader --format='%T %R' --long --reservation=beamline | grep PENDING", shell=True, stdout=subprocess.PIPE) + pending_list=process.communicate()[0] + if "PENDING" in pending_list: + print "Critical: we have no free nodes for beamline reservation and jobs trying to run!" + STATE=STATE_CRITICAL + # Lets provide some more info for the readers benefit + process=subprocess.Popen("/usr/local/slurm/latest/bin/squeue --format=\"%.18i %.9P %.8j %.16u %.10a %.8T %.10M %.9l %.6D %R\" --reservation=beamline", shell=True, stdout=subprocess.PIPE) + squeue_list=process.communicate()[0] + print "Beamline reservation has %s of %s nodes free" % (int(free_nodes),int(reservation_dict['NodeCnt'])) + print "squeue --long --reservation=beamline" + print squeue_list + sys.exit(STATE) + +# If we made it hear we are happy +print "Beamline reservation has %s of %s nodes free" % (int(free_nodes),int(reservation_dict['NodeCnt'])) +sys.exit(STATE) + diff --git a/roles/nagios_monitored/files/scripts/check_blocked_compute_jobs b/roles/nagios_monitored/files/scripts/check_blocked_compute_jobs new file mode 100755 index 0000000000000000000000000000000000000000..d1c91793c1000c449892efc9d7acb1173f736159 --- /dev/null +++ b/roles/nagios_monitored/files/scripts/check_blocked_compute_jobs @@ -0,0 +1,71 @@ +#!/usr/bin/python +import sys, os, pwd +import getopt +import commands +import subprocess +import datetime + +STATE_OK=0 +state=STATE_OK +STATE_WARNING=1 +STATE_CRITICAL=2 +WARNING_THRESHOLD_SEC=7*24*60*60 +# WARNING_THRESHOLD_SEC=6*60*60 +CRITICAL_THRESHOLD_SEC=WARNING_THRESHOLD_SEC*2 + +check_pending_com_job=subprocess.Popen("squeue --noheader --states=PENDING --Format=SUBMITTIME", shell=True, stdout=subprocess.PIPE) +pending_com_job_id_list=check_pending_com_job.communicate()[0] + +check_recent_time=subprocess.Popen("date +'%Y-%m-%dT%H:%M:%S'", shell=True, stdout=subprocess.PIPE) +b = check_recent_time.communicate()[0].strip() +# print "date date" +# print "%s" % b +now_time = datetime.datetime.strptime(b, '%Y-%m-%dT%H:%M:%S') + +for submittime in pending_com_job_id_list.splitlines(): + # print job_ID + # check_submit_time=subprocess.Popen("/usr/local/slurm/latest/bin/squeue --job %s --noheader --Format=submittime" % job_ID , shell=True, stdout=subprocess.PIPE) + # a = check_submit_time.communicate()[0].strip() + # print a + # print "-----------------------------------------------------------" + # print "slurm submit time" + # print "-%s-" % submittime + # check_recent_time=subprocess.Popen("date +'%Y-%m-%dT%H:%M:%S'", shell=True, stdout=subprocess.PIPE) + # b = check_recent_time.communicate()[0].strip() + # print "date date" + # print "%s" % b + + submit_time = datetime.datetime.strptime(submittime, '%Y-%m-%dT%H:%M:%S ') + # now_time = datetime.datetime.strptime(b, '%Y-%m-%dT%H:%M:%S') + + # print "python submit_time" + # print submit_time + # print "python now_time" + # print now_time + time_elapsed=(now_time - submit_time).seconds + # print "time_elapsed" + # print "%s" % time_elapsed + + if time_elapsed>=CRITICAL_THRESHOLD_SEC: + message="Critical: Slurm Job Pending over " + str(CRITICAL_THRESHOLD_SEC/60/60/24) + " days" + state=STATE_CRITICAL + + if time_elapsed>=WARNING_THRESHOLD_SEC and state != STATE_CRITICAL: + message="Warning: Slurm Job Pending over " + str(WARNING_THRESHOLD_SEC/60/60/24) + " days" + state=STATE_WARNING + + +if state == STATE_OK: + print "OK: No Slurm Jobs BLOCKED over %s days" % int(WARNING_THRESHOLD_SEC/60/60/24) +else: + print message + check_pending_com_job=subprocess.Popen("/usr/local/slurm/latest/bin/squeue --format='%.18i %.9P %.8u %.8T %S %V %R' --states=PENDING", shell=True, stdout=subprocess.PIPE) + squeue_result=check_pending_com_job.communicate()[0] + print squeue_result + sys.exit(STATE_CRITICAL) + +sys.exit(state) + + + + diff --git a/roles/nagios_monitored/files/scripts/check_blocked_vis_jobs b/roles/nagios_monitored/files/scripts/check_blocked_vis_jobs new file mode 100755 index 0000000000000000000000000000000000000000..8614e13418de4f043de121a961ae59b17bbf3370 --- /dev/null +++ b/roles/nagios_monitored/files/scripts/check_blocked_vis_jobs @@ -0,0 +1,50 @@ +#!/usr/bin/python +import sys, os, pwd +import getopt +import commands +import subprocess +import datetime + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 + +check_pending_vis_job=subprocess.Popen("squeue -p m2-vis-c6,m1-vis-c6 --format='%.18i %.9P %.8j %.8u %.2t %.19S %.6D %20Y %R' --states=PENDING | grep -i vis | awk {'print $1'}", shell=True, stdout=subprocess.PIPE) + +pending_vis_job_id_list=check_pending_vis_job.communicate()[0] +for job_ID in pending_vis_job_id_list.splitlines(): + check_submit_time=subprocess.Popen("/usr/local/slurm/latest/bin/squeue --Format=jobID,submittime | grep %s | awk {print'$2'}" % job_ID, shell=True, stdout=subprocess.PIPE) + a = check_submit_time.communicate()[0].strip() +# print "%s" % a + + check_recent_time=subprocess.Popen("date +'%Y-%m-%dT%H:%M:%S'", shell=True, stdout=subprocess.PIPE) + b = check_recent_time.communicate()[0].strip() +# print "%s" % b + + submit_time = datetime.datetime.strptime(a, '%Y-%m-%dT%H:%M:%S') + now_time = datetime.datetime.strptime(b, '%Y-%m-%dT%H:%M:%S') + + time_elapsed=(now_time - submit_time).seconds + if time_elapsed>=900: + print "CRTICAL: Slurm Vis Job Pending over 15 mins" + pending_listp = subprocess.Popen("/usr/local/slurm/latest/bin/squeue -p m2-vis-c6,m1-vis-c6 --states=PENDING", shell=True, stdout=subprocess.PIPE) + pending_list = pending_listp.communicate()[0] + print "\n\n/usr/local/slurm/latest/bin/squeue -p m2-vis-c6,m1-vis-c6 --states=PENDING\n" + print pending_list + sys.exit(STATE_CRITICAL) + +# print "%s" % time_elapsed + if time_elapsed>=300: + print "WARNING: Slurm Vis Job Pending over 5 mins" + pending_listp = subprocess.Popen("/usr/local/slurm/latest/bin/squeue -p m2-vis-c6,m1-vis-c6 --states=PENDING", shell=True, stdout=subprocess.PIPE) + pending_list = pending_listp.communicate()[0] + print "\n\n/usr/local/slurm/latest/bin/squeue -p m2-vis-c6,m1-vis-c6 --states=PENDING\n" + print pending_list + sys.exit(STATE_WARNING) + +print "NO Slurm Vis Jobs BLOCKED over 15 mins" +sys.exit(STATE_OK) + + + + diff --git a/roles/nagios_monitored/files/scripts/check_disk b/roles/nagios_monitored/files/scripts/check_disk new file mode 100755 index 0000000000000000000000000000000000000000..e13060a1fafc5ee1eb188f6b7ccdc702bef226cd --- /dev/null +++ b/roles/nagios_monitored/files/scripts/check_disk @@ -0,0 +1,30 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +HOST_NAME=$(hostname -f) + +function check_disk() { + local space="$1" + + local used=$(df -h ${space} | grep "^/" | awk '{print $5}' | sed -e 's/%//') + local threshhold=$((90)) + + if (( ${used} > ${threshhold} )); then + echo "WARNING: ${HOST_NAME} is running out of filesystem space" + exit "${STATE_WARNING}" + fi +} + +folder="/" + +check_disk "/" +check_disk "/tmp" + +echo "Check ${HOST_NAME} disk space Ok" +exit "$STATE_OK" + diff --git a/roles/nagios_monitored/files/scripts/check_load b/roles/nagios_monitored/files/scripts/check_load index 8e6966c4e309874444dbe4cca2f5e783e107fc88..4768d6b00cf21d53446e56fbe2801ef315c71d57 100755 Binary files a/roles/nagios_monitored/files/scripts/check_load and b/roles/nagios_monitored/files/scripts/check_load differ diff --git a/roles/nagios_monitored/files/scripts/check_localfs.sh b/roles/nagios_monitored/files/scripts/check_localfs.sh new file mode 100755 index 0000000000000000000000000000000000000000..20043ee4530895c62c35b96d23d5b7ef31b4e4ee --- /dev/null +++ b/roles/nagios_monitored/files/scripts/check_localfs.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + + +main () +{ + tmpfile=$( mktemp -p /tmp ) + r=$? + if [ $r -ne 0 ] + then + return $STATE_CRITICAL + else + rm $tmpfile + fi + + return $STATE_OK + +} + +main +exit $? diff --git a/roles/nagios_monitored/files/scripts/check_munge b/roles/nagios_monitored/files/scripts/check_munge index bf7c01cf1c5be003604a09f77e51d99d7bc7344c..5e94412c7cdbb369f81c7ee6643aa74d2a436059 100755 --- a/roles/nagios_monitored/files/scripts/check_munge +++ b/roles/nagios_monitored/files/scripts/check_munge @@ -7,7 +7,7 @@ import subprocess STATE_OK=0 STATE_WARNING=1 -check_munge=subprocess.Popen("/usr/sbin/service munge status", shell=True, stdout=subprocess.PIPE) +check_munge=subprocess.Popen("service munge status", shell=True, stdout=subprocess.PIPE) munge_status=check_munge.communicate()[0] if "run" in munge_status: diff --git a/roles/nagios_monitored/files/scripts/check_ntp b/roles/nagios_monitored/files/scripts/check_ntp new file mode 100755 index 0000000000000000000000000000000000000000..6124b60650cde49593aef13653e583789aa2e7ae --- /dev/null +++ b/roles/nagios_monitored/files/scripts/check_ntp @@ -0,0 +1,21 @@ +#!/usr/bin/python +import sys, os, pwd +import getopt +import commands +import subprocess + +STATE_OK=0 +STATE_WARNING=1 + +check_munge=subprocess.Popen("service ntpd status", shell=True, stdout=subprocess.PIPE) +munge_status=check_munge.communicate()[0] + +if "run" in munge_status: + print "NTPD service is Running" + sys.exit(STATE_OK) +else: + print "NTPD service is NOT Running !!" + sys.exit(STATE_WARNING) + +sys.exit(STATE_OK) + diff --git a/roles/nagios_monitored/tasks/main.yml b/roles/nagios_monitored/tasks/main.yml index 8a1e5ce9b0f0273344b7e8b3331d8ac4cb2b4a1b..1a7789eb7c9e9d397ef760e52ecb6c08ecd8f9c9 100644 --- a/roles/nagios_monitored/tasks/main.yml +++ b/roles/nagios_monitored/tasks/main.yml @@ -1,10 +1,10 @@ --- - name: create nagios user - user: name=nagios system=yes createhome=yes home=/var/lib/nagios shell=/bin/bash + user: name=nagios system=yes createhome=yes home={{ nagios_home }} shell=/bin/bash sudo: true - name: authorize_key - authorized_key: user=nagios key="{{ monitor_pubkey }}" + authorized_key: user=nagios key="{{ lookup('file', 'files/nagios_public_key') }}" path="{{ nagios_home }}"/.ssh/authorized_keys sudo: true - name: make scripts directory @@ -12,8 +12,7 @@ sudo: true - name: install monitor scripts - copy: dest=/var/lib/nagios/scripts/{{ item }} src=scripts/{{ item }} mode=755 - with_items: - - check_load - - check_munge + copy: dest={{ nagios_home }}/scripts/{{ item }} src=scripts/{{ item }} mode=755 + with_items: "{% set script_list = [] %}{% for s in nagios_services %}{%for g in hostvars[ansible_hostname].group_names %}{% if g in s.groups and s.script %}{% if script_list.append(s.script) %}{% endif %}{% endif %}{% endfor %}{% endfor %}{{ script_list }}" sudo: true + diff --git a/roles/nagios_server/handlers/main.yml b/roles/nagios_server/handlers/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..fb97f1971ff637b1d7e27713e2386c53603d6a8c --- /dev/null +++ b/roles/nagios_server/handlers/main.yml @@ -0,0 +1,8 @@ +--- +- name: restart apache2 + service: name=apache2 state=restarted + sudo: true + +- name: restart postfix + service: name=postfix state=restarted + sudo: true diff --git a/roles/nagios_server/tasks/main.yml b/roles/nagios_server/tasks/main.yml index 9e56db5b42cb982990d45423a72ec534102377fd..203b4f8b10efaa8324e188c745396101b9c8db76 100644 --- a/roles/nagios_server/tasks/main.yml +++ b/roles/nagios_server/tasks/main.yml @@ -1,31 +1,29 @@ --- +- name: create directory + file: dest=/var/lib/nagios/.ssh state=directory + sudo: true + +- name: create nagios user + user: name=nagios system=yes createhome=yes home={{ nagios_home }} shell=/bin/bash + sudo: true + - name: copy priv key - template: src={{ monitor_privkey_file }} dest=/var/lib/nagios/.ssh/id_rsa mode=600 owner=nagios + template: src={{ monitor_privkey_file }} dest={{ nagios_home }}/.ssh/id_rsa mode=600 owner={{ nagios_username }} sudo: true - name: install packages - apt: name={{ item }} state=installed + apt: name={{ item }} state=present with_items: - nagios3 - python-passlib - - python3-passlib sudo: true + when: ansible_os_family == "Debian" - name: configure nagios authentication htpasswd: path=/etc/nagios3/htpasswd.users name={{ nagios_username }} password={{ nagios_password }} sudo: true -- name: configure monitoring - template: src={{ item }}_nagios2.cfg.j2 dest=/etc/nagios3/conf.d/{{ item }}_nagios2.cfg - with_items: - - 'hostgroups' - - 'hosts' - - 'commands' - - 'services' - - 'extinfo' - sudo: true - - name: force restart - service: name=nagios3 state=restarted + service: name=nagios3 state=started sudo: true diff --git a/roles/nagios_server/templates/cgi.cfg.j2 b/roles/nagios_server/templates/cgi.cfg.j2 new file mode 100644 index 0000000000000000000000000000000000000000..a3f3a07cd440ba32de38771fe0350102443b6618 --- /dev/null +++ b/roles/nagios_server/templates/cgi.cfg.j2 @@ -0,0 +1,377 @@ +################################################################# +# +# CGI.CFG - Sample CGI Configuration File for Nagios +# +################################################################# + + +# MAIN CONFIGURATION FILE +# This tells the CGIs where to find your main configuration file. +# The CGIs will read the main and host config files for any other +# data they might need. + +main_config_file=/etc/nagios3/nagios.cfg + + + +# PHYSICAL HTML PATH +# This is the path where the HTML files for Nagios reside. This +# value is used to locate the logo images needed by the statusmap +# and statuswrl CGIs. + +physical_html_path=/usr/share/nagios3/htdocs + + + +# URL HTML PATH +# This is the path portion of the URL that corresponds to the +# physical location of the Nagios HTML files (as defined above). +# This value is used by the CGIs to locate the online documentation +# and graphics. If you access the Nagios pages with an URL like +# http://www.myhost.com/nagios, this value should be '/nagios' +# (without the quotes). + +url_html_path=/nagios3 + + + +# CONTEXT-SENSITIVE HELP +# This option determines whether or not a context-sensitive +# help icon will be displayed for most of the CGIs. +# Values: 0 = disables context-sensitive help +# 1 = enables context-sensitive help + +show_context_help=1 + + + +# PENDING STATES OPTION +# This option determines what states should be displayed in the web +# interface for hosts/services that have not yet been checked. +# Values: 0 = leave hosts/services that have not been check yet in their original state +# 1 = mark hosts/services that have not been checked yet as PENDING + +use_pending_states=1 + +# NAGIOS PROCESS CHECK COMMAND +# This is the full path and filename of the program used to check +# the status of the Nagios process. It is used only by the CGIs +# and is completely optional. However, if you don't use it, you'll +# see warning messages in the CGIs about the Nagios process +# not running and you won't be able to execute any commands from +# the web interface. The program should follow the same rules +# as plugins; the return codes are the same as for the plugins, +# it should have timeout protection, it should output something +# to STDIO, etc. +# +# Note: The command line for the check_nagios plugin below may +# have to be tweaked a bit, as different versions of the plugin +# use different command line arguments/syntaxes. + +nagios_check_command=/usr/lib/nagios/plugins/check_nagios /var/cache/nagios3/status.dat 5 '/usr/sbin/nagios3' + + +# AUTHENTICATION USAGE +# This option controls whether or not the CGIs will use any +# authentication when displaying host and service information, as +# well as committing commands to Nagios for processing. +# +# Read the HTML documentation to learn how the authorization works! +# +# NOTE: It is a really *bad* idea to disable authorization, unless +# you plan on removing the command CGI (cmd.cgi)! Failure to do +# so will leave you wide open to kiddies messing with Nagios and +# possibly hitting you with a denial of service attack by filling up +# your drive by continuously writing to your command file! +# +# Setting this value to 0 will cause the CGIs to *not* use +# authentication (bad idea), while any other value will make them +# use the authentication functions (the default). + +use_authentication=1 + + + + +# x509 CERT AUTHENTICATION +# When enabled, this option allows you to use x509 cert (SSL) +# authentication in the CGIs. This is an advanced option and should +# not be enabled unless you know what you're doing. + +use_ssl_authentication=0 + + + + +# DEFAULT USER +# Setting this variable will define a default user name that can +# access pages without authentication. This allows people within a +# secure domain (i.e., behind a firewall) to see the current status +# without authenticating. You may want to use this to avoid basic +# authentication if you are not using a secure server since basic +# authentication transmits passwords in the clear. +# +# Important: Do not define a default username unless you are +# running a secure web server and are sure that everyone who has +# access to the CGIs has been authenticated in some manner! If you +# define this variable, anyone who has not authenticated to the web +# server will inherit all rights you assign to this user! + +#default_user_name=guest + + + +# SYSTEM/PROCESS INFORMATION ACCESS +# This option is a comma-delimited list of all usernames that +# have access to viewing the Nagios process information as +# provided by the Extended Information CGI (extinfo.cgi). By +# default, *no one* has access to this unless you choose to +# not use authorization. You may use an asterisk (*) to +# authorize any user who has authenticated to the web server. + +authorized_for_system_information=nagiosadmin,nagios + + + +# CONFIGURATION INFORMATION ACCESS +# This option is a comma-delimited list of all usernames that +# can view ALL configuration information (hosts, commands, etc). +# By default, users can only view configuration information +# for the hosts and services they are contacts for. You may use +# an asterisk (*) to authorize any user who has authenticated +# to the web server. + +authorized_for_configuration_information=nagiosadmin,nagios + + + +# SYSTEM/PROCESS COMMAND ACCESS +# This option is a comma-delimited list of all usernames that +# can issue shutdown and restart commands to Nagios via the +# command CGI (cmd.cgi). Users in this list can also change +# the program mode to active or standby. By default, *no one* +# has access to this unless you choose to not use authorization. +# You may use an asterisk (*) to authorize any user who has +# authenticated to the web server. + +authorized_for_system_commands=nagiosadmin,nagios + + + +# GLOBAL HOST/SERVICE VIEW ACCESS +# These two options are comma-delimited lists of all usernames that +# can view information for all hosts and services that are being +# monitored. By default, users can only view information +# for hosts or services that they are contacts for (unless you +# you choose to not use authorization). You may use an asterisk (*) +# to authorize any user who has authenticated to the web server. + + +authorized_for_all_services=nagiosadmin,nagios +authorized_for_all_hosts=nagiosadmin,nagios + + + +# GLOBAL HOST/SERVICE COMMAND ACCESS +# These two options are comma-delimited lists of all usernames that +# can issue host or service related commands via the command +# CGI (cmd.cgi) for all hosts and services that are being monitored. +# By default, users can only issue commands for hosts or services +# that they are contacts for (unless you you choose to not use +# authorization). You may use an asterisk (*) to authorize any +# user who has authenticated to the web server. + +authorized_for_all_service_commands=nagiosadmin,nagios +authorized_for_all_host_commands=nagiosadmin,nagios + + + +# READ-ONLY USERS +# A comma-delimited list of usernames that have read-only rights in +# the CGIs. This will block any service or host commands normally shown +# on the extinfo CGI pages. It will also block comments from being shown +# to read-only users. + +#authorized_for_read_only=user1,user2 + + + + +# STATUSMAP BACKGROUND IMAGE +# This option allows you to specify an image to be used as a +# background in the statusmap CGI. It is assumed that the image +# resides in the HTML images path (i.e. /usr/local/nagios/share/images). +# This path is automatically determined by appending "/images" +# to the path specified by the 'physical_html_path' directive. +# Note: The image file may be in GIF, PNG, JPEG, or GD2 format. +# However, I recommend that you convert your image to GD2 format +# (uncompressed), as this will cause less CPU load when the CGI +# generates the image. + +#statusmap_background_image=smbackground.gd2 + + + + +# STATUSMAP TRANSPARENCY INDEX COLOR +# These options set the r,g,b values of the background color used the statusmap CGI, +# so normal browsers that can't show real png transparency set the desired color as +# a background color instead (to make it look pretty). +# Defaults to white: (R,G,B) = (255,255,255). + +#color_transparency_index_r=255 +#color_transparency_index_g=255 +#color_transparency_index_b=255 + + + + +# DEFAULT STATUSMAP LAYOUT METHOD +# This option allows you to specify the default layout method +# the statusmap CGI should use for drawing hosts. If you do +# not use this option, the default is to use user-defined +# coordinates. Valid options are as follows: +# 0 = User-defined coordinates +# 1 = Depth layers +# 2 = Collapsed tree +# 3 = Balanced tree +# 4 = Circular +# 5 = Circular (Marked Up) + +default_statusmap_layout=5 + + + +# DEFAULT STATUSWRL LAYOUT METHOD +# This option allows you to specify the default layout method +# the statuswrl (VRML) CGI should use for drawing hosts. If you +# do not use this option, the default is to use user-defined +# coordinates. Valid options are as follows: +# 0 = User-defined coordinates +# 2 = Collapsed tree +# 3 = Balanced tree +# 4 = Circular + +default_statuswrl_layout=4 + + + +# STATUSWRL INCLUDE +# This option allows you to include your own objects in the +# generated VRML world. It is assumed that the file +# resides in the HTML path (i.e. /usr/local/nagios/share). + +#statuswrl_include=myworld.wrl + + + +# PING SYNTAX +# This option determines what syntax should be used when +# attempting to ping a host from the WAP interface (using +# the statuswml CGI. You must include the full path to +# the ping binary, along with all required options. The +# $HOSTADDRESS$ macro is substituted with the address of +# the host before the command is executed. +# Please note that the syntax for the ping binary is +# notorious for being different on virtually ever *NIX +# OS and distribution, so you may have to tweak this to +# work on your system. + +ping_syntax=/bin/ping -n -U -c 5 $HOSTADDRESS$ + + + +# REFRESH RATE +# This option allows you to specify the refresh rate in seconds +# of various CGIs (status, statusmap, extinfo, and outages). + +refresh_rate=90 + +# DEFAULT PAGE LIMIT +# This option allows you to specify the default number of results +# displayed on the status.cgi. This number can be adjusted from +# within the UI after the initial page load. Setting this to 0 +# will show all results. + +result_limit=100 + + +# ESCAPE HTML TAGS +# This option determines whether HTML tags in host and service +# status output is escaped in the web interface. If enabled, +# your plugin output will not be able to contain clickable links. + +escape_html_tags=1 + + + + +# SOUND OPTIONS +# These options allow you to specify an optional audio file +# that should be played in your browser window when there are +# problems on the network. The audio files are used only in +# the status CGI. Only the sound for the most critical problem +# will be played. Order of importance (higher to lower) is as +# follows: unreachable hosts, down hosts, critical services, +# warning services, and unknown services. If there are no +# visible problems, the sound file optionally specified by +# 'normal_sound' variable will be played. +# +# +# <varname>=<sound_file> +# +# Note: All audio files must be placed in the /media subdirectory +# under the HTML path (i.e. /usr/local/nagios/share/media/). + +#host_unreachable_sound=hostdown.wav +#host_down_sound=hostdown.wav +#service_critical_sound=critical.wav +#service_warning_sound=warning.wav +#service_unknown_sound=warning.wav +#normal_sound=noproblem.wav + + + +# URL TARGET FRAMES +# These options determine the target frames in which notes and +# action URLs will open. + +action_url_target=_blank +notes_url_target=_blank + + + + +# LOCK AUTHOR NAMES OPTION +# This option determines whether users can change the author name +# when submitting comments, scheduling downtime. If disabled, the +# author names will be locked into their contact name, as defined in Nagios. +# Values: 0 = allow editing author names +# 1 = lock author names (disallow editing) + +lock_author_names=1 + + + + +# SPLUNK INTEGRATION OPTIONS +# These options allow you to enable integration with Splunk +# in the web interface. If enabled, you'll be presented with +# "Splunk It" links in various places in the CGIs (log file, +# alert history, host/service detail, etc). Useful if you're +# trying to research why a particular problem occurred. +# For more information on Splunk, visit http://www.splunk.com/ + +# This option determines whether the Splunk integration is enabled +# Values: 0 = disable Splunk integration +# 1 = enable Splunk integration + +#enable_splunk_integration=1 + + +# This option should be the URL used to access your instance of Splunk + +#splunk_url=http://127.0.0.1:8000/ + + + diff --git a/roles/nagios_server/templates/commands_nagios2.cfg.j2 b/roles/nagios_server/templates/commands_nagios2.cfg.j2 deleted file mode 100644 index 8f4f49cf168a8c9cf3aa55a036c6ff03c7dc79a2..0000000000000000000000000000000000000000 --- a/roles/nagios_server/templates/commands_nagios2.cfg.j2 +++ /dev/null @@ -1,9 +0,0 @@ -define command { - command_name check_mount - command_line /usr/lib/nagios/plugins/check_by_ssh -H $HOSTADDRESS$ -C "/var/lib/nagios/scripts/check_mount.pl -m $ARG1$" -} -define command { - command_name check_munge - command_line /usr/lib/nagios/plugins/check_by_ssh -H $HOSTADDRESS$ -C "/var/lib/nagios/scripts/check_munge" -} - diff --git a/roles/nagios_server/templates/extinfo_nagios2.cfg.j2 b/roles/nagios_server/templates/extinfo_nagios2.cfg.j2 deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/roles/nagios_server/templates/hostgroups_nagios2.cfg.j2 b/roles/nagios_server/templates/hostgroups_nagios2.cfg.j2 deleted file mode 100644 index 54b7862e846f0b6cc8aa247cb4c73d0b1c79c601..0000000000000000000000000000000000000000 --- a/roles/nagios_server/templates/hostgroups_nagios2.cfg.j2 +++ /dev/null @@ -1,44 +0,0 @@ -# Some generic hostgroup definitions - -## A simple wildcard hostgroup -#define hostgroup { -# hostgroup_name all -# alias All Servers -# members * -# } - -## A list of your Debian GNU/Linux servers -#define hostgroup { -# hostgroup_name debian-servers -# alias Debian GNU/Linux Servers -# members localhost -# } - -## A list of your web servers -#define hostgroup { -# hostgroup_name http-servers -# alias HTTP servers -# members localhost -# } - -## A list of your ssh-accessible servers -#define hostgroup { -# hostgroup_name ssh-servers -# alias SSH servers -# members * -# } - - -{% for group in groups %} -#{ % if group != "all" % } -{% set nodelist = [] %} -{% for node in groups[group] %} -{% if nodelist.append(node) %} -{% endif %} -{% endfor %} -define hostgroup { - hostgroup_name {{ group }} - members {{ nodelist|unique|join(',') }} -} -#{ % endif % } -{% endfor %} diff --git a/roles/nagios_server/templates/main_cf.j2 b/roles/nagios_server/templates/main_cf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..2823b289dc68bb169f0f6a2556a314876762bf61 --- /dev/null +++ b/roles/nagios_server/templates/main_cf.j2 @@ -0,0 +1,39 @@ +# See /usr/share/postfix/main.cf.dist for a commented, more complete version + + +# Debian specific: Specifying a file name will cause the first +# line of that file to be used as the name. The Debian default +# is /etc/mailname. +#myorigin = /etc/mailname + +smtpd_banner = $myhostname ESMTP $mail_name +biff = no + +# appending .domain is the MUA's job. +append_dot_mydomain = no + +# Uncomment the next line to generate "delayed mail" warnings +#delay_warning_time = 4h + +readme_directory = no + +# TLS parameters +smtpd_tls_cert_file=/etc/ssl/certs/ssl-cert-snakeoil.pem +smtpd_tls_key_file=/etc/ssl/private/ssl-cert-snakeoil.key +smtpd_use_tls=yes +smtpd_tls_session_cache_database = btree:${data_directory}/smtpd_scache +smtp_tls_session_cache_database = btree:${data_directory}/smtp_scache + +# See /usr/share/doc/postfix/TLS_README.gz in the postfix-doc package for +# information on enabling SSL in the smtp client. + +myhostname = {{ ansible_fqdn }} +alias_maps = hash:/etc/aliases +alias_database = hash:/etc/aliases +myorigin = {{ ansible_fqdn }} +mydestination = {{ ansible_fqdn }}, localhost.{{ ansible_domain }}, localhost +relayhost = {{ smtp_smarthost }} +mynetworks = 127.0.0.0/8 [::ffff:127.0.0.0]/104 [::1]/128 +mailbox_size_limit = 0 +recipient_delimiter = + +inet_interfaces = loopback-only diff --git a/roles/setupKnownHosts/templates/known_hosts.j2 b/roles/setupKnownHosts/templates/known_hosts.j2 index b083b5ee6e037007b984b133264405c77a9340ff..47eb314c25eff70eba4a1007fbcfee8489147f27 100644 --- a/roles/setupKnownHosts/templates/known_hosts.j2 +++ b/roles/setupKnownHosts/templates/known_hosts.j2 @@ -7,8 +7,7 @@ {% if nodelist.append(host) %} {% endif %} {% endif %} -{% if hostvars[node]['ansible_ssh_host_key_ecdsa_public'] %} -#{% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ssh-ecdsa', 'key': hostvars[node]['ansible_ssh_host_key_ecdsa_public']} %} +{% if 'ansible_ssh_host_key_ecdsa_public' in hostvars[node] and hostvars[node]['ansible_ssh_host_key_ecdsa_public'] %} {% set host = {'name': node, 'ip': hostvars[node]['ansible_'+interface]['ipv4']['address'], 'keytype':'ecdsa-sha2-nistp256', 'key': hostvars[node]['ansible_ssh_host_key_ecdsa_public']} %} {% if nodelist.append(host) %} {% endif %} diff --git a/roles/slurm-common/tasks/main.yml b/roles/slurm-common/tasks/main.yml index fc9eedfb4a294c364b33cc14935b762c71ba93fc..5cc6f6d7e1430d9eeffd6e9538cdbe12b4b8a375 100644 --- a/roles/slurm-common/tasks/main.yml +++ b/roles/slurm-common/tasks/main.yml @@ -42,12 +42,12 @@ file: path={{slurmsharedstatedir }} state=directory owner=slurm group=slurm mode=750 sudo: true run_once: true - when: usesharedstatedir + when: usesharedstatedir is defined and usesharedstatedir - name: symlink shared state dir file: path={{ slurmstatedir }} src={{ slurmsharedstatedir }} state=link sudo: true - when: usesharedstatedir + when: usesharedstatedir is defined and usesharedstatedir - name: create state directory file: path={{ slurmstatedir }} state=directory owner=slurm group=slurm mode=750