diff --git a/roles/slurm-from-source/templates/nhc.sysconfig.j2 b/roles/slurm-from-source/templates/nhc.sysconfig.j2 index fb3c05ed75bede2da78750e4e467204a2a066dcb..df7125af6522d7d71e2773d2b0696b952858a8fa 100644 --- a/roles/slurm-from-source/templates/nhc.sysconfig.j2 +++ b/roles/slurm-from-source/templates/nhc.sysconfig.j2 @@ -3,9 +3,10 @@ NHC_RM=slurm HELPERDIR={{ nhc_dir }}/libexec/nhc PATH={{ slurm_dir }}/bin:{{ nhc_dir }}/sbin:$PATH LOGFILE=/var/log/nhc.log +LOG_LEVEL={{ nhc_log_level }} CONFFILE="{{ nhc_dir }}/etc/nhc/{{ nhc_config_file }}" -NHC_EMAIL_TO= {{ nhc_emails }} -NHC_EMAIL_SUBJECT="Node failure" +NHC_EMAIL_TO={{ nhc_emails }} +NHC_EMAIL_SUBJECT={{ nhc_email_subject }} NHC_LOOP_TIME="300" diff --git a/roles/slurm-from-source/templates/nhc_cron.j2 b/roles/slurm-from-source/templates/nhc_cron.j2 index 6b683da3b06c1ba922223c5f4b28585c751e0066..8ec9815762a26d83fa51324d5b8478c6f8d3a111 100755 --- a/roles/slurm-from-source/templates/nhc_cron.j2 +++ b/roles/slurm-from-source/templates/nhc_cron.j2 @@ -10,6 +10,7 @@ config_file=${CONFFILE-/etc/nhc/nhc.conf} loop=${NHC_LOOP_TIME-300} email_subject=${NHC_EMAIL_SUBJECT-notification} logfile=${LOGFILE-/var/log/nhc.log} +loglevel=${LOG_LEVEL-0} email_to=${NHC_EMAIL_TO-root} function log() { @@ -27,7 +28,9 @@ function email() { fi } -log "Start health check ${config_file}" +if [ ${loglevel} -gt "0" ]; then + log "Start health check ${config_file}" +fi nhc -c "${config_file}" > /root/nhc.log 2>&1 || { log "$(cat /root/nhc.log)" && email && exit 1; }