diff --git a/roles/gluster-monitor/README.md b/roles/gluster-monitor/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6f498a51c91664225829e49c9bb6c445e57d3b04 --- /dev/null +++ b/roles/gluster-monitor/README.md @@ -0,0 +1,5 @@ +Puts a cron job to search gluster for split brain errors. Only applicable on management nodes + +Usage +- { role: gluster-monitor, tags: [ gluster,gluster_client ] } +- { role: gluster-monitor, EMAIL_DEST: "hpc-alerts-warning-l@monash.edu", tags: [ gluster,gluster_client ] } diff --git a/roles/gluster-monitor/tasks/main.yml b/roles/gluster-monitor/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f73f3cc9303ccec17b37222f2ad13ff0563f2cc --- /dev/null +++ b/roles/gluster-monitor/tasks/main.yml @@ -0,0 +1,21 @@ +--- + - name: mkdir /usr/local/sbin if it does not exit + file: + path: /usr/local/sbin + state: directory + become: true + become_user: root + - name: template gluster_monitoring + template: + src=detect-gluster-problems.sh.j2 + dest=/usr/local/sbin/detect-gluster-problems.sh + mode=755 + owner=root + group=root + become: true + become_user: root + - name: gluster_monitoring- install crontab entry + #cron: name="Check glust for problems" minute="*/5" job="/usr/local/sbin/detect-gluster-problems.sh >> /tmp/detect-gluster-problems.txt 2>&1" + cron: name="Check gluster for problems" minute="*/5" job="/usr/local/sbin/detect-gluster-problems.sh" + become: true + become_user: root diff --git a/roles/gluster-monitor/templates/detect-gluster-problems.sh.j2 b/roles/gluster-monitor/templates/detect-gluster-problems.sh.j2 new file mode 100644 index 0000000000000000000000000000000000000000..4582c229bde556182f4eb9b5163cde249acd5db9 --- /dev/null +++ b/roles/gluster-monitor/templates/detect-gluster-problems.sh.j2 @@ -0,0 +1,50 @@ +#!/bin/bash + +#detect-gluster-problems.sh +# written by sgm 10 March 2021 to be a script +# that looks for gluster problems and emails help +# if anything happens +# See https://docs.gluster.org/en/latest/Troubleshooting/resolving-splitbrain/ +# We look for keywords in output +# a) 'Is in split-brain' +# b) 'Is possibly undergoing heal' + +COMMAND="sudo gluster volume heal gv info" +HEADER="Possible Error Message in `hostname`" +MAILTO="{{ EMAIL_DEST }}" + +myOutput=`$COMMAND` +#myOutput=" a split brain that heals" +#echo "Output is $myOutput" + + +# +# check for split brain or healing messages in gluster output +# +echo $myOutput | grep -i "split" +exitCode=$? +if [ $exitCode -eq 0 ] +then + logger "detect-gluster-problems.sh: We found a split brain situation" + EmailBody="Possible split brain situation on `hostname` on `date` \n${myOutput}" + HEADER1="$HEADER : Split brain message" + echo -e "$EmailBody" | mail -s "$HEADER1" "$MAILTO" +#else +# echo "No brain worries" +fi + +# +# check for healing +# +echo $myOutput | grep -i "heal" +exitCode=$? +if [ $exitCode -eq 0 ] +then + logger "detect-gluster-problems.sh: We found a healing situation" + HEADER1="$HEADER : healing message" + EmailBody="Possible healing situation on `hostname` on `date` \n${myOutput}" + echo -e "$EmailBody" | mail -s "$HEADER1" "$MAILTO" +#else +# echo "No healing worries" +fi + diff --git a/roles/gluster-monitor/vars/main.yml b/roles/gluster-monitor/vars/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..92ee6879e24b923bdb699d2c266fae0a32d8b774 --- /dev/null +++ b/roles/gluster-monitor/vars/main.yml @@ -0,0 +1,2 @@ +--- +EMAIL_DEST: "youremailhere@nowhere.com"