From a1143e838cad3a3a64e85e510eec1083abec56d8 Mon Sep 17 00:00:00 2001
From: Simon Michnowicz <simon.michnowicz@monash.edu>
Date: Wed, 10 Mar 2021 14:06:20 +1100
Subject: [PATCH] First checkin of our gluster monitoring tool

---
 roles/gluster-monitor/README.md               |  5 ++
 roles/gluster-monitor/tasks/main.yml          | 21 ++++++++
 .../templates/detect-gluster-problems.sh.j2   | 50 +++++++++++++++++++
 roles/gluster-monitor/vars/main.yml           |  2 +
 4 files changed, 78 insertions(+)
 create mode 100644 roles/gluster-monitor/README.md
 create mode 100644 roles/gluster-monitor/tasks/main.yml
 create mode 100644 roles/gluster-monitor/templates/detect-gluster-problems.sh.j2
 create mode 100644 roles/gluster-monitor/vars/main.yml

diff --git a/roles/gluster-monitor/README.md b/roles/gluster-monitor/README.md
new file mode 100644
index 00000000..6f498a51
--- /dev/null
+++ b/roles/gluster-monitor/README.md
@@ -0,0 +1,5 @@
+Puts a cron job to search gluster for split brain errors. Only applicable on management nodes
+
+Usage
+- { role: gluster-monitor, tags: [ gluster,gluster_client ] }
+- { role: gluster-monitor, EMAIL_DEST: "hpc-alerts-warning-l@monash.edu", tags: [ gluster,gluster_client ] }
diff --git a/roles/gluster-monitor/tasks/main.yml b/roles/gluster-monitor/tasks/main.yml
new file mode 100644
index 00000000..2f73f3cc
--- /dev/null
+++ b/roles/gluster-monitor/tasks/main.yml
@@ -0,0 +1,21 @@
+---
+  - name: mkdir /usr/local/sbin if it does not exit
+    file:
+        path: /usr/local/sbin
+        state: directory
+    become: true
+    become_user: root
+  - name: template gluster_monitoring
+    template: 
+        src=detect-gluster-problems.sh.j2  
+        dest=/usr/local/sbin/detect-gluster-problems.sh 
+        mode=755 
+        owner=root 
+        group=root 
+    become: true
+    become_user: root
+  - name: gluster_monitoring- install crontab entry
+    #cron: name="Check glust for problems"  minute="*/5" job="/usr/local/sbin/detect-gluster-problems.sh >>  /tmp/detect-gluster-problems.txt 2>&1"
+    cron: name="Check gluster for problems"  minute="*/5" job="/usr/local/sbin/detect-gluster-problems.sh"
+    become: true
+    become_user: root
diff --git a/roles/gluster-monitor/templates/detect-gluster-problems.sh.j2 b/roles/gluster-monitor/templates/detect-gluster-problems.sh.j2
new file mode 100644
index 00000000..4582c229
--- /dev/null
+++ b/roles/gluster-monitor/templates/detect-gluster-problems.sh.j2
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+#detect-gluster-problems.sh
+# written by sgm 10 March 2021 to be a script
+# that looks for gluster problems and emails help
+# if anything happens
+# See https://docs.gluster.org/en/latest/Troubleshooting/resolving-splitbrain/
+# We look for keywords in output
+# a) 'Is in split-brain'
+# b) 'Is possibly undergoing heal'
+
+COMMAND="sudo gluster volume heal gv info"
+HEADER="Possible Error Message in `hostname`"
+MAILTO="{{ EMAIL_DEST }}"
+
+myOutput=`$COMMAND`
+#myOutput=" a split brain that heals"
+#echo "Output is $myOutput"
+
+
+#
+# check for split brain or healing messages in gluster output
+#
+echo $myOutput | grep -i "split" 
+exitCode=$?
+if [ $exitCode -eq  0 ]
+then
+    logger "detect-gluster-problems.sh: We found a split brain situation"
+    EmailBody="Possible split brain situation on `hostname` on `date` \n${myOutput}"
+    HEADER1="$HEADER : Split brain message"
+    echo -e "$EmailBody" | mail -s "$HEADER1" "$MAILTO"
+#else
+#    echo "No brain worries"
+fi
+
+#
+# check for healing
+#
+echo $myOutput | grep -i "heal" 
+exitCode=$?
+if [ $exitCode -eq  0 ]
+then
+    logger "detect-gluster-problems.sh: We found a healing situation"
+    HEADER1="$HEADER : healing message"
+    EmailBody="Possible healing situation on `hostname` on `date` \n${myOutput}"
+    echo -e "$EmailBody" | mail -s "$HEADER1" "$MAILTO"
+#else
+#    echo "No healing worries"
+fi
+
diff --git a/roles/gluster-monitor/vars/main.yml b/roles/gluster-monitor/vars/main.yml
new file mode 100644
index 00000000..92ee6879
--- /dev/null
+++ b/roles/gluster-monitor/vars/main.yml
@@ -0,0 +1,2 @@
+---
+EMAIL_DEST: "youremailhere@nowhere.com"
-- 
GitLab