diff --git a/roles/gluster-monitor/tasks/main.yml b/roles/gluster-monitor/tasks/main.yml index 2f73f3cc9303ccec17b37222f2ad13ff0563f2cc..6641f72314c03b772f366a2067cdddcaf9054bfa 100644 --- a/roles/gluster-monitor/tasks/main.yml +++ b/roles/gluster-monitor/tasks/main.yml @@ -7,8 +7,8 @@ become_user: root - name: template gluster_monitoring template: - src=detect-gluster-problems.sh.j2 - dest=/usr/local/sbin/detect-gluster-problems.sh + src=detect-gluster-problems.py.j2 + dest=/usr/local/sbin/detect-gluster-problems.py mode=755 owner=root group=root @@ -16,6 +16,6 @@ become_user: root - name: gluster_monitoring- install crontab entry #cron: name="Check glust for problems" minute="*/5" job="/usr/local/sbin/detect-gluster-problems.sh >> /tmp/detect-gluster-problems.txt 2>&1" - cron: name="Check gluster for problems" minute="*/5" job="/usr/local/sbin/detect-gluster-problems.sh" + cron: name="Check gluster for problems" minute="*/5" job="/usr/local/sbin/detect-gluster-problems.py" become: true become_user: root diff --git a/roles/gluster-monitor/templates/detect-gluster-problems.py.j2 b/roles/gluster-monitor/templates/detect-gluster-problems.py.j2 new file mode 100644 index 0000000000000000000000000000000000000000..05d2098110f49d19c5ba6138cf2d29f20a6dd48c --- /dev/null +++ b/roles/gluster-monitor/templates/detect-gluster-problems.py.j2 @@ -0,0 +1,78 @@ +#!/bin/env python +# +# +# detect-gluster-problems.py +# Authors simon michnowicz 13 April 2021 + +import os +import sys +import subprocess + +############################## +def error(e): + ''' + we have an error. Send an email + e=error string + ''' + hostname = os.uname()[1] + #print("Error is:\n{}".format(e)) + HEADER="Error Message from {}".format(hostname) + MAILTO="{{ EMAIL_DEST }}" + command="echo -e \"{}\" | mail -s \"{}\" \"{}\" ".format(e,HEADER,MAILTO) + #print("Command is:\n{}".format(command)) + os.system(command) + +############################## +def Test1(): + ''' + This tests looks for the word "split brain" in a general query + ''' + COMMAND="sudo gluster volume heal gv info" + try: + result = subprocess.check_output(COMMAND.split()).decode('UTF-8') + #print("Test1 Output is {}".format(result)) + outList=result.splitlines() + for line in outList: + #print("Test1 line is {}".format(line)) + if 'split' in line: + error(COMMAND+"\n"+outlist) + except subprocess.CalledProcessError as error: + error("Test1: \nPlease contact mcc-help@monash.edu \n error code", error.returncode, error.output) + sys.exit(1) +############################## +def Test2(): + ''' + This test checks for number of split brain entries + + sudo gluster volume heal gv info split-brain + Brick 172.16.227.169:/gbrick/brick + Status: Connected + Number of entries in split-brain: 0 + ''' + COMMAND="sudo gluster volume heal gv info split-brain" + try: + result = subprocess.check_output(COMMAND.split()).decode('UTF-8') + #print("Test2 Output is {}".format(result)) + outList=result.splitlines() + for line in outList: + #print("Line is {}".format(line)) + if 'Number of entries in split-brain' in line: + split=line.split(':') + if len(split)!=2: + error("Logic error in Test2: split is {}".format(split)) + sys.exit(1) + NoOfSplitBrains=int(split[1]) + #print("Number of Split Brains is {}".format(NoOfSplitBrains)) + if (NoOfSplitBrains!=0): + error("Number of Split Brains is {}".format(NoOfSplitBrains)) + except subprocess.CalledProcessError as error: + error("Test2: \nPlease contact mcc-help@monash.edu \n error code", error.returncode, error.output) + sys.exit(1) + +############################## +def main(): + Test1() + Test2() + +if __name__ == "__main__": + main()