Commit 9b4401a1 authored by Chris Hines's avatar Chris Hines
Browse files

updates to the code

parent c40e3336
*~
__pycache__
.vscode
config.yml
config*
build/*
dist/*
statuspage_integration.egg-info
......@@ -10,7 +10,8 @@ def main():
import argparse
import yaml
parser = argparse.ArgumentParser()
parser.add_argument('--config', default='./config_test.yml')
parser.add_argument('--config', default='./config.yml')
parser.add_argument('--check', type=bool, default=False)
args = parser.parse_args()
......@@ -30,25 +31,29 @@ def main():
spcomponents = {}
for c in spClient.components.list():
spcomponents[c['name']] = c
logger.debug("found component {}".format(c['name']))
for name,c in config['components'].items():
msgs = []
overall = Status.operational
print(name)
print(spcomponents[name])
if spcomponents[name]['status'] == 'under_maintenance':
if name not in spcomponents:
logger.error("Can't find component {}".format(name))
if name in spcomponents and spcomponents[name]['status'] == 'under_maintenance':
continue
for t in c['tests']:
testclass = component_tests.getTestClass(classname=t['class'],influxClient=influxClient,**t)
result = testclass.test()
print(result)
logger.debug(result)
overall = max(overall,result['status'])
if result['status'] != Status.operational:
msgs.append(result['msg'])
update_component(spClient,spcomponents[name],overall,"\n".join(msgs))
if name in spcomponents:
update_component(spClient,spcomponents[name],overall,"\n".join(msgs),check=args.check)
#update_incident(spclient,spcomponents[name],overall,"\n".join(msgs),open_incidents)
def update_component(spclient,component,status,msg):
def update_component(spclient,component,status,msg,check):
from .status import Status
if status is Status.operational:
......@@ -59,7 +64,15 @@ def update_component(spclient,component,status,msg):
statusstr = 'major_outage'
if status is Status.under_maintenance:
statusstr = 'under_maintenance'
spclient.components.update(component.id, status = statusstr)
if not check:
if statusstr != component['status']:
spclient.components.update(component.id, status = statusstr)
else:
if statusstr == component['status']:
print("no change {} {} {}".format(component['name'],component['status'], statusstr))
else:
print("change {} {} {}".format(component['name'],component['status'], statusstr))
if __name__ == '__main__':
......
......@@ -49,8 +49,7 @@ class NFSFS():
time = datetime.datetime.now()
else:
time=self.time
print("testing NFS {} {}".format(self.time, self.nfs[0].ratequery(t1=None,t2=time)))
print("testing NFS {} {}".format(self.time, self.nfs[1].ratequery(t1=None,t2=time)))
if sum(map(lambda x: x.ratequery(t1=None,t2=time), self.nfs)) > 0:
msgstring = """The {fsname} filesystem is OK""".format(fsname=self.fsname)
......
import datetime
import logging
class InfluxDBRate():
"""
Take values from every host in influx.
......@@ -59,10 +60,12 @@ class InfluxDBRate():
"""
Compare the value of a specific measurment across the entire cluster at to different points in time
"""
logger=logging.getLogger()
if t1 is None:
t1 = t2 - datetime.timedelta(minutes=5)
t1 = t2 - datetime.timedelta(minutes=30)
a1 = self.sumlastquery(t1)
a2 = self.sumlastquery(t2)
logger.debug('{} {} rate query {} {} {} {}'.format(self.field, self.measurement ,a1,a2,t1,t2))
return (a2-a1)/(t2-t1).total_seconds()
def tagstring(self):
......
make a testclass that returns a outage status and an ok status
use a config file that runs each of these and asserts the status
how to test the component status
find a time window in which NFSFS should return false
find a time window in which LustreFS should return false
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment