diff --git a/roles/collectd/tasks/main.yml b/roles/collectd/tasks/main.yml index b951071e3d1bc86b7a9e57ffcd2a68a0ff643d4e..034e803299384934dad8449281f98eedb88135af 100644 --- a/roles/collectd/tasks/main.yml +++ b/roles/collectd/tasks/main.yml @@ -26,6 +26,11 @@ become_user: root when: cudamonitor is defined +- name: install mountstats_collectd python script + template: src=mountstats_collectd.py.j2 dest=/etc/collectd.python/mountstats_collectd.py mode=0700 owner=root group=root + become: true + become_user: root + - name: start collectd service service: name=collectd state=started enabled=true become: true diff --git a/roles/collectd/templates/collectd.conf.j2 b/roles/collectd/templates/collectd.conf.j2 index 9d722189615d5054e0b42cab8bad5db537e9d8d4..487b12d797b8ad04206cce6092086f4aeee09d28 100644 --- a/roles/collectd/templates/collectd.conf.j2 +++ b/roles/collectd/templates/collectd.conf.j2 @@ -39,7 +39,7 @@ FQDNLookup true # Interval 60 # # </LoadPlugin> # #----------------------------------------------------------------------------# -#Interval 10 +Interval 30 #MaxReadInterval 86400 #Timeout 2 @@ -915,6 +915,9 @@ LoadPlugin users <Module cuda_collectd> </Module> {% endif %} + Import "mountstats_collectd" + <Module mountstats_collectd> + </Module> </Plugin> # LogTraces true # Interactive true @@ -1134,6 +1137,7 @@ LoadPlugin users ValuesFrom 1 </Result> </Table> + # <Table "/proc/slabinfo"> # Instance "slabinfo" # Separator " " diff --git a/roles/collectd/templates/mountstats_collectd.py.j2 b/roles/collectd/templates/mountstats_collectd.py.j2 new file mode 100644 index 0000000000000000000000000000000000000000..197c8098f179c70783a635b60e4e4f7a3e2555be --- /dev/null +++ b/roles/collectd/templates/mountstats_collectd.py.j2 @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +try: + import collectd + from collectd import Values +except: + from fakeCollectd import Values + +STATS='/proc/self/mountstats' +BYTESFIELDS=['nread', 'nwrite', 'dread', 'dwrite', 'nfsread', 'nfswrite', 'pageread', 'pagewrite'] +BYTESSTR='\tbytes:\t' +EVENTSSTR='\tevents:\t' +EVENTSFIELDS=["inode_revalidate", "dnode_revalidate", "data_invalidate", "attribute_invalidate", "vfs_open", "vfs_lookup", "vfs_access", "vfs_update_page", "vfs_read_page", "vfs_read_pages", "vfs_write_page", "vfs_write_pages", "vfs_getdents", "vfs_setattr", "vfs_flush", "vfs_fsync", "vfs_lock", "vfs_file_release", "congestion_wait", "truncation", "write_extension", "silly_rename", "short_read", "short_write", "jukebox_delay", "pnfs_read", "pnfs_write"] + + +def get_values(): + device = None + rv = {} + with open(STATS,'r') as f: + for l in f.read().splitlines(): + pass + if 'fstype nfs4' in l: + device = l.split('mounted on')[0][len('device '):] + if EVENTSSTR in l: + values = map(int, l[len(EVENTSSTR):].split()) + ev = zip(EVENTSFIELDS,values) + if BYTESSTR in l: + values = map(int, l[len(BYTESSTR):].split()) + bv = zip(BYTESFIELDS,values) + rv[device] = [] + bv + ev + return rv + + +def read(data=None): + vl = Values(type='gauge') + vl.plugin = 'mountstats' + + values = get_values() + + for mount,stats in values.items(): + print(mount) + vl.plugin_instance = mount + for s in stats: + vl.dispatch(type='gauge',type_instance=s[0],values=[s[1]]) + +def readtest(): + read() + +try: + collectd.register_read(read,60) +except: + readtest() +