Skip to content
Snippets Groups Projects
cuda_collectd.py.j2 1.11 KiB
Newer Older
#!/usr/bin/env python

import collectd
import subprocess
import xml.etree.ElementTree as ET

def read(data=None):
        vl = collectd.Values(type='gauge')
        vl.plugin = 'cuda'

        out = subprocess.check_output(['nvidia-smi', '-q', '-x'])
        root = ET.fromstring(out)

        for gpu in root.iter('gpu'):
                vl.plugin_instance = 'cuda-%s' % (gpu.attrib['id'])

                try:
                    vl.dispatch(type='temperature',
                            values=[float(gpu.find('temperature/gpu_temp').text.split()[0])])
                except:
                    pass

                try:
                    vl.dispatch(type='utilization', type_instance='memory',
                            values=[1e6 * float(gpu.find('utilization/memory_util').text.split()[0])])
                except:
                    pass
                try:
                    vl.dispatch(type='utilization', type_instance='gpu',
                            values=[1e6 * float(gpu.find('utilization/gpu_util').text.split()[0])])