Skip to content
Snippets Groups Projects
Commit be060aea authored by Kerri Wait's avatar Kerri Wait
Browse files

Add nvidia_smi plugin to telegraf config

parent b6f26bfa
No related branches found
No related tags found
7 merge requests!399Capture extra NFS stats,!393Hotfix: monitor NFS GETATTR stats via telegraf,!392Temporarily disable inputs.filecount in telegraf,!389Update telegraf config to ignore more ethX interfaces in ethtool plugin,!388Fix the telegraf config for mlx hw counters to get rid of errors in logs,!387Telegraf 1.15 nvidia_smi fix,!386Telegraf 1.15 nvidia_smi plugin
# Pulls statistics from nvidia GPUs attached to the host
[[inputs.nvidia_smi]]
## Optional: path to nvidia-smi binary, defaults to $PATH via exec.LookPath
# bin_path = "/usr/bin/nvidia-smi"
## Optional: timeout for GPU polling
# timeout = "5s"
\ No newline at end of file
...@@ -70,3 +70,18 @@ ...@@ -70,3 +70,18 @@
become_user: root become_user: root
tags: tags:
- configuration - configuration
- name: Install nvidia-smi plugin
template:
src: inputs.nvidia_smi.conf.j2
dest: /etc/telegraf/telegraf.d/inputs.nvidia_smi.conf
owner: telegraf
group: telegraf
mode: '640'
notify:
- "restart telegraf"
become: true
become_user: root
tags:
- configuration
- gpu
\ No newline at end of file
...@@ -180,27 +180,27 @@ ...@@ -180,27 +180,27 @@
influxdb_database="slurm" influxdb_database="slurm"
{% endif %} {% endif %}
# Read mlx hardware counters ## Read mlx hardware counters
{% if 'hw_counters' in ansible_local %} #{% if 'hw_counters' in ansible_local %}
{% for interface in ansible_local['hw_counters'] %} #{% for interface in ansible_local['hw_counters'] %}
[[inputs.multifile]] #[[inputs.multifile]]
name_override = 'infiniband' # name_override = 'infiniband'
base_dir = '/sys/class/infiniband' # base_dir = '/sys/class/infiniband'
interval = '60s' # interval = '60s'
#
[[inputs.multifile.tags]] # [[inputs.multifile.tags]]
device = '{{ interface }}' # device = '{{ interface }}'
port = '1' # port = '1'
type = 'hw_counters' # type = 'hw_counters'
#
{% for counter in ansible_local['hw_counters'][interface] | sort %} # {% for counter in ansible_local['hw_counters'][interface] | sort %}
[[inputs.multifile.file]] #[[inputs.multifile.file]]
file = '{{ interface }}/ports/1/hw_counters/{{ counter }}' # file = '{{ interface }}/ports/1/hw_counters/{{ counter }}'
conversion = 'int' # conversion = 'int'
#
{% endfor %} # {% endfor %}
{% endfor %} #{% endfor %}
{% endif %} #{% endif %}
{% if 'Karaage' in group_names %} {% if 'Karaage' in group_names %}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment