diff --git a/roles/telegraf/tasks/main.yml b/roles/telegraf/tasks/main.yml index 9226fa6871d1461340bc519635bdfa11cde60c4e..5ca8af1fb00c518dfc260e78dc4885cc8c5cf701 100644 --- a/roles/telegraf/tasks/main.yml +++ b/roles/telegraf/tasks/main.yml @@ -71,6 +71,20 @@ tags: - configuration +- name: Install multifile plugin for mlx hw_counters + template: + src: inputs.multifile_mlx.conf.j2 + dest: /etc/telegraf/telegraf.d/inputs.multifile_mlx.conf + owner: telegraf + group: telegraf + mode: '640' + notify: + - "restart telegraf" + become: true + become_user: root + tags: + - configuration + - name: Install nvidia-smi plugin template: src: inputs.nvidia_smi.conf.j2 @@ -84,4 +98,5 @@ become_user: root tags: - configuration - - gpu \ No newline at end of file + - gpu + when: "'VisNodes' in group_names" \ No newline at end of file diff --git a/roles/telegraf/templates/inputs.multifile_mlx.conf.j2 b/roles/telegraf/templates/inputs.multifile_mlx.conf.j2 new file mode 100644 index 0000000000000000000000000000000000000000..c6adbfd9276aa226c265d25792afd2c7a54f98ea --- /dev/null +++ b/roles/telegraf/templates/inputs.multifile_mlx.conf.j2 @@ -0,0 +1,22 @@ +# Read mlx hardware counters +{% if hwcounterlist %} +{% for interface in hwcounterlist %} +[[inputs.multifile]] + name_override = 'infiniband' + base_dir = '/sys/class/infiniband' + interval = '60s' + + [[inputs.multifile.tags]] + device = '{{ interface }}' + port = '1' + type = 'hw_counters' + + {% for counter in hwcounterlist[interface] | sort %} +[[inputs.multifile.file]] + file = '{{ interface }}/ports/1/hw_counters/{{ counter }}' + conversion = 'int' + + {% endfor %} +{% endfor %} +{% endif %} +