diff --git a/roles/telegraf/files/telegraf_softnet_stats.awk b/roles/telegraf/files/telegraf_softnet_stats.awk new file mode 100644 index 0000000000000000000000000000000000000000..b24ef34a4261e012eef082e38c021e388f76e5f7 --- /dev/null +++ b/roles/telegraf/files/telegraf_softnet_stats.awk @@ -0,0 +1,3 @@ +#! /usr/bin/awk -f + +{ printf("softnet,cpu=%d sd_processed=%di,sd_dropped=%di,sd_time_squeeze=%di\n", NR-1, strtonum("0x"$1), strtonum("0x"$2), strtonum("0x"$3)) } \ No newline at end of file diff --git a/roles/telegraf/tasks/main.yml b/roles/telegraf/tasks/main.yml index af25f19b059752095d288f3fb5307a9b5f3dd895..a3afc7baf08bcab809ff7a49782378161469edb4 100644 --- a/roles/telegraf/tasks/main.yml +++ b/roles/telegraf/tasks/main.yml @@ -64,7 +64,15 @@ dest: '/opt/telegraf/bin/telegraf_slurmstats.py' become: true become_user: root -# + +- name: copy softnet_stat script + copy: + mode: 'u=rw,g=r,o=r' + src: telegraf_softnet_stats.awk + dest: '/opt/telegraf/bin/telegraf_softnet_stats.awk' + become: true + become_user: root + - name: Install Telegraf config template: src: telegraf.conf.j2 diff --git a/roles/telegraf/templates/telegraf.conf.j2 b/roles/telegraf/templates/telegraf.conf.j2 index 30a743cb5fe54e7d1c9aa504fdf8fe55abf3e1d3..e50589dcace9d2fe81155f17b3e2c22d2e060215 100644 --- a/roles/telegraf/templates/telegraf.conf.j2 +++ b/roles/telegraf/templates/telegraf.conf.j2 @@ -157,6 +157,14 @@ timeout="4s" interval="300s" +[[inputs.exec]] + commands = [ + "awk -v -f /opt/telegraf/bin/telegraf_softnet_stats.awk /proc/net/softnet_stat" + ] + data_format = "influx" + timeout="4s" + interval="600s" + # Both Slurm ManagementNodes will log sdiag stats, but no Compute or Login nodes will {% if 'ManagementNodes' in group_names %} [[inputs.exec]]