Skip to content
Snippets Groups Projects
Commit d76b6267 authored by Kerri Wait's avatar Kerri Wait Committed by Chris Hines
Browse files

Add `infiniband` hardware counter collection to telegraf

It's not very clean but it works!!
parent 1aa326fc
No related branches found
No related tags found
No related merge requests found
......@@ -147,6 +147,116 @@
influxdb_database="slurm"
{% endif %}
# Read mlx hardware counters
[[inputs.multifile]]
name_override = "infiniband"
base_dir = "/sys/class/infiniband"
interval = "60s"
[[inputs.multifile.tags]]
device="mlx5_0"
port="1"
type="hw_counters"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/duplicate_request"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/implied_nak_seq_err"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/lifespan"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/local_ack_timeout_err"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/np_cnp_sent"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/np_ecn_marked_roce_packets"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/out_of_buffer"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/out_of_sequence"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/packet_seq_err"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/req_cqe_error"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/req_cqe_flush_error"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/req_remote_access_errors"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/req_remote_invalid_request"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/resp_cqe_error"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/resp_cqe_flush_error"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/resp_local_length_error"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/resp_remote_access_errors"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rnr_nak_retry_err"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rp_cnp_handled"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rp_cnp_ignored"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rx_atomic_requests"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rx_dct_connect"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rx_icrc_encapsulated"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rx_read_requests"
conversion = "int"
[[inputs.multifile.file]]
file = "mlx5_0/ports/1/hw_counters/rx_write_requests"
conversion = "int"
###############################################################################
# SERVICE INPUTS #
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment