Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hpc-team/HPCasCode
  • chines/ansible_cluster_in_a_box
2 results
Show changes
Showing
with 2611 additions and 2 deletions
#######################################################################
###
### Filesystem checks
###
# * || check_fs_mount_rw -t "fuse.glusterfs" -s "mgmt0:/gv" -f "/glusterVolume"
* || check_fs_used / 90%
# * || check_fs_used /glusterVolume 90%
* || check_fs_iused / 100%
# * || check_fs_iused /glusterVolume 100%
#######################################################################
###
### Hardware checks
###
* || check_hw_cpuinfo 1 1 1
# * || check_hw_physmem 4048416kB 4048416kB 3%
* || check_hw_swap 0kB 0kB 3%
* || check_hw_eth eth0
* || check_hw_eth lo
#######################################################################
###
### Process checks
###
* || check_ps_service -S -u root sshd
- name: "Templating slurm.conf"
template: src=slurm.conf.j2 dest=/tmp/slurm.conf owner=root group=root mode=644
become: true
- name: fetch slurm.conf
fetch: src=/tmp/slurm.conf dest=files/slurm.conf flat=yes
- name: "Templating slurmdbd.conf"
template: src=slurmdbd.conf.j2 dest=/tmp/slurmdbd.conf owner=root group=root mode=644
become: true
- name: fetch slurm.conf
fetch: src=/tmp/slurmdbd.conf dest=files/slurmdbd.conf flat=yes
#
# Example slurm.conf file. Please run configurator.html
# (in doc/html) to build a configuration file customized
# for your environment.
#
#
# slurm.conf file generated by configurator.html.
#
# See the slurm.conf man page for more information.
#
ClusterName={{ clustername }}
ControlMachine={{ slurmctrl }}
{% if slurmctrlbackup is defined %}
BackupController={{ slurmctrlbackup }}
{% endif %}
#ControlAddr=
#BackupController=
#BackupAddr=
#
SlurmctldParameters=enable_configless
SlurmUser=slurm
SlurmdUser=root
SlurmctldPort=6817
SlurmdPort=6818
AuthType=auth/munge
#JobCredentialPrivateKey=
#JobCredentialPublicCertificate=
StateSaveLocation={{ slurmstatedir }}
SlurmdSpoolDir={{ slurmdatadir }}
SwitchType=switch/none
MpiDefault=pmi2
SlurmctldPidFile={{ slurmpiddir }}/slurmctld.pid
SlurmdPidFile={{ slurmpiddir }}/slurmd.pid
#ProctrackType=proctrack/linuxproc
ProctrackType=proctrack/cgroup
#PluginDir=
#FirstJobId=
ReturnToService=1
RebootProgram=/sbin/reboot
#ResumeTimeout=300
#MaxJobCount=
#PlugStackConfig=
#PropagatePrioProcess=
#PropagateResourceLimits=
#PropagateResourceLimitsExcept=
#Prolog=
#Epilog=
#SrunProlog=
#SrunEpilog=
#TaskProlog=
#TaskEpilog=
TaskPlugin=task/cgroup
#TaskPlugin=task/affinity
#TaskPlugin=task/affinity,task/cgroup
{% if slurm_lua is defined %}
JobSubmitPlugins=lua
{% endif %}
OverTimeLimit=1
CompleteWait=10
#TrackWCKey=no
#TreeWidth=50
#TmpFS=
#UsePAM=
#
# TIMERS
SlurmctldTimeout=3000
#SlurmdTimeout=300
#InactiveLimit=0
#MinJobAge=300
KillWait=10
#Waittime=0
#
# SCHEDULING
SchedulerType={{ slurmschedulertype }}
#SchedulerAuth=
#SchedulerPort=
#SchedulerRootFilter=
SelectType={{ slurmselecttype }}
{% if slurmselecttype.find("cons_tres") > 0 %}
SelectTypeParameters=CR_Core_Memory
{% endif %}
PriorityType=priority/multifactor
#PriorityFlags=Ticket_Based
#PriorityCalcPeriod=5
#PriorityDecayHalfLife=0
#PriorityUsageResetPeriod=14-0
PriorityWeightFairshare=10000
PriorityWeightAge=10000
PriorityWeightPartition=10000
PriorityWeightJobSize=10000
PriorityMaxAge=14-0
#
# LOGGING
{% if slurmctlddebug %}
SlurmctldDebug={{ slurmctlddebug.level }}
SlurmctldLogFile={{ slurmctlddebug.log }}
{% else %}
#SlurmctldDebug=
#SlurmctldLogFile=
{% endif %}
{% if slurmddebug %}
SlurmdDebug={{ slurmddebug.level }}
SlurmdLogFile={{ slurmddebug.log }}
{% else %}
#SlurmdDebug=
#SlurmdLogFile=
{% endif %}
{% if slurmschedlog %}
SlurmSchedlogLevel={{ slurmschedlog.level }}
SlurmSchedLogFile={{ slurmschedlog.log }}
{% else %}
#SlurmSchedlogLevel=
#SlurmSchedLogFile=
{% endif %}
JobCompType=jobcomp/none
#JobCompLoc=
#
{% if slurmjob is defined %}
Prolog={{ slurmjob.prolog }}
Epilog={{ slurmjob.epilog }}
{% else %}
Prolog=/opt/slurm/etc/slurm.prolog
Epilog=/opt/slurm/etc/slurm.epilog
{% endif %}
#
# ACCOUNTING
JobAcctGatherType=jobacct_gather/linux
JobAcctGatherFrequency=30
#
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageHost={{ slurmdbd }}
{% if slurmdbdbackup is defined %}
AccountingStorageBackupHost={{ slurmdbdbackup }}
{% endif %}
AccountingStorageEnforce=limits,safe
#AccountingStorageLoc=
#AccountingStoragePass=
#AccountingStorageUser=
#
#GRES
GresTypes=gpu
#
HealthCheckInterval=300
HealthCheckProgram={{ nhc_dir }}/sbin/nhc
#array jobs. max number
{% if slurm_max_array_size is defined %}
MaxArraySize={{ slurm_max_array_size }}
{% endif %}
# Fair share
{% if slurmfairshare.def %}
PriorityWeightFairshare={{ slurmfairshare.val }}
{% endif %}
DisableRootJobs=YES
MpiParams=ports=12000-12999
# COMPUTE NODES
{% set nodelist = [] %}
{% for queue in slurmqueues %}
{% for node in groups[queue.group] %}
{% if nodelist.append(node) %}
{% endif %}
{% endfor %}
{% endfor %}
{% for node in nodelist|unique %}
NodeName={{ hostvars[node]['ansible_hostname'] }} Procs={{ hostvars[node]['ansible_processor_vcpus'] }} RealMemory={{ hostvars[node].ansible_memory_mb.real.total - 1024 }} Sockets={{ hostvars[node]['ansible_processor_vcpus'] }} CoresPerSocket=1 ThreadsPerCore={{ hostvars[node].ansible_processor_threads_per_core }} {% if hostvars[node].ansible_hostname.find('vis') != -1 %}Gres=gpu:1{% endif %} {% if hostvars[node]['ansible_processor_vcpus'] == 1 %}Weight=1{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 1 and hostvars[node]['ansible_processor_vcpus'] <= 16 %}Weight=3{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 16 and hostvars[node]['ansible_processor_vcpus'] <= 20 %}Weight=5{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 20 and hostvars[node]['ansible_processor_vcpus'] <= 40 %}Weight=7{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 40 and hostvars[node]['ansible_processor_vcpus'] <= 64 %}Weight=8{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 64 and hostvars[node]['ansible_processor_vcpus'] <= 128 %}Weight=9{% endif %}{% if hostvars[node]['ansible_processor_vcpus'] > 128 %}Weight=10{% endif %} Feature=stage1 State=UNKNOWN
{% endfor %}
{% for queue in slurmqueues %}
{% set nodenames = [] %}
{% for node in groups[queue.group] %}
{% if nodenames.append(hostvars[node]['ansible_hostname']) %}
{% endif %}
{% endfor %}
PartitionName={{ queue.name }} {% if queue.default %}Default=yes{% endif %} Nodes={{ nodenames|join(',') }} {% if queue.DefaultTime is defined %} DefaultTime={{ queue.DefaultTime }} {% endif %} {% if queue.DefMemPerCPU is defined %} DefMemPerCPU={{ queue.DefMemPerCPU }} {% endif %} {% if queue.MaxTime is defined %} MaxTime={{ queue.MaxTime}} {% endif %} State=UP
{% endfor %}
#
# Example slurmdbd.conf file.
#
# See the slurmdbd.conf man page for more information.
#
# Archive info
#ArchiveJobs=yes
#ArchiveDir="/tmp"
#ArchiveSteps=yes
#ArchiveScript=
#JobPurge=12
#StepPurge=1
#
# Authentication info
AuthType=auth/munge
#AuthInfo=/var/run/munge/munge.socket.2
#
# slurmDBD info
#DbdAddr=
DbdHost={{ slurmdbd }}
{% if slurmdbdbackup is defined %}
DbdBackupHost={{ slurmdbdbackup }}
{% endif %}
#DbdPort=7031
SlurmUser=slurm
#MessageTimeout=300
#DefaultQOS=normal,standby
{% if slurmdbdlog is defined %}
DebugLevel={{ slurmdbdlog.level }}
LogFile={{ slurmdbdlog.log }}
{% else %}
#DebugLevel=
#LogFile=
{% endif %}
PidFile=/opt/slurm/var/run/slurmdbd.pid
#PluginDir=/usr/lib/slurm
#PrivateData=accounts,users,usage,jobs
#TrackWCKey=yes
#
# Database info
StorageType=accounting_storage/mysql
StorageHost={{ mysql_host }}
#StoragePort=1234
StoragePass={{ slurmdb_passwd }}
StorageUser=slurmdb
StorageLoc=slurm_acct_db
---
-
name: Removing the RDO repository
file: path=/etc/yum.repos.d/rdo-release.repo state=absent
become: true
-
name: Install epel-release
yum: name=epel-release-7-5.noarch state=present
become: true
-
name: Enable epel
command: yum-config-manager --enable epel
become: true
-
name: Installing Base Packages
yum: name={{ item }} state=present
with_items:
- yum-utils
- deltarpm-3.6-3.el7.x86_64
- yum-plugin-versionlock
become: true
-
name: Installing Core packages
yum: name="{{ item.software }}-{{ item.version }}.{{ item.arch }}" state=present
with_items: package_list
become: true
-
name: Performing version lock on the packages
shell: yum versionlock \*
become: true
---
- include: installBasePackages.yml
---
package_list:
- { software: "ModemManager-glib", version: "1.1.0-6.git20130913.el7", arch: "x86_64" }
- { software: "NetworkManager", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-adsl", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-bluetooth", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-glib", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-libnm", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-team", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-tui", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-wifi", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "NetworkManager-wwan", version: "1:1.0.0-16.git20150121.b4ea599c.el7_1", arch: "x86_64" }
- { software: "PyYAML", version: "3.10-11.el7", arch: "x86_64" }
- { software: "acl", version: "2.2.51-12.el7", arch: "x86_64" }
- { software: "acpid", version: "2.0.19-5.el7", arch: "x86_64" }
- { software: "alsa-lib", version: "1.0.28-2.el7", arch: "x86_64" }
- { software: "audit", version: "2.4.1-5.el7", arch: "x86_64" }
- { software: "audit-libs", version: "2.4.1-5.el7", arch: "x86_64" }
- { software: "audit-libs-python", version: "2.4.1-5.el7", arch: "x86_64" }
- { software: "authconfig", version: "6.2.8-9.el7", arch: "x86_64" }
- { software: "autogen-libopts", version: "5.18-5.el7", arch: "x86_64" }
- { software: "avahi", version: "0.6.31-14.el7", arch: "x86_64" }
- { software: "avahi-autoipd", version: "0.6.31-14.el7", arch: "x86_64" }
- { software: "avahi-libs", version: "0.6.31-14.el7", arch: "x86_64" }
- { software: "basesystem", version: "10.0-7.el7.centos", arch: "noarch" }
- { software: "bash", version: "4.2.46-12.el7", arch: "x86_64" }
- { software: "bind-libs", version: "32:9.9.4-18.el7_1.5", arch: "x86_64" }
- { software: "bind-libs-lite", version: "32:9.9.4-18.el7_1.5", arch: "x86_64" }
- { software: "bind-license", version: "32:9.9.4-18.el7_1.5", arch: "noarch" }
- { software: "bind-utils", version: "32:9.9.4-18.el7_1.5", arch: "x86_64" }
- { software: "binutils", version: "2.23.52.0.1-30.el7_1.2", arch: "x86_64" }
- { software: "biosdevname", version: "0.6.1-2.el7", arch: "x86_64" }
- { software: "btrfs-progs", version: "3.16.2-1.el7", arch: "x86_64" }
- { software: "bzip2", version: "1.0.6-12.el7", arch: "x86_64" }
- { software: "bzip2-libs", version: "1.0.6-12.el7", arch: "x86_64" }
- { software: "ca-certificates", version: "2015.2.4-70.0.el7_1", arch: "noarch" }
- { software: "centos-logos", version: "70.0.6-2.el7.centos", arch: "noarch" }
- { software: "centos-release", version: "7-1.1503.el7.centos.2.8", arch: "x86_64" }
- { software: "checkpolicy", version: "2.1.12-6.el7", arch: "x86_64" }
- { software: "chkconfig", version: "1.3.61-4.el7", arch: "x86_64" }
- { software: "cloud-init", version: "0.7.5-10.el7.centos.1", arch: "x86_64" }
- { software: "cloud-utils-growpart", version: "0.27-20.el7.centos", arch: "x86_64" }
- { software: "coreutils", version: "8.22-12.el7_1.2", arch: "x86_64" }
- { software: "cpio", version: "2.11-22.el7", arch: "x86_64" }
- { software: "cpp", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "cracklib", version: "2.9.0-11.el7", arch: "x86_64" }
- { software: "cracklib-dicts", version: "2.9.0-11.el7", arch: "x86_64" }
- { software: "cronie", version: "1.4.11-13.el7", arch: "x86_64" }
- { software: "cronie-anacron", version: "1.4.11-13.el7", arch: "x86_64" }
- { software: "crontabs", version: "1.11-6.20121102git.el7", arch: "noarch" }
- { software: "cryptsetup-libs", version: "1.6.6-3.el7", arch: "x86_64" }
- { software: "curl", version: "7.29.0-19.el7", arch: "x86_64" }
- { software: "cyrus-sasl-lib", version: "2.1.26-17.el7", arch: "x86_64" }
- { software: "dbus", version: "1:1.6.12-11.el7", arch: "x86_64" }
- { software: "dbus-glib", version: "0.100-7.el7", arch: "x86_64" }
- { software: "dbus-libs", version: "1:1.6.12-11.el7", arch: "x86_64" }
- { software: "dbus-python", version: "1.1.1-9.el7", arch: "x86_64" }
- { software: "device-mapper", version: "7:1.02.93-3.el7_1.1", arch: "x86_64" }
- { software: "device-mapper-libs", version: "7:1.02.93-3.el7_1.1", arch: "x86_64" }
- { software: "dhclient", version: "12:4.2.5-36.el7.centos", arch: "x86_64" }
- { software: "dhcp-common", version: "12:4.2.5-36.el7.centos", arch: "x86_64" }
- { software: "dhcp-libs", version: "12:4.2.5-36.el7.centos", arch: "x86_64" }
- { software: "diffutils", version: "3.3-4.el7", arch: "x86_64" }
- { software: "dmidecode", version: "1:2.12-5.el7", arch: "x86_64" }
- { software: "dnsmasq", version: "2.66-14.el7_1", arch: "x86_64" }
- { software: "dracut", version: "033-241.el7_1.5", arch: "x86_64" }
- { software: "dracut-config-rescue", version: "033-241.el7_1.5", arch: "x86_64" }
- { software: "dracut-network", version: "033-241.el7_1.5", arch: "x86_64" }
- { software: "e2fsprogs", version: "1.42.9-7.el7", arch: "x86_64" }
- { software: "e2fsprogs-libs", version: "1.42.9-7.el7", arch: "x86_64" }
- { software: "ebtables", version: "2.0.10-13.el7", arch: "x86_64" }
- { software: "elfutils-libelf", version: "0.160-1.el7", arch: "x86_64" }
- { software: "elfutils-libs", version: "0.160-1.el7", arch: "x86_64" }
- { software: "environment-modules", version: "3.2.10-8.el7", arch: "x86_64" }
- { software: "epel-release", version: "7-5", arch: "noarch" }
- { software: "ethtool", version: "2:3.15-2.el7", arch: "x86_64" }
- { software: "expat", version: "2.1.0-8.el7", arch: "x86_64" }
- { software: "fail2ban", version: "0.9.2-1.el7", arch: "noarch" }
- { software: "fail2ban-firewalld", version: "0.9.2-1.el7", arch: "noarch" }
- { software: "fail2ban-sendmail", version: "0.9.2-1.el7", arch: "noarch" }
- { software: "fail2ban-server", version: "0.9.2-1.el7", arch: "noarch" }
- { software: "fail2ban-systemd", version: "0.9.2-1.el7", arch: "noarch" }
- { software: "file", version: "5.11-21.el7", arch: "x86_64" }
- { software: "file-libs", version: "5.11-21.el7", arch: "x86_64" }
- { software: "filesystem", version: "3.2-18.el7", arch: "x86_64" }
- { software: "findutils", version: "1:4.5.11-3.el7", arch: "x86_64" }
- { software: "fipscheck", version: "1.4.1-5.el7", arch: "x86_64" }
- { software: "fipscheck-lib", version: "1.4.1-5.el7", arch: "x86_64" }
- { software: "firewalld", version: "0.3.9-11.el7", arch: "noarch" }
- { software: "freetype", version: "2.4.11-10.el7_1.1", arch: "x86_64" }
- { software: "fxload", version: "2002_04_11-16.el7", arch: "x86_64" }
- { software: "gawk", version: "4.0.2-4.el7", arch: "x86_64" }
- { software: "gcc", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "gcc-c++", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "gcc-gfortran", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "gdbm", version: "1.10-8.el7", arch: "x86_64" }
- { software: "gettext", version: "0.18.2.1-4.el7", arch: "x86_64" }
- { software: "gettext-libs", version: "0.18.2.1-4.el7", arch: "x86_64" }
- { software: "glib-networking", version: "2.40.0-1.el7", arch: "x86_64" }
- { software: "glib2", version: "2.40.0-4.el7", arch: "x86_64" }
- { software: "glibc", version: "2.17-78.el7", arch: "x86_64" }
- { software: "glibc-common", version: "2.17-78.el7", arch: "x86_64" }
- { software: "glibc-devel", version: "2.17-78.el7", arch: "x86_64" }
- { software: "glibc-headers", version: "2.17-78.el7", arch: "x86_64" }
- { software: "gmp", version: "1:6.0.0-11.el7", arch: "x86_64" }
- { software: "gnupg2", version: "2.0.22-3.el7", arch: "x86_64" }
- { software: "gnutls", version: "3.3.8-12.el7_1.1", arch: "x86_64" }
- { software: "gobject-introspection", version: "1.36.0-4.el7", arch: "x86_64" }
- { software: "gpgme", version: "1.3.2-5.el7", arch: "x86_64" }
- { software: "grep", version: "2.20-1.el7", arch: "x86_64" }
- { software: "groff-base", version: "1.22.2-8.el7", arch: "x86_64" }
- { software: "grub2", version: "1:2.02-0.17.el7.centos.4.0.1", arch: "x86_64" }
- { software: "grub2-tools", version: "1:2.02-0.17.el7.centos.4.0.1", arch: "x86_64" }
- { software: "grubby", version: "8.28-11.el7", arch: "x86_64" }
- { software: "gsettings-desktop-schemas", version: "3.8.2-3.el7", arch: "x86_64" }
- { software: "gzip", version: "1.5-7.el7", arch: "x86_64" }
- { software: "hardlink", version: "1:1.0-19.el7", arch: "x86_64" }
- { software: "heat-cfntools", version: "1.2.8-1.el7.centos", arch: "noarch" }
- { software: "hostname", version: "3.13-3.el7", arch: "x86_64" }
- { software: "hwdata", version: "0.252-7.8.el7_1", arch: "x86_64" }
- { software: "info", version: "5.1-4.el7", arch: "x86_64" }
- { software: "initscripts", version: "9.49.24-1.el7", arch: "x86_64" }
- { software: "iproute", version: "3.10.0-21.el7", arch: "x86_64" }
- { software: "iprutils", version: "2.4.3-3.el7", arch: "x86_64" }
- { software: "ipset", version: "6.19-4.el7", arch: "x86_64" }
- { software: "ipset-libs", version: "6.19-4.el7", arch: "x86_64" }
- { software: "iptables", version: "1.4.21-13.el7", arch: "x86_64" }
- { software: "iputils", version: "20121221-6.el7_1.1", arch: "x86_64" }
- { software: "irqbalance", version: "2:1.0.7-2.el7_1", arch: "x86_64" }
- { software: "jansson", version: "2.4-6.el7", arch: "x86_64" }
- { software: "jbigkit-libs", version: "2.0-11.el7", arch: "x86_64" }
- { software: "json-c", version: "0.11-4.el7_0", arch: "x86_64" }
- { software: "kbd", version: "1.15.5-11.el7", arch: "x86_64" }
- { software: "kbd-legacy", version: "1.15.5-11.el7", arch: "noarch" }
- { software: "kbd-misc", version: "1.15.5-11.el7", arch: "noarch" }
- { software: "kernel", version: "3.10.0-123.6.3.el7", arch: "x86_64" }
- { software: "kernel", version: "3.10.0-123.8.1.el7", arch: "x86_64" }
- { software: "kernel", version: "3.10.0-229.14.1.el7", arch: "x86_64" }
- { software: "kernel-headers", version: "3.10.0-229.14.1.el7", arch: "x86_64" }
- { software: "kernel-tools", version: "3.10.0-229.14.1.el7", arch: "x86_64" }
- { software: "kernel-tools-libs", version: "3.10.0-229.14.1.el7", arch: "x86_64" }
- { software: "kexec-tools", version: "2.0.7-19.el7_1.2", arch: "x86_64" }
- { software: "keyutils-libs", version: "1.5.8-3.el7", arch: "x86_64" }
- { software: "kmod", version: "14-10.el7", arch: "x86_64" }
- { software: "kmod-libs", version: "14-10.el7", arch: "x86_64" }
- { software: "kpartx", version: "0.4.9-77.el7_1.1", arch: "x86_64" }
- { software: "krb5-libs", version: "1.12.2-15.el7_1", arch: "x86_64" }
- { software: "less", version: "458-8.el7", arch: "x86_64" }
- { software: "libX11", version: "1.6.0-2.1.el7", arch: "x86_64" }
- { software: "libX11-common", version: "1.6.0-2.1.el7", arch: "noarch" }
- { software: "libXau", version: "1.0.8-2.1.el7", arch: "x86_64" }
- { software: "libacl", version: "2.2.51-12.el7", arch: "x86_64" }
- { software: "libassuan", version: "2.1.0-3.el7", arch: "x86_64" }
- { software: "libattr", version: "2.4.46-12.el7", arch: "x86_64" }
- { software: "libblkid", version: "2.23.2-22.el7_1.1", arch: "x86_64" }
- { software: "libcap", version: "2.22-8.el7", arch: "x86_64" }
- { software: "libcap-ng", version: "0.7.3-5.el7", arch: "x86_64" }
- { software: "libcgroup", version: "0.41-8.el7", arch: "x86_64" }
- { software: "libcom_err", version: "1.42.9-7.el7", arch: "x86_64" }
- { software: "libcroco", version: "0.6.8-5.el7", arch: "x86_64" }
- { software: "libcurl", version: "7.29.0-19.el7", arch: "x86_64" }
- { software: "libdaemon", version: "0.14-7.el7", arch: "x86_64" }
- { software: "libdb", version: "5.3.21-17.el7_0.1", arch: "x86_64" }
- { software: "libdb-utils", version: "5.3.21-17.el7_0.1", arch: "x86_64" }
- { software: "libdrm", version: "2.4.56-2.el7", arch: "x86_64" }
- { software: "libedit", version: "3.0-12.20121213cvs.el7", arch: "x86_64" }
- { software: "libestr", version: "0.1.9-2.el7", arch: "x86_64" }
- { software: "libffi", version: "3.0.13-11.el7", arch: "x86_64" }
- { software: "libgcc", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "libgcrypt", version: "1.5.3-12.el7_1.1", arch: "x86_64" }
- { software: "libgfortran", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "libgomp", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "libgpg-error", version: "1.12-3.el7", arch: "x86_64" }
- { software: "libgudev1", version: "208-20.el7_1.6", arch: "x86_64" }
- { software: "libidn", version: "1.28-3.el7", arch: "x86_64" }
- { software: "libjpeg-turbo", version: "1.2.90-5.el7", arch: "x86_64" }
- { software: "libmnl", version: "1.0.3-7.el7", arch: "x86_64" }
- { software: "libmodman", version: "2.0.1-8.el7", arch: "x86_64" }
- { software: "libmount", version: "2.23.2-22.el7_1.1", arch: "x86_64" }
- { software: "libmpc", version: "1.0.1-3.el7", arch: "x86_64" }
- { software: "libndp", version: "1.2-4.el7", arch: "x86_64" }
- { software: "libnetfilter_conntrack", version: "1.0.4-2.el7", arch: "x86_64" }
- { software: "libnfnetlink", version: "1.0.1-4.el7", arch: "x86_64" }
- { software: "libnl3", version: "3.2.21-8.el7", arch: "x86_64" }
- { software: "libnl3-cli", version: "3.2.21-8.el7", arch: "x86_64" }
- { software: "libpcap", version: "14:1.5.3-4.el7_1.2", arch: "x86_64" }
- { software: "libpciaccess", version: "0.13.1-4.1.el7", arch: "x86_64" }
- { software: "libpipeline", version: "1.2.3-3.el7", arch: "x86_64" }
- { software: "libproxy", version: "0.4.11-6.el7", arch: "x86_64" }
- { software: "libpwquality", version: "1.2.3-4.el7", arch: "x86_64" }
- { software: "libquadmath", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "libquadmath-devel", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "libselinux", version: "2.2.2-6.el7", arch: "x86_64" }
- { software: "libselinux-python", version: "2.2.2-6.el7", arch: "x86_64" }
- { software: "libselinux-utils", version: "2.2.2-6.el7", arch: "x86_64" }
- { software: "libsemanage", version: "2.1.10-16.el7", arch: "x86_64" }
- { software: "libsemanage-python", version: "2.1.10-16.el7", arch: "x86_64" }
- { software: "libsepol", version: "2.1.9-3.el7", arch: "x86_64" }
- { software: "libsoup", version: "2.46.0-3.el7", arch: "x86_64" }
- { software: "libss", version: "1.42.9-7.el7", arch: "x86_64" }
- { software: "libssh2", version: "1.4.3-8.el7", arch: "x86_64" }
- { software: "libstdc++", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "libstdc++-devel", version: "4.8.3-9.el7", arch: "x86_64" }
- { software: "libsysfs", version: "2.1.0-16.el7", arch: "x86_64" }
- { software: "libtasn1", version: "3.8-2.el7", arch: "x86_64" }
- { software: "libteam", version: "1.15-1.el7", arch: "x86_64" }
- { software: "libtiff", version: "4.0.3-14.el7", arch: "x86_64" }
- { software: "libunistring", version: "0.9.3-9.el7", arch: "x86_64" }
- { software: "libuser", version: "0.60-7.el7_1", arch: "x86_64" }
- { software: "libutempter", version: "1.1.6-4.el7", arch: "x86_64" }
- { software: "libuuid", version: "2.23.2-22.el7_1.1", arch: "x86_64" }
- { software: "libverto", version: "0.2.5-4.el7", arch: "x86_64" }
- { software: "libwebp", version: "0.3.0-3.el7", arch: "x86_64" }
- { software: "libxcb", version: "1.9-5.el7", arch: "x86_64" }
- { software: "libxml2", version: "2.9.1-5.el7_1.2", arch: "x86_64" }
- { software: "libyaml", version: "0.1.4-11.el7_0", arch: "x86_64" }
- { software: "linux-firmware", version: "20140911-0.1.git365e80c.el7", arch: "noarch" }
- { software: "logrotate", version: "3.8.6-4.el7", arch: "x86_64" }
- { software: "lua", version: "5.1.4-14.el7", arch: "x86_64" }
- { software: "lzo", version: "2.06-6.el7_0.2", arch: "x86_64" }
- { software: "make", version: "1:3.82-21.el7", arch: "x86_64" }
- { software: "man-db", version: "2.6.3-9.el7", arch: "x86_64" }
- { software: "mariadb-libs", version: "1:5.5.44-1.el7_1", arch: "x86_64" }
- { software: "microcode_ctl", version: "2:2.1-10.el7", arch: "x86_64" }
- { software: "mozjs17", version: "17.0.0-10.el7", arch: "x86_64" }
- { software: "mpfr", version: "3.1.1-4.el7", arch: "x86_64" }
- { software: "nano", version: "2.3.1-10.el7", arch: "x86_64" }
- { software: "ncurses", version: "5.9-13.20130511.el7", arch: "x86_64" }
- { software: "ncurses-base", version: "5.9-13.20130511.el7", arch: "noarch" }
- { software: "ncurses-libs", version: "5.9-13.20130511.el7", arch: "x86_64" }
- { software: "net-tools", version: "2.0-0.17.20131004git.el7", arch: "x86_64" }
- { software: "nettle", version: "2.7.1-4.el7", arch: "x86_64" }
- { software: "newt", version: "0.52.15-4.el7", arch: "x86_64" }
- { software: "newt-python", version: "0.52.15-4.el7", arch: "x86_64" }
- { software: "nmap", version: "2:6.40-4.el7", arch: "x86_64" }
- { software: "nmap-ncat", version: "2:6.40-4.el7", arch: "x86_64" }
- { software: "nspr", version: "4.10.8-1.el7_1", arch: "x86_64" }
- { software: "nss", version: "3.19.1-5.el7_1", arch: "x86_64" }
- { software: "nss-softokn", version: "3.16.2.3-13.el7_1", arch: "x86_64" }
- { software: "nss-softokn-freebl", version: "3.16.2.3-13.el7_1", arch: "x86_64" }
- { software: "nss-sysinit", version: "3.19.1-5.el7_1", arch: "x86_64" }
- { software: "nss-tools", version: "3.19.1-5.el7_1", arch: "x86_64" }
- { software: "nss-util", version: "3.19.1-3.el7_1", arch: "x86_64" }
- { software: "ntp", version: "4.2.6p5-19.el7.centos.1", arch: "x86_64" }
- { software: "ntpdate", version: "4.2.6p5-19.el7.centos.1", arch: "x86_64" }
- { software: "numactl-libs", version: "2.0.9-5.el7_1", arch: "x86_64" }
- { software: "openldap", version: "2.4.39-6.el7", arch: "x86_64" }
- { software: "openssh", version: "6.6.1p1-12.el7_1", arch: "x86_64" }
- { software: "openssh-clients", version: "6.6.1p1-12.el7_1", arch: "x86_64" }
- { software: "openssh-server", version: "6.6.1p1-12.el7_1", arch: "x86_64" }
- { software: "openssl", version: "1:1.0.1e-42.el7.9", arch: "x86_64" }
- { software: "openssl-libs", version: "1:1.0.1e-42.el7.9", arch: "x86_64" }
- { software: "os-prober", version: "1.58-5.el7", arch: "x86_64" }
- { software: "p11-kit", version: "0.20.7-3.el7", arch: "x86_64" }
- { software: "p11-kit-trust", version: "0.20.7-3.el7", arch: "x86_64" }
- { software: "pam", version: "1.1.8-12.el7_1.1", arch: "x86_64" }
- { software: "parted", version: "3.1-20.el7", arch: "x86_64" }
- { software: "passwd", version: "0.79-4.el7", arch: "x86_64" }
- { software: "pciutils-libs", version: "3.2.1-4.el7", arch: "x86_64" }
- { software: "pcre", version: "8.32-14.el7", arch: "x86_64" }
- { software: "pinentry", version: "0.8.1-14.el7", arch: "x86_64" }
- { software: "pkgconfig", version: "1:0.27.1-4.el7", arch: "x86_64" }
- { software: "plymouth", version: "0.8.9-0.13.20140113.el7.centos", arch: "x86_64" }
- { software: "plymouth-core-libs", version: "0.8.9-0.13.20140113.el7.centos", arch: "x86_64" }
- { software: "plymouth-scripts", version: "0.8.9-0.13.20140113.el7.centos", arch: "x86_64" }
- { software: "policycoreutils", version: "2.2.5-15.el7", arch: "x86_64" }
- { software: "policycoreutils-python", version: "2.2.5-15.el7", arch: "x86_64" }
- { software: "polkit", version: "0.112-5.el7", arch: "x86_64" }
- { software: "polkit-pkla-compat", version: "0.1-4.el7", arch: "x86_64" }
- { software: "popt", version: "1.13-16.el7", arch: "x86_64" }
- { software: "postfix", version: "2:2.10.1-6.el7", arch: "x86_64" }
- { software: "ppp", version: "2.4.5-33.el7", arch: "x86_64" }
- { software: "procps-ng", version: "3.3.10-3.el7", arch: "x86_64" }
- { software: "pth", version: "2.0.7-22.el7", arch: "x86_64" }
- { software: "pygobject3-base", version: "3.8.2-6.el7", arch: "x86_64" }
- { software: "pygpgme", version: "0.3-9.el7", arch: "x86_64" }
- { software: "pyliblzma", version: "0.5.3-11.el7", arch: "x86_64" }
- { software: "python", version: "2.7.5-18.el7_1.1", arch: "x86_64" }
- { software: "python-IPy", version: "0.75-6.el7", arch: "noarch" }
- { software: "python-backports", version: "1.0-8.el7", arch: "x86_64" }
- { software: "python-boto", version: "2.38.0-2.el7", arch: "noarch" }
- { software: "python-chardet", version: "2.2.1-1.el7_1", arch: "noarch" }
- { software: "python-cheetah", version: "2.4.4-5.el7.centos", arch: "x86_64" }
- { software: "python-configobj", version: "4.7.2-7.el7", arch: "noarch" }
- { software: "python-decorator", version: "3.4.0-3.el7", arch: "noarch" }
- { software: "python-devel", version: "2.7.5-18.el7_1.1", arch: "x86_64" }
- { software: "python-iniparse", version: "0.4-9.el7", arch: "noarch" }
- { software: "python-jsonpatch", version: "1.2-3.el7.centos", arch: "noarch" }
- { software: "python-jsonpointer", version: "1.0-2.el7.centos", arch: "noarch" }
- { software: "python-kitchen", version: "1.1.1-5.el7", arch: "noarch" }
- { software: "python-libs", version: "2.7.5-18.el7_1.1", arch: "x86_64" }
- { software: "python-markdown", version: "2.4.1-1.el7.centos", arch: "noarch" }
- { software: "python-pillow", version: "2.0.0-17.gitd1c6db8.el7", arch: "x86_64" }
- { software: "python-prettytable", version: "0.7.2-2.el7.centos", arch: "noarch" }
- { software: "python-psutil", version: "1.2.1-1.el7", arch: "x86_64" }
- { software: "python-pycurl", version: "7.19.0-17.el7", arch: "x86_64" }
- { software: "python-pygments", version: "1.4-9.el7", arch: "noarch" }
- { software: "python-pyudev", version: "0.15-6.el7", arch: "noarch" }
- { software: "python-requests", version: "2.6.0-1.el7_1", arch: "noarch" }
- { software: "python-rsa", version: "3.1.1-5.el7", arch: "noarch" }
- { software: "python-setuptools", version: "0.9.8-3.el7", arch: "noarch" }
- { software: "python-six", version: "1.6.1-1.el7", arch: "noarch" }
- { software: "python-slip", version: "0.4.0-2.el7", arch: "noarch" }
- { software: "python-slip-dbus", version: "0.4.0-2.el7", arch: "noarch" }
- { software: "python-urlgrabber", version: "3.10-6.el7", arch: "noarch" }
- { software: "python-urllib3", version: "1.10.2-2.el7_1", arch: "noarch" }
- { software: "pyxattr", version: "0.5.1-5.el7", arch: "x86_64" }
- { software: "qrencode-libs", version: "3.4.1-3.el7", arch: "x86_64" }
- { software: "rdo-release", version: "icehouse-4", arch: "noarch" }
- { software: "readline", version: "6.2-9.el7", arch: "x86_64" }
- { software: "rootfiles", version: "8.1-11.el7", arch: "noarch" }
- { software: "rpm", version: "4.11.1-25.el7", arch: "x86_64" }
- { software: "rpm-build-libs", version: "4.11.1-25.el7", arch: "x86_64" }
- { software: "rpm-libs", version: "4.11.1-25.el7", arch: "x86_64" }
- { software: "rpm-python", version: "4.11.1-25.el7", arch: "x86_64" }
- { software: "rsyslog", version: "7.4.7-7.el7_0", arch: "x86_64" }
- { software: "sed", version: "4.2.2-5.el7", arch: "x86_64" }
- { software: "selinux-policy", version: "3.13.1-23.el7_1.18", arch: "noarch" }
- { software: "selinux-policy-targeted", version: "3.13.1-23.el7_1.18", arch: "noarch" }
- { software: "setools-libs", version: "3.3.7-46.el7", arch: "x86_64" }
- { software: "setup", version: "2.8.71-5.el7", arch: "noarch" }
- { software: "shadow-utils", version: "2:4.1.5.1-18.el7", arch: "x86_64" }
- { software: "shared-mime-info", version: "1.1-7.el7", arch: "x86_64" }
- { software: "slang", version: "2.2.4-11.el7", arch: "x86_64" }
- { software: "snappy", version: "1.1.0-3.el7", arch: "x86_64" }
- { software: "sqlite", version: "3.7.17-6.el7_1.1", arch: "x86_64" }
- { software: "sudo", version: "1.8.6p7-13.el7", arch: "x86_64" }
- { software: "systemd", version: "208-20.el7_1.6", arch: "x86_64" }
- { software: "systemd-libs", version: "208-20.el7_1.6", arch: "x86_64" }
- { software: "systemd-python", version: "208-20.el7_1.6", arch: "x86_64" }
- { software: "systemd-sysv", version: "208-20.el7_1.6", arch: "x86_64" }
- { software: "sysvinit-tools", version: "2.88-14.dsf.el7", arch: "x86_64" }
- { software: "tar", version: "2:1.26-29.el7", arch: "x86_64" }
- { software: "tcl", version: "1:8.5.13-4.el7", arch: "x86_64" }
- { software: "tcp_wrappers-libs", version: "7.6-77.el7", arch: "x86_64" }
- { software: "teamd", version: "1.15-1.el7", arch: "x86_64" }
- { software: "telnet", version: "1:0.17-59.el7", arch: "x86_64" }
- { software: "trousers", version: "0.3.11.2-4.el7_1", arch: "x86_64" }
- { software: "tuned", version: "2.4.1-1.el7", arch: "noarch" }
- { software: "tzdata", version: "2015f-1.el7", arch: "noarch" }
- { software: "ustr", version: "1.0.4-16.el7", arch: "x86_64" }
- { software: "util-linux", version: "2.23.2-22.el7_1.1", arch: "x86_64" }
- { software: "vim-minimal", version: "2:7.4.160-1.el7", arch: "x86_64" }
- { software: "virt-what", version: "1.13-5.el7", arch: "x86_64" }
- { software: "wget", version: "1.14-10.el7_0.1", arch: "x86_64" }
- { software: "which", version: "2.20-7.el7", arch: "x86_64" }
- { software: "wpa_supplicant", version: "1:2.0-17.el7_1", arch: "x86_64" }
- { software: "xfsprogs", version: "3.2.1-6.el7", arch: "x86_64" }
- { software: "xz", version: "5.1.2-9alpha.el7", arch: "x86_64" }
- { software: "xz-libs", version: "5.1.2-9alpha.el7", arch: "x86_64" }
- { software: "yum", version: "3.4.3-125.el7.centos", arch: "noarch" }
- { software: "yum-metadata-parser", version: "1.1.4-10.el7", arch: "x86_64" }
- { software: "yum-plugin-fastestmirror", version: "1.1.31-29.el7", arch: "noarch" }
- { software: "yum-utils", version: "1.1.31-29.el7", arch: "noarch" }
- { software: "zlib", version: "1.2.7-13.el7", arch: "x86_64" }
---
- name: install packages for ceph client Debian
package:
name: ceph-common
state: present
become: true
when: ansible_os_family == "Debian"
- name: install packages for ceph repo RHEL
package:
name: centos-release-ceph-nautilus.noarch
state: present
become: true
register: cephrepoinstall
when: ansible_os_family == "RedHat"
- name: install packages for ceph client Debian
yum:
name: libcephfs2
update_cache: yes
state: present
become: true
when: ansible_os_family == "RedHat" and cephrepoinstall.changed
- name: deploy secret file. this should not be hardcoded
lineinfile:
path: /root/cephfs-dev-hpc.secret
line: "{{ cephsecret_devhpc }}"
owner: root
group: root
mode: '0600'
create: yes
become: true
- name: "Mounting ceph mounts. code copied from nfs-client"
mount:
name: "{{ item.name }}"
src: "{{ item.ipv4 }}:{{ item.src }}"
fstype: "{{ item.fstype }}"
opts: "{{ item.opts }}"
state: mounted
loop: "{{ cephmounts }}"
become: true
---
- name: stat crash files
find: path=/var/crash
register: files
- name: remove crash files
file: state=absent path={{ item.path }}
with_items: "{{ files.files }}"
---
- name: install collectd - CentOS
yum: name=collectd state=present enablerepo=epel
when:
- '"CentOS" in ansible_distribution'
become: true
become_user: root
- name: install collectd - RHEL7
yum: name=collectd state=present enablerepo="org_monash_uni_EPEL_7_EPEL_7_-_x86_64"
when:
- '"DGX" in ansible_product_name'
- '"RedHat" in ansible_distribution'
become: true
become_user: root
- name: install collectd config
template: src=collectd.conf.j2 dest=/etc/collectd.d/collectd.conf mode=0600 owner=root group=root
become: true
become_user: root
register: configchange
- name: create directory for python collectd components
file: path=/etc/collectd.python state=directory owner=root group=root mode=755
become: true
become_user: root
- name: install buddyinfo python script
template: src=buddyinfo.py.j2 dest=/etc/collectd.python/buddyinfo.py mode=0700 owner=root group=root
become: true
become_user: root
- name: install cuda_collectd python script
template: src=cuda_collectd.py.j2 dest=/etc/collectd.python/cuda_collectd.py mode=0700 owner=root group=root
become: true
become_user: root
when: cudamonitor is defined
- name: install mountstats_collectd python script
template: src=mountstats_collectd.py.j2 dest=/etc/collectd.python/mountstats_collectd.py mode=0700 owner=root group=root
become: true
become_user: root
- name: start collectd service
service: name=collectd state=started enabled=true
become: true
become_user: root
- name: restart collectd service
service: name=collectd state=restarted enabled=true
become: true
become_user: root
when: configchange.changed
#!/usr/bin/python
##########################################################################
# Copyright (c) 2015, Salesforce.com, Inc.
# All rights reserved.
#
# Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#
# Neither the name of Salesforce.com nor the names of its
# contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
##########################################################################
"""
**buddyinfo.py**
Linux uses buddy allocator for memory management. Pages
are allocated in each NUMA node and zones within each
node. Within each zones, pages are allocated as
contiguous groups of 1, 2, 3, 4, and so on order
pages where 1 means 4K pages. Number of free pages in
each bucket is exposed through /proc/buddyinfo
When this number goes below a threshold in any bucket,
kswapd (slowpath for finding free pages) kicks in. It
then scans for free pages in all order levels until
all of them reach above min limit. This process can take
long time and may cause issues for GC latencies.
Typical contents of /proc/buddyinfo:
- Node 0, zone Normal 1490 4026 12224 8508 4493 1929 849 301 101 45 5257
- Node 1, zone DMA 1 1 1 1 1 0 1 0 1 1 3
- Node 1, zone DMA32 15 3 2 5 8 7 4 4 7 8 681
- Node 1, zone Normal 6061 13681 20887 15188 9097 4546 1948 731 273 125 3976
Here are the fields interpretation in each row:
1. NUMA node (such as 0 or 1)
2. Zone name (Normal, DMA32, DMA, etc.)
3. Col. 3 to end: page order or buckets on contiguous memory sizes: 4K, 8K, 16K, 32K, 64K, 128K, 256K, 512K, 1024K, and 2048K
"""
import collectd
import platform
import os
import socket
import time
import re
os_name = platform.system()
BUDDY_FNAME = '/proc/buddyinfo'
METRIC_PLUGIN = 'buddyinfo'
METRIC_TYPE = 'gauge'
buddy_fields = ['numa_node',
'zone_name',
'bucket_free_pages'
]
buddy_metrics = ['bucket_free_pages_per_sec',
'total_free_pages_per_sec',
'pct_fragment_per_sec'
]
white_list = []
node_list = []
zone_list = []
stats_cache = {}
stats_current = {}
re_buddyinfo=re.compile(r'^\s*Node\s+(?P<node>\d+)'
r',\s+zone\s+(?P<zone>\S+)\s+(?P<pages>.*)$')
def init_stats_cache():
global white_list
if os.path.exists(BUDDY_FNAME):
num_buckets = 0
with open(BUDDY_FNAME) as f:
for line in f:
match = re_buddyinfo.search(line)
if not match:
collectd.error('buddyinfo: unknown line pattern: %s' % (line))
continue;
node = match.group('node')
zone = match.group('zone')
free_pages = match.group('pages').strip().split()
num_buckets = len(free_pages)
if node not in node_list:
node_list.append(node)
if zone not in zone_list:
zone_list.append(zone)
stats_cache[(node, zone, 'val')] = free_pages
stats_cache[(node, zone, 'ts')] = time.time()
f.close()
for i in range(0, num_buckets):
white_list.append('free_pages_' + str(4*2**i) + 'K')
collectd.info('buddyinfo: node_list : %s' % (node_list))
collectd.info('buddyinfo: zone_list : %s' % (zone_list))
collectd.info('buddyinfo: white_list: %s' % (white_list))
else:
collectd.info('buddyinfo: init_stats_cache: path: %s does not exist'
% (BUDDY_FNAME))
def collect_buddyinfo():
if os.path.exists(BUDDY_FNAME):
with open(BUDDY_FNAME) as f:
for line in f:
match = re_buddyinfo.search(line)
if not match:
continue;
node = match.group('node')
zone = match.group('zone')
free_pages = match.group('pages').strip().split()
stats_current[(node, zone, 'val')] = free_pages
stats_current[(node, zone, 'ts')] = time.time()
key_val = dict(zip(white_list, free_pages))
metric = collectd.Values()
metric.plugin = METRIC_PLUGIN
metric.plugin_instance = node
metric.type = METRIC_TYPE
for k in range(0, len(white_list)):
metric.type_instance = 'node_'+ node + '_zone_' + zone + '.'
metric.type_instance += white_list[k]
metric.values = [free_pages[k]]
metric.dispatch()
f.close()
else:
collectd.error('buddyinfo: procfs path: %s does not exist'
% (BUDDY_FNAME))
def swap_current_cache():
stats_cache = stats_current.copy()
def configer(ObjConfiguration):
collectd.info('buddyinfo plugin: configuring host' )
def initer():
collectd.info('buddyinfo initer: white list: %s' % (white_list))
init_stats_cache()
collectd.info('buddyinfo init: stats_cache: %s' % (stats_cache))
def reader(input_data=None):
collect_buddyinfo()
swap_current_cache()
def writer(metric, data=None):
for i in metric.values:
collectd.debug('%s (%s): %f' % (metric.plugin, metric.type, i))
def shutdown():
collectd.info('buddyinfo plugin shutting down')
#== Callbacks ==#
if (os_name == 'Linux'):
collectd.register_config(configer)
collectd.register_init(initer)
collectd.register_read(reader)
collectd.register_write(writer)
collectd.register_shutdown(shutdown)
else:
collectd.warning('buddyinfo plugin currently works for Linux only')
# Config file for collectd(1).
#
# Some plugins need additional configuration and are disabled by default.
# Please read collectd.conf(5) for details.
#
# You should also read /usr/share/doc/collectd-core/README.Debian.plugins
# before enabling any more plugins.
##############################################################################
# Global #
#----------------------------------------------------------------------------#
# Global settings for the daemon. #
##############################################################################
#Hostname "localhost"
FQDNLookup true
#BaseDir "/var/lib/collectd"
#PluginDir "/usr/lib/collectd"
#TypesDB "/usr/share/collectd/types.db" "/etc/collectd/my_types.db"
#----------------------------------------------------------------------------#
# When enabled, plugins are loaded automatically with the default options #
# when an appropriate <Plugin ...> block is encountered. #
# Disabled by default. #
#----------------------------------------------------------------------------#
#AutoLoadPlugin false
#----------------------------------------------------------------------------#
# When enabled, internal statistics are collected, using "collectd" as the #
# plugin name. #
# Disabled by default. #
#----------------------------------------------------------------------------#
#CollectInternalStats false
#----------------------------------------------------------------------------#
# Interval at which to query values. This may be overwritten on a per-plugin #
# base by using the 'Interval' option of the LoadPlugin block: #
# <LoadPlugin foo> #
# Interval 60 #
# </LoadPlugin> #
#----------------------------------------------------------------------------#
Interval 30
#MaxReadInterval 86400
#Timeout 2
#ReadThreads 5
#WriteThreads 5
# Limit the size of the write queue. Default is no limit. Setting up a limit
# is recommended for servers handling a high volume of traffic.
#WriteQueueLimitHigh 1000000
#WriteQueueLimitLow 800000
##############################################################################
# Logging #
#----------------------------------------------------------------------------#
# Plugins which provide logging functions should be loaded first, so log #
# messages generated when loading or configuring other plugins can be #
# accessed. #
##############################################################################
#LoadPlugin logfile
LoadPlugin syslog
LoadPlugin network
#LoadPlugin log_logstash
#<Plugin logfile>
# LogLevel "info"
# File STDOUT
# Timestamp true
# PrintSeverity false
#</Plugin>
<Plugin syslog>
LogLevel info
</Plugin>
<Plugin network>
<Server "{{ collectd_server }}" "25826">
Username collectd
Password "{{ collectd_password }}"
</Server>
</Plugin>
#<Plugin log_logstash>
# LogLevel info
# File "/var/log/collectd.json.log"
#</Plugin>
##############################################################################
# LoadPlugin section #
#----------------------------------------------------------------------------#
# Specify what features to activate. #
##############################################################################
#LoadPlugin aggregation
#LoadPlugin amqp
#LoadPlugin apache
#LoadPlugin apcups
#LoadPlugin ascent
#LoadPlugin barometer
LoadPlugin battery
#LoadPlugin bind
#LoadPlugin ceph
#LoadPlugin cgroups
#LoadPlugin conntrack
#LoadPlugin contextswitch
LoadPlugin cpu
LoadPlugin cpufreq
#LoadPlugin csv
#LoadPlugin curl
#LoadPlugin curl_json
#LoadPlugin curl_xml
#LoadPlugin dbi
LoadPlugin df
LoadPlugin disk
#LoadPlugin dns
#LoadPlugin drbd
#LoadPlugin email
LoadPlugin entropy
LoadPlugin ethstat
#LoadPlugin exec
#LoadPlugin fhcount
#LoadPlugin filecount
#LoadPlugin fscache
#LoadPlugin gmond
#LoadPlugin hddtemp
LoadPlugin interface
#LoadPlugin ipc
#LoadPlugin ipmi
#LoadPlugin iptables
#LoadPlugin ipvs
#LoadPlugin irq
#LoadPlugin java
LoadPlugin load
#LoadPlugin lvm
#LoadPlugin madwifi
#LoadPlugin mbmon
#LoadPlugin md
#LoadPlugin memcachec
#LoadPlugin memcached
LoadPlugin memory
#LoadPlugin modbus
#LoadPlugin multimeter
#LoadPlugin mysql
#LoadPlugin netlink
#LoadPlugin nfs
#LoadPlugin nginx
#LoadPlugin notify_desktop
#LoadPlugin notify_email
#LoadPlugin ntpd
#LoadPlugin numa
#LoadPlugin nut
#LoadPlugin olsrd
#LoadPlugin openldap
#LoadPlugin openvpn
#LoadPlugin perl
#LoadPlugin pinba
#LoadPlugin ping
#LoadPlugin postgresql
#LoadPlugin powerdns
LoadPlugin processes
#LoadPlugin protocols
LoadPlugin python
#LoadPlugin redis
#LoadPlugin rrdcached
#LoadPlugin rrdtool
#LoadPlugin sensors
#LoadPlugin serial
#LoadPlugin sigrok
#LoadPlugin smart
#LoadPlugin snmp
#LoadPlugin statsd
LoadPlugin swap
LoadPlugin table
#LoadPlugin tail
#LoadPlugin tail_csv
#LoadPlugin tcpconns
#LoadPlugin teamspeak2
#LoadPlugin ted
#LoadPlugin thermal
#LoadPlugin tokyotyrant
#LoadPlugin turbostat
#LoadPlugin unixsock
LoadPlugin uptime
LoadPlugin users
#LoadPlugin uuid
#LoadPlugin varnish
#LoadPlugin virt
#LoadPlugin vmem
#LoadPlugin vserver
#LoadPlugin wireless
#LoadPlugin write_graphite
#LoadPlugin write_http
#LoadPlugin write_kafka
#LoadPlugin write_log
#LoadPlugin write_redis
#LoadPlugin write_riemann
#LoadPlugin write_sensu
#LoadPlugin write_tsdb
#LoadPlugin zfs_arc
#LoadPlugin zookeeper
##############################################################################
# Plugin configuration #
#----------------------------------------------------------------------------#
# In this section configuration stubs for each plugin are provided. A desc- #
# ription of those options is available in the collectd.conf(5) manual page. #
##############################################################################
#<Plugin aggregation>
# <Aggregation>
# #Host "unspecified"
# Plugin "cpu"
# PluginInstance "/[0,2,4,6,8]$/"
# Type "cpu"
# #TypeInstance "unspecified"
#
# SetPlugin "cpu"
# SetPluginInstance "even-%{aggregation}"
#
# GroupBy "Host"
# GroupBy "TypeInstance"
#
# CalculateNum false
# CalculateSum false
# CalculateAverage true
# CalculateMinimum false
# CalculateMaximum false
# CalculateStddev false
# </Aggregation>
#</Plugin>
#<Plugin amqp>
# <Publish "name">
# Host "localhost"
# Port "5672"
# VHost "/"
# User "guest"
# Password "guest"
# Exchange "amq.fanout"
# RoutingKey "collectd"
# Persistent false
# StoreRates false
# ConnectionRetryDelay 0
# </Publish>
#</Plugin>
#<Plugin apache>
# <Instance "foo">
# URL "http://localhost/server-status?auto"
# User "www-user"
# Password "secret"
# VerifyPeer false
# VerifyHost false
# CACert "/etc/ssl/ca.crt"
# Server "apache"
# </Instance>
#
# <Instance "bar">
# URL "http://some.domain.tld/status?auto"
# Host "some.domain.tld"
# Server "lighttpd"
# </Instance>
#</Plugin>
#<Plugin apcups>
# Host "localhost"
# Port "3551"
# ReportSeconds true
#</Plugin>
#<Plugin ascent>
# URL "http://localhost/ascent/status/"
# User "www-user"
# Password "secret"
# VerifyPeer false
# VerifyHost false
# CACert "/etc/ssl/ca.crt"
#</Plugin>
#<Plugin barometer>
# Device "/dev/i2c-0";
# Oversampling 512
# PressureOffset 0.0
# TemperatureOffset 0.0
# Normalization 2
# Altitude 238.0
# TemperatureSensor "myserver/onewire-F10FCA000800/temperature"
#</Plugin>
#<Plugin battery>
# ValuesPercentage false
# ReportDegraded false
#</Plugin>
#<Plugin bind>
# URL "http://localhost:8053/"
#
# ParseTime false
#
# OpCodes true
# QTypes true
# ServerStats true
# ZoneMaintStats true
# ResolverStats false
# MemoryStats true
#
# <View "_default">
# QTypes true
# ResolverStats true
# CacheRRSets true
#
# Zone "127.in-addr.arpa/IN"
# </View>
#</Plugin>
#<Plugin ceph>
# LongRunAvgLatency false
# ConvertSpecialMetricTypes true
# <Daemon "osd.0">
# SocketPath "/var/run/ceph/ceph-osd.0.asok"
# </Daemon>
# <Daemon "osd.1">
# SocketPath "/var/run/ceph/ceph-osd.1.asok"
# </Daemon>
# <Daemon "mon.a">
# SocketPath "/var/run/ceph/ceph-mon.ceph1.asok"
# </Daemon>
# <Daemon "mds.a">
# SocketPath "/var/run/ceph/ceph-mds.ceph1.asok"
# </Daemon>
#</Plugin>
#<Plugin cgroups>
# CGroup "libvirt"
# IgnoreSelected false
#</Plugin>
#<Plugin cpu>
# ReportByCpu true
# ReportByState true
# ValuesPercentage false
#</Plugin>
#<Plugin csv>
# DataDir "/var/lib/collectd/csv"
# StoreRates false
#</Plugin>
#<Plugin curl>
# <Page "stock_quotes">
# URL "http://finance.google.com/finance?q=NYSE%3AAMD"
# User "foo"
# Password "bar"
# Digest false
# VerifyPeer true
# VerifyHost true
# CACert "/path/to/ca.crt"
# Header "X-Custom-Header: foobar"
# Post "foo=bar"
#
# MeasureResponseTime false
# MeasureResponseCode false
# <Match>
# Regex "<span +class=\"pr\"[^>]*> *([0-9]*\\.[0-9]+) *</span>"
# DSType "GaugeAverage"
# Type "stock_value"
# Instance "AMD"
# </Match>
# </Page>
#</Plugin>
#<Plugin curl_json>
## See: http://wiki.apache.org/couchdb/Runtime_Statistics
# <URL "http://localhost:5984/_stats">
# Instance "httpd"
# <Key "httpd/requests/count">
# Type "http_requests"
# </Key>
#
# <Key "httpd_request_methods/*/count">
# Type "http_request_methods"
# </Key>
#
# <Key "httpd_status_codes/*/count">
# Type "http_response_codes"
# </Key>
# </URL>
## Database status metrics:
# <URL "http://localhost:5984/_all_dbs">
# Instance "dbs"
# <Key "*/doc_count">
# Type "gauge"
# </Key>
# <Key "*/doc_del_count">
# Type "counter"
# </Key>
# <Key "*/disk_size">
# Type "bytes"
# </Key>
# </URL>
#</Plugin>
#<Plugin curl_xml>
# <URL "http://localhost/stats.xml">
# Host "my_host"
# Instance "some_instance"
# User "collectd"
# Password "thaiNg0I"
# Digest false
# VerifyPeer true
# VerifyHost true
# CACert "/path/to/ca.crt"
# Header "X-Custom-Header: foobar"
# Post "foo=bar"
#
# <XPath "table[@id=\"magic_level\"]/tr">
# Type "magic_level"
# InstancePrefix "prefix-"
# InstanceFrom "td[1]"
# ValuesFrom "td[2]/span[@class=\"level\"]"
# </XPath>
# </URL>
#</Plugin>
#<Plugin dbi>
# <Query "num_of_customers">
# Statement "SELECT 'customers' AS c_key, COUNT(*) AS c_value \
# FROM customers_tbl"
# MinVersion 40102
# MaxVersion 50042
# <Result>
# Type "gauge"
# InstancePrefix "customer"
# InstancesFrom "c_key"
# ValuesFrom "c_value"
# </Result>
# </Query>
#
# <Database "customers_db">
# Driver "mysql"
# DriverOption "host" "localhost"
# DriverOption "username" "collectd"
# DriverOption "password" "secret"
# DriverOption "dbname" "custdb0"
# SelectDB "custdb0"
# Query "num_of_customers"
# Query "..."
# Host "..."
# </Database>
#</Plugin>
<Plugin df>
# Device "/dev/sda1"
# Device "192.168.0.2:/mnt/nfs"
# MountPoint "/home"
# FSType "ext3"
# ignore rootfs; else, the root file-system would appear twice, causing
# one of the updates to fail and spam the log
FSType rootfs
# ignore the usual virtual / temporary file-systems
FSType sysfs
FSType proc
FSType devtmpfs
FSType devpts
FSType tmpfs
FSType fusectl
FSType cgroup
IgnoreSelected true
# ReportByDevice false
# ReportInodes false
# ValuesAbsolute true
# ValuesPercentage false
</Plugin>
#<Plugin disk>
# Disk "hda"
# Disk "/sda[23]/"
# IgnoreSelected false
# UseBSDName false
# UdevNameAttr "DEVNAME"
#</Plugin>
#<Plugin dns>
# Interface "eth0"
# IgnoreSource "192.168.0.1"
# SelectNumericQueryTypes false
#</Plugin>
#<Plugin email>
# SocketFile "/var/run/collectd-email"
# SocketGroup "collectd"
# SocketPerms "0770"
# MaxConns 5
#</Plugin>
<Plugin ethstat>
Interface "mlx0"
MappedOnly false
</Plugin>
#<Plugin exec>
# Exec user "/path/to/exec"
# Exec "user:group" "/path/to/exec"
# NotificationExec user "/path/to/exec"
#</Plugin>
#<Plugin fhcount>
# ValuesAbsolute true
# ValuesPercentage false
#</Plugin>
#<Plugin filecount>
# <Directory "/path/to/dir">
# Instance "foodir"
# Name "*.conf"
# MTime "-5m"
# Size "+10k"
# Recursive true
# IncludeHidden false
# </Directory>
#</Plugin>
#<Plugin gmond>
# MCReceiveFrom "239.2.11.71" "8649"
#
# <Metric "swap_total">
# Type "swap"
# TypeInstance "total"
# DataSource "value"
# </Metric>
#
# <Metric "swap_free">
# Type "swap"
# TypeInstance "free"
# DataSource "value"
# </Metric>
#</Plugin>
#<Plugin hddtemp>
# Host "127.0.0.1"
# Port 7634
#</Plugin>
#<Plugin interface>
# Interface "eth0"
# IgnoreSelected false
#</Plugin>
#<Plugin ipmi>
# Sensor "some_sensor"
# Sensor "another_one"
# IgnoreSelected false
# NotifySensorAdd false
# NotifySensorRemove true
# NotifySensorNotPresent false
#</Plugin>
<Plugin iptables>
Chain "table" "chain"
Chain6 "table" "chain"
</Plugin>
#<Plugin irq>
# Irq 7
# Irq 8
# Irq 9
# IgnoreSelected true
#</Plugin>
#<Plugin java>
# JVMArg "-verbose:jni"
# JVMArg "-Djava.class.path=/usr/share/collectd/java/collectd-api.jar"
#
# LoadPlugin "org.collectd.java.GenericJMX"
# <Plugin "GenericJMX">
# # See /usr/share/doc/collectd/examples/GenericJMX.conf
# # for an example config.
# </Plugin>
#</Plugin>
<Plugin load>
ReportRelative true
</Plugin>
#<Plugin madwifi>
# Interface "wlan0"
# IgnoreSelected false
# Source "SysFS"
# WatchSet "None"
# WatchAdd "node_octets"
# WatchAdd "node_rssi"
# WatchAdd "is_rx_acl"
# WatchAdd "is_scan_active"
#</Plugin>
#<Plugin mbmon>
# Host "127.0.0.1"
# Port 411
#</Plugin>
#<Plugin md>
# Device "/dev/md0"
# IgnoreSelected false
#</Plugin>
#<Plugin memcachec>
# <Page "plugin_instance">
# Server "localhost"
# Key "page_key"
# <Match>
# Regex "(\\d+) bytes sent"
# ExcludeRegex "<lines to be excluded>"
# DSType CounterAdd
# Type "ipt_octets"
# Instance "type_instance"
# </Match>
# </Page>
#</Plugin>
#<Plugin memcached>
# <Instance "local">
# Socket "/var/run/memcached.sock"
# or:
# Host "127.0.0.1"
# Port "11211"
# </Instance>
#</Plugin>
#<Plugin memory>
# ValuesAbsolute true
# ValuesPercentage false
#</Plugin>
#<Plugin modbus>
# <Data "data_name">
# RegisterBase 1234
# RegisterCmd ReadHolding
# RegisterType float
# Type gauge
# Instance "..."
# </Data>
#
# <Host "name">
# Address "addr"
# Port "1234"
# Interval 60
#
# <Slave 1>
# Instance "foobar" # optional
# Collect "data_name"
# </Slave>
# </Host>
#</Plugin>
#<Plugin mysql>
# <Database db_name>
# Host "database.serv.er"
# Port "3306"
# User "db_user"
# Password "secret"
# Database "db_name"
# MasterStats true
# ConnectTimeout 10
# InnodbStats true
# </Database>
#
# <Database db_name2>
# Alias "squeeze"
# Host "localhost"
# Socket "/var/run/mysql/mysqld.sock"
# SlaveStats true
# SlaveNotifications true
# </Database>
#</Plugin>
#<Plugin netlink>
# Interface "All"
# VerboseInterface "All"
# QDisc "eth0" "pfifo_fast-1:0"
# Class "ppp0" "htb-1:10"
# Filter "ppp0" "u32-1:0"
# IgnoreSelected false
#</Plugin>
#<Plugin network>
# # client setup:
# Server "ff18::efc0:4a42" "25826"
# <Server "239.192.74.66" "25826">
# SecurityLevel Encrypt
# Username "user"
# Password "secret"
# Interface "eth0"
# ResolveInterval 14400
# </Server>
# TimeToLive 128
#
# # server setup:
# Listen "ff18::efc0:4a42" "25826"
# <Listen "239.192.74.66" "25826">
# SecurityLevel Sign
# AuthFile "/etc/collectd/passwd"
# Interface "eth0"
# </Listen>
# MaxPacketSize 1452
#
# # proxy setup (client and server as above):
# Forward true
#
# # statistics about the network plugin itself
# ReportStats false
#
# # "garbage collection"
# CacheFlush 1800
#</Plugin>
#<Plugin nginx>
# URL "http://localhost/status?auto"
# User "www-user"
# Password "secret"
# VerifyPeer false
# VerifyHost false
# CACert "/etc/ssl/ca.crt"
#</Plugin>
#<Plugin notify_desktop>
# OkayTimeout 1000
# WarningTimeout 5000
# FailureTimeout 0
#</Plugin>
#<Plugin notify_email>
# SMTPServer "localhost"
# SMTPPort 25
# SMTPUser "my-username"
# SMTPPassword "my-password"
# From "collectd@main0server.com"
# # <WARNING/FAILURE/OK> on <hostname>.
# # Beware! Do not use not more than two placeholders (%)!
# Subject "[collectd] %s on %s!"
# Recipient "email1@domain1.net"
# Recipient "email2@domain2.com"
#</Plugin>
#<Plugin ntpd>
# Host "localhost"
# Port 123
# ReverseLookups false
# IncludeUnitID true
#</Plugin>
#<Plugin nut>
# UPS "upsname@hostname:port"
#</Plugin>
#<Plugin olsrd>
# Host "127.0.0.1"
# Port "2006"
# CollectLinks "Summary"
# CollectRoutes "Summary"
# CollectTopology "Summary"
#</Plugin>
#<Plugin openldap>
# <Instance "localhost">
# URL "ldap://localhost:389"
# StartTLS false
# VerifyHost true
# CACert "/path/to/ca.crt"
# Timeout -1
# Version 3
# </Instance>
#</Plugin>
#<Plugin openvpn>
# StatusFile "/etc/openvpn/openvpn-status.log"
# ImprovedNamingSchema false
# CollectCompression true
# CollectIndividualUsers true
# CollectUserCount false
#</Plugin>
#<Plugin perl>
# IncludeDir "/my/include/path"
# BaseName "Collectd::Plugins"
# EnableDebugger ""
# LoadPlugin Monitorus
# LoadPlugin OpenVZ
#
# <Plugin foo>
# Foo "Bar"
# Qux "Baz"
# </Plugin>
#</Plugin>
#<Plugin pinba>
# Address "::0"
# Port "30002"
# <View "name">
# Host "host name"
# Server "server name"
# Script "script name"
# <View>
#</Plugin>
#<Plugin ping>
# Host "host.foo.bar"
# Host "host.baz.qux"
# Interval 1.0
# Timeout 0.9
# TTL 255
# SourceAddress "1.2.3.4"
# Device "eth0"
# MaxMissed -1
#</Plugin>
#<Plugin postgresql>
# <Query magic>
# Statement "SELECT magic FROM wizard WHERE host = $1;"
# Param hostname
#
# <Result>
# Type gauge
# InstancePrefix "magic"
# ValuesFrom "magic"
# </Result>
# </Query>
#
# <Query rt36_tickets>
# Statement "SELECT COUNT(type) AS count, type \
# FROM (SELECT CASE \
# WHEN resolved = 'epoch' THEN 'open' \
# ELSE 'resolved' END AS type \
# FROM tickets) type \
# GROUP BY type;"
#
# <Result>
# Type counter
# InstancePrefix "rt36_tickets"
# InstancesFrom "type"
# ValuesFrom "count"
# </Result>
# </Query>
#
# <Writer sqlstore>
# # See /usr/share/doc/collectd-core/examples/postgresql/collectd_insert.sql for details
# Statement "SELECT collectd_insert($1, $2, $3, $4, $5, $6, $7, $8, $9);"
# StoreRates true
# </Writer>
#
# <Database foo>
# Host "hostname"
# Port 5432
# User "username"
# Password "secret"
#
# SSLMode "prefer"
# KRBSrvName "kerberos_service_name"
#
# Query magic
# </Database>
#
# <Database bar>
# Interval 60
# Service "service_name"
#
# Query backend # predefined
# Query rt36_tickets
# </Database>
#
# <Database qux>
# Service "collectd_store"
# Writer sqlstore
# # see collectd.conf(5) for details
# CommitInterval 30
# </Database>
#</Plugin>
#<Plugin powerdns>
# <Server "server_name">
# Collect "latency"
# Collect "udp-answers" "udp-queries"
# Socket "/var/run/pdns.controlsocket"
# </Server>
# <Recursor "recursor_name">
# Collect "questions"
# Collect "cache-hits" "cache-misses"
# Socket "/var/run/pdns_recursor.controlsocket"
# </Recursor>
# LocalSocket "/opt/collectd/var/run/collectd-powerdns"
#</Plugin>
#<Plugin processes>
# Process "name"
# ProcessMatch "foobar" "/usr/bin/perl foobar\\.pl.*"
#</Plugin>
#<Plugin protocols>
# Value "/^Tcp:/"
# IgnoreSelected false
#</Plugin>
<Plugin python>
ModulePath "/etc/collectd.python/"
Import "buddyinfo"
<Module buddyinfo>
</Module>
{% if cudamonitor is defined %}
Import "cuda_collectd"
<Module cuda_collectd>
</Module>
{% endif %}
Import "mountstats_collectd"
<Module mountstats_collectd>
</Module>
</Plugin>
# LogTraces true
# Interactive true
# Import "spam"
#
# <Module spam>
# spam "wonderful" "lovely"
# </Module>
#</Plugin>
#<Plugin redis>
# <Node example>
# Host "redis.example.com"
# Port "6379"
# Timeout 2000
# </Node>
#</Plugin>
#<Plugin rrdcached>
# DaemonAddress "unix:/var/run/rrdcached.sock"
# DataDir "/var/lib/rrdcached/db/collectd"
# CreateFiles true
# CreateFilesAsync false
# CollectStatistics true
#
# The following settings are rather advanced
# and should usually not be touched:
# StepSize 10
# HeartBeat 20
# RRARows 1200
# RRATimespan 158112000
# XFF 0.1
#</Plugin>
<Plugin rrdtool>
DataDir "/var/lib/collectd/rrd"
# CacheTimeout 120
# CacheFlush 900
# WritesPerSecond 30
# CreateFilesAsync false
# RandomTimeout 0
#
# The following settings are rather advanced
# and should usually not be touched:
# StepSize 10
# HeartBeat 20
# RRARows 1200
# RRATimespan 158112000
# XFF 0.1
</Plugin>
#<Plugin sensors>
# SensorConfigFile "/etc/sensors3.conf"
# Sensor "it8712-isa-0290/temperature-temp1"
# Sensor "it8712-isa-0290/fanspeed-fan3"
# Sensor "it8712-isa-0290/voltage-in8"
# IgnoreSelected false
#</Plugin>
#<Plugin sigrok>
# LogLevel 3
# <Device "AC Voltage">
# Driver "fluke-dmm"
# MinimumInterval 10
# Conn "/dev/ttyUSB2"
# </Device>
# <Device "Sound Level">
# Driver "cem-dt-885x"
# Conn "/dev/ttyUSB1"
# </Device>
#</Plugin>
#<Plugin smart>
# Disk "/^[hs]d[a-f][0-9]?$/"
# IgnoreSelected false
#</Plugin>
# See /usr/share/doc/collectd/examples/snmp-data.conf.gz for a
# comprehensive sample configuration.
#<Plugin snmp>
# <Data "powerplus_voltge_input">
# Type "voltage"
# Table false
# Instance "input_line1"
# Scale 0.1
# Values "SNMPv2-SMI::enterprises.6050.5.4.1.1.2.1"
# </Data>
# <Data "hr_users">
# Type "users"
# Table false
# Instance ""
# Shift -1
# Values "HOST-RESOURCES-MIB::hrSystemNumUsers.0"
# </Data>
# <Data "std_traffic">
# Type "if_octets"
# Table true
# InstancePrefix "traffic"
# Instance "IF-MIB::ifDescr"
# Values "IF-MIB::ifInOctets" "IF-MIB::ifOutOctets"
# </Data>
#
# <Host "some.switch.mydomain.org">
# Address "192.168.0.2"
# Version 1
# Community "community_string"
# Collect "std_traffic"
# Inverval 120
# </Host>
# <Host "some.server.mydomain.org">
# Address "192.168.0.42"
# Version 2
# Community "another_string"
# Collect "std_traffic" "hr_users"
# </Host>
# <Host "some.ups.mydomain.org">
# Address "192.168.0.3"
# Version 1
# Community "more_communities"
# Collect "powerplus_voltge_input"
# Interval 300
# </Host>
#</Plugin>
#<Plugin statsd>
# Host "::"
# Port "8125"
# DeleteCounters false
# DeleteTimers false
# DeleteGauges false
# DeleteSets false
# TimerPercentile 90.0
# TimerPercentile 95.0
# TimerPercentile 99.0
# TimerLower false
# TimerUpper false
# TimerSum false
# TimerCount false
#</Plugin>
#<Plugin swap>
# ReportByDevice false
# ReportBytes true
#</Plugin>
<Plugin table>
<Table "/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs">
Instance "khugepaged"
Separator " "
<Result>
Type gauge
InstancePrefix "alloc_sleep_millisecs"
ValuesFrom 0
</Result>
</Table>
<Table "/sys/kernel/mm/transparent_hugepage/khugepaged/defrag">
Instance "khugepaged"
Separator " "
<Result>
Type gauge
InstancePrefix "defrag"
ValuesFrom 0
</Result>
</Table>
<Table "/sys/kernel/mm/transparent_hugepage/khugepaged/full_scans">
Instance "khugepaged"
Separator " "
<Result>
Type gauge
InstancePrefix "full_scans"
ValuesFrom 0
</Result>
</Table>
<Table "/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none">
Instance "khugepaged"
Separator " "
<Result>
Type gauge
InstancePrefix "max_ptes_none"
ValuesFrom 0
</Result>
</Table>
<Table "/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed">
Instance "khugepaged"
Separator " "
<Result>
Type gauge
InstancePrefix "pages_collapsed"
ValuesFrom 0
</Result>
</Table>
<Table "/sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan">
Instance "khugepaged"
Separator " "
<Result>
Type gauge
InstancePrefix "pages_to_scan"
ValuesFrom 0
</Result>
</Table>
<Table "/sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs">
Instance "khugepaged"
Separator " "
<Result>
Type gauge
InstancePrefix "scan_sleep_millisecs"
ValuesFrom 0
</Result>
</Table>
<Table "/proc/vmstat">
Instance "vmstat"
Separator " "
<Result>
Type gauge
InstancePrefix "vmstat"
InstancesFrom 0
ValuesFrom 1
</Result>
</Table>
# <Table "/proc/slabinfo">
# Instance "slabinfo"
# Separator " "
# <Result>
# Type gauge
# InstancePrefix "active_objs"
# InstancesFrom 0
# ValuesFrom 1
# </Result>
# <Result>
# Type gauge
# InstancePrefix "objperslab"
# InstancesFrom 0
# ValuesFrom 4
# </Result>
# </Table>
</Plugin>
#<Plugin tail>
# <File "/var/log/exim4/mainlog">
# Instance "exim"
# Interval 60
# <Match>
# Regex "S=([1-9][0-9]*)"
# DSType "CounterAdd"
# Type "ipt_bytes"
# Instance "total"
# </Match>
# <Match>
# Regex "\\<R=local_user\\>"
# ExcludeRegex "\\<R=local_user\\>.*mail_spool defer"
# DSType "CounterInc"
# Type "counter"
# Instance "local_user"
# </Match>
# </File>
#</Plugin>
#<Plugin tail_csv>
# <Metric "dropped">
# Type "percent"
# Instance "dropped"
# ValueFrom 1
# </Metric>
# <Metric "mbps">
# Type "bytes"
# Instance "wire-realtime"
# ValueFrom 2
# </Metric>
# <Metric "alerts">
# Type "alerts_per_second"
# ValueFrom 3
# </Metric>
# <Metric "kpps">
# Type "kpackets_wire_per_sec.realtime"
# ValueFrom 4
# </Metric>
# <File "/var/log/snort/snort.stats">
# Instance "snort-eth0"
# Interval 600
# Collect "dropped" "mbps" "alerts" "kpps"
# TimeFrom 0
# </File>
#</Plugin>
#<Plugin tcpconns>
# ListeningPorts false
# AllPortsSummary false
# LocalPort "25"
# RemotePort "25"
#</Plugin>
#<Plugin teamspeak2>
# Host "127.0.0.1"
# Port "51234"
# Server "8767"
#</Plugin>
#<Plugin ted>
# Device "/dev/ttyUSB0"
# Retries 0
#</Plugin>
#<Plugin thermal>
# ForceUseProcfs false
# Device "THRM"
# IgnoreSelected false
#</Plugin>
#<Plugin tokyotyrant>
# Host "localhost"
# Port "1978"
#</Plugin>
#<Plugin turbostat>
## None of the following option should be set manually
## This plugin automatically detect most optimal options
## Only set values here if:
## - The module ask you to
## - You want to disable the collection of some data
## - Your (intel) CPU is not supported (yet) by the module
## - The module generate a lot of errors 'MSR offset 0x... read failed'
## In the last two cases, please open a bug request
#
# TCCActivationTemp "100"
# CoreCstates "392"
# PackageCstates "396"
# SystemManagementInterrupt true
# DigitalTemperatureSensor true
# PackageThermalManagement true
# RunningAveragePowerLimit "7"
#</Plugin>
#<Plugin unixsock>
# SocketFile "/var/run/collectd-unixsock"
# SocketGroup "collectd"
# SocketPerms "0660"
# DeleteSocket false
#</Plugin>
#<Plugin uuid>
# UUIDFile "/etc/uuid"
#</Plugin>
#<Plugin varnish>
# <Instance>
# CollectBackend true
# CollectBan false # Varnish 3 and above
# CollectCache true
# CollectConnections true
# CollectDirectorDNS false # Varnish 3 only
# CollectESI false
# CollectFetch false
# CollectHCB false
# CollectObjects false
# CollectPurge false # Varnish 2 only
# CollectSession false
# CollectSHM true
# CollectSMA false # Varnish 2 only
# CollectSMS false
# CollectSM false # Varnish 2 only
# CollectStruct false
# CollectTotals false
# CollectUptime false # Varnish 3 and above
# CollectdVCL false
# CollectVSM false # Varnish 4 only
# CollectWorkers false
# </Instance>
#
# <Instance "myinstance">
# CollectCache true
# </Instance>
#</Plugin>
#<Plugin virt>
# Connection "xen:///"
# RefreshInterval 60
# Domain "name"
# BlockDevice "name:device"
# InterfaceDevice "name:device"
# IgnoreSelected false
# HostnameFormat name
# InterfaceFormat name
# PluginInstanceFormat name
#</Plugin>
#<Plugin vmem>
# Verbose false
#</Plugin>
#<Plugin write_graphite>
# <Node "example">
# Host "localhost"
# Port "2003"
# Protocol "tcp"
# LogSendErrors true
# Prefix "collectd"
# Postfix "collectd"
# StoreRates true
# AlwaysAppendDS false
# EscapeCharacter "_"
# </Node>
#</Plugin>
#<Plugin write_http>
# <Node "example">
# URL "http://example.com/collectd-post"
# User "collectd"
# Password "secret"
# VerifyPeer true
# VerifyHost true
# CACert "/etc/ssl/ca.crt"
# CAPath "/etc/ssl/certs/"
# ClientKey "/etc/ssl/client.pem"
# ClientCert "/etc/ssl/client.crt"
# ClientKeyPass "secret"
# SSLVersion "TLSv1"
# Format "Command"
# StoreRates false
# BufferSize 4096
# LowSpeedLimit 0
# Timeout 0
# </Node>
#</Plugin>
#<Plugin write_kafka>
# Property "metadata.broker.list" "localhost:9092"
# <Topic "collectd">
# Format JSON
# </Topic>
#</Plugin>
#<Plugin write_riemann>
# <Node "example">
# Host "localhost"
# Port 5555
# Protocol TCP
# Batch true
# BatchMaxSize 8192
# StoreRates true
# AlwaysAppendDS false
# TTLFactor 2.0
# Notifications true
# CheckThresholds false
# EventServicePrefix ""
# </Node>
# Tag "foobar"
# Attribute "foo" "bar"
#</Plugin>
#<Plugin write_sensu>
# <Node "example">
# Host "localhost"
# Port 3030
# StoreRates true
# AlwaysAppendDS false
# Notifications true
# Metrics true
# EventServicePrefix ""
# MetricHandler "influx"
# MetricHandler "default"
# NotificationHandler "flapjack"
# NotificationHandler "howling_monkey"
# </Node>
# Tag "foobar"
# Attribute "foo" "bar"
#</Plugin>
#<Plugin write_tsdb>
# <Node>
# Host "localhost"
# Port "4242"
# HostTags "status=production"
# StoreRates false
# AlwaysAppendDS false
# </Node>
#</Plugin>
#<Plugin zookeeper>
# Host "localhost"
# Port "2181"
#</Plugin>
#!/usr/bin/env python
try:
import collectd
from collectd import Values
except:
from fakeCollectd import Values
import subprocess
import xml.etree.ElementTree as ET
def read(data=None):
vl = Values(type='gauge')
vl.plugin = 'cuda'
out = subprocess.check_output(['nvidia-smi', '-q', '-x'])
root = ET.fromstring(out)
for gpu in root.iter('gpu'):
vl.plugin_instance = 'cuda-%s' % (gpu.attrib['id'])
try:
vl.dispatch(type='temperature',
values=[float(gpu.find('temperature/gpu_temp').text.split()[0])])
except:
pass
try:
vl.dispatch(type='gauge', type_instance='mem_utilization',
values=[1e6 * float(gpu.find('utilization/memory_util').text.split()[0])])
except:
pass
try:
vl.dispatch(type='gauge', type_instance='gpu_utilization',
values=[1e6 * float(gpu.find('utilization/gpu_util').text.split()[0])])
except:
pass
try:
vl.dispatch(type='gauge', type_instance='power_state',
values=[int(gpu.find('power_readings/power_state').text.split()[0][1:])])
except:
pass
try:
vl.dispatch(type='gauge', type_instance='power_draw',
values=[float(gpu.find('power_readings/power_draw').text.split()[0])])
except:
pass
try:
collectd.register_read(read)
except:
readtest()
#!/usr/bin/env python
try:
import collectd
from collectd import Values
except:
from fakeCollectd import Values
STATS='/proc/self/mountstats'
BYTESFIELDS=['nread', 'nwrite', 'dread', 'dwrite', 'nfsread', 'nfswrite', 'pageread', 'pagewrite']
BYTESSTR='\tbytes:\t'
EVENTSSTR='\tevents:\t'
EVENTSFIELDS=["inode_revalidate", "dnode_revalidate", "data_invalidate", "attribute_invalidate", "vfs_open", "vfs_lookup", "vfs_access", "vfs_update_page", "vfs_read_page", "vfs_read_pages", "vfs_write_page", "vfs_write_pages", "vfs_getdents", "vfs_setattr", "vfs_flush", "vfs_fsync", "vfs_lock", "vfs_file_release", "congestion_wait", "truncation", "write_extension", "silly_rename", "short_read", "short_write", "jukebox_delay", "pnfs_read", "pnfs_write"]
def get_values():
device = None
rv = {}
with open(STATS,'r') as f:
for l in f.read().splitlines():
pass
if 'fstype nfs4' in l:
device = l.split('mounted on')[0][len('device '):]
if EVENTSSTR in l:
values = map(int, l[len(EVENTSSTR):].split())
ev = zip(EVENTSFIELDS,values)
if BYTESSTR in l:
values = map(int, l[len(BYTESSTR):].split())
bv = zip(BYTESFIELDS,values)
rv[device] = [] + bv + ev
return rv
def read(data=None):
vl = Values(type='gauge')
vl.plugin = 'mountstats'
values = get_values()
for mount,stats in values.items():
vl.plugin_instance = mount
for s in stats:
vl.dispatch(type='gauge',type_instance=s[0],values=[s[1]])
def readtest():
read()
try:
collectd.register_read(read,60)
except:
readtest()
---
domain: testdomain.massive.org.au
---
# default variables for config_repos
repopath: centos
\ No newline at end of file
---
- name: use http_proxy if defined
lineinfile:
path: /etc/yum.conf
line: "proxy=http://{{ http_proxy }}"
become: True
when: http_proxy is defined
- name: instal yum-utils package
package:
name: yum-utils
state: present
become: true
when: ansible_os_family == 'RedHat'
- name: get enabled repos
#shell: yum repolist | grep -v "repo id" | grep -v "Loaded plugins" | head -n -1 | cut -f 1 -d '/' | sed -s 's/\!//'
#shell: yum repolist all | grep enabled | cut -f 1 -d '/' | sed -s 's/\!//'
shell: yum repolist enabled -C | cut -f 1 -d '/' | sed -s 's/\!//'
when: ansible_os_family == 'RedHat'
register: repolistenabled
check_mode: no
changed_when: False
args:
warn: False
#- name: get disabled repos
# shell: yum repolist all | grep disabled | cut -f 1 -d '/' | sed -s 's/\!//'
# when: ansible_os_family == 'RedHat'
# register: repolistdisabled
# check_mode: no
# changed_when: False
# args:
# warn: False
- name: enable wanted repos
shell: yum-config-manager --enable "{{ item }}"
#with_items: "{{ repolistenabled.stdout_lines|difference(yumenablerepo) }}"
with_items: "{{ yumenablerepo | symmetric_difference(repolistenabled.stdout_lines) }}"
become: true
become_user: root
#ignore_errors: true
when: ansible_os_family == 'RedHat'
register: repoenable
- name: disable unwanted repos
shell: yum-config-manager --disable "{{ item }}"
with_items: "{{ repolistenabled.stdout_lines|difference(yumenablerepo) }}"
become: true
become_user: root
#ignore_errors: true
when: ansible_os_family == 'RedHat'
register: repodisable
# Use mate DE on systems that have moved to gnome3, since there is no gpu acceleration by default on NeCTAR openstack
# Trusty (Ubuntu 14.04 LTS) needs repos added. Wheezy (Debian Stable) gets mate from backports, Utopic (Ubuntu 14.10) Jessie (Debian testing) and Sid (Debian unstable) get it by default
- name: add repos apt
shell: "add-apt-repository -y ppa:ubuntu-mate-dev/ppa"
become: true
when: ansible_distribution_release == 'trusty'
- name: add repos apt
shell: "add-apt-repository -y ppa:ubuntu-mate-dev/trusty-mate"
become: true
when: ansible_distribution_release == 'trusty'
- name: apt-get update
apt: update_cache=True
become: true
when: ansible_os_family=="Debian"
- name: force refresh of the repository cache
shell: |
yum clean metadata
yum clean all
yum updateinfo
yum makecache
become: true
async: 600
poll: 5
check_mode: no
when: ansible_os_family == 'RedHat' and ( repoenable.changed or repodisable.changed)
args:
warn: False
# Place this file in your /etc/yum.repos.d/ directory
[glusterfs-epel]
name=GlusterFS is a clustered file-system capable of scaling to several petabytes.
baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/$basearch/
enabled=1
skip_if_unavailable=1
gpgcheck=0
[glusterfs-noarch-epel]
name=GlusterFS is a clustered file-system capable of scaling to several petabytes.
baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/noarch
enabled=1
skip_if_unavailable=1
gpgcheck=0
[glusterfs-source-epel]
name=GlusterFS is a clustered file-system capable of scaling to several petabytes. - Source
baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/SRPMS
enabled=0
skip_if_unavailable=1
gpgcheck=0
# Place this file in your /etc/yum.repos.d/ directory
[glusterfs-epel]
name=GlusterFS is a clustered file-system capable of scaling to several petabytes.
baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/$basearch/
enabled=1
skip_if_unavailable=1
gpgcheck=0
[glusterfs-noarch-epel]
name=GlusterFS is a clustered file-system capable of scaling to several petabytes.
baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/noarch
enabled=1
skip_if_unavailable=1
gpgcheck=0
[glusterfs-source-epel]
name=GlusterFS is a clustered file-system capable of scaling to several petabytes. - Source
baseurl=http://download.gluster.org/pub/gluster/glusterfs/LATEST/EPEL.repo/epel-$releasever/SRPMS
enabled=0
skip_if_unavailable=1
gpgcheck=0
# Place this file in your /etc/yum.repos.d/ directory
[monashhpc_base]
name=MonashHPC base repository mirrored to control the update process
baseurl=https://consistency0/centos/$releasever/os/$basearch/
enabled=1
sslverify=false
[monashhpc_udpates]
name=MonashHPC base repository mirrored to control the update process
baseurl=https://consistency0/centos/$releasever/updates/$basearch/
enabled=1
sslverify=false
[monashhpc_extras]
name=MonashHPC base repository mirrored to control the update process
baseurl=https://consistency0/centos/$releasever/extras/$basearch/
enabled=1
sslverify=false
[monashhpc_centosplus]
name=MonashHPC base repository mirrored to control the update process
baseurl=https://consistency0/centos/$releasever/centosplus/$basearch/
enabled=1
sslverify=false