Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
HPCasCode
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
hpc-team
HPCasCode
Commits
35af92cb
Commit
35af92cb
authored
7 years ago
by
Chris Hines
Browse files
Options
Downloads
Patches
Plain Diff
forgot to include the buddyinfo script for collectd
parent
6de8c8c9
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
roles/collectd/templates/buddyinfo.py.j2
+183
-0
183 additions, 0 deletions
roles/collectd/templates/buddyinfo.py.j2
with
183 additions
and
0 deletions
roles/collectd/templates/buddyinfo.py.j2
0 → 100644
+
183
−
0
View file @
35af92cb
#!/usr/bin/python
##########################################################################
# Copyright (c) 2015, Salesforce.com, Inc.
# All rights reserved.
#
# Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#
# Neither the name of Salesforce.com nor the names of its
# contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
##########################################################################
"""
**buddyinfo.py**
Linux uses buddy allocator for memory management. Pages
are allocated in each NUMA node and zones within each
node. Within each zones, pages are allocated as
contiguous groups of 1, 2, 3, 4, and so on order
pages where 1 means 4K pages. Number of free pages in
each bucket is exposed through /proc/buddyinfo
When this number goes below a threshold in any bucket,
kswapd (slowpath for finding free pages) kicks in. It
then scans for free pages in all order levels until
all of them reach above min limit. This process can take
long time and may cause issues for GC latencies.
Typical contents of /proc/buddyinfo:
- Node 0, zone Normal 1490 4026 12224 8508 4493 1929 849 301 101 45 5257
- Node 1, zone DMA 1 1 1 1 1 0 1 0 1 1 3
- Node 1, zone DMA32 15 3 2 5 8 7 4 4 7 8 681
- Node 1, zone Normal 6061 13681 20887 15188 9097 4546 1948 731 273 125 3976
Here are the fields interpretation in each row:
1. NUMA node (such as 0 or 1)
2. Zone name (Normal, DMA32, DMA, etc.)
3. Col. 3 to end: page order or buckets on contiguous memory sizes: 4K, 8K, 16K, 32K, 64K, 128K, 256K, 512K, 1024K, and 2048K
"""
import
collectd
import
platform
import
os
import
socket
import
time
import
re
os_name
=
platform
.
system
()
BUDDY_FNAME
=
'
/proc/buddyinfo
'
METRIC_PLUGIN
=
'
buddyinfo
'
METRIC_TYPE
=
'
gauge
'
buddy_fields
=
[
'
numa_node
'
,
'
zone_name
'
,
'
bucket_free_pages
'
]
buddy_metrics
=
[
'
bucket_free_pages_per_sec
'
,
'
total_free_pages_per_sec
'
,
'
pct_fragment_per_sec
'
]
white_list
=
[]
node_list
=
[]
zone_list
=
[]
stats_cache
=
{}
stats_current
=
{}
re_buddyinfo
=
re
.
compile
(
r
'
^\s*Node\s+(?P<node>\d+)
'
r
'
,\s+zone\s+(?P<zone>\S+)\s+(?P<pages>.*)$
'
)
def
init_stats_cache
():
global
white_list
if
os
.
path
.
exists
(
BUDDY_FNAME
):
num_buckets
=
0
with
open
(
BUDDY_FNAME
)
as
f
:
for
line
in
f
:
match
=
re_buddyinfo
.
search
(
line
)
if
not
match
:
collectd
.
error
(
'
buddyinfo: unknown line pattern: %s
'
%
(
line
))
continue
;
node
=
match
.
group
(
'
node
'
)
zone
=
match
.
group
(
'
zone
'
)
free_pages
=
match
.
group
(
'
pages
'
).
strip
().
split
()
num_buckets
=
len
(
free_pages
)
if
node
not
in
node_list
:
node_list
.
append
(
node
)
if
zone
not
in
zone_list
:
zone_list
.
append
(
zone
)
stats_cache
[(
node
,
zone
,
'
val
'
)]
=
free_pages
stats_cache
[(
node
,
zone
,
'
ts
'
)]
=
time
.
time
()
f
.
close
()
for
i
in
range
(
0
,
num_buckets
):
white_list
.
append
(
'
free_pages_
'
+
str
(
4
*
2
**
i
)
+
'
K
'
)
collectd
.
info
(
'
buddyinfo: node_list : %s
'
%
(
node_list
))
collectd
.
info
(
'
buddyinfo: zone_list : %s
'
%
(
zone_list
))
collectd
.
info
(
'
buddyinfo: white_list: %s
'
%
(
white_list
))
else
:
collectd
.
info
(
'
buddyinfo: init_stats_cache: path: %s does not exist
'
%
(
BUDDY_FNAME
))
def
collect_buddyinfo
():
if
os
.
path
.
exists
(
BUDDY_FNAME
):
with
open
(
BUDDY_FNAME
)
as
f
:
for
line
in
f
:
match
=
re_buddyinfo
.
search
(
line
)
if
not
match
:
continue
;
node
=
match
.
group
(
'
node
'
)
zone
=
match
.
group
(
'
zone
'
)
free_pages
=
match
.
group
(
'
pages
'
).
strip
().
split
()
stats_current
[(
node
,
zone
,
'
val
'
)]
=
free_pages
stats_current
[(
node
,
zone
,
'
ts
'
)]
=
time
.
time
()
key_val
=
dict
(
zip
(
white_list
,
free_pages
))
metric
=
collectd
.
Values
()
metric
.
plugin
=
METRIC_PLUGIN
metric
.
plugin_instance
=
node
metric
.
type
=
METRIC_TYPE
for
k
in
range
(
0
,
len
(
white_list
)):
metric
.
type_instance
=
'
node_
'
+
node
+
'
_zone_
'
+
zone
+
'
.
'
metric
.
type_instance
+=
white_list
[
k
]
metric
.
values
=
[
free_pages
[
k
]]
metric
.
dispatch
()
f
.
close
()
else
:
collectd
.
error
(
'
buddyinfo: procfs path: %s does not exist
'
%
(
BUDDY_FNAME
))
def
swap_current_cache
():
stats_cache
=
stats_current
.
copy
()
def
configer
(
ObjConfiguration
):
collectd
.
info
(
'
buddyinfo plugin: configuring host
'
)
def
initer
():
collectd
.
info
(
'
buddyinfo initer: white list: %s
'
%
(
white_list
))
init_stats_cache
()
collectd
.
info
(
'
buddyinfo init: stats_cache: %s
'
%
(
stats_cache
))
def
reader
(
input_data
=
None
):
collect_buddyinfo
()
swap_current_cache
()
def
writer
(
metric
,
data
=
None
):
for
i
in
metric
.
values
:
collectd
.
debug
(
'
%s (%s): %f
'
%
(
metric
.
plugin
,
metric
.
type
,
i
))
def
shutdown
():
collectd
.
info
(
'
buddyinfo plugin shutting down
'
)
#== Callbacks ==#
if
(
os_name
==
'
Linux
'
):
collectd
.
register_config
(
configer
)
collectd
.
register_init
(
initer
)
collectd
.
register_read
(
reader
)
collectd
.
register_write
(
writer
)
collectd
.
register_shutdown
(
shutdown
)
else
:
collectd
.
warning
(
'
buddyinfo plugin currently works for Linux only
'
)
This diff is collapsed.
Click to expand it.
Chris Hines
@chines
mentioned in commit
1e77381a
·
4 years ago
mentioned in commit
1e77381a
mentioned in commit 1e77381af15b449159af5f6346ba3fe3d0e68902
Toggle commit list
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment