8000 Add `sum`, `livemin`, and `livemax` multiprocess modes for `Gauge`s (… · christopherime/client_python@5a5261d · GitHub
[go: up one dir, main page]

Skip to content

Commit 5a5261d

Browse files
authored
Add sum, livemin, and livemax multiprocess modes for Gauges (prometheus#794)
* add sum, livemin, and livemax multiprocessing modes * apply suggestions from @nkov * call it a label instead of a tag Signed-off-by: Josh Karpel <josh.karpel@gmail.com>
1 parent 5c18ede commit 5a5261d

File tree

4 files changed

+55
-16
lines changed

4 files changed

+55
-16
lines changed

README.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -609,14 +609,17 @@ def child_exit(server, worker):
609609

610610
**4. Metrics tuning (Gauge)**:
611611

612-
When `Gauge` metrics are used, additional tuning needs to be performed.
612+
When `Gauge`s are used in multiprocess applications,
613+
you must decide how to handle the metrics reported by each process.
613614
Gauges have several modes they can run in, which can be selected with the `multiprocess_mode` parameter.
614615

615-
- 'all': Default. Return a timeseries per process alive or dead.
616-
- 'liveall': Return a timeseries per process that is still alive.
617-
- 'livesum': Return a single timeseries that is the sum of the values of alive processes.
618-
- 'max': Return a single timeseries that is the maximum of the values of all processes, alive or dead.
619-
- 'min': Return a single timeseries that is the minimum of the values of all processes, alive or dead.
616+
- 'all': Default. Return a timeseries per process (alive or dead), labelled by the process's `pid` (the label is added internally).
617+
- 'min': Return a single timeseries that is the minimum of the values of all processes (alive or dead).
618+
- 'max': Return a single timeseries that is the maximum of the values of all processes (alive or dead).
619+
- 'sum': Return a single timeseries that is the sum of the values of all processes (alive or dead).
620+
621+
Prepend 'live' to the beginning of the mode to return the same result but only considering living processes
622+
(e.g., 'liveall, 'livesum', 'livemax', 'livemin').
620623

621624
```python
622625
from prometheus_client import Gauge

prometheus_client/metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def f():
346346
d.set_function(lambda: len(my_dict))
347347
"""
348348
_type = 'gauge'
349-
_MULTIPROC_MODES = frozenset(('min', 'max', 'livesum', 'liveall', 'all'))
349+
_MULTIPROC_MODES = frozenset(('all', 'liveall', 'min', 'livemin', 'max', 'livemax', 'sum', 'livesum'))
350350

351351
def __init__(self,
352352
name: str,

prometheus_client/multiprocess.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import warnings
66

7+
from .metrics import Gauge
78
from .metrics_core import Metric
89
from .mmap_dict import MmapedDict
910
from .samples import Sample
@@ -63,8 +64,8 @@ def _parse_key(key):
6364
try:
6465
file_values = MmapedDict.read_all_values_from_file(f)
6566
except FileNotFoundError:
66-
if typ == 'gauge' and parts[1] in ('liveall', 'livesum'):
67-
# Those files can disappear between the glob of collect
67+
if typ == 'gauge' and parts[1].startswith('live'):
68+
# Files for 'live*' gauges can be deleted between the glob of collect
6869
# and now (via a mark_process_dead call) so don't fail if
6970
# the file is missing
7071
continue
@@ -96,15 +97,15 @@ def _accumulate_metrics(metrics, accumulate):
9697
name, labels, value, timestamp, exemplar = s
9798
if metric.type == 'gauge':
9899
without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid'))
99-
if metric._multiprocess_mode == 'min':
100+
if metric._multiprocess_mode in ('min', 'livemin'):
100101
current = samples_setdefault(without_pid_key, value)
101102
if value < current:
102103
samples[without_pid_key] = value
103-
elif metric._multiprocess_mode == 'max':
104+
elif metric._multiprocess_mode in ('max', 'livemax'):
104105
current = samples_setdefault(without_pid_key, value)
105106
if value > current:
106107
samples[without_pid_key] = value
107-
elif metric._multiprocess_mode == 'livesum':
108+
elif metric._multiprocess_mode in ('sum', 'livesum'):
108109
samples[without_pid_key] += value
109110
else: # all/liveall
110111
samples[(name, labels)] = value
@@ -152,11 +153,13 @@ def collect(self):
152153
return self.merge(files, accumulate=True)
153154

154155

156+
_LIVE_GAUGE_MULTIPROCESS_MODES = {m for m in Gauge._MULTIPROC_MODES if m.startswith('live')}
157+
158+
155159
def mark_process_dead(pid, path=None):
156160
"""Do bookkeeping for when one process dies in a multi-process setup."""
157161
if path is None:
158162
path = os.environ.get('PROMETHEUS_MULTIPROC_DIR', os.environ.get('prometheus_multiproc_dir'))
159-
for f in glob.glob(os.path.join(path, f'gauge_livesum_{pid}.db')):
160-
os.remove(f)
161-
for f in glob.glob(os.path.join(path, f'gauge_liveall_{pid}.db')):
162-
os.remove(f)
163+
for mode in _LIVE_GAUGE_MULTIPROCESS_MODES:
164+
for f in glob.glob(os.path.join(path, f'gauge_{mode}_{pid}.db')):
165+
os.remove(f)

tests/test_multiprocess.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,17 @@ def test_gauge_min(self):
132132
g2.set(2)
133133
self.assertEqual(1, self.registry.get_sample_value('g'))
134134

135+
def test_gauge_livemin(self):
136+
g1 = Gauge('g', 'help', registry=None, multiprocess_mode='livemin')
137+
values.ValueClass = MultiProcessValue(lambda: 456)
138+
g2 = Gauge('g', 'help', registry=None, multiprocess_mode='livemin')
139+
self.assertEqual(0, self.registry.get_sample_value('g'))
140+
g1.set(1)
141+
g2.set(2)
142+
self.assertEqual(1, self.registry.get_sample_value('g'))
143+
mark_process_dead(123, os.environ['PROMETHEUS_MULTIPROC_DIR'])
144+
self.assertEqual(2, self.registry.get_sample_value('g'))
145+
135146
def test_gauge_max(self):
136147
g1 = Gauge('g', 'help', registry=None, multiprocess_mode='max')
137148
values.ValueClass = MultiProcessValue(lambda: 456)
@@ -141,6 +152,28 @@ def test_gauge_max(self):
141152
g2.set(2)
142153
self.assertEqual(2, self.registry.get_sample_value('g'))
143154

155+
def test_gauge_livemax(self):
156+
g1 = Gauge('g', 'help', registry=None, multiprocess_mode='livemax')
157+
values.ValueClass = MultiProcessValue(lambda: 456)
158+
g2 = Gauge('g', 'help', registry=None, multiprocess_mode='livemax')
159+
self.assertEqual(0, self.registry.get_sample_value('g'))
160+
g1.set(2)
161+
g2.set(1)
162+
self.assertEqual(2, self.registry.get_sample_value('g'))
163+
mark_process_dead(123, os.environ['PROMETHEUS_MULTIPROC_DIR'])
164+
self.assertEqual(1, self.registry.get_sample_value('g'))
165+
166+
def test_gauge_sum(self):
167+
g1 = Gauge('g', 'help', registry=None, multiprocess_mode='sum')
168+
values.ValueClass = MultiProcessValue(lambda: 456)
169+
g2 = Gauge('g', 'help', registry=None, multiprocess_mode='sum')
170+
self.assertEqual(0, self.registry.get_sample_value('g'))
171+
g1.set(1)
172+
g2.set(2)
173+
self.assertEqual(3, self.registry.get_sample_value('g'))
174+
mark_process_dead(123, os.environ['PROMETHEUS_MULTIPROC_DIR'])
175+
self.assertEqual(3, self.registry.get_sample_value('g'))
176+
144177
def test_gauge_livesum(self):
145178
g1 = Gauge('g', 'help', registry=None, multiprocess_mode='livesum')
146179
values.ValueClass = MultiProcessValue(lambda: 456)

0 commit comments

Comments
 (0)
0