8000 UTF-8 Content Negotiation · prometheus/client_python@769f7fb · GitHub
[go: up one dir, main page]

Skip to content

Commit 769f7fb

Browse files
committed
UTF-8 Content Negotiation
Signed-off-by: Owen Williams <owen.williams@grafana.com>
1 parent 23ab826 commit 769f7fb

13 files changed

+611
-106
lines changed

prometheus_client/__init__.py

Lines changed: 2 additions & 2 deletions
Original fi 67E6 le line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
process_collector, registry,
66
)
77
from .exposition import (
8-
CONTENT_TYPE_LATEST, delete_from_gateway, generate_latest,
8+
CONTENT_TYPE_PLAIN, delete_from_gateway, generate_latest,
99
instance_ip_grouping_key, make_asgi_app, make_wsgi_app, MetricsHandler,
1010
push_to_gateway, pushadd_to_gateway, start_http_server, start_wsgi_server,
1111
write_to_textfile,
@@ -32,7 +32,7 @@
3232
'Enum',
3333
'enable_created_metrics',
3434
'disable_created_metrics',
35-
'CONTENT_TYPE_LATEST',
35+
'CONTENT_TYPE_PLAIN',
3636
'generate_latest',
3737
'MetricsHandler',
3838
'make_wsgi_app',

prometheus_client/exposition.py

Lines changed: 76 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from .validation import _is_valid_legacy_metric_name
2424

2525
__all__ = (
26-
'CONTENT_TYPE_LATEST',
26+
'CONTENT_TYPE_PLAIN',
2727
'delete_from_gateway',
2828
'generate_latest',
2929
'instance_ip_grouping_key',
@@ -37,7 +37,7 @@
3737
'write_to_textfile',
3838
)
3939

40-
CONTENT_TYPE_LATEST = 'text/plain; version=0.0.4; charset=utf-8'
40+
CONTENT_TYPE_PLAIN = 'text/plain; version=0.0.4; charset=utf-8'
4141
"""Content type of the latest text format"""
4242

4343

@@ -245,29 +245,38 @@ class TmpServer(ThreadingWSGIServer):
245245
start_http_server = start_wsgi_server
246246

247247

248-
def generate_latest(registry: CollectorRegistry = REGISTRY) -> bytes:
249-
"""Returns the metrics from the registry in latest text format as a string."""
248+
def generate_latest(registry: CollectorRegistry = REGISTRY, escaping: str = openmetrics.UNDERSCORES) -> bytes:
249+
"""
250+
Generates the exposition format using the basic Prometheus text format.
251+
252+
Params:
253+
registry: CollectorRegistry to export data from.
254+
escaping: Escaping scheme used for metric and label names.
255+
256+
Returns: UTF-8 encoded string containing the metrics in text format.
257+
"""
250258

251259
def sample_line(samples):
252260
if samples.labels:
253261
labelstr = '{0}'.format(','.join(
262+
# Label values always support UTF-8
254263
['{}="{}"'.format(
255-
openmetrics.escape_label_name(k), openmetrics._escape(v))
264+
openmetrics.escape_label_name(k, escaping), openmetrics._escape(v, openmetrics.ALLOWUTF8, False))
256265
for k, v in sorted(samples.labels.items())]))
257266
else:
258267
labelstr = ''
259268
timestamp = ''
260269
if samples.timestamp is not None:
261270
# Convert to milliseconds.
262271
timestamp = f' {int(float(samples.timestamp) * 1000):d}'
263-
if _is_valid_legacy_metric_name(samples.name):
272+
if escaping != openmetrics.ALLOWUTF8 or _is_valid_legacy_metric_name(samples.name):
264273
if labelstr:
265274
labelstr = '{{{0}}}'.format(labelstr)
266-
return f'{samples.name}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
275+
return f'{openmetrics.escape_metric_name(samples.name, escaping)}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
267276
maybe_comma = ''
268277
if labelstr:
269278
maybe_comma = ','
270-
return f'{{{openmetrics.escape_metric_name(samples.name)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'
279+
return f'{{{openmetrics.escape_metric_name(samples.name, escaping)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'
271280

272281
output = []
273282
for metric in registry.collect():
@@ -290,8 +299,8 @@ def sample_line(samples):
290299
mtype = 'untyped'
291300

292301
output.append('# HELP {} {}\n'.format(
293-
openmetrics.escape_metric_name(mname), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
294-
output.append(f'# TYPE {openmetrics.escape_metric_name(mname)} {mtype}\n')
302+
openmetrics.escape_metric_name(mname, escaping), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
303+
output.append(f'# TYPE {openmetrics.escape_metric_name(mname, escaping)} {mtype}\n')
295304

296305
om_samples: Dict[str, List[str]] = {}
297306
for s in metric.samples:
@@ -307,21 +316,69 @@ def sample_line(samples):
307316
raise
308317

309318
for suffix, lines in sorted(om_samples.items()):
310-
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix),
319+
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix, escaping),
311320
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
312-
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix)} gauge\n')
321+
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix, escaping)} gauge\n')
313322
output.extend(lines)
314323
return ''.join(output).encode('utf-8')
315324

316325

317326
def choose_encoder(accept_header: str) -> Tuple[Callable[[CollectorRegistry], bytes], str]:
327+
# Python client library accepts a much narrower range of content-types than
328+
# Prometheus does -- UTF-8 is only supported on OpenMetrics v1.0.0.
318329
accept_header = accept_header or ''
330+
escaping = openmetrics.UNDERSCORES
319331
for accepted in accept_header.split(','):
320332
if accepted.split(';')[0].strip() == 'application/openmetrics-text':
321-
return (openmetrics.generate_latest,
322-
openmetrics.CONTENT_TYPE_LATEST)
323-
return generate_latest, CONTENT_TYPE_LATEST
324-
333+
toks = accepted.split(';')
334+
version = _get_version(toks)
335+
escaping = _get_escaping(toks)
336+
# Only return an escaping header if we have a good version and
337+
# mimetype.
338+
if version == '1.0.0':
339+
return (openmetrics.generate_latest_fn(escaping),
340+
openmetrics.CONTENT_TYPE_LATEST + '; escaping=' + str(escaping))
341+
return generate_latest, CONTENT_TYPE_PLAIN
342+
343+
344+
def _get_version(accept_header: List[str]) -> str:
345+
"""Return the version tag from the Accept header.
346+
347+
If no escaping scheme is specified, returns empty string."""
348+
349+
for tok in accept_header:
350+
if '=' not in tok:
351+
continue
352+
key, value = tok.strip().split('=', 1)
353+
if key == 'version':
354+
return value
355+
return ""
356+
357+
358+
def _get_escaping(accept_header: List[str]) -> str:
359+
"""Return the escaping scheme from the Accept header.
360+
361+
If no escaping scheme is specified or the scheme is not one of the allowed
362+
strings, defaults to UNDERSCORES."""
363+
364+
for tok in accept_header:
365+
if '=' not in tok:
366+
continue
367+
key, value = tok.strip().split('=', 1)
368+
if key != 'escaping':
369+
continue
370+
if value == openmetrics.ALLOWUTF8:
371+
return openmetrics.ALLOWUTF8
372+
elif value == openmetrics.UNDERSCORES:
373+
return openmetrics.UNDERSCORES
374+
elif value == openmetrics.DOTS:
375+
return openmetrics.DOTS
376+
elif value == openmetrics.VALUES:
377+
return openmetrics.VALUES
378+
else:
379+
return openmetrics.UNDERSCORES
380+
return openmetrics.UNDERSCORES
381+
325382

326383
def gzip_accepted(accept_encoding_header: str) -> bool:
327384
accept_encoding_header = accept_encoding_header or ''
@@ -369,15 +426,15 @@ def factory(cls, registry: CollectorRegistry) -> type:
369426
return MyMetricsHandler
370427

371428

372-
def write_to_textfile(path: str, registry: CollectorRegistry) -> None:
429+
def write_to_textfile(path: str, registry: CollectorRegistry, escaping: str = openmetrics.ALLOWUTF8) -> None:
373430
"""Write metrics to the given path.
374431
375432
This is intended for use with the Node exporter textfile collector.
376433
The path must end in .prom for the textfile collector to process it."""
377434
tmppath = f'{path}.{os.getpid()}.{threading.current_thread().ident}'
378435
try:
379436
with open(tmppath, 'wb') as f:
380-
f.write(generate_latest(registry))
437+
f.write(generate_latest(registry, escaping))
381438

382439
# rename(2) is atomic but fails on Windows if the destination file exists
383440
if os.name == 'nt':
@@ -645,7 +702,7 @@ def _use_gateway(
645702

646703
handler(
647704
url=url, method=method, timeout=timeout,
648-
headers=[('Content-Type', CONTENT_TYPE_LATEST)], data=data,
705+
headers=[('Content-Type', CONTENT_TYPE_PLAIN)], data=data,
649706
)()
650707

651708

prometheus_client/openmetrics/exposition.py

Lines changed: 121 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#!/usr/bin/env python
2-
1+
from io import StringIO
2+
from sys import maxunicode
33

44
from ..utils import floatToGoString
55
from ..validation import (
@@ -8,6 +8,13 @@
88

99
CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=1.0.0; charset=utf-8'
1010
"""Content type of the latest OpenMetrics text format"""
11+
ESCAPING_HEADER_TAG = 'escaping'
12+
13+
14+
ALLOWUTF8 = 'allow-utf-8'
15+
UNDERSCORES = 'underscores'
16+
DOTS = 'dots'
17+
VALUES = 'values'
1118

1219

1320
def _is_valid_exemplar_metric(metric, sample):
@@ -20,30 +27,35 @@ def _is_valid_exemplar_metric(metric, sample):
2027
return False
2128

2229

23-
def generate_latest(registry):
30+
def generate_latest_fn(escaping=ALLOWUTF8):
31+
return lambda registry: generate_latest(registry, escaping)
32+
33+
34+
def generate_latest(registry, escaping):
2435
'''Returns the metrics from the registry in latest text format as a string.'''
2536
output = []
2637
for metric in registry.collect():
2738
try:
2839
mname = metric.name
2940
output.append('# HELP {} {}\n'.format(
30-
escape_metric_name(mname), _escape(metric.documentation)))
31-
output.append(f'# TYPE {escape_metric_name(mname)} {metric.type}\n')
41+
escape_metric_name(mname, escaping), _escape(metric.documentation, ALLOWUTF8, False)))
42+
output.append(f'# TYPE {escape_metric_name(mname, escaping)} {metric.type}\n')
3243
if metric.unit:
33-
output.append(f'# UNIT {escape_metric_name(mname)} {metric.unit}\n')
44+
output.append(f'# UNIT {escape_metric_name(mname, escaping)} {metric.unit}\n')
3445
for s in metric.samples:
35-
if not _is_valid_legacy_metric_name(s.name):
36-
labelstr = escape_metric_name(s.name)
46+
if escaping == ALLOWUTF8 and not _is_valid_legacy_metric_name(s.name):
47+
labelstr = escape_metric_name(s.name, escaping)
3748
if s.labels:
3849
labelstr += ', '
3950
else:
4051
labelstr = ''
4152

4253
if s.labels:
4354
items = sorted(s.labels.items())
55+
# Label values always support UTF-8
4456
labelstr += ','.join(
4557
['{}="{}"'.format(
46-
escape_label_name(k), _escape(v))
58+
escape_label_name(k, escaping), _escape(v, ALLOWUTF8, False))
4759
for k, v in items])
4860
if labelstr:
4961
labelstr = "{" + labelstr + "}"
@@ -71,9 +83,9 @@ def generate_latest(registry):
7183
timestamp = ''
7284
if s.timestamp is not None:
7385
timestamp = f' {s.timestamp}'
74-
if _is_valid_legacy_metric_name(s.name):
86+
if (escaping != ALLOWUTF8) or _is_valid_legacy_metric_name(s.name):
7587
output.append('{}{} {}{}{}\n'.format(
76-
s.name,
88+
_escape(s.name, escaping, False),
7789
labelstr,
7890
floatToGoString(s.value),
7991
timestamp,
@@ -94,24 +106,114 @@ def generate_latest(registry):
94106
return ''.join(output).encode('utf-8')
95107

96108

97-
def escape_metric_name(s: str) -> str:
109+
def escape_metric_name(s: str, escaping: str) -> str:
98110
"""Escapes the metric name and puts it in quotes iff the name does not
99111
conform to the legacy Prometheus character set.
100112
"""
101-
if _is_valid_legacy_metric_name(s):
113+
if len(s) == 0:
102114
return s
103-
return '"{}"'.format(_escape(s))
115+
if escaping == ALLOWUTF8:
116+
if not _is_valid_legacy_metric_name(s):
117+
return '"{}"'.format(_escape(s, escaping, False))
118+
return _escape(s, escaping, False)
119+
elif escaping == UNDERSCORES:
120+
if _is_valid_legacy_metric_name(s):
121+
return s
122+
return _escape(s, escaping, False)
123+
elif escaping == DOTS:
124+
return _escape(s, escaping, False)
125+
elif escaping == VALUES:
126+
if _is_valid_legacy_metric_name(s):
127+
return s
128+
return _escape(s, escaping, False)
129+
return s
104130

105131

106-
def escape_label_name(s: str) -> str:
132+
def escape_label_name(s: str, escaping: str) -> str:
107133
"""Escapes the label name and puts it in quotes iff the name does not
108134
conform to the legacy Prometheus character set.
109135
"""
110-
if _is_valid_legacy_labelname(s):
136+
if len(s) == 0:
111137
return s
112-
return '"{}"'.format(_escape(s))
138+
if escaping == ALLOWUTF8:
139+
if not _is_valid_legacy_labelname(s):
140+
return '"{}"'.format(_escape(s, escaping, True))
141+
return _escape(s, escaping, True)
142+
elif escaping == UNDERSCORES:
143+
if _is_valid_legacy_labelname(s):
144+
return s
145+
return _escape(s, escaping, True)
146+
elif escaping == DOTS:
147+
return _escape(s, escaping, True)
148+
elif escaping == VALUES:
149+
if _is_valid_legacy_labelname(s):
150+
return s
151+
return _escape(s, escaping, True)
152+
return s
113153

114154

115-
def _escape(s: str) -> str:
155+
def _escape(s: str, escaping: str, is_labelname: bool) -> str:
116156
"""Performs backslash escaping on backslash, newline, and double-quote characters."""
117-
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')
157+
if escaping == ALLOWUTF8:
158+
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')
159+
elif escaping == UNDERSCORES:
160+
escaped = StringIO()
161+
for i, b in enumerate(s):
162+
if _is_valid_legacy_rune(b, i, is_labelname):
163+
escaped.write(b)
164+
else:
165+
escaped.write('_')
166+
return escaped.getvalue()
167+
elif escaping == DOTS:
168+
escaped = StringIO()
169+
for i, b in enumerate(s):
170+
if b == '_':
171+
escaped.write('__')
172+
elif b == '.':
173+
escaped.write('_dot_')
174+
elif _is_valid_legacy_rune(b, i, is_labelname):
175+
escaped.write(b)
176+
else:
177+
escaped.write('__')
178+
return escaped.getvalue()
179+
elif escaping == VALUES:
180+
escaped = StringIO()
181+
escaped.write("U__")
182+
for i, b in enumerate(s):
183+
if b == '_':
184+
escaped.write("__")
185+
elif _is_valid_legacy_rune(b, i, is_labelname):
186+
escaped.write(b)
187+
elif not _is_valid_utf8(b):
188+
escaped.write("_FFFD_")
189+
else:
190+
escaped.write('_')
191+
escaped.write(format(ord(b), 'x'))
192+
escaped.write('_')
193+
return escaped.getvalue()
194+
return s
195+
196+
197+
def _is_valid_legacy_rune(b: str, i: int, is_labelname: bool) -> bool:
198+
if len(b) != 1:
199+
raise ValueError("Input 'b' must be a single character.")
200+
if (
201+
('a' <= b <= 'z')
202+
or ('A' <= b <= 'Z')
203+
or (b == '_')
204+
or ('0' <= b <= '9' and i > 0)
205+
):
206+
return True
207+
return not is_labelname and b == ':'
208+
209+
210+
_SURROGATE_MIN = 0xD800
211+
_SURROGATE_MAX = 0xDFFF
212+
213+
214+
def _is_valid_utf8(s: str) -> bool:
215+
if 0 <= ord(s) < _SURROGATE_MIN:
216+
return True
217+
if _SURROGATE_MAX < ord(s) <= maxunicode:
218+
return True
219+
return False

0 commit comments

Comments
 (0)
0