From e043607f170542550c920a69d88aa59baf71aad8 Mon Sep 17 00:00:00 2001 From: Arianna Vespri Date: Mon, 6 Jan 2025 17:57:48 +0100 Subject: [PATCH 1/7] Add test for nh with more spans Signed-off-by: Arianna Vespri --- prometheus_client/openmetrics/parser.py | 280 +++++++++--------------- prometheus_client/samples.py | 4 +- tests/openmetrics/test_parser.py | 12 + 3 files changed, 112 insertions(+), 184 deletions(-) diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py index 39a44dc2..b8b8e3b1 100644 --- a/prometheus_client/openmetrics/parser.py +++ b/prometheus_client/openmetrics/parser.py @@ -5,9 +5,14 @@ import math import re -from ..metrics_core import Metric, METRIC_LABEL_NAME_RE +from ..metrics_core import Metric +from ..parser import ( + _last_unquoted_char, _next_unquoted_char, _parse_value, _split_quoted, + _unquote_unescape, parse_labels, +) from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp from ..utils import floatToGoString +from ..validation import _is_valid_legacy_metric_name, _validate_metric_name def text_string_to_metric_families(text): @@ -73,16 +78,6 @@ def _unescape_help(text): return ''.join(result) -def _parse_value(value): - value = ''.join(value) - if value != value.strip() or '_' in value: - raise ValueError(f"Invalid value: {value!r}") - try: - return int(value) - except ValueError: - return float(value) - - def _parse_timestamp(timestamp): timestamp = ''.join(timestamp) if not timestamp: @@ -113,165 +108,31 @@ def _is_character_escaped(s, charpos): return num_bslashes % 2 == 1 -def _parse_labels_with_state_machine(text): - # The { has already been parsed. - state = 'startoflabelname' - labelname = [] - labelvalue = [] - labels = {} - labels_len = 0 - - for char in text: - if state == 'startoflabelname': - if char == '}': - state = 'endoflabels' - else: - state = 'labelname' - labelname.append(char) - elif state == 'labelname': - if char == '=': - state = 'labelvaluequote' - else: - labelname.append(char) - elif state == 'labelvaluequote': - if char == '"': - state = 'labelvalue' - else: - raise ValueError("Invalid line: " + text) - elif state == 'labelvalue': - if char == '\\': - state = 'labelvalueslash' - elif char == '"': - ln = ''.join(labelname) - if not METRIC_LABEL_NAME_RE.match(ln): - raise ValueError("Invalid line, bad label name: " + text) - if ln in labels: - raise ValueError("Invalid line, duplicate label name: " + text) - labels[ln] = ''.join(labelvalue) - labelname = [] - labelvalue = [] - state = 'endoflabelvalue' - else: - labelvalue.append(char) - elif state == 'endoflabelvalue': - if char == ',': - state = 'labelname' - elif char == '}': - state = 'endoflabels' - else: - raise ValueError("Invalid line: " + text) - elif state == 'labelvalueslash': - state = 'labelvalue' - if char == '\\': - labelvalue.append('\\') - elif char == 'n': - labelvalue.append('\n') - elif char == '"': - labelvalue.append('"') - else: - labelvalue.append('\\' + char) - elif state == 'endoflabels': - if char == ' ': - break - else: - raise ValueError("Invalid line: " + text) - labels_len += 1 - return labels, labels_len - - -def _parse_labels(text): - labels = {} - - # Raise error if we don't have valid labels - if text and "=" not in text: - raise ValueError - - # Copy original labels - sub_labels = text - try: - # Process one label at a time - while sub_labels: - # The label name is before the equal - value_start = sub_labels.index("=") - label_name = sub_labels[:value_start] - sub_labels = sub_labels[value_start + 1:] - - # Check for missing quotes - if not sub_labels or sub_labels[0] != '"': - raise ValueError - - # The first quote is guaranteed to be after the equal - value_substr = sub_labels[1:] - - # Check for extra commas - if not label_name or label_name[0] == ',': - raise ValueError - if not value_substr or value_substr[-1] == ',': - raise ValueError - - # Find the last unescaped quote - i = 0 - while i < len(value_substr): - i = value_substr.index('"', i) - if not _is_character_escaped(value_substr[:i], i): - break - i += 1 - - # The label value is between the first and last quote - quote_end = i + 1 - label_value = sub_labels[1:quote_end] - # Replace escaping if needed - if "\\" in label_value: - label_value = _replace_escaping(label_value) - if not METRIC_LABEL_NAME_RE.match(label_name): - raise ValueError("invalid line, bad label name: " + text) - if label_name in labels: - raise ValueError("invalid line, duplicate label name: " + text) - labels[label_name] = label_value - - # Remove the processed label from the sub-slice for next iteration - sub_labels = sub_labels[quote_end + 1:] - if sub_labels.startswith(","): - next_comma = 1 - else: - next_comma = 0 - sub_labels = sub_labels[next_comma:] - - # Check for missing commas - if sub_labels and next_comma == 0: - raise ValueError - - return labels - - except ValueError: - raise ValueError("Invalid labels: " + text) - - def _parse_sample(text): separator = " # " # Detect the labels in the text - label_start = text.find("{") + label_start = _next_unquoted_char(text, '{') if label_start == -1 or separator in text[:label_start]: # We don't have labels, but there could be an exemplar. - name_end = text.index(" ") + name_end = _next_unquoted_char(text, ' ') name = text[:name_end] + if not _is_valid_legacy_metric_name(name): + raise ValueError("invalid metric name:" + text) # Parse the remaining text after the name remaining_text = text[name_end + 1:] value, timestamp, exemplar = _parse_remaining_text(remaining_text) return Sample(name, {}, value, timestamp, exemplar) - # The name is before the labels name = text[:label_start] - if separator not in text: - # Line doesn't contain an exemplar - # We can use `rindex` to find `label_end` - label_end = text.rindex("}") - label = text[label_start + 1:label_end] - labels = _parse_labels(label) - else: - # Line potentially contains an exemplar - # Fallback to parsing labels with a state machine - labels, labels_len = _parse_labels_with_state_machine(text[label_start + 1:]) - label_end = labels_len + len(name) + label_end = _next_unquoted_char(text, '}') + labels = parse_labels(text[label_start + 1:label_end], True) + if not name: + # Name might be in the labels + if '__name__' not in labels: + raise ValueError + name = labels['__name__'] + del labels['__name__'] + elif '__name__' in labels: + raise ValueError("metric name specified more than once") # Parsing labels succeeded, continue parsing the remaining text remaining_text = text[label_end + 2:] value, timestamp, exemplar = _parse_remaining_text(remaining_text) @@ -294,7 +155,12 @@ def _parse_remaining_text(text): text = split_text[1] it = iter(text) + in_quotes = False for char in it: + if char == '"': + in_quotes = not in_quotes + if in_quotes: + continue if state == 'timestamp': if char == '#' and not timestamp: state = 'exemplarspace' @@ -314,8 +180,9 @@ def _parse_remaining_text(text): raise ValueError("Invalid line: " + text) elif state == 'exemplarstartoflabels': if char == '{': - label_start, label_end = text.index("{"), text.rindex("}") - exemplar_labels = _parse_labels(text[label_start + 1:label_end]) + label_start = _next_unquoted_char(text, '{') + label_end = _last_unquoted_char(text, '}') + exemplar_labels = parse_labels(text[label_start + 1:label_end], True) state = 'exemplarparsedlabels' else: raise ValueError("Invalid line: " + text) @@ -365,40 +232,82 @@ def _parse_remaining_text(text): def _parse_nh_sample(text, suffixes): - labels_start = text.find("{") - # check if it's a native histogram with labels - re_nh_without_labels = re.compile(r'^[^{} ]+ {[^{}]+}$') - re_nh_with_labels = re.compile(r'[^{} ]+{[^{}]+} {[^{}]+}$') - if re_nh_with_labels.match(text): - nh_value_start = text.rindex("{") - labels_end = nh_value_start - 2 + """Determines if the line has a native histogram sample, and parses it if so.""" + labels_start = _next_unquoted_char(text, '{') + labels_end = -1 + + # Finding a native histogram sample requires careful parsing of + # possibly-quoted text, which can appear in metric names, label names, and + # values. + # + # First, we need to determine if there are metric labels. Find the space + # between the metric definition and the rest of the line. Look for unquoted + # space or {. + i = 0 + has_metric_labels = False + i = _next_unquoted_char(text, ' {') + if i == -1: + return + + # If the first unquoted char was a {, then that is the metric labels (which + # could contain a UTF-8 metric name). + if text[i] == '{': + has_metric_labels = True + # Consume the labels -- jump ahead to the close bracket. + labels_end = i = _next_unquoted_char(text, '}', i) + if labels_end == -1: + raise ValueError + + # If there is no subsequent unquoted {, then it's definitely not a nh. + nh_value_start = _next_unquoted_char(text, '{', i + 1) + if nh_value_start == -1: + return + + # Edge case: if there is an unquoted # between the metric definition and the {, + # then this is actually an exemplar + exemplar = _next_unquoted_char(text, '#', i + 1) + if exemplar != -1 and exemplar < nh_value_start: + return + + nh_value_end = _next_unquoted_char(text, '}', nh_value_start) + if nh_value_end == -1: + raise ValueError + + if has_metric_labels: labelstext = text[labels_start + 1:labels_end] - labels = _parse_labels(labelstext) + labels = parse_labels(labelstext, True) name_end = labels_start name = text[:name_end] if name.endswith(suffixes): - raise ValueError("the sample name of a native histogram with labels should have no suffixes", name) + raise ValueError("the sample name of a native histogram with labels should have no suffixes", name) + if not name: + # Name might be in the labels + if '__name__' not in labels: + raise ValueError + name = labels['__name__'] + del labels['__name__'] + # Edge case: the only "label" is the name definition. + if not labels: + labels = None + nh_value = text[nh_value_start:] nat_hist_value = _parse_nh_struct(nh_value) return Sample(name, labels, None, None, None, nat_hist_value) # check if it's a native histogram - if re_nh_without_labels.match(text): - nh_value_start = labels_start + else: nh_value = text[nh_value_start:] name_end = nh_value_start - 1 name = text[:name_end] if name.endswith(suffixes): raise ValueError("the sample name of a native histogram should have no suffixes", name) + # Not possible for UTF-8 name here, that would have been caught as having a labelset. nat_hist_value = _parse_nh_struct(nh_value) return Sample(name, None, None, None, None, nat_hist_value) - else: - # it's not a native histogram - return def _parse_nh_struct(text): pattern = r'(\w+):\s*([^,}]+)' - + #(Vesari)TODO: change here re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+,\d+:\d+)\]') re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]') @@ -576,6 +485,7 @@ def build_metric(name, documentation, typ, unit, samples): raise ValueError("Units not allowed for this metric type: " + name) if typ in ['histogram', 'gaugehistogram']: _check_histogram(samples, name) + _validate_metric_name(name) metric = Metric(name, documentation, typ, unit) # TODO: check labelvalues are valid utf8 metric.samples = samples @@ -596,16 +506,19 @@ def build_metric(name, documentation, typ, unit, samples): if line == '# EOF': eof = True elif line.startswith('#'): - parts = line.split(' ', 3) + parts = _split_quoted(line, ' ', 3) if len(parts) < 4: raise ValueError("Invalid line: " + line) - if parts[2] == name and samples: + candidate_name, quoted = _unquote_unescape(parts[2]) + if not quoted and not _is_valid_legacy_metric_name(candidate_name): + raise ValueError + if candidate_name == name and samples: raise ValueError("Received metadata after samples: " + line) - if parts[2] != name: + if candidate_name != name: if name is not None: yield build_metric(name, documentation, typ, unit, samples) # New metric - name = parts[2] + name = candidate_name unit = None typ = None documentation = None @@ -614,7 +527,7 @@ def build_metric(name, documentation, typ, unit, samples): group_timestamp = None group_timestamp_samples = set() samples = [] - allowed_names = [parts[2]] + allowed_names = [candidate_name] if parts[1] == 'HELP': if documentation is not None: @@ -649,7 +562,10 @@ def build_metric(name, documentation, typ, unit, samples): if name is not None: yield build_metric(name, documentation, typ, unit, samples) # Start an unknown metric. - name = sample.name + candidate_name, quoted = _unquote_unescape(sample.name) + if not quoted and not _is_valid_legacy_metric_name(candidate_name): + raise ValueError + name = candidate_name documentation = None unit = None typ = 'unknown' diff --git a/prometheus_client/samples.py b/prometheus_client/samples.py index b57a5d48..0fb55323 100644 --- a/prometheus_client/samples.py +++ b/prometheus_client/samples.py @@ -47,8 +47,8 @@ class NativeHistogram(NamedTuple): schema: int zero_threshold: float zero_count: float - pos_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None - neg_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None + pos_spans: Optional[Sequence[BucketSpan]] = None + neg_spans: Optional[Sequence[BucketSpan]] = None pos_deltas: Optional[Sequence[int]] = None neg_deltas: Optional[Sequence[int]] = None diff --git a/tests/openmetrics/test_parser.py b/tests/openmetrics/test_parser.py index dc5e9916..a176216f 100644 --- a/tests/openmetrics/test_parser.py +++ b/tests/openmetrics/test_parser.py @@ -188,6 +188,18 @@ def test_native_histogram(self): hfm.add_sample("nativehistogram", None, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) self.assertEqual([hfm], families) + def test_native_histogram_longer_span(self): + families = text_string_to_metric_families("""# TYPE nhsp histogram +# HELP nhsp Is a basic example of a native histogram with three spans +nhsp {count:24,sum:100,schema:0,zero_threshold:0.001,zero_count:4,positive_spans:[0:2,1:2,1:1],positive_deltas:[2,1,-3,3]} +# EOF +""") + families = list(families) + + hfm = HistogramMetricFamily("nhsp", "Is a basic example of a native histogram with three spans") + hfm.add_sample("nhsp", None, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2), BucketSpan(1, 1)), None, (2, 1, -3, 3), None)) + self.assertEqual([hfm], families) + def test_native_histogram_with_labels(self): families = text_string_to_metric_families("""# TYPE hist_w_labels histogram # HELP hist_w_labels Is a basic example of a native histogram with labels From ac736fb64dec55698fb9705a74aa879f98fdceb5 Mon Sep 17 00:00:00 2001 From: Arianna Vespri Date: Tue, 7 Jan 2025 17:48:31 +0100 Subject: [PATCH 2/7] Allow for span arrays to be of whatever length and for delta lists to be None Signed-off-by: Arianna Vespri --- prometheus_client/openmetrics/parser.py | 86 +++++++++++++++---------- tests/openmetrics/test_parser.py | 4 +- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py index b8b8e3b1..dd88bb75 100644 --- a/prometheus_client/openmetrics/parser.py +++ b/prometheus_client/openmetrics/parser.py @@ -307,12 +307,17 @@ def _parse_nh_sample(text, suffixes): def _parse_nh_struct(text): pattern = r'(\w+):\s*([^,}]+)' - #(Vesari)TODO: change here - re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+,\d+:\d+)\]') + re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+(,\d+:\d+)*)\]') re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]') items = dict(re.findall(pattern, text)) - spans = dict(re_spans.findall(text)) + matches = re_spans.findall(text) + spans = {} + for match in matches: + key = match[0] + value = [tuple(map(int, pair.split(':'))) for pair in match[1].split(',')] + spans[key] = value + deltas = dict(re_deltas.findall(text)) count_value = int(items['count']) @@ -321,37 +326,49 @@ def _parse_nh_struct(text): zero_threshold = float(items['zero_threshold']) zero_count = int(items['zero_count']) + pos_spans_text = spans['positive_spans'] + pos_spans = [] + for start, end in pos_spans_text: + pos_spans.append(BucketSpan(start, end)) + pos_spans_tuple = tuple(pos_spans) + + neg_spans_text = spans['negative_spans'] + neg_spans = [] + for start, end in neg_spans_text: + neg_spans.append(BucketSpan(start, end)) + neg_spans_tuple = tuple(neg_spans) + try: - pos_spans_text = spans['positive_spans'] - elems = pos_spans_text.split(',') - arg1 = [int(x) for x in elems[0].split(':')] - arg2 = [int(x) for x in elems[1].split(':')] - pos_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1])) - except KeyError: - pos_spans = None - - try: - neg_spans_text = spans['negative_spans'] - elems = neg_spans_text.split(',') - arg1 = [int(x) for x in elems[0].split(':')] - arg2 = [int(x) for x in elems[1].split(':')] - neg_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1])) - except KeyError: - neg_spans = None - - try: - pos_deltas_text = deltas['positive_deltas'] - elems = pos_deltas_text.split(',') - pos_deltas = tuple([int(x) for x in elems]) - except KeyError: - pos_deltas = None - + pos_deltas_text = deltas.get('positive_deltas') + if pos_deltas_text is not None and pos_deltas_text.strip(): + elems = pos_deltas_text.split(',') + pos_deltas = tuple(int(x.strip()) for x in elems) + else: + pos_deltas = None + except (KeyError, ValueError): + pos_deltas = None + try: - neg_deltas_text = deltas['negative_deltas'] - elems = neg_deltas_text.split(',') - neg_deltas = tuple([int(x) for x in elems]) - except KeyError: - neg_deltas = None + neg_deltas_text = deltas.get('negative_deltas') + if neg_deltas_text is not None and neg_deltas_text.strip(): + elems = neg_deltas_text.split(',') + neg_deltas = tuple(int(x.strip()) for x in elems) + else: + neg_deltas = None + except (KeyError, ValueError): + neg_deltas = None + + print(NativeHistogram( # debugging lines + count_value=count_value, + sum_value=sum_value, + schema=schema, + zero_threshold=zero_threshold, + zero_count=zero_count, + pos_spans=pos_spans_tuple, + neg_spans=neg_spans_tuple, + pos_deltas=pos_deltas, + neg_deltas=neg_deltas + )) return NativeHistogram( count_value=count_value, @@ -359,11 +376,12 @@ def _parse_nh_struct(text): schema=schema, zero_threshold=zero_threshold, zero_count=zero_count, - pos_spans=pos_spans, - neg_spans=neg_spans, + pos_spans=pos_spans_tuple, + neg_spans=neg_spans_tuple, pos_deltas=pos_deltas, neg_deltas=neg_deltas ) + def _group_for_sample(sample, name, typ): diff --git a/tests/openmetrics/test_parser.py b/tests/openmetrics/test_parser.py index 53733d8c..c3e88502 100644 --- a/tests/openmetrics/test_parser.py +++ b/tests/openmetrics/test_parser.py @@ -245,13 +245,13 @@ def test_native_histogram_utf8_stress(self): def test_native_histogram_longer_span(self): families = text_string_to_metric_families("""# TYPE nhsp histogram # HELP nhsp Is a basic example of a native histogram with three spans -nhsp {count:24,sum:100,schema:0,zero_threshold:0.001,zero_count:4,positive_spans:[0:2,1:2,1:1],positive_deltas:[2,1,-3,3]} +nhsp {count:24,sum:100,schema:0,zero_threshold:0.001,zero_count:4,positive_spans:[0:2,1:2,1:1],negative_spans:[0:2,1:2,1:1]} # EOF """) families = list(families) hfm = HistogramMetricFamily("nhsp", "Is a basic example of a native histogram with three spans") - hfm.add_sample("nhsp", None, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2), BucketSpan(1, 1)), None, (2, 1, -3, 3), None)) + hfm.add_sample("nhsp", None, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2), BucketSpan(1, 1)), (BucketSpan(0, 2), BucketSpan(1, 2), BucketSpan(1, 1)), None, None)) self.assertEqual([hfm], families) def test_native_histogram_with_labels(self): From 7618ee351f3f568b1dc4ab5657f10181b4d870aa Mon Sep 17 00:00:00 2001 From: Arianna Vespri Date: Wed, 8 Jan 2025 10:23:39 +0100 Subject: [PATCH 3/7] Allow for spans to be None, condense spans and deltas composition Signed-off-by: Arianna Vespri --- prometheus_client/openmetrics/parser.py | 76 ++++++++++--------------- tests/openmetrics/test_parser.py | 6 +- 2 files changed, 33 insertions(+), 49 deletions(-) diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py index dd88bb75..463efe4b 100644 --- a/prometheus_client/openmetrics/parser.py +++ b/prometheus_client/openmetrics/parser.py @@ -326,62 +326,46 @@ def _parse_nh_struct(text): zero_threshold = float(items['zero_threshold']) zero_count = int(items['zero_count']) - pos_spans_text = spans['positive_spans'] - pos_spans = [] - for start, end in pos_spans_text: - pos_spans.append(BucketSpan(start, end)) - pos_spans_tuple = tuple(pos_spans) - - neg_spans_text = spans['negative_spans'] - neg_spans = [] - for start, end in neg_spans_text: - neg_spans.append(BucketSpan(start, end)) - neg_spans_tuple = tuple(neg_spans) - - try: - pos_deltas_text = deltas.get('positive_deltas') - if pos_deltas_text is not None and pos_deltas_text.strip(): - elems = pos_deltas_text.split(',') - pos_deltas = tuple(int(x.strip()) for x in elems) - else: - pos_deltas = None - except (KeyError, ValueError): - pos_deltas = None - - try: - neg_deltas_text = deltas.get('negative_deltas') - if neg_deltas_text is not None and neg_deltas_text.strip(): - elems = neg_deltas_text.split(',') - neg_deltas = tuple(int(x.strip()) for x in elems) - else: - neg_deltas = None - except (KeyError, ValueError): - neg_deltas = None - - print(NativeHistogram( # debugging lines - count_value=count_value, - sum_value=sum_value, - schema=schema, - zero_threshold=zero_threshold, - zero_count=zero_count, - pos_spans=pos_spans_tuple, - neg_spans=neg_spans_tuple, - pos_deltas=pos_deltas, - neg_deltas=neg_deltas - )) - + pos_spans = _compose_spans(spans, 'positive_spans') + neg_spans = _compose_spans(spans, 'negative_spans') + pos_deltas = _compose_deltas(deltas, 'positive_deltas') + neg_deltas = _compose_deltas(deltas, 'negative_deltas') + return NativeHistogram( count_value=count_value, sum_value=sum_value, schema=schema, zero_threshold=zero_threshold, zero_count=zero_count, - pos_spans=pos_spans_tuple, - neg_spans=neg_spans_tuple, + pos_spans=pos_spans, + neg_spans=neg_spans, pos_deltas=pos_deltas, neg_deltas=neg_deltas ) +def _compose_spans(spans, spans_name): + try: + pos_spans_text = spans[spans_name] + pos_spans = [] + for start, end in pos_spans_text: + pos_spans.append(BucketSpan(start, end)) + pos_spans_tuple = tuple(pos_spans) + return pos_spans_tuple + except KeyError: + pos_spans_tuple = None + return pos_spans_tuple + +def _compose_deltas(deltas, deltas_name): + try: + pos_deltas_text = deltas.get(deltas_name) + if pos_deltas_text is not None and pos_deltas_text.strip(): + elems = pos_deltas_text.split(',') + pos_deltas = tuple(int(x.strip()) for x in elems) + return pos_deltas + else: + pos_deltas = None + except (KeyError, ValueError): + return None def _group_for_sample(sample, name, typ): diff --git a/tests/openmetrics/test_parser.py b/tests/openmetrics/test_parser.py index c3e88502..f812f085 100644 --- a/tests/openmetrics/test_parser.py +++ b/tests/openmetrics/test_parser.py @@ -242,16 +242,16 @@ def test_native_histogram_utf8_stress(self): hfm.add_sample("native{histogram", {'xx{} # {}': ' EOF # {}}}'}, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) self.assertEqual([hfm], families) - def test_native_histogram_longer_span(self): + def test_native_histogram_three_pos_spans_no_neg_spans_or_deltas(self): families = text_string_to_metric_families("""# TYPE nhsp histogram # HELP nhsp Is a basic example of a native histogram with three spans -nhsp {count:24,sum:100,schema:0,zero_threshold:0.001,zero_count:4,positive_spans:[0:2,1:2,1:1],negative_spans:[0:2,1:2,1:1]} +nhsp {count:4,sum:6,schema:3,zero_threshold:2.938735877055719e-39,zero_count:1,positive_spans:[0:1,7:1,4:1],positive_deltas:[1,0,0]} # EOF """) families = list(families) hfm = HistogramMetricFamily("nhsp", "Is a basic example of a native histogram with three spans") - hfm.add_sample("nhsp", None, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2), BucketSpan(1, 1)), (BucketSpan(0, 2), BucketSpan(1, 2), BucketSpan(1, 1)), None, None)) + hfm.add_sample("nhsp", None, None, None, None, NativeHistogram(4, 6, 3, 2.938735877055719e-39, 1, (BucketSpan(0, 1), BucketSpan(7, 1), BucketSpan(4, 1)), None, (1,0,0), None)) self.assertEqual([hfm], families) def test_native_histogram_with_labels(self): From 7606fe74ca876a14fbc32cdbed9665e4da9c16bd Mon Sep 17 00:00:00 2001 From: Arianna Vespri Date: Wed, 8 Jan 2025 10:34:02 +0100 Subject: [PATCH 4/7] Comply to linting Signed-off-by: Arianna Vespri --- prometheus_client/openmetrics/parser.py | 2 ++ prometheus_client/samples.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py index 463efe4b..d72149bf 100644 --- a/prometheus_client/openmetrics/parser.py +++ b/prometheus_client/openmetrics/parser.py @@ -343,6 +343,7 @@ def _parse_nh_struct(text): neg_deltas=neg_deltas ) + def _compose_spans(spans, spans_name): try: pos_spans_text = spans[spans_name] @@ -355,6 +356,7 @@ def _compose_spans(spans, spans_name): pos_spans_tuple = None return pos_spans_tuple + def _compose_deltas(deltas, deltas_name): try: pos_deltas_text = deltas.get(deltas_name) diff --git a/prometheus_client/samples.py b/prometheus_client/samples.py index 0fb55323..16e03c04 100644 --- a/prometheus_client/samples.py +++ b/prometheus_client/samples.py @@ -1,4 +1,4 @@ -from typing import Dict, NamedTuple, Optional, Sequence, Tuple, Union +from typing import Dict, NamedTuple, Optional, Sequence, Union class Timestamp: From 4ce92f6c930d5990faa9fc1d5bff90a8ec4ceca8 Mon Sep 17 00:00:00 2001 From: Arianna Vespri Date: Wed, 8 Jan 2025 11:26:41 +0100 Subject: [PATCH 5/7] Add missing white spaces Signed-off-by: Arianna Vespri --- tests/openmetrics/test_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openmetrics/test_parser.py b/tests/openmetrics/test_parser.py index f812f085..aeaa6ed6 100644 --- a/tests/openmetrics/test_parser.py +++ b/tests/openmetrics/test_parser.py @@ -251,7 +251,7 @@ def test_native_histogram_three_pos_spans_no_neg_spans_or_deltas(self): families = list(families) hfm = HistogramMetricFamily("nhsp", "Is a basic example of a native histogram with three spans") - hfm.add_sample("nhsp", None, None, None, None, NativeHistogram(4, 6, 3, 2.938735877055719e-39, 1, (BucketSpan(0, 1), BucketSpan(7, 1), BucketSpan(4, 1)), None, (1,0,0), None)) + hfm.add_sample("nhsp", None, None, None, None, NativeHistogram(4, 6, 3, 2.938735877055719e-39, 1, (BucketSpan(0, 1), BucketSpan(7, 1), BucketSpan(4, 1)), None, (1, 0, 0), None)) self.assertEqual([hfm], families) def test_native_histogram_with_labels(self): From 3548f6f1a5c305c72de055f325a4f00df74227d1 Mon Sep 17 00:00:00 2001 From: Arianna Vespri Date: Wed, 15 Jan 2025 14:39:14 +0100 Subject: [PATCH 6/7] Address style change requests Signed-off-by: Arianna Vespri --- prometheus_client/openmetrics/parser.py | 51 +++++++++++-------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py index d72149bf..7a42c709 100644 --- a/prometheus_client/openmetrics/parser.py +++ b/prometheus_client/openmetrics/parser.py @@ -311,13 +311,7 @@ def _parse_nh_struct(text): re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]') items = dict(re.findall(pattern, text)) - matches = re_spans.findall(text) - spans = {} - for match in matches: - key = match[0] - value = [tuple(map(int, pair.split(':'))) for pair in match[1].split(',')] - spans[key] = value - + span_matches = re_spans.findall(text) deltas = dict(re_deltas.findall(text)) count_value = int(items['count']) @@ -326,8 +320,8 @@ def _parse_nh_struct(text): zero_threshold = float(items['zero_threshold']) zero_count = int(items['zero_count']) - pos_spans = _compose_spans(spans, 'positive_spans') - neg_spans = _compose_spans(spans, 'negative_spans') + pos_spans = _compose_spans(span_matches, 'positive_spans') + neg_spans = _compose_spans(span_matches, 'negative_spans') pos_deltas = _compose_deltas(deltas, 'positive_deltas') neg_deltas = _compose_deltas(deltas, 'negative_deltas') @@ -344,30 +338,29 @@ def _parse_nh_struct(text): ) -def _compose_spans(spans, spans_name): - try: - pos_spans_text = spans[spans_name] - pos_spans = [] - for start, end in pos_spans_text: - pos_spans.append(BucketSpan(start, end)) - pos_spans_tuple = tuple(pos_spans) - return pos_spans_tuple - except KeyError: - pos_spans_tuple = None - return pos_spans_tuple +def _compose_spans(span_matches, spans_name): + spans = {} + for match in span_matches: + key = match[0] + value = [tuple(map(int, pair.split(':'))) for pair in match[1].split(',')] + spans[key] = value + if spans_name not in spans: + return None + out_spans = [] + for start, end in spans[spans_name]: + out_spans.append(BucketSpan(start, end)) + out_spans_tuple = tuple(out_spans) + return out_spans_tuple def _compose_deltas(deltas, deltas_name): - try: - pos_deltas_text = deltas.get(deltas_name) - if pos_deltas_text is not None and pos_deltas_text.strip(): - elems = pos_deltas_text.split(',') - pos_deltas = tuple(int(x.strip()) for x in elems) - return pos_deltas - else: - pos_deltas = None - except (KeyError, ValueError): + if deltas_name not in deltas: return None + out_deltas = deltas.get(deltas_name) + if out_deltas is not None and out_deltas.strip(): + elems = out_deltas.split(',') + out_deltas_tuple = tuple(int(x.strip()) for x in elems) + return out_deltas_tuple def _group_for_sample(sample, name, typ): From d6be64affa50d56942a56df53b991105bc123d55 Mon Sep 17 00:00:00 2001 From: Arianna Vespri Date: Fri, 17 Jan 2025 09:20:02 +0100 Subject: [PATCH 7/7] Add comments to explain functions Signed-off-by: Arianna Vespri --- prometheus_client/openmetrics/parser.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py index 7a42c709..d967e83b 100644 --- a/prometheus_client/openmetrics/parser.py +++ b/prometheus_client/openmetrics/parser.py @@ -339,26 +339,42 @@ def _parse_nh_struct(text): def _compose_spans(span_matches, spans_name): + """Takes a list of span matches (expected to be a list of tuples) and a string + (the expected span list name) and processes the list so that the values extracted + from the span matches can be used to compose a tuple of BucketSpan objects""" spans = {} for match in span_matches: + # Extract the key from the match (first element of the tuple). key = match[0] + # Extract the value from the match (second element of the tuple). + # Split the value string by commas to get individual pairs, + # split each pair by ':' to get start and end, and convert them to integers. value = [tuple(map(int, pair.split(':'))) for pair in match[1].split(',')] + # Store the processed value in the spans dictionary with the key. spans[key] = value if spans_name not in spans: return None out_spans = [] + # Iterate over each start and end tuple in the list of tuples for the specified spans_name. for start, end in spans[spans_name]: + # Compose a BucketSpan object with the start and end values + # and append it to the out_spans list. out_spans.append(BucketSpan(start, end)) + # Convert to tuple out_spans_tuple = tuple(out_spans) return out_spans_tuple def _compose_deltas(deltas, deltas_name): + """Takes a list of deltas matches (a dictionary) and a string (the expected delta list name), + and processes its elements to compose a tuple of integers representing the deltas""" if deltas_name not in deltas: return None out_deltas = deltas.get(deltas_name) if out_deltas is not None and out_deltas.strip(): elems = out_deltas.split(',') + # Convert each element in the list elems to an integer + # after stripping whitespace and create a tuple from these integers. out_deltas_tuple = tuple(int(x.strip()) for x in elems) return out_deltas_tuple