3
3
from __future__ import unicode_literals
4
4
5
5
import math
6
+ import re
6
7
7
8
from ..metrics_core import Metric , METRIC_LABEL_NAME_RE
8
9
from ..samples import Exemplar , Sample , Timestamp
@@ -24,6 +25,24 @@ def text_string_to_metric_families(text):
24
25
yield metric_family
25
26
26
27
28
+ ESCAPE_SEQUENCES = {
29
+ '\\ \\ ' : '\\ ' ,
30
+ '\\ n' : '\n ' ,
31
+ '\\ "' : '"' ,
32
+ }
33
+
34
+
35
+ def _replace_escape_sequence (match ):
36
+ return ESCAPE_SEQUENCES [match .group (0 )]
37
+
38
+
39
+ ESCAPING_RE = re .compile (r'\\[\\n"]' )
40
+
41
+
42
+ def _replace_escaping (s ):
43
+ return ESCAPING_RE .sub (_replace_escape_sequence , s )
44
+
45
+
27
46
def _unescape_help (text ):
28
47
result = []
29
48
slash = False
@@ -83,14 +102,23 @@ def _parse_timestamp(timestamp):
83
102
return ts
84
103
85
104
86
- def _parse_labels (it , text ):
105
+ def _is_character_escaped (s , charpos ):
106
+ num_bslashes = 0
107
+ while (charpos >
67E6
num_bslashes and
108
+ s [charpos - 1 - num_bslashes ] == '\\ ' ):
109
+ num_bslashes += 1
110
+ return num_bslashes % 2 == 1
111
+
112
+
113
+ def _parse_labels_with_state_machine (text ):
87
114
# The { has already been parsed.
88
115
state = 'startoflabelname'
89
116
labelname = []
90
117
labelvalue = []
91
118
labels = {}
119
+ labels_len = 0
92
120
93
- for char in it :
121
+ for char in text :
94
122
if state == 'startoflabelname' :
95
123
if char == '}' :
96
124
state = 'endoflabels'
@@ -141,37 +169,123 @@ def _parse_labels(it, text):
141
169
break
142
170
else :
143
171
raise ValueError ("Invalid line: " + text )
144
- return labels
172
+ labels_len += 1
173
+ return labels , labels_len
174
+
175
+
176
+ def _parse_labels (text ):
177
+ labels = {}
178
+
179
+ # Raise error if we don't have valid labels
180
+ if text and "=" not in text :
181
+ raise ValueError
182
+
183
+ # Copy original labels
184
+ sub_labels = text
185
+ try :
186
+ # Process one label at a time
187
+ while sub_labels :
188
+ # The label name is before the equal
189
+ value_start = sub_labels .index ("=" )
190
+ label_name = sub_labels [:value_start ]
191
+ sub_labels = sub_labels [value_start + 1 :]
192
+
193
+ # Check for missing quotes
194
+ if not sub_labels or sub_labels [0 ] != '"' :
195
+ raise ValueError
196
+
197
+ # The first quote is guaranteed to be after the equal
198
+ value_substr = sub_labels [1 :]
199
+
200
+ # Check for extra commas
201
+ if not label_name or label_name [0 ] == ',' :
202
+ raise ValueError
203
+ if not value_substr or value_substr [- 1 ] == ',' :
204
+ raise ValueError
205
+
206
+ # Find the last unescaped quote
207
+ i = 0
208
+ while i < len (value_substr ):
209
+ i = value_substr .index ('"' , i )
210
+ if not _is_character_escaped (value_substr [:i ], i ):
211
+ break
212
+ i += 1
213
+
214
+ # The label value is inbetween the first and last quote
215
+ quote_end = i + 1
216
+ label_value = sub_labels [1 :quote_end ]
217
+ # Replace escaping if needed
218
+ if "\\ " in label_value :
219
+ label_value = _replace_escaping (label_value )
220
+ labels [label_name ] = label_value
221
+
222
+ # Remove the processed label from the sub-slice for next iteration
223
+ sub_labels = sub_labels [quote_end + 1 :]
224
+ if sub_labels .startswith ("," ):
225
+ next_comma = 1
226
+ else :
227
+ next_comma = 0
228
+ sub_labels = sub_labels [next_comma :]
229
+
230
+ # Check for missing commas
231
+ if sub_labels and next_comma == 0 :
232
+ raise ValueError
233
+
234
+ return labels
235
+
236
+ except ValueError :
237
+ raise ValueError ("Invalid labels: " + text )
145
238
146
239
147
240
def _parse_sample (text ):
148
- name = []
149
- value = []
241
+ # Detect the labels in the text
242
+ label_start = text .find ("{" )
243
+ if label_start == - 1 :
244
+ # We don't have labels
245
+ name_end = text .index (" " )
246
+ name = text [:name_end ]
247
+ # Parse the remaining text after the name
248
+ remaining_text = text [name_end + 1 :]
249
+ value , timestamp , exemplar = _parse_remaining_text (remaining_text )
250
+ return Sample (name , {}, value , timestamp , exemplar )
251
+ # The name is before the labels
252
+ name = text [:label_start ]
253
+ seperator = " # "
254
+ if text .count (seperator ) == 0 :
255
+ # Line doesn't contain an exemplar
256
+ # We can use `rindex` to find `label_end`
257
+ label_end = text .rindex ("}" )
258
+ label = text [label_start + 1 :label_end ]
259
+ labels = _parse_labels (label )
260
+ else :
261
+ # Line potentially contains an exemplar
262
+ # Fallback to parsing labels with a state machine
263
+ labels , labels_len = _parse_labels_with_state_machine (text [label_start + 1 :])
264
+ label_end = labels_len + len (name )
265
+ # Parsing labels succeeded, continue parsing the remaining text
266
+ remaining_text = text [label_end + 2 :]
267
+ value , timestamp , exemplar = _parse_remaining_text (remaining_text )
268
+ return Sample (name , labels , value , timestamp , exemplar )
269
+
270
+
271
+ def _parse_remaining_text (text ):
272
+ split_text = text .split (" " , 1 )
273
+ val = _parse_value (split_text [0 ])
274
+ if len (split_text ) == 1 :
275
+ # We don't have timestamp or exemplar
276
+ return val , None , None
277
+
150
278
timestamp = []
151
- labels = {}
152
279
exemplar_value = []
153
280
exemplar_timestamp = []
154
281
exemplar_labels = None
155
282
156
- state = 'name'
283
+ state = 'timestamp'
284
+ text = split_text [1 ]
157
285
158
286
it = iter (text )
159
287
for char in it :
160
- if state == 'name' :
161
- if char == '{' :
162
- labels = _parse_labels (it , text )
163
- # Space has already been parsed.
164
- state = 'value'
165
- elif char == ' ' :
166
- state = 'value'
167
- else :
168
- name .append (char )
169
- elif state == 'value' :
170
- if char == ' ' :
171
- state = 'timestamp'
172
- else :
173
- value .append (char )
174
- elif state == 'timestamp' :
288
+ if state == 'timestamp' :
175
289
if char == '#' and not timestamp :
176
290
state = 'exemplarspace'
177
291
elif char == ' ' :
@@ -190,13 +304,23 @@ def _parse_sample(text):
190
304
raise ValueError ("Invalid line: " + text )
191
305
elif state == 'exemplarstartoflabels' :
192
306
if char == '{' :
193
- exemplar_labels = _parse_labels (it , text )
194
- # Space has already been parsed.
307
+ label_start , label_end = text .index ("{" ), text .rindex ("}" )
308
+ exemplar_labels = _parse_labels (text [label_start + 1 :label_end ])
309
+ state = 'exemplarparsedlabels'
310
+ else :
311
+ raise ValueError ("Invalid line: " + text )
312
+ elif state == 'exemplarparsedlabels' :
313
+ if char == '}' :
314
+ state = 'exemplarvaluespace'
315
+ elif state == 'exemplarvaluespace' :
316
+ if char == ' ' :
195
317
state = 'exemplarvalue'
196
318
else :
197
319
raise ValueError ("Invalid line: " + text )
198
320
elif state == 'exemplarvalue' :
199
- if char == ' ' :
321
+ if char == ' ' and not exemplar_value :
322
+ raise ValueError ("Invalid line: " + text )
323
+ elif char == ' ' :
200
324
state = 'exemplartimestamp'
201
325
else :
202
326
exemplar_value .append (char )
@@ -212,13 +336,9 @@ def _parse_sample(text):
212
336
raise ValueError ("Invalid line: " + text )
213
337
214
338
# Incomplete exemplar.
215
- if state in ['exemplarhash' , 'exemplarspace' , 'exemplarstartoflabels' ]:
339
+ if state in ['exemplarhash' , 'exemplarspace' , 'exemplarstartoflabels' , 'exemplarparsedlabels' ]:
216
340
raise ValueError ("Invalid line: " + text )
217
341
218
- if not value :
219
- raise ValueError ("Invalid line: " + text )
220
- value = '' .join (value )
221
- val = _parse_value (value )
222
342
ts = _parse_timestamp (timestamp )
223
343
exemplar = None
224
344
if exemplar_labels is not None :
@@ -231,7 +351,7 @@ def _parse_sample(text):
231
351
_parse_timestamp (exemplar_timestamp ),
232
352
)
233
353
234
- return Sample ( '' . join ( name ), labels , val , ts , exemplar )
354
+ return val , ts , exemplar
235
355
236
356
237
357
def _group_for_sample (sample , name , typ ):
0 commit comments