8000 fix(utils): strip_string() checks text length counting bytes not char… · chdsbd/sentry-python@e2674d4 · GitHub
[go: up one dir, main page]

Skip to content

Commit e2674d4

Browse files
authored
fix(utils): strip_string() checks text length counting bytes not chars (getsentry#1711)
The truncation and indexes in the AnnotatedValues it's done by number of bytes and not number of characters. Fixes getsentryGH-1691
1 parent 1240743 commit e2674d4

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

sentry_sdk/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ def strip_string(value, max_length=None):
841841
# This is intentionally not just the default such that one can patch `MAX_STRING_LENGTH` and affect `strip_string`.
842842
max_length = MAX_STRING_LENGTH
843843

844-
length = len(value)
844+
length = len(value.encode("utf-8"))
845845

846846
if length > max_length:
847847
return AnnotatedValue(

tests/utils/test_general.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
iter_event_stacktraces,
1616
to_base64,
1717
from_base64,
18+
strip_string,
19+
AnnotatedValue,
1820
)
1921
from sentry_sdk._compat import text_type, string_types
2022

@@ -217,3 +219,22 @@ def test_failed_base64_conversion(input):
217219
# failures
218220
if type(input) not in string_types:
219221
assert to_base64(input) is None
222+
223+
224+
def test_strip_string():
225+
# If value is None returns None.
226+
assert strip_string(None) is None
227+
228+
# If max_length is not passed, returns the full text (up to 1024 bytes).
229+
text_1024_long = "a" * 1024
230+
assert strip_string(text_1024_long).count("a") == 1024
231+
232+
# If value exceeds the max_length, returns an AnnotatedValue.
233+
text_1025_long = "a" * 1025
234+
stripped_text = strip_string(text_1025_long)
235+
assert isinstance(stripped_text, AnnotatedValue)
236+
assert stripped_text.value.count("a") == 1021 # + '...' is 1024
237+
238+
# If text has unicode characters, it counts bytes and not number of characters.
239+
text_with_unicode_character = "éê"
240+
assert strip_string(text_with_unicode_character, max_length=2).value == "é..."

0 commit comments

Comments
 (0)
0