8000 gh-111495: improve test coverage of codecs C API by picnixz · Pull Request #126030 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-111495: improve test coverage of codecs C API #126030

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 1, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
8000 Loading
Diff view
Diff view
145 changes: 122 additions & 23 deletions Lib/test/test_capi/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,69 @@ def test_codec_stream_writer(self):
codec_stream_writer(NULL, stream, 'strict')


class UnsafeUnicodeEncodeError(UnicodeEncodeError):
def __init__(self, encoding, message, start, end, reason):
self.may_crash = (end - start) < 0 or (end - start) >= len(message)
super().__init__(encoding, message, start, end, reason)


class UnsafeUnicodeDecodeError(UnicodeDecodeError):
def __init__(self, encoding, message, start, end, reason):
# the case end - start >= len(message) does not crash
self.may_crash = (end - start) < 0
super().__init__(encoding, message, start, end, reason)


class UnsafeUnicodeTranslateError(UnicodeTranslateError):
def __init__(self, message, start, end, reason):
# <= 0 because PyCodec_ReplaceErrors tries to check the Unicode kind
# of a 0-length result (which is by convention PyUnicode_1BYTE_KIND
# and not PyUnicode_2BYTE_KIND as it currently expects)
self.may_crash = (end - start) <= 0 or (end - start) >= len(message)
super().__init__(message, start, end, reason)


class CAPICodecErrors(unittest.TestCase):
@classmethod
def _generate_exceptions(cls, atomic_literal, factory, objlens):
for objlen in objlens:
obj = atomic_literal * objlen
m = 2 * max(2, objlen)
for start in range(-m, m):
for end in range(-m, m):
yield factory(obj, start, end)

@classmethod
def generate_encode_errors(cls, objlen, *objlens):
def factory(obj, start, end):
return UnsafeUnicodeEncodeError('utf-8', obj, start, end, 'reason')
return tuple(cls._generate_exceptions('0', factory, [objlen, *objlens]))

@classmethod
def generate_decode_errors(cls, objlen, *objlens):
def factory(obj, start, end):
return UnsafeUnicodeDecodeError('utf-8', obj, start, end, 'reason')
return tuple(cls._generate_exceptions(b'0', factory, [objlen, *objlens]))

@classmethod
def generate_translate_errors(cls, objlen, *objlens):
def factory(obj, start, end):
return UnsafeUnicodeTranslateError(obj, start, end, 'reason')
return tuple(cls._generate_exceptions('0', factory, [objlen, *objlens]))

@classmethod
def setUpClass(cls):
cls.unicode_encode_errors = cls.generate_encode_errors(0, 1, 5)
cls.unicode_decode_errors = cls.generate_decode_errors(0, 1, 5)
cls.unicode_translate_errors = cls.generate_translate_errors(0, 1, 5)
cls.all_unicode_errors = (
cls.unicode_encode_errors
+ cls.unicode_decode_errors
+ cls.unicode_translate_errors
)
cls.bad_unicode_errors = (
ValueError(),
)

def test_codec_register_error(self):
# for cleaning up between tests
Expand Down Expand Up @@ -780,33 +842,70 @@ def test_codec_lookup_error(self):
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
self.assertRaises(LookupError, codec_lookup_error, 'unknown')

def test_codec_error_handlers(self):
exceptions = [
# A UnicodeError with an empty message currently crashes:
# See: https://github.com/python/cpython/issues/123378
# UnicodeEncodeError('bad', '', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
]

strict_handler = _testcapi.codec_strict_errors
def test_codec_strict_errors_handler(self):
handler = _testcapi.codec_strict_errors
for exc in self.all_unicode_errors + self.bad_unicode_errors:
with self.subTest(handler=handler, exc=exc):
self.assertRaises(type(exc), handler, exc)

def test_codec_ignore_errors_handler(self):
handler = _testcapi.codec_ignore_errors
all_exceptions = self.all_unicode_errors
bad_exceptions = self.bad_unicode_errors
self.do_test_codec_errors_handler(handler, all_exceptions, bad_exceptions)

def test_codec_replace_errors_handler(self):
handler = _testcapi.codec_replace_errors
all_exceptions = self.all_unicode_errors
bad_exceptions = self.bad_unicode_errors
self.do_test_codec_errors_handler(handler, all_exceptions, bad_exceptions)

def test_codec_xmlcharrefreplace_errors_handler(self):
handler = _testcapi.codec_xmlcharrefreplace_errors
exceptions = self.unicode_encode_errors
bad_exceptions = (
self.bad_unicode_errors
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
)
self.do_test_codec_errors_handler(handler, exceptions, bad_exceptions)

def test_codec_backslashreplace_errors_handler(self):
handler = _testcapi.codec_backslashreplace_errors
exceptions = self.all_unicode_errors
bad_exceptions = self.bad_unicode_errors
self.do_test_codec_errors_handler(handler, exceptions, bad_exceptions)

def test_codec_namereplace_errors_handler(self):
handler = _testlimitedcapi.codec_namereplace_errors
exceptions = self.unicode_encode_errors
bad_exceptions = (
self.bad_unicode_errors
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
)
self.do_test_codec_errors_handler(handler, exceptions, bad_exceptions)

def do_test_codec_errors_handler(self, handler, exceptions, bad_exceptions):
at_least_one = False
for exc in exceptions:
with self.subTest(handler=strict_handler, exc=exc):
self.assertRaises(UnicodeEncodeError, strict_handler, exc)

for handler in [
_testcapi.codec_ignore_errors,
_testcapi.codec_replace_errors,
_testcapi.codec_xmlcharrefreplace_errors,
_testlimitedcapi.codec_namereplace_errors,
]:
for exc in exceptions:
with self.subTest(handler=handler, exc=exc):
self.assertIsInstance(handler(exc), tuple)
# See https://github.com/python/cpython/issues/123378 and related
# discussion and issues for details.
if exc.may_crash:
continue

at_least_one = True
with self.subTest(handler=handler, exc=exc):
# test that the handler does not crash
self.assertIsInstance(handler(exc), tuple)

self.assertTrue(at_least_one, "all exceptions are crashing")

for bad_exc in bad_exceptions:
with self.subTest('bad type', handler=handler, exc=bad_exc):
self.assertRaises(TypeError, handler, bad_exc)


if __name__ == "__main__":
Expand Down
Loading
0