8000 [3.9] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 cod… · python/cpython@08b640e · GitHub
[go: up one dir, main page]

Skip to content
/ cpython Public

Commit 08b640e

Browse files
ambvmoriyama
andauthored
[3.9] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds (gh-111695) (gh-111780)
(cherry picked from commit c8faa35) Co-authored-by: Masayuki Moriyama <masayuki.moriyama@miraclelinux.com>
1 parent 43a6e4f commit 08b640e

File tree

3 files changed

+53
-3
lines changed

3 files changed

+53
-3
lines changed

Lib/test/test_codecencodings_iso2022.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase):
2424
(b'ab\x1BNdef', 'replace', 'abdef'),
2525
)
2626

27+
class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase):
28+
encoding = 'iso2022_jp_3'
29+
tstring = multibytecodec_support.load_teststring('iso2022_jp')
30+
codectests = COMMON_CODEC_TESTS + (
31+
(b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
32+
(b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ),
33+
(b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ),
34+
(b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
35+
(b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ),
36+
(b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ),
37+
('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'),
38+
('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'),
39+
('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'),
40+
('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'),
41+
('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'),
42+
(b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'),
43+
('ab\u4FF1def', 'replace', b'ab?def'),
44+
)
45+
xmlcharnametest = (
46+
'\xAB\u211C\xBB = \u2329\u1234\u232A',
47+
b'\x1B$(O\x29\x28\x1B(B&real;\x1B$(O\x29\x32\x1B(B = &lang;&#4660;&rang;'
48+
)
49+
50+
class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase):
51+
encoding = 'iso2022_jp_2004'
52+
tstring = multibytecodec_support.load_teststring('iso2022_jp')
53+
codectests = COMMON_CODEC_TESTS + (
54+
(b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
55+
(b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ),
56+
(b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ),
57+
(b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
58+
(b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ),
59+
(b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ),
60+
('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'),
61+
('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'),
62+
('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'),
63+
('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'),
64+
('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'),
65+
(b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'),
66+
('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'),
67+
)
68+
xmlcharnametest = (
69+
'\xAB\u211C\xBB = \u2329\u1234\u232A',
70+
b'\x1B$(Q\x29\x28\x1B(B&real;\x1B$(Q\x29\x32\x1B(B = &lang;&#4660;&rang;'
71+
)
72+
2773
class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
2874
encoding = 'iso2022_kr'
2975
tstring = multibytecodec_support.load_teststring('iso2022_kr')
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds

Modules/cjkcodecs/_codecs_iso2022.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,9 @@ ENCODER(iso2022)
181181

182182
encoded = MAP_UNMAPPABLE;
183183
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
184+
Py_UCS4 buf[2] = {c, 0};
184185
Py_ssize_t length = 1;
185-
encoded = dsg->encoder(&c, &length);
186+
encoded = dsg->encoder(buf, &length);
186187
if (encoded == MAP_MULTIPLE_AVAIL) {
187188
/* this implementation won't work for pair
188189
* of non-bmp characters. */
@@ -191,9 +192,11 @@ ENCODER(iso2022)
191192
return MBERR_TOOFEW;
192193
length = -1;
193194
}
194-
else
195+
else {
196+
buf[1] = INCHAR2;
195197
length = 2;
196-
encoded = dsg->encoder(&c, &length);
198+
}
199+
encoded = dsg->encoder(buf, &length);
197200
if (encoded != MAP_UNMAPPABLE) {
198201
insize = length;
199202
break;

0 commit comments

Comments
 (0)
0