8000 gh-113274: fix EUC-JP decoding of FULLWIDTH TILDE · python/cpython@1631b56 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1631b56

Browse files
committed
gh-113274: fix EUC-JP decoding of FULLWIDTH TILDE
1 parent fa9ba02 commit 1631b56

File tree

3 files changed

+60
-52
lines changed

3 files changed

+60
-52
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
In EUC-JP, b'\x8f\xa2\xb7' now decodes to ~ (FULLWIDTH TILDE) instead of ~
2+
(TILDE).

Modules/cjkcodecs/mappings_jp.h

Lines changed: 52 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -591,10 +591,10 @@ __jisx0208_decmap+6950,33,38},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
591591
};
592592

593593
static const ucs2_t __jisx0212_decmap[6179] = {
594-
728,711,184,729,733,175,731,730,126,900,901,U,U,U,U,U,U,U,U,161,166,191,U,U,U,
595-
U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,186,170,
596-
169,174,8482,164,8470,902,904,905,906,938,U,908,U,910,939,U,911,U,U,U,U,940,
597-
941,942,943,970,912,972,962,973,971,944,974,1026,1027,1028,1029,1030,1031,
594+
728,711,184,729,733,175,731,730,65374,900,901,U,U,U,U,U,U,U,U,161,166,191,U,U,
595+
U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,186,
596+
170,169,174,8482,164,8470,902,904,905,906,938,U,908,U,910,939,U,911,U,U,U,U,
597+
940,941,942,943,970,912,972,962,973,971,944,974,1026,1027,1028,1029,1030,1031,
598598
1032,1033,1034,1035,1036,1038,1039,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,
599599
U,U,U,U,U,U,U,U,U,U,U,U,U,U,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,
600600
1116,1118,1119,198,272,U,294,U,306,U,321,319,U,330,216,338,U,358,222,U,U,U,U,
@@ -1114,51 +1114,51 @@ __jisx0212_decmap+6018,33,126},{__jisx0212_decmap+6112,33,99},{0,0,0},{0,0,0},
11141114
};
11151115

11161116
static const DBCHAR __jisxcommon_encmap[22016] = {
1117-
8512,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41527,
1118-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41538,
1119-
8561,8562,41584,N,41539,8568,8495,41581,41580,N,8780,N,41582,41524,8555,8542,
1120-
N,N,8493,N,8825,N,41521,N,41579,N,N,N,N,41540,43554,43553,43556,43562,43555,
1121-
43561,43297,43566,43570,43569,43572,43571,43584,43583,43586,43585,N,43600,
1122-
43602,43601,43604,43608,43603,8543,43308,43619,43618,43621,43620,43634,43312,
1123-
43342,43810,43809,43812,43818,43811,43817,43329,43822,43826,43825,43828,43827,
1124-
43840,43839,43842,43841,43331,43856,43858,43857,43860,43864,43859,8544,43340,
1125-
43875,43874,43877,43876,43890,43344,43891,43559,43815,43557,43813,43560,43816,
1126-
43563,43819,43564,43820,43567,43823,43565,43821,43568,43824,43298,43330,43575,
1127-
43831,N,N,43574,43830,43576,43832,43573,43829,43578,43834,43579,43835,43581,
1128-
43837,43580,N,43582,43838,43300,43332,43591,43847,43589,43845,N,N,43590,43846,
1129-
43588,43333,43302,43334,43592,43848,43593,43849,43335,43594,43850,43596,43852,
1130-
43595,43851,43305,43337,43304,43336,43597,43853,43599,43855,43598,43854,43338,
1131-
43307,43339,43607,43863,N,N,43606,43862,43309,43341,43609,43865,43611,43867,
1132-
43610,43866,43612,43868,43613,43869,43615,43871,43614,43870,43617,43873,43616,
1133-
43872,43311,43343,43628,43884,43625,43881,43622,43878,43627,43883,43624,43880,
1134-
43626,43882,43633,43889,43636,43892,43635,43637,43893,43639,43895,43638,43894,
1135-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1136-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1137-
43558,43814,43587,43843,43605,43861,43623,43879,43632,43888,43629,43885,43631,
1138-
43887,43630,43886,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,43833,41520,
1139-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41519,41522,41526,41525,N,41523,41528,41529,
1140-
42593,N,42594,42595,42596,N,42599,N,42601,42604,42614,9761,9762,9763,9764,
1141-
9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776,9777,N,9778,9779,
1142< F438 code class="diff-text syntax-highlighted-line deletion">-
9780,9781,9782,9783,9784,42597,42602,42609,42610,42611,42612,42619,9793,9794,
1143-
9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806,9807,9808,9809,
1144-
42616,9810,9811,9812,9813,9814,9815,9816,42613,42618,42615,42617,42620,10023,
1145-
42818,42819,42820,42821,42822,42823,42824,42825,42826,42827,42828,N,42829,
1146-
42830,10017,10018,10019,10020,10021,10022,10024,10025,10026,10027,10028,10029,
1147-
10030,10031,10032,10033,10034,10035,10036,10037,10038,10039,10040,10041,10042,
1148-
10043,10044,10045,10046,10047,10048,10049,10065,10066,10067,10068,10069,10070,
1149-
10072,10073,10074,10075,10076,10077,10078,10079,10080,10081,10082,10083,10084,
1150-
10085,10086,10087,10088,10089,10090,10091,10092,10093,10094,10095,10096,10097,
1151-
N,10071,42866,42867,42868,42869,42870,42871,42872,42873,42874,42875,42876,N,
1152-
42877,42878,8510,N,N,N,N,8509,8514,N,8518,8519,N,N,8520,8521,N,N,8823,8824,N,
1153-
N,N,8517,8516,N,N,N,N,N,N,N,N,N,8819,N,8556,8557,N,N,N,N,N,N,N,8744,8558,N,N,
1154-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41585,N,N,N,N,N,N,N,N,N,N,N,41583,N,N,N,N,N,N,
1155-
N,N,8818,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1156-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1157-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8747,8748,8746,8749,N,N,
1158-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1159-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8781,N,8782,8783,N,8799,8784,N,N,N,
1160-
8800,8762,N,N,8763,N,N,N,N,N,N,8541,N,N,N,N,N,N,N,8805,N,N,8807,8551,N,8796,N,
1161-
N,N,N,N,N,8778,8779,8769,8768,8809,8810,N,N,N,N,N,N,N,8552,8808,N,N,N,N,N,N,N,
1117+
8512,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1118+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,41538,8561,
1119+
8562,41584,N,41539,8568,8495,41581,41580,N,8780,N,41582,41524,8555,8542,N,N,
1120+
8493,N,8825,N,41521,N,41579,N,N,N,N,41540,43554,43553,43556,43562,43555,43561,
1121+
43297,43566,43570,43569,43572,43571,43584,43583,43586,43585,N,43600,43602,
1122+
43601,43604,43608,43603,8543,43308,43619,43618,43621,43620,43634,43312,43342,
1123+
43810,43809,43812,43818,43811,43817,43329,43822,43826,43825,43828,43827,43840,
1124+
43839,43842,43841,43331,43856,43858,43857,43860,43864,43859,8544,43340,43875,
1125+
43874,43877,43876,43890,43344,43891,43559,43815,43557,43813,43560,43816,43563,
1126+
43819,43564,43820,43567,43823,43565,43821,43568,43824,43298,43330,43575,43831,
1127+
N,N,43574,43830,43576,43832,43573,43829,43578,43834,43579,43835,43581,43837,
1128+
43580,N,43582,43838,43300,43332,43591,43847,43589,43845,N,N,43590,43846,43588,
1129+
43333,43302,43334,43592,43848,43593,43849,43335,43594,43850,43596,43852,43595,
1130+
43851,43305,43337,43304,43336,43597,43853,43599,43855,43598,43854,43338,43307,
1131+
43339,43607,43863,N,N,43606,43862,43309,43341,43609,43865,43611,43867,43610,
1132+
43866,43612,43868,43613,43869,43615,43871,43614,43870,43617,43873,43616,43872,
1133+
43311,43343,43628,43884,43625,43881,43622,43878,43627,43883,43624,43880,43626,
1134+
43882,43633,43889,43636,43892,43635,43637,43893,43639,43895,43638,43894,N,N,N,
1135+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1136+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,43558,
1137+
43814,43587,43843,43605,43861,43623,43879,43632,43888,43629,43885,43631,43887,
1138+
43630,43886,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,43833,41520,N,N,N,
1139+
N,N,N,N,N,N,N,N,N,N,N,N,N,41519,41522,41526,41525,N,41523,41528,41529,42593,N,
1140+
42594,42595,42596,N,42599,N,42601,42604,42614,9761,9762,9763,9764,9765,9766,
1141+
9767,9768,9769,9770,9771,9772,9773,9774,9775,9776,9777,N,9778,9779,9780,9781,
1142+
9782,9783,9784,42597,42602,42609,42610,42611,42612,42619,9793,9794,9795,9796,
1143+
9797,9798,9799,9800,9801,9802,9803,9804,9805,9806,9807,9808,9809,42616,9810,
1144+
9811,9812,9813,9814,9815,9816,42613,42618,42615,42617,42620,10023,42818,42819,
1145+
42820,42821,42822,42823,42824,42825,42826,42827,42828,N,42829,42830,10017,
1146+
10018,10019,10020,10021,10022,10024,10025,10026,10027,10028,10029,10030,10031,
1147+
10032,10033,10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,
1148+
10045,10046,10047,10048,10049,10065,10066,10067,10068,10069,10070,10072,10073,
1149+
10074,10075,10076,10077,10078,10079,10080,10081,10082,10083,10084,10085,10086,
1150+
10087,10088,10089,10090,10091,10092,10093,10094,10095,10096,10097,N,10071,
1151+
42866,42867,42868,42869,42870,42871,42872,42873,42874,42875,42876,N,42877,
1152+
42878,8510,N,N,N,N,8509,8514,N,8518,8519,N,N,8520,8521,N,N,8823,8824,N,N,N,
1153+
8517,8516,N,N,N,N,N,N,N,N,N,8819,N,8556,8557,N,N,N,N,N,N,N,8744,8558,N,N,N,N,
1154+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,41585,N,N,N,N,N,N,N,N,N,N,N,41583,N,N,N,N,N,N,N,N,
1155+
8818,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1156+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1157+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8747,8748,8746,8749,N,N,N,N,
1158+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
1159+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8781,N,8782,8783,N,8799,8784,N,N,N,8800,
1160+
8762,N,N,8763,N,N,N,N,N,N,8541,N,N,N,N,N,N,N,8805,N,N,8807,8551,N,8796,N,N,N,
1161+
N,N,N,8778,8779,8769,8768,8809,8810,N,N,N,N,N,N,N,8552,8808,N,N,N,N,N,N,N,
11621162
8806,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8802,N,N,N,N,N,N,N,N,N,N,N,N,N,
11631163
8546,8801,N,N,N,N,8549,8550,N,N,8803,8804,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
11641164
N,N,N,N,8766,8767,N,N,8764,8765,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
@@ -2360,11 +2360,11 @@ N,N,29557,29558,60749,60750,29560,N,29559,60751,60752,60753,60754,60755,29562,
23602360
9032,9033,9034,9035,9036,9037,9038,9039,9040,9041,9042,9043,9044,9045,9046,
23612361
9047,9048,9049,9050,8526,N,8527,8496,8498,8494,9057,9058,9059,9060,9061,9062,
23622362
9063,9064,9065,9066,9067,9068,9069,9070,9071,9072,9073,9074,9075,9076,9077,
2363-
9078,9079,9080,9081,9082,8528,8515,8529,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
2363+
9078,9079,9080,9081,9082,8528,8515,8529,41527,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
23642364
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
23652365
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
2366-
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,8497,
2367-
N,8559,
2366+
N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,
2367+
8497,N,8559,
23682368
};
23692369

23702370
static const struct unim_index jisxcommon_encmap[256] = {

Tools/unicode/genmap_japanese.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ def main():
8686
cp932decmap = loadmap(cp932file)
8787
jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file)
8888

89+
# In JIS0212.TXT provided by Unicode, JIS X 0212 entry 0x2237 is mapped to
90+
# ~ (TILDE). However, EUC-JP already includes US-ASCII, so it is often
91+
# mapped to ~ (FULLWIDTH TILDE). See
92+
# https://encoding.spec.whatwg.org/index-jis0212.txt
93+
jisx0212decmap[34][55] = ord('~')
94+
8995
if jis3decmap[0x21][0x24] != 0xff0c:
9096
raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff')
9197

0 commit comments

Comments
 (0)
0