8000 Add the missing cyrillic "Yo" characters ('e' and 'E' with two dots) … · postgres/postgres@f4b7624 · GitHub
[go: up one dir, main page]

Skip to content

Commit f4b7624

Browse files
committed
Add the missing cyrillic "Yo" characters ('e' and 'E' with two dots) to the
ISO_8859-5 <-> MULE_INTERNAL conversion tables. This was discovered when trying to convert a string containing those characters from ISO_8859-5 to Windows-1251, because we use MULE_INTERNAL/KOI8R as an intermediate encoding between those two. While the missing "Yo" was just an omission in the conversion tables, there are a few other characters like the "Numero" sign ("No" as a single character) that exists in all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but not in KOI8R. Added comments about that. Patch by Sergey Burladyan. Back-patch to 7.4.
1 parent 470c6c1 commit f4b7624

File tree

1 file changed

+16
-5
lines changed

1 file changed

+16
-5
lines changed

src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.16 2008/01/01 19:45:53 momjian Exp $
9+
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.17 2008/03/20 10:30:04 heikki Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -301,6 +301,12 @@ win866_to_win1251(PG_FUNCTION_ARGS)
301301
Assert(PG_GETARG_INT32(1) == PG_WIN1251);
302302
Assert(len >= 0);
303303

304+
/*
305+
* Note: There are a few characters like the "Numero" sign that exist in
306+
* all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but
307+
* not in KOI8R. As we use MULE_INTERNAL/KOI8R as an intermediary, we
308+
* will fail to convert those characters.
309+
*/
304310
buf = palloc(len * ENCODING_GROWTH_RATE);
305311
win8662mic(src, buf, len);
306312
mic2win1251(buf, dest, strlen((char *) buf));
@@ -321,6 +327,7 @@ win1251_to_win866(PG_FUNCTION_ARGS)
321327
Assert(PG_GETARG_INT32(1) == PG_WIN866);
322328
Assert(len >= 0);
323329

330+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
324331
buf = palloc(len * ENCODING_GROWTH_RATE);
325332
win12512mic(src, buf, len);
326333
mic2win866(buf, dest, strlen((char *) buf));
@@ -381,6 +388,7 @@ iso_to_win1251(PG_FUNCTION_ARGS)
381388
Assert(PG_GETARG_INT32(1) == PG_WIN1251);
382389
Assert(len >= 0);
383390

391+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
384392
buf = palloc(len * ENCODING_GROWTH_RATE);
385393
iso2mic(src, buf, len);
386394
mic2win1251(buf, dest, strlen((char *) buf));
@@ -401,6 +409,7 @@ win1251_to_iso(PG_FUNCTION_ARGS)
401409
Assert(PG_GETARG_INT32(1) == PG_ISO_8859_5);
402410
Assert(len >= 0);
403411

412+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
404413
buf = palloc(len * ENCODING_GROWTH_RATE);
405414
win12512mic(src, buf, len);
406415
mic2iso(buf, dest, strlen((char *) buf));
@@ -421,6 +430,7 @@ iso_to_win866(PG_FUNCTION_ARGS)
421430
Assert(PG_GETARG_INT32(1) == PG_WIN866);
422431
Assert(len >= 0);
423432

433+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
424434
buf = palloc(len * ENCODING_GROWTH_RATE);
425435
iso2mic(src, buf, len);
426436
mic2win866(buf, dest, strlen((char *) buf));
@@ -441,6 +451,7 @@ win866_to_iso(PG_FUNCTION_ARGS)
441451
Assert(PG_GETARG_INT32(1) == PG_ISO_8859_5);
442452
Assert(len >= 0);
443453

454+
/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
444455
buf = palloc(len * ENCODING_GROWTH_RATE);
445456
win8662mic(src, buf, len);
446457
mic2iso(buf, dest, strlen((char *) buf));
@@ -483,7 +494,7 @@ iso2mic(const unsigned char *l, unsigned char *p, int len)
483494
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
484495
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
485496
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
486-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
497+
0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
487498
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
488499
0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
489500
0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
@@ -493,7 +504,7 @@ iso2mic(const unsigned char *l, unsigned char *p, int len)
493504
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
494505
0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0x A2BE de,
495506
0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
496-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
507+
0x00, 0xa3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
497508
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
498509
};
499510

@@ -509,9 +520,9 @@ mic2iso(const unsigned char *mic, unsigned char *p, int len)
509520
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
510521
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
511522
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
523+
0x00, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, 0x00,
512524
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
513-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
514-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
525+
0x00, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, 0x00,
515526
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
516527
0xee, 0xd0, 0xd1, 0xe6, 0xd4, 0xd5, 0xe4, 0xd3,
517528
0xe5, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde,

0 commit comments

Comments
 (0)
0