8000 Back-patch addition of pg_wchar-to-multibyte conversion functionality. · markokr/postgres@f12960d · GitHub
[go: up one dir, main page]

Skip to content

Commit f12960d

Browse files
committed
Back-patch addition of pg_wchar-to-multibyte conversion functionality.
Back-patch of commits 72dd629, f6a05fd, and 60e9c22. This is needed to support fixing the regex prefix extraction bug in back branches.
1 parent 7c460f0 commit f12960d

File tree

3 files changed

+259
-49
lines changed

3 files changed

+259
-49
lines changed

src/backend/utils/mb/mbutils.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,28 @@ pg_encoding_mb2wchar_with_len(int encoding,
710710
return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
711711
}
712712

713+
/* convert a wchar string to a multibyte */
714+
int
715+
pg_wchar2mb(const pg_wchar *from, char *to)
716+
{
717+
return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *)to, pg_wchar_strlen(from));
718+
}
719+
720+
/* convert a wchar string to a multibyte with a limited length */
721+
int
722+
pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
723+
{
724+
return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *)to, len);
725+
}
726+
727+
/* same, with any encoding */
728+
int
729+
pg_encoding_wchar2mb_with_len(int encoding,
730+
const pg_wchar *from, char *to, int len)
731+
{
732+
return (*pg_wchar_table[encoding].wchar2mb_with_len) (from, (unsigned char *)to, len);
733+
}
734+
713735
/* returns the byte length of a multibyte character */
714736
int
715737
pg_mblen(const char *mbstr)

src/backend/utils/mb/wchar.c

Lines changed: 210 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,7 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
9999
*to |= *from++;
100100
len -= 2;
101101
}
102 A373 -
else
103-
/* must be ASCII */
102+
else /* must be ASCII */
104103
{
105104
*to = *from++;
106105
len--;
@@ -339,6 +338,55 @@ pg_euctw_dsplen(const unsigned char *s)
339338
return len;
340339
}
341340

341+
/*
342+
* Convert pg_wchar to EUC_* encoding.
343+
* caller must allocate enough space for "to", including a trailing zero!
344+
* len: length of from.
345+
* "from" not necessarily null terminated.
346+
*/
347+
static int
348+
pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
349+
{
350+
int cnt = 0;
351+
352+
while (len > 0 && *from)
353+
{
354+
unsigned char c;
355+
356+
if ((c = (*from >> 24)))
357+
{
358+
*to++ = c;
359+
*to++ = (*from >> 16) & 0xff;
360+
*to++ = (*from >> 8) & 0xff;
361+
*to++ = *from & 0xff;
362+
cnt += 4;
363+
}
364+
else if ((c = (*from >> 16)))
365+
{
366+
*to++ = c;
367+
*to++ = (*from >> 8) & 0xff;
368+
*to++ = *from & 0xff;
369+
cnt += 3;
370+
}
371+
else if ((c = (*from >> 8)))
372+
{
373+
*to++ = c;
374+
*to++ = *from & 0xff;
375+
cnt += 2;
376+
}
377+
else
378+
{
379+
*to++ = *from;
380+
cnt++;
381+
}
382+
from++;
383+
len--;
384+
}
385+
*to = 0;
386+
return cnt;
387+
}
388+
389+
342390
/*
343391
* JOHAB
344392
*/
@@ -453,6 +501,31 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
453501
return utf8string;
454502
}
455503

504+
/*
505+
* Trivial conversion from pg_wchar to UTF-8.
506+
* caller should allocate enough space for "to"
507+
* len: length of from.
508+
* "from" not necessarily null terminated.
509+
*/
510+
static int
511+
pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
512+
{
513+
int cnt = 0;
514+
515+
while (len > 0 && *from)
516+
{
517+
int char_len;
518+
519+
unicode_to_utf8(*from, to);
520+
char_len = pg_utf_mblen(to);
521+
cnt += char_len;
522+
to += char_len;
523+
from++;
524+
len--;
525+
}
526+
*to = 0;
527+
return cnt;
528+
}
456529

457530
/*
458531
* Return the byte length of a UTF8 character pointed to by s
@@ -719,6 +792,77 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
719792
return cnt;
720793
}
721794

795+
/*
796+
* convert pg_wchar to mule internal code
797+
* caller should allocate enough space for "to"
798+
* len: length of from.
799+
* "from" not necessarily null terminated.
800+
*/
801+
static int
802+
pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
803+
{
804+
int cnt = 0;
805+
806+
while (len > 0 && *from)
807+
{
808+
unsigned char lb;
809+
810+
lb = (*from >> 16) & 0xff;
811+
if (IS_LC1(lb))
812+
{
813+
*to++ = lb;
814+
*to++ = *from & 0xff;
815+
cnt += 2;
816+
}
817+
else if (IS_LC2(lb))
818+
{
819+
*to++ = lb;
820+
*to++ = (*from >> 8) & 0xff;
821+
*to++ = *from & 0xff;
822+
cnt += 3;
823+
}
824+
else if (IS_LCPRV1_A_RANGE(lb))
825+
{
826+
*to++ = LCPRV1_A;
827+
*to++ = lb;
828+
*to++ = *from & 0xff;
829+
cnt += 3;
830+
}
831+
else if (IS_LCPRV1_B_RANGE(lb))
832+
{
833+
*to++ = LCPRV1_B;
834+
*to++ = lb;
835+
*to++ = *from & 0xff;
836+
cnt += 3;
837+
}
838+
else if (IS_LCPRV2_A_RANGE(lb))
839+
{
840+
*to++ = LCPRV2_A;
841+
*to++ = lb;
842+
*to++ = (*from >> 8) & 0xff;
843+
*to++ = *from & 0xff;
844+
cnt += 4;
845+
}
846+
else if (IS_LCPRV2_B_RANGE(lb))
847+
{
848+
*to++ = LCPRV2_B;
849+
*to++ = lb;
850+
*to++ = (*from >> 8) & 0xff;
851+
*to++ = *from & 0xff;
852+
cnt += 4;
853+
}
854+
else
855+
{
856+
*to++ = *from & 0xff;
857+
cnt += 1;
858+
}
859+
from++;
860+
len--;
861+
}
862+
*to = 0;
863+
return cnt;
864+
}
865+
722866
int
723867
pg_mule_mblen(const unsigned char *s)
724868
{
@@ -774,6 +918,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
774918
return cnt;
775919
}
776920

921+
/*
922+
* Trivial conversion from pg_wchar to single byte encoding. Just ignores
923+
* high bits.
924+
* caller should allocate enough space for "to"
925+
* len: length of from.
926+
* "from" not necessarily null terminated.
927+
*/
928+
static int
929+
pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
930+
{
931+
int cnt = 0;
932+
933+
while (len > 0 && *from)
934+
{
935+
*to++ = *from++;
936+
len--;
937+
cnt++;
938+
}
939+
*to = 0;
940+
return cnt;
941+
}
942+
777943
static int
778944
pg_latin1_mblen(const unsigned char *s)
779945
{
@@ -1550,48 +1716,48 @@ pg_eucjp_increment(unsigned char *charptr, int length)
15501716
*-------------------------------------------------------------------
15511717
*/
15521718
pg_wchar_tbl pg_wchar_table[] = {
1553-
{pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
1554-
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
1555-
{pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
1556-
{pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
1557-
{pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
1558-
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
1559-
{pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
1560-
{pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
1561-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
1562-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
1563-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
1564-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
1565-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
1566-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
1567-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
1568-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
1569-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
1570-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
1571-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
1572-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
1573-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
1574-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
1575-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
1576-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
1577-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
1578-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
1579-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
1580-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
1581-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
1582-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
1583-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
1584-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
1585-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
1586-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
1587-
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
1588-
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
1589-
{0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
1590-
{0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
1591-
{0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
1592-
{0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
1593-
{0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
1594-
{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
1719+
{pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
1720+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
1721+
{pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
1722+
{pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
1723+
{pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
1724+
{pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
1725+
{pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
1726+
{pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
1727+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
1728+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
1729+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
1730+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
1731+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
1732+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
1733+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
1734+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
1735+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
1736+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
1737+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
1738+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
1739+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
1740+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
1741+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
1742+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
1743+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
1744+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
1745+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
1746+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
1747+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
1748+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
1749+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
1750+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
1751+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
1752+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
1753+
{pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
1754+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
1755+
{0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
1756+
{0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
1757+
{0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
1758+
{0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
1759+
{0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
1760+
{0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
15951761
};
15961762

15971763
/* returns the byte length of a word for mule internal code */

src/include/mb/pg_wchar.h

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,27 @@ typedef unsigned int pg_wchar;
4949
/*
5050
* Is a prefix byte for "private" single byte encodings?
5151
*/
52-
#define IS_LCPRV1(c) ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b)
52+
#define LCPRV1_A 0x9a
53+
#define LCPRV1_B 0x9b
54+
#define IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
55+
#define IS_LCPRV1_A_RANGE(c) \
56+
((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
57+
#define IS_LCPRV1_B_RANGE(c) \
58+
((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
5359
/*
5460
* Is a leading byte for "official" multibyte encodings?
5561
*/
5662
#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
5763
/*
5864
* Is a prefix byte for "private" multibyte encodings?
5965
*/
60-
#define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)
66+
#define LCPRV2_A 0x9c
67+
#define LCPRV2_B 0x9d
68+
#define IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
69+
#define IS_LCPRV2_A_RANGE(c) \
70+
((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
71+
#define IS_LCPRV2_B_RANGE(c) \
72+
((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
6173

6274
/*----------------------------------------------------
6375
* leading characters
@@ -277,7 +289,11 @@ extern pg_enc2gettext pg_enc2gettext_tbl[];
277289
* pg_wchar stuff
278290
*/
279291
typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
280-
pg_wchar *to,
292+
pg_wchar *to,
293+
int len);
294+
295+
typedef int (*wchar2mb_with_len_converter) (const pg_wchar *from,
296+
unsigned char *to,
281297
int len);
282298

283299
typedef int (*mblen_converter) (const unsigned char *mbstr);
@@ -290,8 +306,10 @@ typedef int (*mbverifier) (const unsigned char *mbstr, int len);
290306

291307
typedef struct
292308
{
293-
mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte
294-
* string to a wchar */
309+
mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte
310+
* string to a wchar */
311+
wchar2mb_with_len_converter wchar2mb_with_len; /* convert a wchar
312+
* string to a multibyte */
295313
mblen_converter mblen; /* get byte length of a char */
296314
mbdisplaylen_converter dsplen; /* get display width of a char */
297315
mbverifier mbverify; /* verify multibyte sequence */
@@ -372,6 +390,10 @@ extern int pg_mb2wchar(const char *from, pg_wchar *to);
372390
extern int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
373391
extern int pg_encoding_mb2wchar_with_len(int encoding,
374392
const char *from, pg_wchar *to, int len);
393+
extern int pg_wchar2mb(const pg_wchar *from, char *to);
394+
extern int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len);
395+
extern int pg_encoding_wchar2mb_with_len(int encoding,
396+
const pg_wchar *from, char *to, int len);
375397
extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
376398
extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
377399
extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);

0 commit comments

Comments
 (0)
0