@@ -99,8 +99,7 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
99
99
* to |= * from ++ ;
100
100
len -= 2 ;
101
101
}
102
A373
- else
103
- /* must be ASCII */
102
+ else /* must be ASCII */
104
103
{
105
104
* to = * from ++ ;
106
105
len -- ;
@@ -339,6 +338,55 @@ pg_euctw_dsplen(const unsigned char *s)
339
338
return len ;
340
339
}
341
340
341
+ /*
342
+ * Convert pg_wchar to EUC_* encoding.
343
+ * caller must allocate enough space for "to", including a trailing zero!
344
+ * len: length of from.
345
+ * "from" not necessarily null terminated.
346
+ */
347
+ static int
348
+ pg_wchar2euc_with_len (const pg_wchar * from , unsigned char * to , int len )
349
+ {
350
+ int cnt = 0 ;
351
+
352
+ while (len > 0 && * from )
353
+ {
354
+ unsigned char c ;
355
+
356
+ if ((c = (* from >> 24 )))
357
+ {
358
+ * to ++ = c ;
359
+ * to ++ = (* from >> 16 ) & 0xff ;
360
+ * to ++ = (* from >> 8 ) & 0xff ;
361
+ * to ++ = * from & 0xff ;
362
+ cnt += 4 ;
363
+ }
364
+ else if ((c = (* from >> 16 )))
365
+ {
366
+ * to ++ = c ;
367
+ * to ++ = (* from >> 8 ) & 0xff ;
368
+ * to ++ = * from & 0xff ;
369
+ cnt += 3 ;
370
+ }
371
+ else if ((c = (* from >> 8 )))
372
+ {
373
+ * to ++ = c ;
374
+ * to ++ = * from & 0xff ;
375
+ cnt += 2 ;
376
+ }
377
+ else
378
+ {
379
+ * to ++ = * from ;
380
+ cnt ++ ;
381
+ }
382
+ from ++ ;
383
+ len -- ;
384
+ }
385
+ * to = 0 ;
386
+ return cnt ;
387
+ }
388
+
389
+
342
390
/*
343
391
* JOHAB
344
392
*/
@@ -453,6 +501,31 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
453
501
return utf8string ;
454
502
}
455
503
504
+ /*
505
+ * Trivial conversion from pg_wchar to UTF-8.
506
+ * caller should allocate enough space for "to"
507
+ * len: length of from.
508
+ * "from" not necessarily null terminated.
509
+ */
510
+ static int
511
+ pg_wchar2utf_with_len (const pg_wchar * from , unsigned char * to , int len )
512
+ {
513
+ int cnt = 0 ;
514
+
515
+ while (len > 0 && * from )
516
+ {
517
+ int char_len ;
518
+
519
+ unicode_to_utf8 (* from , to );
520
+ char_len = pg_utf_mblen (to );
521
+ cnt += char_len ;
522
+ to += char_len ;
523
+ from ++ ;
524
+ len -- ;
525
+ }
526
+ * to = 0 ;
527
+ return cnt ;
528
+ }
456
529
457
530
/*
458
531
* Return the byte length of a UTF8 character pointed to by s
@@ -719,6 +792,77 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
719
792
return cnt ;
720
793
}
721
794
795
+ /*
796
+ * convert pg_wchar to mule internal code
797
+ * caller should allocate enough space for "to"
798
+ * len: length of from.
799
+ * "from" not necessarily null terminated.
800
+ */
801
+ static int
802
+ pg_wchar2mule_with_len (const pg_wchar * from , unsigned char * to , int len )
803
+ {
804
+ int cnt = 0 ;
805
+
806
+ while (len > 0 && * from )
807
+ {
808
+ unsigned char lb ;
809
+
810
+ lb = (* from >> 16 ) & 0xff ;
811
+ if (IS_LC1 (lb ))
812
+ {
813
+ * to ++ = lb ;
814
+ * to ++ = * from & 0xff ;
815
+ cnt += 2 ;
816
+ }
817
+ else if (IS_LC2 (lb ))
818
+ {
819
+ * to ++ = lb ;
820
+ * to ++ = (* from >> 8 ) & 0xff ;
821
+ * to ++ = * from & 0xff ;
822
+ cnt += 3 ;
823
+ }
824
+ else if (IS_LCPRV1_A_RANGE (lb ))
825
+ {
826
+ * to ++ = LCPRV1_A ;
827
+ * to ++ = lb ;
828
+ * to ++ = * from & 0xff ;
829
+ cnt += 3 ;
830
+ }
831
+ else if (IS_LCPRV1_B_RANGE (lb ))
832
+ {
833
+ * to ++ = LCPRV1_B ;
834
+ * to ++ = lb ;
835
+ * to ++ = * from & 0xff ;
836
+ cnt += 3 ;
837
+ }
838
+ else if (IS_LCPRV2_A_RANGE (lb ))
839
+ {
840
+ * to ++ = LCPRV2_A ;
841
+ * to ++ = lb ;
842
+ * to ++ = (* from >> 8 ) & 0xff ;
843
+ * to ++ = * from & 0xff ;
844
+ cnt += 4 ;
845
+ }
846
+ else if (IS_LCPRV2_B_RANGE (lb ))
847
+ {
848
+ * to ++ = LCPRV2_B ;
849
+ * to ++ = lb ;
850
+ * to ++ = (* from >> 8 ) & 0xff ;
851
+ * to ++ = * from & 0xff ;
852
+ cnt += 4 ;
853
+ }
854
+ else
855
+ {
856
+ * to ++ = * from & 0xff ;
857
+ cnt += 1 ;
858
+ }
859
+ from ++ ;
860
+ len -- ;
861
+ }
862
+ * to = 0 ;
863
+ return cnt ;
864
+ }
865
+
722
866
int
723
867
pg_mule_mblen (const unsigned char * s )
724
868
{
@@ -774,6 +918,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
774
918
return cnt ;
775
919
}
776
920
921
+ /*
922
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
923
+ * high bits.
924
+ * caller should allocate enough space for "to"
925
+ * len: length of from.
926
+ * "from" not necessarily null terminated.
927
+ */
928
+ static int
929
+ pg_wchar2single_with_len (const pg_wchar * from , unsigned char * to , int len )
930
+ {
931
+ int cnt = 0 ;
932
+
933
+ while (len > 0 && * from )
934
+ {
935
+ * to ++ = * from ++ ;
936
+ len -- ;
937
+ cnt ++ ;
938
+ }
939
+ * to = 0 ;
940
+ return cnt ;
941
+ }
942
+
777
943
static int
778
944
pg_latin1_mblen (const unsigned char * s )
779
945
{
@@ -1550,48 +1716,48 @@ pg_eucjp_increment(unsigned char *charptr, int length)
1550
1716
*-------------------------------------------------------------------
1551
1717
*/
1552
1718
pg_wchar_tbl pg_wchar_table [] = {
1553
- {pg_ascii2wchar_with_len , pg_ascii_mblen , pg_ascii_dsplen , pg_ascii_verifier , 1 }, /* PG_SQL_ASCII */
1554
- {pg_eucjp2wchar_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JP */
1555
- {pg_euccn2wchar_with_len , pg_euccn_mblen , pg_euccn_dsplen , pg_euccn_verifier , 2 }, /* PG_EUC_CN */
1556
- {pg_euckr2wchar_with_len , pg_euckr_mblen , pg_euckr_dsplen , pg_euckr_verifier , 3 }, /* PG_EUC_KR */
1557
- {pg_euctw2wchar_with_len , pg_euctw_mblen , pg_euctw_dsplen , pg_euctw_verifier , 4 }, /* PG_EUC_TW */
1558
- {pg_eucjp2wchar_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JIS_2004 */
1559
- {pg_utf2wchar_with_len , pg_utf_mblen , pg_utf_dsplen , pg_utf8_verifier , 4 }, /* PG_UTF8 */
1560
- {pg_mule2wchar_with_len , pg_mule_mblen , pg_mule_dsplen , pg_mule_verifier , 4 }, /* PG_MULE_INTERNAL */
1561
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN1 */
1562
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN2 */
1563
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN3 */
1564
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN4 */
1565
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN5 */
1566
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN6 */
1567
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN7 */
1568
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN8 */
1569
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN9 */
1570
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN10 */
1571
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1256 */
1572
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1258 */
1573
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN866 */
1574
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN874 */
1575
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8R */
1576
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1251 */
1577
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1252 */
1578
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-5 */
1579
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-6 */
1580
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-7 */
1581
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-8 */
1582
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1250 */
1583
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1253 */
1584
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1254 */
1585
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1255 */
1586
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1257 */
1587
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8U */
1588
- {0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 }, /* PG_SJIS */
1589
- {0 , pg_big5_mblen , pg_big5_dsplen , pg_big5_verifier , 2 }, /* PG_BIG5 */
1590
- {0 , pg_gbk_mblen , pg_gbk_dsplen , pg_gbk_verifier , 2 }, /* PG_GBK */
1591
- {0 , pg_uhc_mblen , pg_uhc_dsplen , pg_uhc_verifier , 2 }, /* PG_UHC */
1592
- {0 , pg_gb18030_mblen , pg_gb18030_dsplen , pg_gb18030_verifier , 4 }, /* PG_GB18030 */
1593
- {0 , pg_johab_mblen , pg_johab_dsplen , pg_johab_verifier , 3 }, /* PG_JOHAB */
1594
- {0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 } /* PG_SHIFT_JIS_2004 */
1719
+ {pg_ascii2wchar_with_len , pg_wchar2single_with_len , pg_ascii_mblen , pg_ascii_dsplen , pg_ascii_verifier , 1 }, /* PG_SQL_ASCII */
1720
+ {pg_eucjp2wchar_with_len , pg_wchar2euc_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JP */
1721
+ {pg_euccn2wchar_with_len , pg_wchar2euc_with_len , pg_euccn_mblen , pg_euccn_dsplen , pg_euccn_verifier , 2 }, /* PG_EUC_CN */
1722
+ {pg_euckr2wchar_with_len , pg_wchar2euc_with_len , pg_euckr_mblen , pg_euckr_dsplen , pg_euckr_verifier , 3 }, /* PG_EUC_KR */
1723
+ {pg_euctw2wchar_with_len , pg_wchar2euc_with_len , pg_euctw_mblen , pg_euctw_dsplen , pg_euctw_verifier , 4 }, /* PG_EUC_TW */
1724
+ {pg_eucjp2wchar_with_len , pg_wchar2euc_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JIS_2004 */
1725
+ {pg_utf2wchar_with_len , pg_wchar2utf_with_len , pg_utf_mblen , pg_utf_dsplen , pg_utf8_verifier , 4 }, /* PG_UTF8 */
1726
+ {pg_mule2wchar_with_len , pg_wchar2mule_with_len , pg_mule_mblen , pg_mule_dsplen , pg_mule_verifier , 4 }, /* PG_MULE_INTERNAL */
1727
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN1 */
1728
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN2 */
1729
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN3 */
1730
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN4 */
1731
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN5 */
1732
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN6 */
1733
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN7 */
1734
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN8 */
1735
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN9 */
1736
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN10 */
1737
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1256 */
1738
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1258 */
1739
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN866 */
1740
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN874 */
1741
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8R */
1742
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1251 */
1743
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1252 */
1744
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-5 */
1745
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-6 */
1746
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-7 */
1747
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-8 */
1748
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1250 */
1749
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1253 */
1750
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1254 */
1751
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1255 */
1752
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1257 */
1753
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8U */
1754
+ {0 , 0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 }, /* PG_SJIS */
1755
+ {0 , 0 , pg_big5_mblen , pg_big5_dsplen , pg_big5_verifier , 2 }, /* PG_BIG5 */
1756
+ {0 , 0 , pg_gbk_mblen , pg_gbk_dsplen , pg_gbk_verifier , 2 }, /* PG_GBK */
1757
+ {0 , 0 , pg_uhc_mblen , pg_uhc_dsplen , pg_uhc_verifier , 2 }, /* PG_UHC */
1758
+ {0 , 0 , pg_gb18030_mblen , pg_gb18030_dsplen , pg_gb18030_verifier , 4 }, /* PG_GB18030 */
1759
+ {0 , 0 , pg_johab_mblen , pg_johab_dsplen , pg_johab_verifier , 3 }, /* PG_JOHAB */
1760
+ {0 , 0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 } /* PG_SHIFT_JIS_2004 */
1595
1761
};
1596
1762
1597
1763
/* returns the byte length of a word for mule internal code */
0 commit comments