8000 Allow direct conversion between EUC_JP and SJIS to improve · postgrespro/postgres@e2d088d · GitHub
[go: up one dir, main page]

Skip to content
.hEHvLI{min-width:0;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;}/*!sc*/ .bmcJak{min-width:0;}/*!sc*/ .fyKNMY[data-size="medium"]{color:var(--fgColor-default,var(--color-fg-default,#1F2328));}/*!sc*/ .gUkoLg{-webkit-box-pack:center;-webkit-justify-content:center;-ms-flex-pack:center;justify-content:center;}/*!sc*/ .gLSgdJ{font-weight:600;color:var(--fgColor-default,var(--color-fg-default,#1F2328));}/*!sc*/ .gLSgdJ:hover{color:var(--fgColor-default,var(--color-fg-default,#1F2328));}/*!sc*/ .irPhWZ{width:60px;}/*!sc*/ .dNbsEP{width:62px;}/*!sc*/ .kHfwUD{width:60px;height:22px;}/*!sc*/ .bHLmSv{position:absolute;inset:0 -2px;cursor:col-resize;background-color:transparent;-webkit-transition-delay:0.1s;transition-delay:0.1s;}/*!sc*/ .bHLmSv:hover{background-color:var(--bgColor-neutral-muted,var(--color-neutral-muted,rgba(175,184,193,0.2)));}/*!sc*/ .hqtbbn{bottom:0 !important;-webkit-clip:rect(1px,1px,1px,1px);clip:rect(1px,1px,1px,1px);-webkit-clip-path:inset(50%);clip-path:inset(50%);height:84px;position:absolute;width:320px;}/*!sc*/ data-styled.g1[id="Box-sc-g0xbh4-0"]{content:"hEHvLI,bmcJak,fyKNMY,gUkoLg,gLSgdJ,irPhWZ,dNbsEP,kHfwUD,bHLmSv,hqtbbn,"}/*!sc*/ .brGdpi{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;-webkit-clip:rect(0,0,0,0);clip:rect(0,0,0,0);white-space:nowrap;border-width:0;}/*!sc*/ data-styled.g2[id="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0"]{content:"brGdpi,"}/*!sc*/ .jjwhNb{position:relative;display:inline-block;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ .jjwhNb::after{position:absolute;z-index:1000000;display:none;padding:0.5em 0.75em;font:normal normal 11px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI","Noto Sans",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji";-webkit-font-smoothing:subpixel-antialiased;color:var(--tooltip-fgColor,var(--fgColor-onEmphasis,var(--color-fg-on-emphasis,#ffffff)));text-align:center;-webkit-text-decoration:none;text-decoration:none;text-shadow:none;text-transform:none;-webkit-letter-spacing:normal;-moz-letter-spacing:normal;-ms-letter-spacing:normal;letter-spacing:normal;word-wrap:break-word;white-space:pre;pointer-events:none;content:attr(aria-label);background:var(--tooltip-bgColor,var(--bgColor-emphasis,var(--color-neutral-emphasis-plus,#24292f)));border-radius:6px;opacity:0;}/*!sc*/ @-webkit-keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ @keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ .jjwhNb:hover::after,.jjwhNb:active::after,.jjwhNb:focus::after,.jjwhNb:focus-within::after{display:inline-block;-webkit-text-decoration:none;text-decoration:none;-webkit-animation-name:tooltip-appear;animation-name:tooltip-appear;-webkit-animation-duration:0.1s;animation-duration:0.1s;-webkit-animation-fill-mode:forwards;animation-fill-mode:forwards;-webkit-animation-timing-function:ease-in;animation-timing-function:ease-in;-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .jjwhNb.tooltipped-no-delay:hover::after,.jjwhNb.tooltipped-no-delay:active::after,.jjwhNb.tooltipped-no-delay:focus::after,.jjwhNb.tooltipped-no-delay:focus-within::after{-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .jjwhNb.tooltipped-multiline:hover::after,.jjwhNb.tooltipped-multiline:active::after,.jjwhNb.tooltipped-multiline:focus::after,.jjwhNb.tooltipped-multiline:focus-within::after{display:table-cell;}/*!sc*/ .jjwhNb.tooltipped-s::after,.jjwhNb.tooltipped-se::after,.jjwhNb.tooltipped-sw::after{top:100%;right:50%;margin-top:6px;}/*!sc*/ .jjwhNb.tooltipped-se::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .jjwhNb.tooltipped-sw::after{margin-right:-16px;}/*!sc*/ .jjwhNb.tooltipped-n::after,.jjwhNb.tooltipped-ne::after,.jjwhNb.tooltipped-nw::after{right:50%;bottom:100%;margin-bottom:6px;}/*!sc*/ .jjwhNb.tooltipped-ne::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .jjwhNb.tooltipped-nw::after{margin-right:-16px;}/*!sc*/ .jjwhNb.tooltipped-s::after,.jjwhNb.tooltipped-n::after{-webkit-transform:translateX(50%);-ms-transform:translateX(50%);transform:translateX(50%);}/*!sc*/ .jjwhNb.tooltipped-w::after{right:100%;bottom:50%;margin-right:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .jjwhNb.tooltipped-e::after{bottom:50%;left:100%;margin-left:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .jjwhNb.tooltipped-multiline::after{width:-webkit-max-content;width:-moz-max-content;width:max-content;max-width:250px;word-wrap:break-word;white-space:pre-line;border-collapse:separate;}/*!sc*/ .jjwhNb.tooltipped-multiline.tooltipped-s::after,.jjwhNb.tooltipped-multiline.tooltipped-n::after{right:auto;left:50%;-webkit-transform:translateX(-50%);-ms-transform:translateX(-50%);transform:translateX(-50%);}/*!sc*/ .jjwhNb.tooltipped-multiline.tooltipped-w::after,.jjwhNb.tooltipped-multiline.tooltipped-e::after{right:100%;}/*!sc*/ .jjwhNb.tooltipped-align-right-2::after{right:0;margin-right:0;}/*!sc*/ .jjwhNb.tooltipped-align-left-2::after{left:0;margin-left:0;}/*!sc*/ data-styled.g5[id="Tooltip__TooltipBase-sc-17tf59c-0"]{content:"jjwhNb,"}/*!sc*/ .irithh{position:relative;overflow:hidden;-webkit-mask-image:radial-gradient(white,black);mask-image:radial-gradient(white,black);background-color:var(--bgColor-neutral-muted,var(--color-neutral-subtle,rgba(234,238,242,0.5)));border-radius:3px;display:block;height:1.2em;width:60px;}/*!sc*/ .irithh::after{-webkit-animation:crVFvv 1.5s infinite linear;animation:crVFvv 1.5s infinite linear;background:linear-gradient(90deg,transparent,var(--bgColor-neutral-muted,var(--color-neutral-subtle,rgba(234,238,242,0.5))),transparent);content:'';position:absolute;-webkit-transform:translateX(-100%);-ms-transform:translateX(-100%);transform:translateX(-100%);bottom:0;left:0;right:0;top:0;}/*!sc*/ .ihfxfT{position:relative;overflow:hidden;-webkit-mask-image:radial-gradient(white,black);mask-image:radial-gradient(white,black);background-color:var(--bgColor-neutral-muted,var(--color-neutral-subtle,rgba(234,238,242,0.5)));border-radius:3px;display:block;height:1.2em;width:62px;}/*!sc*/ .ihfxfT::after{-webkit-animation:crVFvv 1.5s infinite linear;animation:crVFvv 1.5s infinite linear;background:linear-gradient(90deg,transparent,var(--bgColor-neutral-muted,var(--color-neutral-subtle,rgba(234,238,242,0.5))),transparent);content:'';position:absolute;-webkit-transform:translateX(-100%);-ms-transform:translateX(-100%);transform:translateX(-100%);bottom:0;left:0;right:0;top:0;}/*!sc*/ .kRBfod{position:relative;overflow:hidden;-webkit-mask-image:radial-gradient(white,black);mask-image:radial-gradient(white,black);background-color:var(--bgColor-neutral-muted,var(--color-neutral-subtle,rgba(234,238,242,0.5)));border-radius:3px;display:block;height:1.2em;width:60px;height:22px;}/*!sc*/ .kRBfod::after{-webkit-animation:crVFvv 1.5s infinite linear;animation:crVFvv 1.5s infinite linear;background:linear-gradient(90deg,transparent,var(--bgColor-neutral-muted,var(--color-neutral-subtle,rgba(234,238,242,0.5))),transparent);content:'';position:absolute;-webkit-transform:translateX(-100%);-ms-transform:translateX(-100%);transform:translateX(-100%);bottom:0;left:0;right:0;top:0;}/*!sc*/ data-styled.g23[id="LoadingSkeleton-sc-695d630a-0"]{content:"irithh,ihfxfT,kRBfod,"}/*!sc*/ @-webkit-keyframes crVFvv{0%{-webkit-transform:translateX(-100%);-ms-transform:translateX(-100%);transform:translateX(-100%);}50%{-webkit-transform:translateX(100%);-ms-transform:translateX(100%);transform:translateX(100%);}100%{-webkit-transform:translateX(100%);-ms-transform:translateX(100%);transform:translateX(100%);}}/*!sc*/ @keyframes crVFvv{0%{-webkit-transform:translateX(-100%);-ms-transform:translateX(-100%);transform:translateX(-100%);}50%{-webkit-transform:translateX(100%);-ms-transform:translateX(100%);transform:translateX(100%);}100%{-webkit-transform:translateX(100%);-ms-transform:translateX(100%);transform:translateX(100%);}}/*!sc*/ data-styled.g46[id="sc-keyframes-crVFvv"]{content:"crVFvv,"}/*!sc*/

Commit e2d088d

Browse files
committed
Allow direct conversion between EUC_JP and SJIS to improve
performance. patches submitted by Atsushi Ogawa.
1 parent 1fa87fa commit e2d088d

File tree

1 file changed

+201
-11
lines changed

1 file changed

+201
-11
lines changed

src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

Lines changed: 201 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.10 2005/06/10 16:43:56 ishii Exp $
9+
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.11 2005/06/24 13:56:39 ishii Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -58,23 +58,21 @@ static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
5858
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
5959
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
6060
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
61+
static void euc_jp2sjis(unsigned char *mic, unsigned char *p, int len);
62+
static void sjis2euc_jp(unsigned char *mic, unsigned char *p, int len);
6163

6264
Datum
6365
euc_jp_to_sjis(PG_FUNCTION_ARGS)
6466
{
6567
unsigned char *src = PG_GETARG_CSTRING(2);
6668
unsigned char *dest = PG_GETARG_CSTRING(3);
6769
int len = PG_GETARG_INT32(4);
68-
unsigned char *buf;
6970

7071
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
7172
Assert(PG_GETARG_INT32(1) == PG_SJIS);
7273
Assert(len >= 0);
7374

74-
buf = palloc(len * ENCODING_GROWTH_RATE);
75-
euc_jp2mic(src, buf, len);
76-
mic2sjis(buf, dest, strlen(buf));
77-
pfree(buf);
75+
euc_jp2sjis(src, dest, len);
7876

7977
PG_RETURN_VOID();
8078
}
@@ -85,16 +83,12 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
8583
unsigned char *src = PG_GETARG_CSTRING(2);
8684
unsigned char *dest = PG_GETARG_CSTRING(3);
8785
int len = PG_GETARG_INT32(4);
88-
unsigned char *buf;
8986

9087
Assert(PG_GETARG_INT32(0) == PG_SJIS);
9188
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
9289
Assert(len >= 0);
9390

94-
buf = palloc(len * ENCODING_GROWTH_RATE);
95-
sjis2mic(src, buf, len);
96-
mic2euc_jp(buf, dest, strlen(buf));
97-
pfree(buf);
91+
sjis2euc_jp(src, dest, len);
9892

9993
PG_RETURN_VOID();
10094
}
@@ -454,3 +448,199 @@ mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
454448
}
455449
*p = '\0';
456450
}
451+
452+
/*
453+
* EUC_JP -> SJIS
454+
*/
455+
static void
456+
euc_jp2sjis(unsigned char *euc, unsigned char *p, int len)
457+
{
458+
int c1,
459+
c2,
460+
k;
461+
unsigned char *euc_end = euc + len;
462+
463+
while (euc_end >= euc && (c1 = *euc++))
464+
{
465+
if(c1 < 0x80)
466+
{
467+
/* should be ASCII */
468+
*p++ = c1;
469+
}
470+
else if (c1 == SS2)
471+
{
472+
/* hankaku kana? */
473+
*p++ = *euc++;
474+
}
475+
else if (c1 == SS3)
476+
{
477+
/* JIS X0212 kanji? */
478+
c1 = *euc++;
479+
c2 = *euc++;
480+
k = c1 << 8 | c2;
481+
if (k >= 0xf5a1)
482+
{
483+
/* UDC2 */
484+
c1 -= 0x54;
485+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
486+
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
487+
}
488+
else
489+
{
490+
int i, k2;
491+
492+
/* IBM kanji */
493+
for (i = 0;; i++)
494+
{
495+
k2 = ibmkanji[i].euc & 0xffff;
496+
if (k2 == 0xffff)
497+
{
498+
*p++ = PGSJISALTCODE >> 8;
499+
*p++ = PGSJISALTCODE & 0xff;
500+
break;
501+
}
502+
if (k2 == k)
503+
{
504+
k = ibmkanji[i].sjis;
505+
*p++ = k >> 8;
506+
*p++ = k & 0xff;
507+
break;
508+
}
509+
}
510+
}
511+
}
512+
else
513+
{
514+
/* JIS X0208 kanji? */
515+
c2 = *euc++;
516+
k = (c1 << 8) | (c2 & 0xff);
517+
if (k >= 0xf5a1)
518+
{
519+
/* UDC1 */
520+
c1 -= 0x54;
521+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
522+
}
523+
else
524+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
525+
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
526+
}
527+
}
528+
*p = '\0';
529+
}
530+
531+
/*
532+
* SJIS ---> EUC_JP
533+
*/
534+
static void
535+
sjis2euc_jp(unsigned char *sjis, unsigned char *p, int len)
536+
{
537+
int c1,
538+
c2,
539+
i,
540+
k,
541+
k2;
542+
unsigned char *sjis_end = sjis + len;
543+
544+
while (sjis_end >= sjis && (c1 = *sjis++))
545+
{
546+
if(c1 < 0x80)
547+
{
548+
/* should be ASCII */
549+
*p++ = c1;
550+
}
551+
else if (c1 >= 0xa1 && c1 <= 0xdf)
552+
{
553+
/* JIS X0201 (1 byte kana) */
554+
*p++ = SS2;
555+
*p++ = c1;
556+
}
557+
else
558+
{
559+
/*
560+
* JIS X0208, X0212, user defined extended characters
561+
*/
562+
c2 = *sjis++;
563+
k = (c1 << 8) + c2;
564+
if (k >= 0xed40 && k < 0xf040)
565+
{
566+
/* NEC selection IBM kanji */
567+
for (i = 0;; i++)
568+
{
569+
k2 = ibmkanji[i].nec;
570+
if (k2 == 0xffff)
571+
break;
572+
if (k2 == k)
573+
{
574+
k = ibmkanji[i].sjis;
575+
c1 = (k >> 8) & 0xff;
576+
c2 = k & 0xff;
577+
}
578+
}
579+
}
580+
581+
if (k < 0xeb3f)
582+
{
583+
/* JIS X0208 */
584+
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
585+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
586+
}
587+
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
588+
{
589+
/* NEC selection IBM kanji - Other undecided justice */
590+
*p++ = PGEUCALTCODE >> 8;
591+
*p++ = PGEUCALTCODE & 0xff;
592+
}
593+
else if (k >= 0xf040 && k < 0xf540)
594+
{
595+
/*
596+
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
597+
* 0x7e7e EUC 0xf5a1 - 0xfefe
598+
*/
599+
c1 -= 0x6f;
600+
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
601+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
602+
}
603+
else if (k >= 0xf540 && k < 0xfa40)
604+
{
605+
/*
606+
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
607+
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
608+
*/
609+
*p++ = SS3;
610+
c1 -= 0x74;
611+
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
612+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
613+
}
614+
else if (k >= 0xfa40)
615+
{
616+
/*
617+
* mapping IBM kanji to X0208 and X0212
618+
*
619+
*/
620+
for (i = 0;; i++)
621+
{
622+
k2 = ibmkanji[i].sjis;
623+
if (k2 == 0xffff)
624+
break;
625+
if (k2 == k)
626+
{
627+
k = ibmkanji[i].euc;
628+
if (k >= 0x8f0000)
629+
{
630+
*p++ = SS3;
631+
*p++ = 0x80 | ((k & 0xff00) >> 8);
632+
*p++ = 0x80 | (k & 0xff);
633+
}
634+
else
635+
{
636+
*p++ = 0x80 | (k >> 8);
637+
*p++ = 0x80 | (k & 0xff);
638+
}
639+
}
640+
}
641+
}
642+
}
643+
}
644+
*p = '\0';
645+
}
646+

0 commit comments

Comments
 (0)
0