8000 Add mbverifystr() functions specific to each encoding. · postgrespro/postgres@b80e106 · GitHub
[go: up one dir, main page]

Skip to content

Commit b80e106

Browse files
committed
Add mbverifystr() functions specific to each encoding.
This makes pg_verify_mbstr() function faster, by allowing more efficient encoding-specific implementations. All the implementations included in this commit are pretty naive, they just call the same encoding-specific verifychar functions that were used previously, but that already gives a performance boost because the tight character-at-a-time loop is simpler. Reviewed-by: John Naylor Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01@iki.fi
1 parent a3367aa commit b80e106

File tree

9 files changed

+493
-101
lines changed
  • src
    • backend
      • commands
  • utils/mb
  • common
  • include/mb
  • 9 files changed

    +493
    -101
    lines changed

    src/backend/commands/extension.c

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -682,7 +682,7 @@ read_extension_script_file(const ExtensionControlFile *control,
    682682
    src_encoding = control->encoding;
    683683

    684684
    /* make sure that source string is valid in the expected encoding */
    685-
    pg_verify_mbstr_len(src_encoding, src_str, len, false);
    685+
    (void) pg_verify_mbstr(src_encoding, src_str, len, false);
    686686

    687687
    /*
    688688
    * Convert the encoding to the database encoding. read_whole_file

    src/backend/utils/mb/conv.c

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len,
    653653
    continue;
    654654
    }
    655655

    656-
    l = pg_encoding_verifymb(encoding, (const char *) iso, len);
    656+
    l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
    657657
    if (l < 0)
    658658
    break;
    659659

    src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
    8787
    continue;
    8888
    }
    8989

    90-
    l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
    90+
    l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
    9191

    9292
    if (l < 0)
    9393
    report_invalid_encoding(PG_EUC_JIS_2004,
    @@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
    238238
    continue;
    239239
    }
    240240

    241-
    l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
    241+
    l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
    242242

    243243
    if (l < 0 || l > len)
    244244
    report_invalid_encoding(PG_SHIFT_JIS_2004,

    src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

    Lines changed: 5 additions & 5 deletions
    Original file line numberDiff line numberDiff line change
    @@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
    291291
    len--;
    292292
    continue;
    293293
    }
    294-
    l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
    294+
    l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
    295295
    if (l < 0)
    296296
    report_invalid_encoding(PG_MULE_INTERNAL,
    297297
    (const char *) mic, len);
    @@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
    381381
    len--;
    382382
    continue;
    383383
    }
    384-
    l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
    384+
    l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
    385385
    if (l < 0)
    386386
    report_invalid_encoding(PG_EUC_JP,
    387387
    (const char *) euc, len);
    @@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
    431431
    len--;
    432432
    continue;
    433433
    }
    434-
    l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
    434+
    l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
    435435
    if (l < 0)
    436436
    report_invalid_encoding(PG_MULE_INTERNAL,
    437437
    (const char *) mic, len);
    @@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
    485485
    len--;
    486486
    continue;
    487487
    }
    488-
    l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
    488+
    l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
    489489
    if (l < 0)
    490490
    report_invalid_encoding(PG_EUC_JP,
    491491
    (const char *) euc, len);
    @@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
    580580
    len--;
    581581
    continue;
    582582
    }
    583-
    l = pg_encoding_verifymb(PG_SJIS, (const char *) sjis, len);
    583+
    l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
    584584
    if (l < 0)
    585585
    report_invalid_encoding(PG_SJIS,
    586586
    (const char *) sjis, len);

    src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
    7676
    c1 = *euc;
    7777
    if (IS_HIGHBIT_SET(c1))
    7878
    {
    79-
    l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len);
    79+
    l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
    8080
    if (l != 2)
    8181
    report_invalid_encoding(PG_EUC_KR,
    8282
    (const char *) euc, len);
    @@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
    122122
    len--;
    123123
    continue;
    124124
    }
    125-
    l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
    125+
    l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
    126126
    if (l < 0)
    127127
    report_invalid_encoding(PG_MULE_INTERNAL,
    128128
    (const char *) mic, len);

    src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c

    Lines changed: 4 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -148,7 +148,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
    148148
    c1 = *euc;
    149149
    if (IS_HIGHBIT_SET(c1))
    150150
    {
    151-
    l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
    151+
    l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
    152152
    if (l < 0)
    153153
    report_invalid_encoding(PG_EUC_TW,
    154154
    (const char *) euc, len);
    @@ -213,7 +213,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
    213213
    len--;
    214214
    continue;
    215215
    }
    216-
    l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
    216+
    l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
    217217
    if (l < 0)
    218218
    report_invalid_encoding(PG_MULE_INTERNAL,
    219219
    (const char *) mic, len);
    @@ -272,7 +272,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
    272272
    len--;
    273273
    continue;
    274274
    }
    275-
    l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
    275+
    l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
    276276
    if (l < 0)
    277277
    report_invalid_encoding(PG_BIG5,
    278278
    (const char *) big5, len);
    @@ -321,7 +321,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
    321321
    len--;
    322322
    continue;
    323323
    }
    324-
    l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
    324+
    l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
    325325
    if (l < 0)
    326326
    report_invalid_encoding(PG_MULE_INTERNAL,
    327327
    (const char *) mic, len);

    src/backend/utils/mb/mbutils.c

    Lines changed: 22 additions & 9 deletions
    Original file line numberDiff line numberDiff line change
    @@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS)
    519519
    /* make sure that source string is valid */
    520520
    len = VARSIZE_ANY_EXHDR(string);
    521521
    src_str = VARDATA_ANY(string);
    522-
    pg_verify_mbstr_len(src_encoding, src_str, len, false);
    522+
    (void) pg_verify_mbstr(src_encoding, src_str, len, false);
    523523

    524524
    /* perform conversion */
    525525
    dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
    @@ -1215,10 +1215,10 @@ static bool
    12151215
    pg_generic_charinc(unsigned char *charptr, int len)
    12161216
    {
    12171217
    unsigned char *lastbyte = charptr + len - 1;
    1218-
    mbverifier mbverify;
    1218+
    mbchar_verifier mbverify;
    12191219

    12201220
    /* We can just invoke the character verifier directly. */
    1221-
    mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
    1221+
    mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar;
    12221222

    12231223
    while (*lastbyte < (unsigned char) 255)
    12241224
    {
    @@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void)
    14451445
    bool
    14461446
    pg_verifymbstr(const char *mbstr, int len, bool noError)
    14471447
    {
    1448-
    return
    1449-
    pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
    1448+
    return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
    14501449
    }
    14511450

    14521451
    /*
    @@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError)
    14561455
    bool
    14571456
    pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
    14581457
    {
    1459-
    return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
    1458+
    int oklen;
    1459+
    1460+
    Assert(PG_VALID_ENCODING(encoding));
    1461+
    1462+
    oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
    1463+
    if (oklen != len)
    1464+
    {
    1465+
    if (noError)
    1466+
    return false;
    1467+
    report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
    1468+
    }
    1469+
    return true;
    14601470
    }
    14611471

    14621472
    /*
    @@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
    14691479
    * If OK, return length of string in the encoding.
    14701480
    * If a problem is found, return -1 when noError is
    14711481
    * true; when noError is false, ereport() a descriptive message.
    1482+
    *
    1483+
    * Note: We cannot use the faster encoding-specific mbverifystr() function
    1484+
    * here, because we need to count the number of characters in the string.
    14721485
    */
    14731486
    int
    14741487
    pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
    14751488
    {
    1476-
    mbverifier mbverify;
    1489+
    mbchar_verifier mbverifychar;
    14771490
    int mb_len;
    14781491

    14791492
    Assert(PG_VALID_ENCODING(encoding));
    @@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
    14931506
    }
    14941507

    14951508
    /* fetch function pointer just once */
    1496-
    mbverify = pg_wchar_table[encoding].mbverify;
    1509+
    mbverifychar = pg_wchar_table[encoding].mbverifychar;
    14971510

    14981511
    mb_len = 0;
    14991512

    @@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
    15161529
    report_invalid_encoding(encoding, mbstr, len);
    15171530
    }
    15181531

    1519-
    l = (*mbverify) ((const unsigned char *) mbstr, len);
    1532+
    l = (*mbverifychar) ((const unsigned char *) mbstr, len);
    15201533

    15211534
    if (l < 0)
    15221535
    {

    0 commit comments

    Comments
     (0)
    0