8000 Per-column collation support · postgres/postgres@414c5a2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 414c5a2

Browse files
committed
Per-column collation support
This adds collation support for columns and domains, a COLLATE clause to override it per expression, and B-tree index support. Peter Eisentraut reviewed by Pavel Stehule, Itagaki Takahiro, Robert Haas, Noah Misch
1 parent 1703f0e commit 414c5a2
  • nodes
  • optimizer
  • parser
  • rewrite
  • tsearch
  • utils
  • bin
  • include
  • pl/plpgsql/src
  • port
  • test/regress
  • Some content is hidden

    Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

    156 files changed

    +4519
    -582
    lines changed

    config/c-library.m4

    Lines changed: 29 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -297,3 +297,32 @@ int main()
    297297
    ])dnl AC_CACHE_VAL
    298298
    AC_MSG_RESULT([$pgac_cv_printf_arg_control])
    299299
    ])# PGAC_FUNC_PRINTF_ARG_CONTROL
    300+
    301+
    302+
    # PGAC_TYPE_LOCALE_T
    303+
    # ------------------
    304+
    # Check for the locale_t type and find the right header file. Mac OS
    305+
    # X needs xlocale.h; standard is locale.h, but glibc also has an
    306+
    # xlocale.h file that we should not use.
    307+
    #
    308+
    AC_DEFUN([PGAC_TYPE_LOCALE_T],
    309+
    [AC_CACHE_CHECK([for locale_t], pgac_cv_type_locale_t,
    310+
    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
    311+
    [#include <locale.h>
    312+
    locale_t x;],
    313+
    [])],
    314+
    [pgac_cv_type_locale_t=yes],
    315+
    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
    316+
    [#include <xlocale.h>
    317+
    locale_t x;],
    318+
    [])],
    319+
    [pgac_cv_type_locale_t='yes (in xlocale.h)'],
    320+
    [pgac_cv_type_locale_t=no])])])
    321+
    if test "$pgac_cv_type_locale_t" != no; then
    322+
    AC_DEFINE(HAVE_LOCALE_T, 1,
    323+
    [Define to 1 if the system has the type `locale_t'.])
    324+
    fi
    325+
    if test "$pgac_cv_type_locale_t" = 'yes (in xlocale.h)'; then
    326+
    AC_DEFINE(LOCALE_T_IN_XLOCALE, 1,
    327+
    [Define to 1 if `locale_t' requires <xlocale.h>.])
    328+
    fi])])# PGAC_HEADER_XLOCALE

    configure

    Lines changed: 108 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -16830,6 +16830,114 @@ _ACEOF
    1683016830
    fi
    1683116831

    1683216832

    16833+
    { $as_echo "$as_me:$LINENO: checking for locale_t" >&5
    16834+
    $as_echo_n "checking for locale_t... " >&6; }
    16835+
    if test "${pgac_cv_type_locale_t+set}" = set; then
    16836+
    $as_echo_n "(cached) " >&6
    16837+
    else
    16838+
    cat >conftest.$ac_ext <<_ACEOF
    16839+
    /* confdefs.h. */
    16840+
    _ACEOF
    16841+
    cat confdefs.h >>conftest.$ac_ext
    16842+
    cat >>conftest.$ac_ext <<_ACEOF
    16843+
    /* end confdefs.h. */
    16844+
    #include <locale.h>
    16845+
    locale_t x;
    16846+
    int
    16847+
    main ()
    16848+
    {
    16849+
    16850+
    ;
    16851+
    return 0;
    16852+
    }
    16853+
    _ACEOF
    16854+
    rm -f conftest.$ac_objext
    16855+
    if { (ac_try="$ac_compile"
    16856+
    case "(($ac_try" in
    16857+
    *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
    16858+
    *) ac_try_echo=$ac_try;;
    16859+
    esac
    16860+
    eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
    16861+
    $as_echo "$ac_try_echo") >&5
    16862+
    (eval "$ac_compile") 2>conftest.er1
    16863+
    ac_status=$?
    16864+
    grep -v '^ *+' conftest.er1 >conftest.err
    16865+
    rm -f conftest.er1
    16866+
    cat conftest.err >&5
    16867+
    $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
    16868+
    (exit $ac_status); } && {
    16869+
    test -z "$ac_c_werror_flag" ||
    16870+
    test ! -s conftest.err
    16871+
    } && test -s conftest.$ac_objext; then
    16872+
    pgac_cv_type_locale_t=yes
    16873+
    else
    16874+
    $as_echo "$as_me: failed program was:" >&5
    16875+
    sed 's/^/| /' conftest.$ac_ext >&5
    16876+
    16877+
    cat >conftest.$ac_ext <<_ACEOF
    16878+
    /* confdefs.h. */
    16879+
    _ACEOF
    16880+
    cat confdefs.h >>conftest.$ac_ext
    16881+
    cat >>conftest.$ac_ext <<_ACEOF
    16882+
    /* end confdefs.h. */
    16883+
    #include <xlocale.h>
    16884+
    locale_t x;
    16885+
    int
    16886+
    main ()
    16887+
    {
    16888+
    16889+
    ;
    16890+
    return 0;
    16891+
    }
    16892+
    _ACEOF
    16893+
    rm -f conftest.$ac_objext
    16894+
    if { (ac_try="$ac_compile"
    16895+
    case "(($ac_try" in
    16896+
    *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
    16897+
    *) ac_try_echo=$ac_try;;
    16898+
    esac
    16899+
    eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
    16900+
    $as_echo "$ac_try_echo") >&5
    16901+
    (eval "$ac_compile") 2>conftest.er1
    16902+
    ac_status=$?
    16903+
    grep -v '^ *+' conftest.er1 >conftest.err
    16904+
    rm -f conftest.er1
    16905+
    cat conftest.err >&5
    16906+
    $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
    16907+
    (exit $ac_status); } && {
    16908+
    test -z "$ac_c_werror_flag" ||
    16909+
    test ! -s conftest.err
    16910+
    } && test -s conftest.$ac_objext; then
    16911+
    pgac_cv_type_locale_t='yes (in xlocale.h)'
    16912+
    else
    16913+
    $as_echo "$as_me: failed program was:" >&5
    16914+
    sed 's/^/| /' conftest.$ac_ext >&5
    16915+
    16916+
    pgac_cv_type_locale_t=no
    16917+
    fi
    16918+
    16919+
    rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    16920+
    fi
    16921+
    16922+
    rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    16923+
    fi
    16924+
    { $as_echo "$as_me:$LINENO: result: $pgac_cv_type_locale_t" >&5
    16925+
    $as_echo "$pgac_cv_type_locale_t" >&6; }
    16926+
    if test "$pgac_cv_type_locale_t" != no; then
    16927+
    16928+
    cat >>confdefs.h <<\_ACEOF
    16929+
    #define HAVE_LOCALE_T 1
    16930+
    _ACEOF
    16931+
    16932+
    fi
    16933+
    if test "$pgac_cv_type_locale_t" = 'yes (in xlocale.h)'; then
    16934+
    16935+
    cat >>confdefs.h <<\_ACEOF
    16936+
    #define LOCALE_T_IN_XLOCALE 1
    16937+
    _ACEOF
    16938+
    16939+
    fi
    16940+
    1683316941
    { $as_echo "$as_me:$LINENO: checking for struct cmsgcred" >&5
    1683416942
    $as_echo_n "checking for struct cmsgcred... " >&6; }
    1683516943
    if test "${ac_cv_type_struct_cmsgcred+set}" = set; then

    configure.in

    Lines changed: 2 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -1118,6 +1118,8 @@ AC_TYPE_INTPTR_T
    11181118
    AC_TYPE_UINTPTR_T
    11191119
    AC_TYPE_LONG_LONG_INT
    11201120

    1121+
    PGAC_TYPE_LOCALE_T
    1122+
    11211123
    AC_CHECK_TYPES([struct cmsgcred, struct fcred, struct sockcred], [], [],
    11221124
    [#include <sys/param.h>
    11231125
    #include <sys/types.h>

    contrib/btree_gin/btree_gin.c

    Lines changed: 3 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -7,6 +7,7 @@
    77

    88
    #include "fmgr.h"
    99
    #include "access/skey.h"
    10+
    #include "catalog/pg_collation.h"
    1011
    #include "utils/builtins.h"
    1112
    #include "utils/bytea.h"
    1213
    #include "utils/cash.h"
    @@ -120,8 +121,9 @@ gin_compare_prefix_##type(PG_FUNCTION_ARGS) \
    120121
    int32 res, \
    121122
    cmp; \
    122123
    \
    123-
    cmp = DatumGetInt32(DirectFunctionCall2( \
    124+
    cmp = DatumGetInt32(DirectFunctionCall2WithCollation( \
    124125
    TypeInfo_##type.typecmp, \
    126+
    DEFAULT_COLLATION_OID, \
    125127
    (data->strategy == BTLessStrategyNumber || \
    126128
    data->strategy == BTLessEqualStrategyNumber) \
    127129
    ? data->datum : a, \

    contrib/btree_gist/btree_text.c

    Lines changed: 7 additions & 6 deletions
    Original file line numberDiff line numberDiff line change
    @@ -3,6 +3,7 @@
    33
    */
    44
    #include "btree_gist.h"
    55
    #include "btree_utils_var.h"
    6+
    #include "catalog/pg_collation.h"
    67
    #include "utils/builtins.h"
    78

    89
    /*
    @@ -32,37 +33,37 @@ Datum gbt_text_same(PG_FUNCTION_ARGS);
    3233
    static bool
    3334
    gbt_textgt(const void *a, const void *b)
    3435
    {
    35-
    return (DatumGetBool(DirectFunctionCall2(text_gt, PointerGetDatum(a), PointerGetDatum(b))));
    36+
    return (DatumGetBool(DirectFunctionCall2WithCollation(text_gt, DEFAULT_COLLATION_OID, PointerGetDatum(a), PointerGetDatum(b))));
    3637
    }
    3738

    3839
    static bool
    3940
    gbt_textge(const void *a, const void *b)
    4041
    {
    41-
    return (DatumGetBool(DirectFunctionCall2(text_ge, PointerGetDatum(a), PointerGetDatum(b))));
    42+
    return (DatumGetBool(DirectFunctionCall2WithCollation(text_ge, DEFAULT_COLLATION_OID, PointerGetDatum(a), PointerGetDatum(b))));
    4243
    }
    4344

    4445
    static bool
    4546
    gbt_texteq(const void *a, const void *b)
    4647
    {
    47-
    return (DatumGetBool(DirectFunctionCall2(texteq, PointerGetDatum(a), PointerGetDatum(b))));
    48+
    return (DatumGetBool(DirectFunctionCall2WithCollation(texteq, DEFAULT_COLLATION_OID, PointerGetDatum(a), PointerGetDatum(b))));
    4849
    }
    4950

    5051
    static bool
    5152
    gbt_textle(const void *a, const void *b)
    5253
    {
    53-
    return (DatumGetBool(DirectFunctionCall2(text_le, PointerGetDatum(a), PointerGetDatum(b))));
    54+
    return (DatumGetBool(DirectFunctionCall2WithCollation(text_le, DEFAULT_COLLATION_OID, PointerGetDatum(a), PointerGetDatum(b))));
    5455
    }
    5556

    5657
    static bool
    5758
    gbt_textlt(const void *a, const void *b)
    5859
    {
    59-
    return (DatumGetBool(DirectFunctionCall2(text_lt, PointerGetDatum(a), PointerGetDatum(b))));
    60+
    return (DatumGetBool(DirectFunctionCall2WithCollation(text_lt, DEFAULT_COLLATION_OID, PointerGetDatum(a), PointerGetDatum(b))));
    6061
    }
    6162

    6263
    static int32
    6364
    gbt_textcmp(const bytea *a, const bytea *b)
    6465
    {
    65-
    return DatumGetInt32(DirectFunctionCall2(bttextcmp, PointerGetDatum(a), PointerGetDatum(b)));
    66+
    return DatumGetInt32(DirectFunctionCall2WithCollation(bttextcmp, DEFAULT_COLLATION_OID, PointerGetDatum(a), PointerGetDatum(b)));
    6667
    }
    6768

    6869
    static gbtree_vinfo tinfo =

    contrib/btree_gist/btree_utils_var.c

    Lines changed: 2 additions & 1 deletion
    < 7917 td data-grid-cell-id="diff-afde087be68a57904f781995a129bdb3d1549c880808e2cb3ec8d21aae5f7c65-162-163-0" data-selected="false" role="gridcell" style="background-color:var(--bgColor-default);text-align:center" tabindex="-1" valign="top" class="focusable-grid-cell diff-line-number position-relative diff-line-number-neutral left-side">162
    Original file line numberDiff line numberDiff line change
    @@ -8,6 +8,7 @@
    88
    #include <float.h>
    99

    1010
    #include "btree_utils_var.h"
    11+
    #include "catalog/pg_collation.h"
    1112
    #include "utils/pg_locale.h"
    1213
    #include "utils/builtins.h"
    1314
    #include "utils/rel.h"
    @@ -156,7 +157,7 @@ gbt_bytea_pf_match(const bytea *pf, const bytea *query, const gbtree_vinfo *tinf
    156157

    157158
    if (tinfo->eml > 1)
    158159
    {
    159-
    out = (varstr_cmp(q, nlen, n, nlen) == 0);
    160+
    out = (varstr_cmp(q, nlen, n, nlen, DEFAULT_COLLATION_OID) == 0);
    160161
    }
    161162
    else
    163
    {

    contrib/citext/citext.c

    Lines changed: 18 additions & 17 deletions
    Original file line numberDiff line numberDiff line change
    @@ -18,7 +18,7 @@ PG_MODULE_MAGIC;
    1818
    * ====================
    1919
    */
    2020

    21-
    static int32 citextcmp(text *left, text *right);
    21+
    static int32 citextcmp(text *left, text *right, Oid collid);
    2222
    extern Datum citext_cmp(PG_FUNCTION_ARGS);
    2323
    extern Datum citext_hash(PG_FUNCTION_ARGS);
    2424
    extern Datum citext_eq(PG_FUNCTION_ARGS);
    @@ -42,17 +42,18 @@ extern Datum citext_larger(PG_FUNCTION_ARGS);
    4242
    * Returns int32 negative, zero, or positive.
    4343
    */
    4444
    static int32
    45-
    citextcmp(text *left, text *right)
    45+
    citextcmp(text *left, text *right, Oid collid)
    4646
    {
    4747
    char *lcstr,
    4848
    *rcstr;
    4949
    int32 result;
    5050

    51-
    lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left));
    52-
    rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right));
    51+
    lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left), collid);
    52+
    rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right), collid);
    5353

    5454
    result = varstr_cmp(lcstr, strlen(lcstr),
    55-
    rcstr, strlen(rcstr));
    55+
    rcstr, strlen(rcstr),
    56+
    collid);
    5657

    5758
    pfree(lcstr);
    5859
    pfree(rcstr);
    @@ -75,7 +76,7 @@ citext_cmp(PG_FUNCTION_ARGS)
    7576
    text *right = PG_GETARG_TEXT_PP(1);
    7677
    int32 result;
    7778

    78-
    result = citextcmp(left, right);
    79+
    result = citextcmp(left, right, PG_GET_COLLATION());
    7980

    8081
    PG_FREE_IF_COPY(left, 0);
    8182
    PG_FREE_IF_COPY(right, 1);
    @@ -92,7 +93,7 @@ citext_hash(PG_FUNCTION_ARGS)
    9293
    char *str;
    9394
    Datum result;
    9495

    95-
    str = str_tolower(VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    96+
    str = str_tolower(VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt), PG_GET_COLLATION());
    9697
    result = hash_any((unsigned char *) str, strlen(str));
    9798
    pfree(str);
    9899

    @@ -121,8 +122,8 @@ citext_eq(PG_FUNCTION_ARGS)
    121122

    122123
    /* We can't compare lengths in advance of downcasing ... */
    123124

    124-
    lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left));
    125-
    rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right));
    125+
    lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left), PG_GET_COLLATION());
    126+
    rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right), PG_GET_COLLATION());
    126127

    127128
    /*
    128129
    * Since we only care about equality or not-equality, we can avoid all the
    @@ -151,8 +152,8 @@ citext_ne(PG_FUNCTION_ARGS)
    151152

    152153
    /* We can't compare lengths in advance of downcasing ... */
    153154

    154-
    lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left));
    155-
    rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right));
    155+
    lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left), PG_GET_COLLATION());
    156+
    rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right), PG_GET_COLLATION());
    156157

    157158
    /*
    158159
    * Since we only care about equality or not-equality, we can avoid all the
    @@ -177,7 +178,7 @@ citext_lt(PG_FUNCTION_ARGS)
    177178
    text *right = PG_GETARG_TEXT_PP(1);
    178179
    bool result;
    179180

    180-
    result = citextcmp(left, right) < 0;
    181+
    result = citextcmp(left, right, PG_GET_COLLATION()) < 0;
    181182

    182183
    PG_FREE_IF_COPY(left, 0);
    183184
    PG_FREE_IF_COPY(right, 1);
    @@ -194,7 +195,7 @@ citext_le(PG_FUNCTION_ARGS)
    194195
    text *right = PG_GETARG_TEXT_PP(1);
    195196
    bool result;
    196197

    197-
    result = citextcmp(left, right) <= 0;
    198+
    result = citextcmp(left, right, PG_GET_COLLATION()) <= 0;
    198199

    199200
    PG_FREE_IF_COPY(left, 0);
    200201
    PG_FREE_IF_COPY(right, 1);
    @@ -211,7 +212,7 @@ citext_gt(PG_FUNCTION_ARGS)
    211212
    text *right = PG_GETARG_TEXT_PP(1);
    212213
    bool result;
    213214

    214-
    result = citextcmp(left, right) > 0;
    215+
    result = citextcmp(left, right, PG_GET_COLLATION()) > 0;
    215216

    216217
    PG_FREE_IF_COPY(left, 0);
    217218
    PG_FREE_IF_COPY(right, 1);
    @@ -228,7 +229,7 @@ citext_ge(PG_FUNCTION_ARGS)
    228229
    text *right = PG_GETARG_TEXT_PP(1);
    229230
    bool result;
    230231

    231-
    result = citextcmp(left, right) >= 0;
    232+
    result = citextcmp(left, right, PG_GET_COLLATION()) >= 0;
    232233

    233234
    PG_FREE_IF_COPY(left, 0);
    234235
    PG_FREE_IF_COPY(right, 1);
    @@ -251,7 +252,7 @@ citext_smaller(PG_FUNCTION_ARGS)
    251252
    text *right = PG_GETARG_TEXT_PP(1);
    252253
    text *result;
    253254

    254-
    result = citextcmp(left, right) < 0 ? left : right;
    255+
    result = citextcmp(left, right, PG_GET_COLLATION()) < 0 ? left : right;
    255256
    PG_RETURN_TEXT_P(result);
    256257
    }
    257258

    @@ -264,6 +265,6 @@ citext_larger(PG_FUNCTION_ARGS)
    264265
    text *right = PG_GETARG_TEXT_PP(1);
    265266
    text *result;
    266267

    267-
    result = citextcmp(left, right) > 0 ? left : right;
    268+
    result = citextcmp(left, right, PG_GET_COLLATION()) > 0 ? left : right;
    268269
    PG_RETURN_TEXT_P(result);
    269270
    }

    contrib/citext/citext.sql.in

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -52,7 +52,8 @@ CREATE TYPE citext (
    5252
    STORAGE = extended,
    5353
    -- make it a non-preferred member of string type category
    5454
    CATEGORY = 'S',
    55-
    PREFERRED = false
    55+
    PREFERRED = false,
    56+
    COLLATABLE = true
    5657
    );
    5758

    5859
    --

    contrib/ltree/lquery_op.c

    Lines changed: 3 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -7,6 +7,7 @@
    77

    88
    #include <ctype.h>
    99

    10+
    #include "catalog/pg_collation.h"
    1011
    #include "utils/array.h"
    1112
    #include "utils/formatting.h"
    1213
    #include "ltree.h"
    @@ -90,8 +91,8 @@ bool
    9091
    int
    9192
    ltree_strncasecmp(const char *a, const char *b, size_t s)
    9293
    {
    93-
    char *al = str_tolower(a, s);
    94-
    char *bl = str_tolower(b, s);
    94+
    char *al = str_tolower(a, s, DEFAULT_COLLATION_OID);
    95+
    char *bl = str_tolower(b, s, DEFAULT_COLLATION_OID);
    9596
    int res;
    9697

    9798
    res = strncmp(al, bl, s);

    0 commit comments

    Comments
     (0)
    0