8000 Fix GiST index build for NaN values in geometric types. · postgrespro/postgres@1acf757 · GitHub
[go: up one dir, main page]

Skip to content
  • Commit 1acf757

    Browse files
    committed
    Fix GiST index build for NaN values in geometric types.
    GiST index build could go into an infinite loop when presented with boxes (or points, circles or polygons) containing NaN component values. This happened essentially because the code assumed that x == x is true for any "double" value x; but it's not true for NaNs. The looping behavior was not the only problem though: we also attempted to sort the items using simple double comparisons. Since NaNs violate the trichotomy law, qsort could (in principle at least) get arbitrarily confused and mess up the sorting of ordinary values as well as NaNs. And we based splitting choices on box size calculations that could produce NaNs, again resulting in undesirable behavior. To fix, replace all comparisons of doubles in this logic with float8_cmp_internal, which is NaN-aware and is careful to sort NaNs consistently, higher than any non-NaN. Also rearrange the box size calculation to not produce NaNs; instead it should produce an infinity for a box with NaN on one side and not-NaN on the other. I don't by any means claim that this solves all problems with NaNs in geometric values, but it should at least make GiST index insertion work reliably with such data. It's likely that the index search side of things still needs some work, and probably regular geometric operations too. But with this patch we're laying down a convention for how such cases ought to behave. Per bug #14238 from Guang-Dih Lei. Back-patch to 9.2; the code used before commit 7f3bd86 is quite different and doesn't lock up on my simple test case, nor on the submitter's dataset. Report: <20160708151747.1426.60150@wrigleys.postgresql.org> Discussion: <28685.1468246504@sss.pgh.pa.us>
    1 parent 00e0b67 commit 1acf757

    File tree

    3 files changed

    +92
    -67
    lines changed

    3 files changed

    +92
    -67
    lines changed

    src/backend/access/gist/gistproc.c

    Lines changed: 88 additions & 63 deletions
    Original file line numberDiff line numberDiff line change
    @@ -17,20 +17,31 @@
    1717
    */
    1818
    #include "postgres.h"
    1919

    20+
    #include <math.h>
    21+
    2022
    #include "access/gist.h"
    2123
    #include "access/stratnum.h"
    24+
    #include "utils/builtins.h"
    2225
    #include "utils/geo_decls.h"
    2326

    2427

    2528
    static bool gist_box_leaf_consistent(BOX *key, BOX *query,
    2629
    StrategyNumber strategy);
    27-
    static double size_box(BOX *box);
    2830
    static bool rtree_internal_consistent(BOX *key, BOX *query,
    2931
    StrategyNumber strategy);
    3032

    3133
    /* Minimum accepted ratio of split */
    3234
    #define LIMIT_RATIO 0.3
    3335

    36+
    /* Convenience macros for NaN-aware comparisons */
    37+
    #define FLOAT8_EQ(a,b) (float8_cmp_internal(a, b) == 0)
    38+
    #define FLOAT8_LT(a,b) (float8_cmp_internal(a, b) < 0)
    39+
    #define FLOAT8_LE(a,b) (float8_cmp_internal(a, b) <= 0)
    40+
    #define FLOAT8_GT(a,b) (float8_cmp_internal(a, b) > 0)
    41+
    #define FLOAT8_GE(a,b) (float8_cmp_internal(a, b) >= 0)
    42+
    #define FLOAT8_MAX(a,b) (FLOAT8_GT(a, b) ? (a) : (b))
    43+
    #define FLOAT8_MIN(a,b) (FLOAT8_LT(a, b) ? (a) : (b))
    44+
    3445

    3546
    /**************************************************
    3647
    * Box ops
    @@ -40,12 +51,53 @@ static bool rtree_internal_consistent(BOX *key, BOX *query,
    4051
    * Calculates union of two boxes, a and b. The result is stored in *n.
    4152
    */
    4253
    static void
    43-
    rt_box_union(BOX *n, BOX *a, BOX *b)
    54+
    rt_box_union(BOX *n, const BOX *a, const BOX *b)
    55+
    {
    56+
    n->high.x = FLOAT8_MAX(a->high.x, b->high.x);
    57+
    n->high.y = FLOAT8_MAX(a->high.y, b->high.y);
    58+
    n->low.x = FLOAT8_MIN(a->low.x, b->low.x);
    59+
    n->low.y = FLOAT8_MIN(a->low.y, b->low.y);
    60+
    }
    61+
    62+
    /*
    63+
    * Size of a BOX for penalty-calculation purposes.
    64+
    * The result can be +Infinity, but not NaN.
    65+
    */
    66+
    static double
    67+
    size_box(const BOX *box)
    68+
    {
    69+
    /*
    70+
    * Check for zero-width cases. Note that we define the size of a zero-
    71+
    * by-infinity box as zero. It's important to special-case this somehow,
    72+
    * as naively multiplying infinity by zero will produce NaN.
    73+
    *
    74+
    * The less-than cases should not happen, but if they do, say "zero".
    75+
    */
    76+
    if (FLOAT8_LE(box->high.x, box->low.x) ||
    77+
    FLOAT8_LE(box->high.y, box->low.y))
    78+
    return 0.0;
    79+
    80+
    /*
    81+
    * We treat NaN as larger than +Infinity, so any distance involving a NaN
    82+
    * and a non-NaN is infinite. Note the previous check eliminated the
    83+
    * possibility that the low fields are NaNs.
    84+
    */
    85+
    if (isnan(box->high.x) || isnan(box->high.y))
    86+
    return get_float8_infinity();
    87+
    return (box->high.x - box->low.x) * (box->high.y - box->low.y);
    88+
    }
    89+
    90+
    /*
    91+
    * Return amount by which the union of the two boxes is larger than
    92+
    * the original BOX's area. The result can be +Infinity, but not NaN.
    93+
    */
    94+
    static double
    95+
    box_penalty(const BOX *original, const BOX *new)
    4496
    {
    45-
    n->high.x = Max(a->high.x, b->high.x);
    46-
    n->high.y = Max(a->high.y, b->high.y);
    47-
    n->low.x = Min(a->low.x, b->low.x);
    48-
    n->low.y = Min(a->low.y, b->low.y);
    97+
    BOX unionbox;
    98+
    99+
    rt_box_union(&unionbox, original, new);
    100+
    return size_box(&unionbox) - size_box(original);
    49101
    }
    50102

    51103
    /*
    @@ -85,16 +137,19 @@ gist_box_consistent(PG_FUNCTION_ARGS)
    85137
    strategy));
    86138
    }
    87139

    140+
    /*
    141+
    * Increase BOX b to include addon.
    142+
    */
    88143
    static void
    89-
    adjustBox(BOX *b, BOX *addon)
    144+
    adjustBox(BOX *b, const BOX *addon)
    90145
    {
    91-
    if (b->high.x < addon->high.x)
    146+
    if (FLOAT8_LT(b->high.x, addon->high.x))
    92147
    b->high.x = addon->high.x;
    93-
    if (b->low.x > addon->low.x)
    148+
    if (FLOAT8_GT(b->low.x, addon->low.x))
    94149
    b->low.x = addon->low.x;
    95-
    if (b->high.y < addon->high.y)
    150+
    if (FLOAT8_LT(b->high.y, addon->high.y))
    96151
    b->high.y = addon->high.y;
    97-
    if (b->low.y > addon->low.y)
    152+
    if (FLOAT8_GT(b->low.y, addon->low.y))
    98153
    b->low.y = addon->low.y;
    99154
    }
    100155

    @@ -174,10 +229,8 @@ gist_box_penalty(PG_FUNCTION_ARGS)
    174229
    float *result = (float *) PG_GETARG_POINTER(2);
    175230
    BOX *origbox = DatumGetBoxP(origentry->key);
    176231
    BOX *newbox = DatumGetBoxP(newentry->key);
    177-
    BOX unionbox;
    178232

    179-
    rt_box_union(&unionbox, origbox, newbox);
    180-
    *result = (float) (size_box(&unionbox) - size_box(origbox));
    233+
    *result = (float) box_penalty(origbox, newbox);
    181234
    PG_RETURN_POINTER(result);
    182235
    }
    183236

    @@ -290,12 +343,7 @@ interval_cmp_lower(const void *i1, const void *i2)
    290343
    double lower1 = ((const SplitInterval *) i1)->lower,
    291344
    lower2 = ((const SplitInterval *) i2)->lower;
    292345

    293-
    if (lower1 < lower2)
    294-
    return -1;
    295-
    else if (lower1 > lower2)
    296-
    return 1;
    297-
    else
    298-
    return 0;
    346+
    return float8_cmp_internal(lower1, lower2);
    299347
    }
    300348

    301349
    /*
    @@ -307,16 +355,11 @@ interval_cmp_upper(const void *i1, const void *i2)
    307355
    double upper1 = ((const SplitInterval *) i1)->upper,
    308356
    upper2 = ((const SplitInterval *) i2)->upper;
    309357

    310-
    if (upper1 < upper2)
    311-
    return -1;
    312-
    else if (upper1 > upper2)
    313-
    return 1;
    314-
    else
    315-
    return 0;
    358+
    return float8_cmp_internal(upper1, upper2);
    316359
    }
    317360

    318361
    /*
    319-
    * Replace negative value with zero.
    362+
    * Replace negative (or NaN) value with zero.
    320363
    */
    321364
    static inline float
    322365
    non_negative(float val)
    @@ -435,25 +478,9 @@ g_box_consider_split(ConsiderSplitContext *context, int dimNum,
    435478
    }
    436479
    }
    437480

    438-
    /*
    439-
    * Return increase of original BOX area by new BOX area insertion.
    440-
    */
    441-
    static double
    442-
    box_penalty(BOX *original, BOX *new)
    443-
    {
    444-
    double union_width,
    445-
    union_height;
    446-
    447-
    union_width = Max(original->high.x, new->high.x) -
    448-
    Min(original->low.x, new->low.x);
    449-
    union_height = Max(original->high.y, new->high.y) -
    450-
    Min(original->low.y, new->low.y);
    451-
    return union_width * union_height - (original->high.x - original->low.x) *
    452-
    (original->high.y - original->low.y);
    453-
    }
    454-
    455481
    /*
    456482
    * Compare common entries by their deltas.
    483+
    * (We assume the deltas can't be NaN.)
    457484
    */
    458485
    static int
    459486
    common_entry_cmp(const void *i1, const void *i2)
    @@ -615,9 +642,11 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
    615642
    /*
    616643
    * Find next lower bound of right group.
    617644
    */
    618-
    while (i1 < nentries && rightLower == intervalsLower[i1].lower)
    645+
    while (i1 < nentries &&
    646+
    FLOAT8_EQ(rightLower, intervalsLower[i1].lower))
    619647
    {
    620-
    leftUpper = Max(leftUpper, intervalsLower[i1].upper);
    648+
    if (FLOAT8_LT(leftUpper, intervalsLower[i1].upper))
    649+
    leftUpper = intervalsLower[i1].upper;
    621650
    i1++;
    622651
    }
    623652
    if (i1 >= nentries)
    @@ -628,7 +657,8 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
    628657
    * Find count of intervals which anyway should be placed to the
    629658
    * left group.
    630659
    */
    631-
    while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
    660+
    while (i2 < nentries &&
    661+
    FLOAT8_LE(intervalsUpper[i2].upper, leftUpper))
    632662
    i2++;
    633663

    634664
    /*
    @@ -650,9 +680,10 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
    650680
    /*
    651681
    * Find next upper bound of left group.
    652682
    */
    653-
    while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper)
    683+
    while (i2 >= 0 && FLOAT8_EQ(leftUpper, intervalsUpper[i2].upper))
    654684
    {
    655-
    rightLower = Min(rightLower, intervalsUpper[i2].lower);
    685+
    if (FLOAT8_GT(rightLower, intervalsUpper[i2].lower))
    686+
    rightLower = intervalsUpper[i2].lower;
    656687
    i2--;
    657688
    }
    658689
    if (i2 < 0)
    @@ -663,7 +694,7 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
    663694
    * Find count of intervals which anyway should be placed to the
    664695
    * right group.
    665696
    */
    666-
    while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
    697+
    while (i1 >= 0 && FLOAT8_GE(intervalsLower[i1].lower, rightLower))
    667698
    i1--;
    668699

    669700
    /*
    @@ -751,10 +782,10 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
    751782
    upper = box->high.y;
    752783
    }
    753784

    754-
    if (upper <= context.leftUpper)
    785+
    if (FLOAT8_LE(upper, context.leftUpper))
    755786
    {
    756787
    /* Fits to the left group */
    757-
    if (lower >= context.rightLower)
    788+
    if (FLOAT8_GE(lower, context.rightLower))
    758789
    {
    759790
    /* Fits also to the right group, so "common entry" */
    760791
    commonEntries[commonEntriesCount++].index = i;
    @@ -772,7 +803,7 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
    772803
    * entry didn't fit on the left group, it better fit in the right
    773804
    * group.
    774805
    */
    775-
    Assert(lower >= context.rightLower);
    806+
    Assert(FLOAT8_GE(lower, context.rightLower));
    776807

    777808
    /* Doesn't fit to the left group, so join to the right group */
    778809
    PLACE_RIGHT(box, i);
    @@ -856,8 +887,10 @@ gist_box_same(PG_FUNCTION_ARGS)
    856887
    bool *result = (bool *) PG_GETARG_POINTER(2);
    857888

    858889
    if (b1 && b2)
    859-
    *result = (b1->low.x == b2->low.x && b1->low.y == b2->low.y &&
    860-
    b1->high.x == b2->high.x && b1->high.y == b2->high.y);
    890+
    *result = (FLOAT8_EQ(b1->low.x, b2->low.x) &&
    891+
    FLOAT8_EQ(b1->low.y, b2->low.y) &&
    892+
    FLOAT8_EQ(b1->high.x, b2->high.x) &&
    893+
    FLOAT8_EQ(b1->high.y, b2->high.y));
    861894
    else
    862895
    *result = (b1 == NULL && b2 == NULL);
    863896
    PG_RETURN_POINTER(result);
    @@ -943,14 +976,6 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy)
    943976
    return retval;
    944977
    }
    945978

    946-
    static double
    947-
    size_box(BOX *box)
    948-
    {
    949-
    if (box->high.x <= box->low.x || box->high.y <= box->low.y)
    950-
    return 0.0;
    951-
    return (box->high.x - box->low.x) * (box->high.y - box->low.y);
    952-
    }
    953-
    954979
    /*****************************************
    955980
    * Common rtree functions (for boxes, polygons, and circles)
    956981
    *****************************************/

    src/backend/utils/adt/float.c

    Lines changed: 2 additions & 4 deletions
    Original file line numberDiff line numberDiff line change
    @@ -90,8 +90,6 @@ float8 degree_c_one_half = 0.5;
    9090
    float8 degree_c_one = 1.0;
    9191

    9292
    /* Local function prototypes */
    93-
    static int float4_cmp_internal(float4 a, float4 b);
    94-
    static int float8_cmp_internal(float8 a, float8 b);
    9593
    static double sind_q1(double x);
    9694
    static double cosd_q1(double x);
    9795
    static void init_degree_constants(void);
    @@ -936,7 +934,7 @@ float8div(PG_FUNCTION_ARGS)
    936934
    /*
    937935
    * float4{eq,ne,lt,le,gt,ge} - float4/float4 comparison operations
    938936
    */
    939-
    static int
    937+
    int
    940938
    float4_cmp_internal(float4 a, float4 b)
    941939
    {
    942940
    /*
    @@ -1050,7 +1048,7 @@ btfloat4sortsupport(PG_FUNCTION_ARGS)
    10501048
    /*
    10511049
    * float8{eq,ne,lt,le,gt,ge} - float8/float8 comparison operations
    10521050
    */
    1053-
    static int
    1051+
    int
    10541052
    float8_cmp_internal(float8 a, float8 b)
    10551053
    {
    10561054
    /*

    src/include/utils/builtins.h

    Lines changed: 2 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -346,6 +346,8 @@ extern int is_infinite(double val);
    346346
    extern double float8in_internal(char *num, char **endptr_p,
    347347
    const char *type_name, const char *orig_string);
    348348
    extern char *float8out_internal(double num);
    349+
    extern int float4_cmp_internal(float4 a, float4 b);
    350+
    extern int float8_cmp_internal(float8 a, float8 b);
    349351

    350352
    extern Datum float4in(PG_FUNCTION_ARGS);
    351353
    extern Datum float4out(PG_FUNCTION_ARGS);

    0 commit comments

    Comments
     (0)
    0