8000 Fix integer-overflow problems in interval comparison. · thatguystone/postgres@d68a2b2 · GitHub
[go: up one dir, main page]

Skip to content

Commit d68a2b2

Browse files
committed
Fix integer-overflow problems in interval comparison.
When using integer timestamps, the interval-comparison functions tried to compute the overall magnitude of an interval as an int64 number of microseconds. As reported by Frazer McLean, this overflows for intervals exceeding about 296000 years, which is bad since we nominally allow intervals many times larger than that. That results in wrong comparison results, and possibly in corrupted btree indexes for columns containing such large interval values. To fix, compute the magnitude as int128 instead. Although some compilers have native support for int128 calculations, many don't, so create our own support functions that can do 128-bit addition and multiplication if the compiler support isn't there. These support functions are designed with an eye to allowing the int128 code paths in numeric.c to be rewritten for use on all platforms, although this patch doesn't do that, or even provide all the int128 primitives that will be needed for it. Back-patch as far as 9.4. Earlier releases did not guard against overflow of interval val 8000 ues at all (commit 146604e fixed that), so it seems not very exciting to worry about overly-large intervals for them. Before 9.6, we did not assume that unreferenced "static inline" functions would not draw compiler warnings, so omit functions not directly referenced by timestamp.c, the only present consumer of int128.h. (We could have omitted these functions in HEAD too, but since they were written and debugged on the way to the present patch, and they look likely to be needed by numeric.c, let's keep them in HEAD.) I did not bother to try to prevent such warnings in a --disable-integer-datetimes build, though. Before 9.5, configure will never define HAVE_INT128, so the part of int128.h that exploits a native int128 implementation is dead code in the 9.4 branch. I didn't bother to remove it, thinking that keeping the file looking similar in different branches is more useful. In HEAD only, add a simple test harness for int128.h in src/tools/. In back branches, this does not change the float-timestamps code path. That's not subject to the same kind of overflow risk, since it computes the interval magnitude as float8. (No doubt, when this code was originally written, overflow was disregarded for exactly that reason.) There is a precision hazard instead :-(, but we'll avert our eyes from that question, since no complaints have been reported and that code's deprecated anyway. Kyotaro Horiguchi and Tom Lane Discussion: https://postgr.es/m/1490104629.422698.918452336.26FA96B7@webmail.messagingengine.com
1 parent 2843d5d commit d68a2b2

File tree

4 files changed

+376
-11
lines changed

4 files changed

+376
-11
lines changed

src/backend/utils/adt/timestamp.c

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "access/hash.h"
2525
#include "access/xact.h"
2626
#include "catalog/pg_type.h"
27+
#include "common/int128.h"
2728
#include "funcapi.h"
2829
#include "libpq/pqformat.h"
2930
#include "miscadmin.h"
@@ -2482,19 +2483,47 @@ timestamptz_cmp_timestamp(PG_FUNCTION_ARGS)
24822483
/*
24832484
* interval_relop - is interval1 relop interval2
24842485
*
2485-
* collate invalid interval at the end
2486+
* Interval comparison is based on converting interval values to a linear
2487+
* representation expressed in the units of the time field (microseconds,
2488+
* in the case of integer timestamps) with days assumed to be always 24 hours
2489+
* and months assumed to be always 30 days. To avoid overflow, we need a
2490+
* wider-than-int64 datatype for the linear representation, so use INT128
2491+
* with integer timestamps.
2492+
*
2493+
* In the float8 case, our problems are not with overflow but with precision;
2494+
* but it's been like that since day one, so live with it.
24862495
*/
2487-
static inline TimeOffset
2496+
#ifdef HAVE_INT64_TIMESTAMP
2497+
typedef INT128 IntervalOffset;
2498+
#else
2499+
typedef TimeOffset IntervalOffset;
2500+
#endif
2501+
2502+
static inline IntervalOffset
24882503
interval_cmp_value(const Interval *interval)
24892504
{
2490-
TimeOffset span;
2491-
2492-
span = interval->time;
2505+
IntervalOffset span;
24932506

24942507
#ifdef HAVE_INT64_TIMESTAMP
2495-
span += interval->month * INT64CONST(30) * USECS_PER_DAY;
2496-
span += interval->day * INT64CONST(24) * USECS_PER_HOUR;
2508+
int64 dayfraction;
2509+
int64 days;
2510+
2511+
/*
2512+
* Separate time field into days and dayfraction, then add the month and
2513+
* day fields to the days part. We cannot overflow int64 days here.
2514+
*/
2515+
dayfraction = interval->time % USECS_PER_DAY;
2516+
days = interval->time / USECS_PER_DAY;
2517+
days += interval->month * INT64CONST(30);
2518+
days += interval->day;
2519+
2520+
/* Widen dayfraction to 128 bits */
2521+
span = int64_to_int128(dayfraction);
2522+
2523+
/* Scale up days to microseconds, forming a 128-bit product */
2524+
int128_add_int64_mul_int64(&span, days, USECS_PER_DAY);
24972525
#else
2526+
span = interval->time;
24982527
span += interval->month * ((double) DAYS_PER_MONTH * SECS_PER_DAY);
24992528
span += interval->day * ((double) HOURS_PER_DAY * SECS_PER_HOUR);
25002529
#endif
@@ -2505,10 +2534,14 @@ interval_cmp_value(const Interval *interval)
25052534
static int
25062535
interval_cmp_internal(Interval *interval1, Interval *interval2)
25072536
{
2508-
TimeOffset span1 = interval_cmp_value(interval1);
2509-
TimeOffset span2 = interval_cmp_value(interval2);
2537+
IntervalOffset span1 = interval_cmp_value(interval1);
2538+
IntervalOffset span2 = interval_cmp_value(interval2);
25102539

2540+
#ifdef HAVE_INT64_TIMESTAMP
2541+
return int128_compare(span1, span2);
2542+
#else
25112543
return ((span1 < span2) ? -1 : (span1 > span2) ? 1 : 0);
2544+
#endif
25122545
}
25132546

25142547
Datum
@@ -2585,10 +2618,20 @@ Datum
25852618
interval_hash(PG_FUNCTION_ARGS)
25862619
{
25872620
Interval *interval = PG_GETARG_INTERVAL_P(0);
2588-
TimeOffset span = interval_cmp_value(interval);
2621+
IntervalOffset span = interval_cmp_value(interval);
25892622

25902623
#ifdef HAVE_INT64_TIMESTAMP
2591-
return DirectFunctionCall1(hashint8, Int64GetDatumFast(span));
2624+
int64 span64;
2625+
2626+
/*
2627+
* Use only the least significant 64 bits for hashing. The upper 64 bits
2628+
* seldom add any useful information, and besides we must do it like this
2629+
* for compatibility with hashes calculated before use of INT128 was
2630+
* introduced.
2631+
*/
2632+
span64 = int128_to_int64(span);
2633+
2634+
return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64));
25922635
#else
25932636
return DirectFunctionCall1(hashfloat8, Float8GetDatumFast(span));
25942637
#endif

src/include/common/int128.h

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* int128.h
4+
* Roll-our-own 128-bit integer arithmetic.
5+
*
6+
* We make use of the native int128 type if there is one, otherwise
7+
* implement things the hard way based on two int64 halves.
8+
*
9+
* See src/tools/testint128.c for a simple test harness for this file.
10+
*
11+
* Copyright (c) 2017, PostgreSQL Global Development Group
12+
*
13+
* src/include/common/int128.h
14+
*
15+
*-------------------------------------------------------------------------
16+
*/
17+
#ifndef INT128_H
18+
#define INT128_H
19+
20+
/*
21+
* For testing purposes, use of native int128 can be switched on/off by
22+
* predefining USE_NATIVE_INT128.
23+
*/
24+
#ifndef USE_NATIVE_INT128
25+
#ifdef HAVE_INT128
26+
#define USE_NATIVE_INT128 1
27+
#else
28+
#define USE_NATIVE_INT128 0
29+
#endif
30+
#endif
31+
32+
33+
#if USE_NATIVE_INT128
34+
35+
typedef int128 INT128;
36+
37+
/*
38+
* Add the 128-bit product of two int64 values into an INT128 variable.
39+
*
40+
* XXX with a stupid compiler, this could actually be less efficient than
41+
* the other implementation; maybe we should do it by hand always?
42+
*/
43+
static inline void
44+
int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y)
45+
{
46+
*i128 += (int128) x *(int128) y;
47+
}
48+
49+
/*
50+
* Compare two INT128 values, return -1, 0, or +1.
51+
*/
52+
static inline int
53+
int128_compare(INT128 x, INT128 y)
54+
{
55+
if (x < y)
56+
return -1;
57+
if (x > y)
58+
return 1;
59+
return 0;
60+
}
61+
62+
/*
63+
* Widen int64 to INT128.
64+
*/
65+
static inline INT128
66+
int64_to_int128(int64 v)
67+
{
68+
return (INT128) v;
69+
}
70+
71+
/*
72+
* Convert INT128 to int64 (losing any high-order bits).
73+
* This also works fine for casting down to uint64.
74+
*/
75+
static inline int64
76+
int128_to_int64(INT128 val)
77+
{
78+
return (int64) val;
79+
}
80+
81+
#else /* !USE_NATIVE_INT128 */
82+
83+
/*
84+
* We lay out the INT128 structure with the same content and byte ordering
85+
* that a native int128 type would (probably) have. This makes no difference
86+
* for ordinary use of INT128, but allows union'ing INT128 with int128 for
87+
* testing purposes.
88+
*/
89+
typedef struct
90+
{
91+
#ifdef WORDS_BIGENDIAN
92+
int64 hi; /* most significant 64 bits, including sign */
93+
uint64 lo; /* least significant 64 bits, without sign */
94+
#else
95+
uint64 lo; /* least significant 64 bits, without sign */
96+
int64 hi; /* most significant 64 bits, including sign */
97+
#endif
98+
} INT128;
99+
100+
/*
101+
* Add an unsigned int64 value into an INT128 variable.
102+
*/
103+
static inline void
104+
int128_add_uint64(INT128 *i128, uint64 v)
105+
{
106+
/*
107+
* First add the value to the .lo part, then check to see if a carry needs
108+
* to be propagated into the .hi part. A carry is needed if both inputs
109+
* have high bits set, or if just one input has high bit set while the new
110+
* .lo part doesn't. Remember that .lo part is unsigned; we cast to
111+
* signed here just as a cheap way to check the high bit.
112+
*/
113+
uint64 oldlo = i128->lo;
114+
115+
i128->lo += v;
116+
if (((int64) v < 0 && (int64) oldlo < 0) ||
117+
(((int64) v < 0 || (int64) oldlo < 0) && (int64) i128->lo >= 0))
118+
i128->hi++;
119+
}
120+
121+
/*
122+
* INT64_AU32 extracts the most significant 32 bits of int64 as int64, while
123+
* INT64_AL32 extracts the least significant 32 bits as uint64.
124+
*/
125+
#define INT64_AU32(i64) ((i64) >> 32)
126+
#define INT64_AL32(i64) ((i64) & UINT64CONST(0xFFFFFFFF))
127+
128+
/*
129+
* Add the 128-bit product of two int64 values into an INT128 variable.
130+
*/
131+
static inline void
132+
int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y)
133+
{
134+
/* INT64_AU32 must use arithmetic right shift */
135+
StaticAssertStmt(((int64) -1 >> 1) == (int64) -1,
136+
"arithmetic right shift is needed");
137+
138+
/*----------
139+
* Form the 128-bit product x * y using 64-bit arithmetic.
140+
* Considering each 64-bit input as having 32-bit high and low parts,
141+
* we can compute
142+
*
143+
* x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo)
144+
* = (x.hi * y.hi) << 64 +
145+
* (x.hi * y.lo) << 32 +
146+
* (x.lo * y.hi) << 32 +
147+
* x.lo * y.lo
148+
*
149+
* Each individual product is of 32-bit terms so it won't overflow when
150+
* computed in 64-bit arithmetic. Then we just have to shift it to the
151+
* correct position while adding into the 128-bit result. We must also
152+
* keep in mind that the "lo" parts must be treated as unsigned.
153+
*----------
154+
*/
155+
156+
/* No need to work hard if product must be zero */
157+
if (x != 0 && y != 0)
158+
{
159+
int64 x_u32 = INT64_AU32(x);
160+
uint64 x_l32 = INT64_AL32(x);
161+
int64 y_u32 = INT64_AU32(y);
162+
uint64 y_l32 = INT64_AL32(y);
163+
int64 tmp;
164+
165+
/* the first term */
166+
i128->hi += x_u32 * y_u32;
167+
168+
/* the second term: sign-extend it only if x is negative */
169+
tmp = x_u32 * y_l32;
170+
if (x < 0)
171+
i128->hi += INT64_AU32(tmp);
172+
else
173+
i128->hi += ((uint64) tmp) >> 32;
174+
int128_add_uint64(i128, ((uint64) INT64_AL32(tmp)) << 32);
175+
176+
/* the third term: sign-extend it only if y is negative */
177+
tmp = x_l32 * y_u32;
178+
if (y < 0)
179+
i128->hi += INT64_AU32(tmp);
180+
else
181+
i128->hi += ((uint64) tmp) >> 32;
182+
int128_add_uint64(i128, ((uint64) INT64_AL32(tmp)) << 32);
183+
184+
/* the fourth term: always unsigned */
185+
int128_add_uint64(i128, x_l32 * y_l32);
186+
}
187+
}
188+
189+
/*
190+
* Compare two INT128 values, return -1, 0, or +1.
191+
*/
192+
static inline int
193+
int128_compare(INT128 x, INT128 y)
194+
{
195+
if (x.hi < y.hi)
196+
return -1;
197+
if (x.hi > y.hi)
198+
return 1;
199+
if (x.lo < y.lo)
200+
return -1;
201+
if (x.lo > y.lo)
202+
return 1;
203+
return 0;
204+
}
205+
206+
/*
207+
* Widen int64 to INT128.
208+
*/
209+
static inline INT128
210+
int64_to_int128(int64 v)
211+
{
212+
INT128 val;
213+
214+
val.lo = (uint64) v;
215+
val.hi = (v < 0) ? -INT64CONST(1) : INT64CONST(0);
216+
return val;
217+
}
218+
219+
/*
220+
* Convert INT128 to int64 (losing any high-order bits).
221+
* This also works fine for casting down to uint64.
222+
*/
223+
static inline int64
224+
int128_to_int64(INT128 val)
225+
{
226+
return (int64) val.lo;
227+
}
228+
229+
#endif /* USE_NATIVE_INT128 */
230+
231+
#endif /* INT128_H */

0 commit comments

Comments
 (0)
0