8000 simplified math.sumPrecise() · bellard/quickjs@c3e5ae2 · GitHub
[go: up one dir, main page]

Skip to content

Commit c3e5ae2

Browse files
author
Fabrice Bellard
committed
simplified math.sumPrecise()
1 parent 0060876 commit c3e5ae2

File tree

1 file changed

+94
-95
lines changed

1 file changed

+94
-95
lines changed

quickjs.c

Lines changed: 94 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -45458,47 +45458,59 @@ static JSValue js_math_clz32(JSContext *ctx, JSValueConst this_val,
4545845458
return JS_NewInt32(ctx, r);
4545945459
}
4546045460

45461-
/* we add one extra limb to avoid having to test for overflows during the sum */
45462-
#define SUM_PRECISE_ACC_LEN 34
45463-
4546445461
typedef enum {
45465-
SUM_PRECISE_STATE_MINUS_ZERO,
4546645462
SUM_PRECISE_STATE_FINITE,
4546745463
SUM_PRECISE_STATE_INFINITY,
4546845464
SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */
4546945465
SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */
4547045466
} SumPreciseStateEnum;
4547145467

45468+
#define SP_LIMB_BITS 56
45469+
#define SP_RND_BITS (SP_LIMB_BITS - 53)
45470+
/* we add one extra limb to avoid having to test for overflows during the sum */
45471+
#define SUM_PRECISE_ACC_LEN 39
45472+
45473+
#define SUM_PRECISE_COUNTER_INIT 250
45474+
4547245475
typedef struct {
45473-
uint64_t acc[SUM_PRECISE_ACC_LEN];
45474-
int n_limbs; /* acc is not necessarily normalized */
4547545476
SumPreciseStateEnum state;
45477+
uint32_t counter;
45478+
int n_limbs; /* 'acc' contains n_limbs and is not necessarily
45479+
acc[n_limb - 1] may be 0. 0 indicates minus zero
45480+
result when state = SUM_PRECISE_STATE_FINITE */
45481+
int64_t acc[SUM_PRECISE_ACC_LEN];
4547645482
} SumPreciseState;
4547745483

4547845484
static void sum_precise_init(SumPreciseState *s)
4547945485
{
45480-
s->state = SUM_PRECISE_STATE_MINUS_ZERO;
45481-
s->acc[0] = 0;
45482-
s->n_limbs = 1;
45486+
memset(s->acc, 0, sizeof(s->acc));
45487+
s->state = SUM_PRECISE_STATE_FINITE;
45488+
s->counter = SUM_PRECISE_COUNTER_INIT;
45489+
s->n_limbs = 0;
4548345490
}
4548445491

45485-
#define ADDC64(res, carry_out, op1, op2, carry_in) \
45486-
do { \
45487-
uint64_t __v, __a, __k, __k1; \
45488-
__v = (op1); \
45489-
__a = __v + (op2); \
45490-
__k1 = __a < __v; \
45491-
__k = (carry_in); \
45492-
__a = __a + __k; \
45493-
carry_out = (__a < __k) | __k1; \
45494-
res = __a; \
45495-
} while (0)
45492+
static void sum_precise_renorm(SumPreciseState *s)
45493+
{
45494+
int64_t v, carry;
45495+
int i;
45496+
45497+
carry = 0;
45498+
for(i = 0; i < s->n_limbs; i++) {
45499+
v = s->acc[i] + carry;
45500+
s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
45501+
carry = v >> SP_LIMB_BITS;
45502+
}
45503+
/* we add a failsafe but it should be never reached in a
45504+
reasonnable amount of time */
45505+
if (carry != 0 && s->n_limbs < SUM_PRECISE_ACC_LEN)
45506+
s->acc[s->n_limbs++] = carry;
45507+
}
4549645508

4549745509
static void sum_precise_add(SumPreciseState *s, double d)
4549845510
{
45499-
uint64_t a, m, a0, carry, acc_sign, a_sign;
45500-
int sgn, e, p, n, i;
45501-
unsigned shift;
45511+
uint64_t a, m, a0, a1;
45512+
int sgn, e, p;
45513+
unsigned int shift;
4550245514

4550345515
a = float64_as_uint64(d);
4550445516
sgn = a >> 63;
@@ -45521,78 +45533,50 @@ static void sum_precise_add(SumPreciseState *s, double d)
4552145533
} else if (e == 0) {
4552245534
if (likely(m == 0)) {
4552345535
/* zero */
45524-
if (s->state == SUM_PRECISE_STATE_MINUS_ZERO && !sgn)
45525-
s->state = SUM_PRECISE_STATE_FINITE;
45536+
if (s->n_limbs == 0 && !sgn)
45537+
s->n_limbs = 1;
4552645538
} else {
4552745539
/* subnormal */
4552845540
p = 0;
4552945541
shift = 0;
4553045542
goto add;
4553145543
}
4553245544
} else {
45545+
/* Note: we sum even if state != SUM_PRECISE_STATE_FINITE to
45546+
avoid tests */
4553345547
m |= (uint64_t)1 << 52;
4553445548
shift = e - 1;
45535-
p = shift / 64;
45536-
/* 'p' is the position of a0 in acc */
45537-
shift %= 64;
45549+
/* 'p' is the position of a0 in acc. The division is normally
45550+
implementation as a multiplication by the compiler. */
45551+
p = shift / SP_LIMB_BITS;
45552+
shift %= SP_LIMB_BITS;
4553845553
add:
45539-
if (s->state >= SUM_PRECISE_STATE_INFINITY)
45540-
return;
45541-
s->state = SUM_PRECISE_STATE_FINITE;
45542-
n = s->n_limbs;
45543-
45544-
acc_sign = (int64_t)s->acc[n - 1] >> 63;
45545-
45546-
/* sign extend acc */
45547-
for(i = n; i <= p; i++)
45548-
s->acc[i] = acc_sign;
45549-
45550-
carry = sgn;
45551-
a_sign = -sgn;
45552-
a0 = m << shift;
45553-
ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
45554-
if (shift >= 12) {
45555-
p++;
45556-
if (p >= n)
45557-
s->acc[p] = acc_sign;
45558-
a0 = m >> (64 - shift);
45559-
ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
45560-
}
45561-
p++;
45562-
if (p >= n) {
45563-
n = p;
45554+
a0 = (m << shift) & (((uint64_t)1 << SP_LIMB_BITS) - 1);
45555+
a1 = m >> (SP_LIMB_BITS - shift);
45556+
if (!sgn) {
45557+
s->acc[p] += a0;
45558+
s->acc[p + 1] += a1;
4556445559
} else {
45565-
/* carry */
45566-
for(i = p; i < n; i++) {
45567-
/* if 'a' positive: stop condition: carry = 0.
45568-
if 'a' negative: stop condition: carry = 1. */
45569-
if (carry == sgn)
45570-
goto done;
45571-
ADDC64(s->acc[i], carry, s->acc[i], a_sign, carry);
45572-
}
45560+
s->acc[p] -= a0;
45561+
s->acc[p + 1] -= a1;
4557345562
}
45574-
45575-
/* extend the accumulator if needed */
45576-
a0 = carry + acc_sign + a_sign;
45577-
/* -1 <= a0 <= 1 (if both acc and a are negative, carry is set) */
45578-
if (a0 != ((int64_t)s->acc[n - 1] >> 63)) {
45579-
s->acc[n++] = a0;
45563+
s->n_limbs = max_int(s->n_limbs, p + 2);
45564+
45565+
if (unlikely(--s->counter == 0)) {
45566+
s->counter = SUM_PRECISE_COUNTER_INIT;
45567+
sum_precise_renorm(s);
4558045568
}
45581-
done:
45582-
s->n_limbs = n;
4558345569
}
4558445570
}
4558545571

4558645572
static double sum_precise_get_result(SumPreciseState *s)
4558745573
{
45588-
int n, shift, e, p, is_neg, i;
45589-
uint64_t m, addend, carry;
45574+
int n, shift, e, p, is_neg;
45575+
uint64_t m, addend;
4559045576

4559145577
if (s->state != SUM_PRECISE_STATE_FINITE) {
4559245578
switch(s->state) {
4559345579
default:
45594-
case SUM_PRECISE_STATE_MINUS_ZERO:
45595-
return -0.0;
4559645580
case SUM_PRECISE_STATE_INFINITY:
4559745581
return INFINITY;
4559845582
case SUM_PRECISE_STATE_MINUS_INFINITY:
@@ -45602,51 +45586,66 @@ static double sum_precise_get_result(SumPreciseState *s)
4560245586
}
4560345587
}
4560445588

45589+
sum_precise_renorm(s);
45590+
4560545591
/* extract the sign and absolute value */
45606-
n = s->n_limbs;
45607-
is_neg = s->acc[n - 1] >> 63;
45608-
if (is_neg) {
45609-
/* acc = -acc */
45610-
carry = 1;
45611-
for(i = 0; i < n; i++) {
45612-
ADDC64(s->acc[i], carry, ~s->acc[i], 0, carry);
45613-
}
45614-
}
45615-
/* normalize */
45616-
while (n > 0 && s->acc[n - 1] == 0)
45617-
n--;
4561845592
#if 0
4561945593
{
45620-
printf("res=");
45621-
for(i = n - 1; i >= 0; i--)
45622-
printf(" %016lx", s->acc[i]);
45594+
int i;
45595+
printf("len=%d:", s->n_limbs);
45596+
for(i = s->n_limbs - 1; i >= 0; i--)
45597+
printf(" %014lx", s->acc[i]);
4562345598
printf("\n");
4562445599
}
4562545600
#endif
45601+
n = s->n_limbs;
45602+
/* minus zero result */
45603+
if (n == 0)
45604+
return -0.0;
45605+
45606+
/* normalize */
45607+
while (n > 0 && s->acc[n - 1] == 0)
45608+
n--;
4562645609
/* zero result. The spec tells it is always positive in the finite case */
4562745610
if (n == 0)
4562845611
return 0.0;
45612+
is_neg = (s->acc[n - 1] < 0);
45613+
if (is_neg) {
45614+
uint64_t v, carry;
45615+
int i;
45616+
/* negate */
45617+
/* XXX: do it only when needed */
45618+
carry = 1;
45619+
for(i = 0; i < n - 1; i++) {
45620+
v = (((uint64_t)1 << SP_LIMB_BITS) - 1) - s->acc[i] + carry;
45621+
carry = v >> SP_LIMB_BITS;
45622+
s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
45623+
}
45624+
s->acc[n - 1] = -s->acc[n - 1] + carry - 1;
45625+
while (n > 1 && s->acc[n - 1] == 0)
45626+
n--;
45627+
}
4562945628
/* subnormal case */
4563045629
if (n == 1 && s->acc[0] < ((uint64_t)1 << 52))
4563145630
return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]);
4563245631
/* normal case */
45633-
e = n * 64;
45632+
e = n * SP_LIMB_BITS;
4563445633
p = n - 1;
4563545634
m = s->acc[p];
45636-
shift = clz64(m);
45635+
shift = clz64(m) - (64 - SP_LIMB_BITS);
4563745636
e = e - shift - 52;
4563845637
if (shift != 0) {
4563945638
m <<= shift;
4564045639
if (p > 0) {
4564145640
int shift1;
4564245641
uint64_t nz;
4564345642
p--;
45644-
shift1 = 64 - shift;
45643+
shift1 = SP_LIMB_BITS - shift;
4564545644
nz = s->acc[p] & (((uint64_t)1 << shift1) - 1);
4564645645
m = m | (s->acc[p] >> shift1) | (nz != 0);
4564745646
}
4564845647
}
45649-
if ((m & ((1 << 10) - 1)) == 0) {
45648+
if ((m & ((1 << SP_RND_BITS) - 1)) == (1 << (SP_RND_BITS - 1))) {
4565045649
/* see if the LSB part is non zero for the final rounding */
4565145650
while (p > 0) {
4565245651
p--;
@@ -45657,10 +45656,10 @@ static double sum_precise_get_result(SumPreciseState *s)
4565745656
}
4565845657
}
4565945658
/* rounding to nearest with ties to even */
45660-
addend = (1 << 10) - 1 + ((m >> 11) & 1);
45661-
m = (m + addend) >> 11;
45659+
addend = (1 << (SP_RND_BITS - 1)) - 1 + ((m >> SP_RND_BITS) & 1);
45660+
m = (m + addend) >> SP_RND_BITS;
4566245661
/* handle overflow in the rounding */
45663-
if (m == 0)
45662+
if (m == ((uint64_t)1 << 53))
4566445663
e++;
4566545664
if (unlikely(e >= 2047)) {
4566645665
/* infinity */

0 commit comments

Comments
 (0)
0