8000 Merge pull request #4 from bashtage/port-more-changes · bashtage/numpy@c59585e · GitHub
[go: up one dir, main page]

Skip to content

Commit c59585e

Browse files
authored
Merge pull request #4 from bashtage/port-more-changes
MAINT: Sync with upstream changes
2 parents 564cffe + c06c339 commit c59585e

File tree

12 files changed

+2130
-2102
lines changed

12 files changed

+2130
-2102
lines changed

numpy/random/randomgen/generator.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2557,8 +2557,8 @@ cdef class RandomGenerator:
25572557
25582558
>>> def logist(x, loc, scale):
25592559
... return np.exp((loc-x)/scale)/(scale*(1+np.exp((loc-x)/scale))**2)
2560-
>>> plt.plot(bins, logist(bins, loc, scale)*count.max()/\
2561-
... logist(bins, loc, scale).max())
2560+
>>> lgst_val = logist(bins, loc, scale)
2561+
>>> plt.plot(bins, lgst_val * count.max() / lgst_val.max())
25622562
>>> plt.show()
25632563
25642564
"""

numpy/random/randomgen/mtrand.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2573,8 +2573,8 @@ cdef class RandomState:
25732573
25742574
>>> def logist(x, loc, scale):
25752575
... return np.exp((loc-x)/scale)/(scale*(1+np.exp((loc-x)/scale))**2)
2576-
>>> plt.plot(bins, logist(bins, loc, scale)*count.max(),
2577-
... logist(bins, loc, scale).max())
2576+
>>> lgst_val = logist(bins, loc, scale)
2577+
>>> plt.plot(bins, lgst_val * count.max() / lgst_val.max())
25782578
>>> plt.show()
25792579
25802580
"""

numpy/random/randomgen/src/xoroshiro128/xoroshiro128-benchmark.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,16 @@
1414

1515
#define N 1000000000
1616

17-
int main() {
17+
int main()
18+
{
1819
uint64_t count = 0, sum = 0;
1920
uint64_t seed = 0xDEADBEAF;
2021
s[0] = splitmix64_next(&seed);
2122
s[1] = splitmix64_next(&seed);
2223
int i;
2324
clock_t begin = clock();
24-
for (i = 0; i < N; i++) {
25+
for (i = 0; i < N; i++)
26+
{
2527
sum += next();
2628
count++;
2729
}

numpy/random/randomgen/src/xoroshiro128/xoroshiro128-test-data-gen.c

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,50 +21,61 @@
2121

2222
#define N 1000
2323

24-
int main() {
24+
int main()
25+
{
2526
uint64_t sum = 0;
2627
uint64_t state, seed = 0xDEADBEAF;
2728
state = seed;
2829
int i;
29-
for (i = 0; i < 2; i++) {
30+
for (i = 0; i < 2; i++)
31+
{
3032
s[i] = splitmix64_next(&state);
3133
}
3234
uint64_t store[N];
33-
for (i = 0; i < N; i++) {
35+
for (i = 0; i < N; i++)
36+
{
3437
store[i] = next();
3538
}
3639

3740
FILE *fp;
3841
fp = fopen("xoroshiro128-testset-1.csv", "w");
39-
if (fp == NULL) {
42+
if (fp == NULL)
43+
{
4044
printf("Couldn't open file\n");
4145
return -1;
4246
}
4347
fprintf(fp, "seed, 0x%" PRIx64 "\n", seed);
44-
for (i = 0; i < N; i++) {
48+
for (i = 0; i < N; i++)
49+
{
4550
fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]);
46-
if (i == 999) {
51+
if (i == 999)
52+
{
4753
printf("%d, 0x%" PRIx64 "\n", i, store[i]);
4854
}
4955
}
5056
fclose(fp);
5157

5258
seed = state = 0;
53-
for (i = 0; i < 2; i++) {
59+
for (i = 0; i < 2; i++)
60+
{
5461
s[i] = splitmix64_next(&state);
5562
}
56-
for (i = 0; i < N; i++) {
63+
for (i = 0; i < N; i++)
64+
{
5765
store[i] = next();
5866
}
5967
fp = fopen("xoroshiro128-testset-2.csv", "w");
60-
if (fp == NULL) {
68+
if (fp == NULL)
69+
{
6170
printf("Couldn't open file\n");
6271
return -1;
6372
} F438
6473
fprintf(fp, "seed, 0x%" PRIx64 "\n", seed);
65-
for (i = 0; i < N; i++) {
74+
for (i = 0; i < N; i++)
75+
{
6676
fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]);
67-
if (i == 999) {
77+
if (i == 999)
78+
{
6879
printf("%d, 0x%" PRIx64 "\n", i, store[i]);
6980
}
7081
}

numpy/random/randomgen/src/xoroshiro128/xoroshiro128.c

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,54 @@
1-
/* Written in 2016 by David Blackman and Sebastiano Vigna (vigna@acm.org)
1+
/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
22
33
To the extent possible under law, the author has dedicated all copyright
44
and related and neighboring rights to this software to the public domain
55
worldwide. This software is distributed without any warranty.
66
77
See <http://creativecommons.org/publicdomain/zero/1.0/>. */
88

9-
/* This is the successor to xorshift128+. It is the fastest full-period
10-
generator passing BigCrush without systematic failures, but due to the
11-
relatively short period it is acceptable only for applications with a
12-
mild amount of parallelism; otherwise, use a xorshift1024* generator.
13-
14-
Beside passing BigCrush, this generator passes the PractRand test suite
15-
up to (and included) 16TB, with the exception of binary rank tests, as
16-
the lowest bit of this generator is an LFSR of degree 128. The next bit
17-
can be described by an LFSR of degree 8256, but in the long run it will
18-
fail linearity tests, too. The other bits needs a much higher degree to
19-
be represented as LFSRs.
9+
/* This is xoroshiro128+ 1.0, our best and fastest small-state generator
10+
for floating-point numbers. We suggest to use its upper bits for
11+
floating-point generation, as it is slightly faster than
12+
xoroshiro128**. It passes all tests we are aware of except for the four
13+
lower bits, which might fail linearity tests (and just those), so if
14+
low linear complexity is not considered an issue (as it is usually the
15+
case) it can be used to generate 64-bit outputs, too; moreover, this
16+
generator has a very mild Hamming-weight dependency making our test
17+
(http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe
18+
this slight bias cannot affect any application. If you are concerned,
19+
use xoroshiro128** or xoshiro256+.
2020
2121
We suggest to use a sign test to extract a random Boolean value, and
2222
right shifts to extract subsets of bits.
2323
24-
Note that the generator uses a simulated rotate operation, which most C
25-
compilers will turn into a single instruction. In Java, you can use
26-
Long.rotateLeft(). In languages that do not make low-level rotation
27-
instructions accessible xorshift128+ could be faster.
28-
2924
The state must be seeded so that it is not everywhere zero. If you have
3025
a 64-bit seed, we suggest to seed a splitmix64 generator and use its
31-
output to fill s. */
26+
output to fill s.
27+
28+
NOTE: the parameters (a=24, b=16, b=37) of this version give slightly
29+
better results in our test than the 2016 version (a=55, b=14, c=36).
30+
*/
3231

3332
#include "xoroshiro128.h"
3433

3534
extern INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state);
3635

3736
extern INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state);
3837

39-
void xoroshiro128_jump(xoroshiro128_state *state) {
38+
void xoroshiro128_jump(xoroshiro128_state *state)
39+
{
4040
int i, b;
4141
uint64_t s0;
4242
uint64_t s1;
43-
static const uint64_t JUMP[] = {0xbeac0467eba5facb, 0xd86b048b86aa9922};
43+
static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc};
4444

4545
s0 = 0;
4646
s1 = 0;
4747
for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
48-
for (b = 0; b < 64; b++) {
49-
if (JUMP[i] & UINT64_C(1) << b) {
48+
for (b = 0; b < 64; b++)
49+
{
50+
if (JUMP[i] & UINT64_C(1) << b)
51+
{
5052
s0 ^= state->s[0];
5153
s1 ^= state->s[1];
5254
}

numpy/random/randomgen/src/xoroshiro128/xoroshiro128.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,41 @@
1414
#define INLINE inline
1515
#endif
1616

17-
typedef struct s_xoroshiro128_state {
17+
typedef struct s_xoroshiro128_state
18+
{
1819
uint64_t s[2];
1920
int has_uint32;
2021
uint32_t uinteger;
2122
} xoroshiro128_state;
2223

23-
static INLINE uint64_t rotl(const uint64_t x, int k) {
24+
static INLINE uint64_t rotl(const uint64_t x, int k)
25+
{
2426
return (x << k) | (x >> (64 - k));
2527
}
2628

27-
static INLINE uint64_t xoroshiro128_next(uint64_t *s) {
29+
static INLINE uint64_t xoroshiro128_next(uint64_t *s)
30+
{
2831
const uint64_t s0 = s[0];
2932
uint64_t s1 = s[1];
3033
const uint64_t result = s0 + s1;
3134

3235
s1 ^= s0;
33-
s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
34-
s[1] = rotl(s1, 36); // c
36+
s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
37+
s[1] = rotl(s1, 37); // c
3538

3639
return result;
3740
}
3841

39-
static INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state) {
42+
static INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state)
43+
{
4044
return xoroshiro128_next(&state->s[0]);
4145
}
4246

43-
static INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state) {
47+
static INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state)
48+
{
4449
uint64_t next;
45-
if (state->has_uint32) {
50+
if (state->has_uint32)
51+
{
4652
state->has_uint32 = 0;
4753
return state->uinteger;
4854
}

numpy/random/randomgen/src/xoroshiro128/xoroshiro128plus.orig.c

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Written in 2016 by David Blackman and Sebastiano Vigna (vigna@acm.org)
1+
/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
22
33
To the extent possible under law, the author has dedicated all copyright
44
and related and neighboring rights to this software to the public domain
@@ -8,44 +8,45 @@ See <http://creativecommons.org/publicdomain/zero/1.0/>. */
88

99
#include <stdint.h>
1010

11-
/* This is the successor to xorshift128+. It is the fastest full-period
12-
generator passing BigCrush without systematic failures, but due to the
13-
relatively short period it is acceptable only for applications with a
14-
mild amount of parallelism; otherwise, use a xorshift1024* generator.
15-
16-
Beside passing BigCrush, this generator passes the PractRand test suite
17-
up to (and included) 16TB, with the exception of binary rank tests, as
18-
the lowest bit of this generator is an LFSR of degree 128. The next bit
19-
can be described by an LFSR of degree 8256, but in the long run it will
20-
fail linearity tests, too. The other bits needs a much higher degree to
21-
be represented as LFSRs.
11+
/* This is xoroshiro128+ 1.0, our best and fastest small-state generator
12+
for floating-point numbers. We suggest to use its upper bits for
13+
floating-point generation, as it is slightly faster than
14+
xoroshiro128**. It passes all tests we are aware of except for the four
15+
lower bits, which might fail linearity tests (and just those), so if
16+
low linear complexity is not considered an issue (as it is usually the
17+
case) it can be used to generate 64-bit outputs, too; moreover, this
18+
generator has a very mild Hamming-weight dependency making our test
19+
(http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe
20+
this slight bias cannot affect any application. If you are concerned,
21+
use xoroshiro128** or xoshiro256+.
2222
2323
We suggest to use a sign test to extract a random Boolean value, and
2424
right shifts to extract subsets of bits.
2525
26-
Note that the generator uses a simulated rotate operation, which most C
27-
compilers will turn into a single instruction. In Java, you can use
28-
Long.rotateLeft(). In languages that do not make low-level rotation
29-
instructions accessible xorshift128+ could be faster.
30-
3126
The state must be seeded so that it is not everywhere zero. If you have
3227
a 64-bit seed, we suggest to seed a splitmix64 generator and use its
33-
output to fill s. */
28+
output to fill s.
29+
30+
NOTE: the parameters (a=24, b=16, b=37) of this version give slightly
31+
better results in our test than the 2016 version (a=55, b=14, c=36).
32+
*/
3433

3534
uint64_t s[2];
3635

37-
static inline uint64_t rotl(const uint64_t x, int k) {
36+
static inline uint64_t rotl(const uint64_t x, int k)
37+
{
3838
return (x << k) | (x >> (64 - k));
3939
}
4040

41-
uint64_t next(void) {
41+
uint64_t next(void)
42+
{
4243
const uint64_t s0 = s[0];
4344
uint64_t s1 = s[1];
4445
const uint64_t result = s0 + s1;
4546

4647
s1 ^= s0;
47-
s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
48-
s[1] = rotl(s1, 36); // c
48+
s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
49+
s[1] = rotl(s1, 37); // c
4950

5051
return result;
5152
}
@@ -54,14 +55,42 @@ uint64_t next(void) {
5455
to 2^64 calls to next(); it can be used to generate 2^64
5556
non-overlapping subsequences for parallel computations. */
5657

57-
void jump(void) {
58-
static const uint64_t JUMP[] = {0xbeac0467eba5facb, 0xd86b048b86aa9922};
58+
void jump(void)
59+
{
60+
static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc};
5961

6062
uint64_t s0 = 0;
6163
uint64_t s1 = 0;
6264
for (int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
63-
for (int b = 0; b < 64; b++) {
64-
if (JUMP[i] & UINT64_C(1) << b) {
65+
for (int b = 0; b < 64; b++)
66+
{
67+
if (JUMP[i] & UINT64_C(1) << b)
68+
{
69+
s0 ^= s[0];
70+
s1 ^= s[1];
71+
}
72+
next();
73+
}
74+
s[0] = s0;
75+
s[1] = s1;
76+
}
77+
78+
/* This is the long-jump function for the generator. It is equivalent to
79+
2^96 calls to next(); it can be used to generate 2^32 starting points,
80+
from each of which jump() will generate 2^32 non-overlapping
81+
subsequences for parallel distributed computations. */
82+
83+
void long_jump(void)
84+
{
85+
static const uint64_t LONG_JUMP[] = {0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1};
86+
87+
uint64_t s0 = 0;
88+
uint64_t s1 = 0;
89+
for (int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
90+
for (int b = 0; b < 64; b++)
91+
{
92+
if (LONG_JUMP[i] & UINT64_C(1) << b)
93+
{
6594
s0 ^= s[0];
6695
s1 ^= s[1];
6796
}

numpy/random/randomgen/src/xorshift1024/xorshift1024.orig.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ See <http://creativecommons.org/publicdomain/zero/1.0/>. */
1414
linear dependencies from one of the lowest bits. The previous
1515
multiplier was 1181783497276652981 (M_8 in the paper). If you need to
1616
tell apart the two generators, you can refer to this generator as
17-
xorshift1024*φ and to the previous one as xorshift1024*M_8.
17+
xorshift1024φ and to the previous one as xorshift1024*M_8.
1818
1919
This is a fast, high-quality generator. If 1024 bits of state are too
2020
much, try a xoroshiro128+ generator.
@@ -36,7 +36,7 @@ int p;
3636
uint64_t next(void) {
3737
const uint64_t s0 = s[p];
3838
uint64_t s1 = s[p = (p + 1) & 15];
39-
s1 ^= s1 << 31; // a
39+
s1 ^= s1 << 31; // a
4040
s[p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30); // b,c
4141
return s[p] * 0x9e3779b97f4a7c13;
4242
}

0 commit comments

Comments
 (0)
0