8000 SeedSequence fixes by rkern · Pull Request #39 · mattip/numpy · GitHub
[go: up one dir, main page]

Skip to content

SeedSequence fixes #39

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 31 additions & 17 deletions numpy/random/bit_generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,25 @@ BitGenerator base class and SeedSequence used to seed the BitGenerators.

SeedSequence is derived from Melissa E. O'Neill's C++11 `std::seed_seq`
implementation, as it has a lot of nice properties that we want.

https://gist.github.com/imneme/540829265469e673d045
http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html

The MIT License (MIT)

Copyright (c) 2015 Melissa E. O'Neill
Copyright (c) 2019 Robert Kern
Copyright (c) 2019 NumPy Developers

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
Expand Down Expand Up @@ -80,13 +86,15 @@ def _int_to_uint32_array(n):

def _coerce_to_uint32_array(x):
""" Coerce an input to a uint32 array.

If a `uint32` array, pass it through directly.
If a non-negative integer, then break it up into `uint32` words, lowest
bits first.
If a string starting with "0x", then interpret as a hex integer, as above.
If a string of decimal digits, interpret as a decimal integer, as above.
If a sequence of ints or strings, interpret each element as above and
concatenate.

Note that the handling of `int64` or `uint64` arrays are not just
straightforward views as `uint32` arrays. If an element is small enough to
fit into a `uint32`, then it will only take up one `uint32` element in the
Expand Down Expand Up @@ -252,23 +260,27 @@ cdef class SeedSequence():
create ``n`` SeedSequences that can be used to seed independent
BitGenerators, i.e. for different threads.

To recreate a SeedSequence ``sq`` exactly, you can use ``str(sq)``

Parameters
----------
entropy: {{None, int, sequence[int]}}, optional
The entropy for creating a SeedSequence.
program_entropy: {{None, int, sequence[int]}}, optional
entropy : {None, int, sequence[int]}, optional
The entropy for creating a `SeedSequence`.
program_entropy : {None, int, sequence[int]}, optional
A second source of entropy, typically per-application
spawn_key: {{(), sequence[int]}}, optional
spawn_key : {(), sequence[int]}, optional
A third source of entropy, used internally when calling
`SeedSequence.spawn`
pool_size: {{int}}, optional
Size of the pooled entropy to store. Default is 4
pool_size : {int}, optional
Size of the pooled entropy to store. Default is 4 to give a 128-bit
entropy pool. 8 (for 256 bits) is another reasonable choice if working
with larger PRNGs, but there is very little to be gained by selecting
another value.
n_children_spawned : {int}, optional
The number of children already spawned. Only pass this if
reconstructing a `SeedSequence` from a serialized form.
"""

def __init__(self, entropy=None, program_entropy=None, spawn_key=(),
pool_size=DEFAULT_POOL_SIZE):
pool_size=DEFAULT_POOL_SIZE, n_children_spawned=0):
if pool_size < DEFAULT_POOL_SIZE:
raise ValueError("The size of the entropy pool should be at least "
f"{DEFAULT_POOL_SIZE}")
Expand All @@ -282,7 +294,7 @@ cdef class SeedSequence():
self.program_entropy = program_entropy
self.spawn_key = tuple(spawn_key)
self.pool_size = pool_size
self.n_children_spawned = 0
self.n_children_spawned = n_children_spawned

self.pool = np.zeros(pool_size, dtype=np.uint32)
self.mix_entropy(self.pool, self.get_assembled_entropy())
Expand All @@ -300,14 +312,17 @@ cdef class SeedSequence():
lines.append(f' spawn_key={self.spawn_key!r},')
if self.pool_size != DEFAULT_POOL_SIZE:
lines.append(f' pool_size={self.pool_size!r},')
if self.n_children_spawned != 0:
lines.append(f' n_children_spawned={self.n_children_spawned!r},')
lines.append(')')
text = '\n'.join(lines)
return text

@property
def state(self):
return {k:getattr(self, k) for k in
['entropy', 'program_entropy', 'spawn_key', 'pool_size']
['entropy', 'program_entropy', 'spawn_key', 'pool_size',
'n_children_spawned']
if getattr(self, k) is not None}

cdef mix_entropy(self, np.ndarray[np.npy_uint32, ndim=1] mixer,
Expand All @@ -316,9 +331,7 @@ cdef class SeedSequence():

Parameters
----------

mixer: 1D uint32 array, modified in-place

mixer : 1D uint32 array, modified in-place
entropy_array : 1D uint32 array
"""
cdef uint32_t hash_const[1]
Expand Down Expand Up @@ -350,6 +363,7 @@ cdef class SeedSequence():
def get_assembled_entropy(self):
""" Convert and assemble all entropy sources into a uniform uint32
array.

Returns
-------
entropy_array : 1D uint32 array
Expand Down Expand Up @@ -459,14 +473,14 @@ cdef class BitGenerator():

Parameters
----------
seed_seq: {None, ISeedSequence, int, sequence[int]}, optional
seed_seq : {None, ISeedSequence, int, sequence[int]}, optional
A ISeedSequence to initialize the BitGenerator. If None, one will be
created. If an int or a sequence of ints, it will be used as the
entropy for creating a SeedSequence.

Attributes
----------
lock: threading.Lock
lock : threading.Lock
Lock instance that is shared so that the same bit git generator can
be used in multiple Generators without corrupting the state. Code that
generates values from a bit generator should hold the bit generator's
Expand Down
8 changes: 7 additions & 1 deletion numpy/random/tests/test_direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,19 @@ def gauss_from_uint(x, n, bits):
return gauss[:n]

def test_seedsequence():
from numpy.random.bit_generator import SeedlessSeedSequence, ISeedSequence
from numpy.random.bit_generator import (ISeedSequence,
ISpawnableSeedSequence,
SeedlessSeedSequence)

s1 = SeedSequence(range(10), 1, (1, 2), pool_size=6)
s1.spawn(10)
s2 = SeedSequence(**s1.state)
assert_equal(s1.state, s2.state)
assert_equal(s1.n_children_spawned, s2.n_children_spawned)

# The interfaces cannot be instantiated themselves.
assert_raises(TypeError, ISeedSequence)
assert_raises(TypeError, ISpawnableSeedSequence)
dummy = SeedlessSeedSequence()
assert_raises(NotImplementedError, dummy.generate_state, 10)
assert len(dummy.spawn(10)) == 10
Expand Down
0