8000 Making partial_svd and tucker reproducible by lan496 · Pull Request #139 · tensorly/tensorly · GitHub
[go: up one dir, main page]

Skip to content

Making partial_svd and tucker reproducible #139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions tensorly/backend/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,10 +552,10 @@ def stack(arrays, axis=0):
Join a sequence of arrays along a new axis.
"""
raise NotImplementedError

def eps(self, dtype):
return self.finfo(dtype).eps

def finfo(self, dtype):
return np.finfo(self.to_numpy(self.tensor([], dtype=dtype)).dtype)

Expand Down Expand Up @@ -632,12 +632,12 @@ def kr(self, matrices, weights=None, mask=None):
a = self.reshape(res, (s1, 1, s2))
b = self.reshape(e, (1, s3, s4))
res = self.reshape(a * b, (-1, n_col))

m = self.reshape(mask, (-1, 1)) if mask is not None else 1

return res*m

def partial_svd(self, matrix, n_eigenvecs=None):
def partial_svd(self, matrix, n_eigenvecs=None, random_state=None, **kwargs):
"""Computes a fast partial SVD on `matrix`

If `n_eigenvecs` is specified, sparse eigendecomposition is used on
Expand All @@ -649,6 +649,10 @@ def partial_svd(self, matrix, n_eigenvecs=None):
A 2D tensor.
n_eigenvecs : int, optional, default is None
If specified, number of eigen[vectors-values] to return.
random_state: {None, int, np.random.RandomState}
If specified, use it for sampling starting vector in a partial SVD(scipy.sparse.linalg.eigsh)
**kwargs : optional
kwargs are used to absorb the difference of parameters among the other SVD functions

Returns
-------
Expand Down Expand Up @@ -696,16 +700,28 @@ def partial_svd(self, matrix, n_eigenvecs=None):
U, S, V = U[:, :n_eigenvecs], S[:n_eigenvecs], V[:n_eigenvecs, :]
else:
# We can perform a partial SVD
# construct np.random.RandomState for sampling a starting vector
if random_state is None:
# if random_state is not specified, do not initialize a starting vector
v0 = None
elif isinstance(random_state, int):
rns = np.random.RandomState(random_state)
# initilize with [-1, 1] as in ARPACK
v0 = rns.uniform(-1, 1, min_dim)
elif isinstance(random_state, np.random.RandomState):
# initilize with [-1, 1] as in ARPACK
v0 = random_state.uniform(-1, 1, min_dim)

# First choose whether to use X * X.T or X.T *X
if dim_1 < dim_2:
S, U = scipy.sparse.linalg.eigsh(
np.dot(matrix, matrix.T.conj()), k=n_eigenvecs, which='LM'
np.dot(matrix, matrix.T.conj()), k=n_eigenvecs, which='LM', v0=v0
)
S = np.where(np.abs(S) <= np.finfo(S.dtype).eps, 0, np.sqrt(S))
V = np.dot(matrix.T.conj(), U * np.where(np.abs(S) <= np.finfo(S.dtype).eps, 0, 1/S)[None, :])
else:
S, V = scipy.sparse.linalg.eigsh(
np.dot(matrix.T.conj(), matrix), k=n_eigenvecs, which='LM'
np.dot(matrix.T.conj(), matrix), k=n_eigenvecs, which='LM', v0=v0
)
S = np.where(np.abs(S) <= np.finfo(S.dtype).eps, 0, np.sqrt(S))
U = np.dot(matrix, V) * np.where(np.abs(S) <= np.finfo(S.dtype).eps, 0, 1/S)[None, :]
Expand All @@ -718,4 +734,4 @@ def partial_svd(self, matrix, n_eigenvecs=None):
U = self.tensor(U, **ctx)
S = self.tensor(S, **ctx)
V = self.tensor(V, **ctx)
return U, S, V
return U, S, V
4 changes: 3 additions & 1 deletion tensorly/backend/cupy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,16 @@ def solve(self, matrix1, matrix2):
return self.tensor(res, **ctx)

@staticmethod
def truncated_svd(matrix, n_eigenvecs=None):
def truncated_svd(matrix, n_eigenvecs=None, **kwargs):
"""Computes a truncated SVD on `matrix`

Parameters
----------
matrix : 2D-array
n_eigenvecs : int, optional, default is None
if specified, number of eigen[vectors-values] to return
**kwargs : optional
kwargs are used to absorb the difference of parameters among the other SVD functions

Returns
-------
Expand Down
4 changes: 3 additions & 1 deletion tensorly/backend/mxnet_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def concatenate(tensors, axis):
def stack(arrays, axis=0):
return stack(*arrays, axis=axis)

def symeig_svd(self, matrix, n_eigenvecs=None):
def symeig_svd(self, matrix, n_eigenvecs=None, **kwargs):
"""Computes a truncated SVD on `matrix` using symeig

Uses symeig on matrix.T.dot(matrix) or its transpose
Expand All @@ -233,6 +233,8 @@ def symeig_svd(self, matrix, n_eigenvecs=None):
matrix : 2D-array
n_eigenvecs : int, optional, default is None
if specified, number of eigen[vectors-values] to return
**kwargs : optional
kwargs are used to absorb the difference of parameters among the other SVD functions

Returns
-------
Expand Down
12 changes: 8 additions & 4 deletions tensorly/backend/pytorch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def solve(self, matrix1, matrix2):
Notes
-----

Previously, this was implemented as follows::
Previously, this was implemented as follows::

if self.ndim(matrix2) < 2:
# Currently, gesv doesn't support vectors for matrix2
Expand Down Expand Up @@ -173,7 +173,7 @@ def argmax(input, axis=None):
@staticmethod
def stack(arrays, axis=0):
return torch.stack(arrays, dim=axis)

@staticmethod
def conj(x, *args, **kwargs):
"""WARNING: IDENTITY FUNCTION (does nothing)
Expand Down Expand Up @@ -201,14 +201,16 @@ def _reverse(tensor, axis=0):
return tensor.index_select(axis, indices)

@staticmethod
def truncated_svd(matrix, n_eigenvecs=None):
def truncated_svd(matrix, n_eigenvecs=None, **kwargs):
"""Computes a truncated SVD on `matrix` using pytorch's SVD

Parameters
----------
matrix : 2D-array
n_eigenvecs : int, optional, default is None
if specified, number of eigen[vectors-values] to return
**kwargs : optional
kwargs are used to absorb the difference of parameters among the other SVD functions

Returns
-------
Expand Down Expand Up @@ -237,7 +239,7 @@ def truncated_svd(matrix, n_eigenvecs=None):
U, S, V = U[:, :n_eigenvecs], S[:n_eigenvecs], V[:n_eigenvecs, :]
return U, S, V

def symeig_svd(self, matrix, n_eigenvecs=None):
def symeig_svd(self, matrix, n_eigenvecs=None, **kwargs):
"""Computes a truncated SVD on `matrix` using symeig

Uses symeig on matrix.T.dot(matrix) or its transpose
Expand All @@ -247,6 +249,8 @@ def symeig_svd(self, matrix, n_eigenvecs=None):
matrix : 2D-array
n_eigenvecs : int, optional, default is None
if specified, number of eigen[vectors-values] to return
**kwargs : optional
kwargs are used to absorb the difference of parameters among the other SVD functions

Returns
-------
Expand Down
6 changes: 4 additions & 2 deletions tensorly/backend/tensorflow_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def norm(tensor, order=2, axis=None):

def dot(self, tensor1, tensor2):
return tf.tensordot(tensor1, tensor2, axes=([self.ndim(tensor1) - 1], [0]))

@staticmethod
def conj(x, *args, **kwargs):
"""WARNING: IDENTITY FUNCTION (does nothing)
Expand All @@ -119,14 +119,16 @@ def solve(lhs, rhs):
return res

@staticmethod
def truncated_svd(matrix, n_eigenvecs=None):
def truncated_svd(matrix, n_eigenvecs=None, **kwargs):
"""Computes an SVD on `matrix`

Parameters
----------
matrix : 2D-array
n_eigenvecs : int, optional, default is None
if specified, number of eigen[vectors-values] to return
**kwargs : optional
kwargs are used to absorb the difference of parameters among the other SVD functions

Returns
-------
Expand Down
22 changes: 17 additions & 5 deletions tensorly/contrib/sparse/backend/numpy_backend.py
10000
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ class NumpySparseBackend(Backend):
backend_name = 'numpy.sparse'

# moveaxis and shape are temporarily redefine to fix issue #131
# Using the builting functionsn raises a TypeError:
# no implementation found for 'numpy.shape' on types
# Using the builting functionsn raises a TypeError:
# no implementation found for 'numpy.shape' on types
# that implement __array_function__: [<class 'sparse._coo.core.COO'>]
def moveaxis(self, tensor, source, target):
axes = list(range(self.ndim(tensor)))
Expand Down Expand Up @@ -113,7 +113,7 @@ def solve(self, A, b):
return x

@staticmethod
def partial_svd(matrix, n_eigenvecs=None):
def partial_svd(matrix, n_eigenvecs=None, random_state=None, **kwargs):
# Check that matrix is... a matrix!
if matrix.ndim != 2:
raise ValueError('matrix be a matrix. matrix.ndim is {} != 2'.format(
Expand Down Expand Up @@ -150,6 +150,18 @@ def partial_svd(matrix, n_eigenvecs=None):
if np.issubdtype(matrix.dtype, np.complexfloating):
raise NotImplementedError("Complex dtypes")
# We can perform a partial SVD
# construct np.random.RandomState for sampling a starting vector
if random_state is None:
# if random_state is not specified, do not initialize a starting vector
v0 = None
elif isinstance(random_state, int):
rns = np.random.RandomState(random_state)
# initilize with [-1, 1] as in ARPACK
v0 = rns.uniform(-1, 1, min_dim)
elif isinstance(random_state, np.random.RandomState):
# initilize with [-1, 1] as in ARPACK
v0 = random_state.uniform(-1, 1, min_dim)

# First choose whether to use X * X.T or X.T *X
if dim_1 < dim_2:
conj = matrix.T
Expand All @@ -160,7 +172,7 @@ def partial_svd(matrix, n_eigenvecs=None):
# use dense form when sparse form will fail
S, U = scipy.linalg.eigh(xxT.toarray())
else:
S, U = scipy.sparse.linalg.eigsh(xxT, k=n_eigenvecs, which='LM')
S, U = scipy.sparse.linalg.eigsh(xxT, k=n_eigenvecs, which='LM', v0=v0)
S = np.sqrt(S)
V = conj.dot(U / S[None, :])
else:
Expand All @@ -171,7 +183,7 @@ def partial_svd(matrix, n_eigenvecs=None):
# use dense form when sparse form will fail
S, V = scipy.linalg.eigh(xTx.toarray())
else:
S, V = scipy.sparse.linalg.eigsh(xTx, k=n_eigenvecs, which='LM')
S, V = scipy.sparse.linalg.eigsh(xTx, k=n_eigenvecs, which='LM', v0=v0)
S = np.sqrt(S)
U = matrix.dot(V / S[None, :])

Expand Down
6 changes: 3 additions & 3 deletions tensorly/decomposition/_tucker.py
10000
Original file line numberDiff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def partial_tucker(tensor, modes, rank=None, n_iter_max=100, init='svd', tol=10e

Returns
-------
core : ndarray
core : ndarray
core tensor of the Tucker decomposition
factors : ndarray list
list of factors of the Tucker decomposition.
Expand Down Expand Up @@ -74,7 +74,7 @@ def partial_tucker(tensor, modes, rank=None, n_iter_max=100, init='svd', tol=10e
if init == 'svd':
factors = []
for index, mode in enumerate(modes):
eigenvecs, _, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank[index])
eigenvecs, _, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank[index], random_state=random_state)
factors.append(eigenvecs)
else:
rng = check_random_state(random_state)
Expand All @@ -87,7 +87,7 @@ def partial_tucker(tensor, modes, rank=None, n_iter_max=100, init='svd', tol=10e
for iteration in range(n_iter_max):
for index, mode in enumerate(modes):
core_approximation = multi_mode_dot(tensor, factors, modes=modes, skip=index, transpose=True)
eigenvecs, _, _ = svd_fun(unfold(core_approximation, mode), n_eigenvecs=rank[index])
eigenvecs, _, _ = svd_fun(unfold(core_approximation, mode), n_eigenvecs=rank[index], random_state=random_state)
factors[index] = eigenvecs

core = multi_mode_dot(tensor, factors, modes=modes, transpose=True)
Expand Down
9 changes: 8 additions & 1 deletion tensorly/decomposition/tests/test_tucker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ...tucker_tensor import tucker_to_tensor
from ...tenalg import multi_mode_dot
from ...random import check_random_state
from ...testing import assert_equal, assert_
from ...testing import assert_equal, assert_, assert_array_equal


def test_partial_tucker():
Expand Down Expand Up @@ -32,6 +32,13 @@ def test_partial_tucker():
assert_equal(core.shape, [tensor.shape[0]]+ranks, err_msg="Core.shape={}, "
"expected {}".format(core.shape, [tensor.shape[0]]+ranks))

# Test random_state fixes the core and the factor matrices
core1, factors1 = partial_tucker(tensor, modes=modes, rank=ranks, random_state=0)
core2, factors2 = partial_tucker(tensor, modes=modes, rank=ranks, random_state=0)
assert_array_equal(core1, core2)
for factor1, factor2 in zip(factors1, factors2):
assert_array_equal(factor1, factor2)


def test_tucker():
"""Test for the Tucker decomposition"""
Expand Down
9 changes: 9 additions & 0 deletions tensorly/tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,15 @@ def test_svd():
true_rec_error = tl.sum((matrix - tl.dot(U, tl.reshape(S, (-1, 1))*V))**2)
assert_(true_rec_error <= tol)

# Test if partial_svd returns the same result for the same setting
matrix = T.tensor(np.random.random((20, 5)))
random_state = np.random.RandomState(0)
U1, S1, V1 = tl.partial_svd(matrix, n_eigenvecs=2, random_state=random_state)
U2, S2, V2 = tl.partial_svd(matrix, n_eigenvecs=2, random_state=0)
assert_array_equal(U1, U2)
assert_array_equal(S1, S2)
assert_array_equal(V1, V2)


def test_shape():
A = T.arange(3*4*5)
Expand Down
0