8000 BUG, API: np.random.multivariate_normal behavior with bad covariance matrix by cowlicks · Pull Request #5726 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

BUG, API: np.random.multivariate_normal behavior with bad covariance matrix #5726

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions doc/release/1.13.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,16 @@ Boolean indexing changes
* Boolean indexing into scalar arrays return a new 1-d array. This means that
``array(1)[array(True)]`` gives ``array([1])`` and not the original array.

``np.random.multivariate_normal`` behavior with bad covariance matrix
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

It is now possible to adjust the behavior the function will have when dealing
with the covariance matrix by using two new keyword arguments:

* ``tol`` can be used to specify a tolerance to use when checking that
the covariance matrix is positive semidefinite.

* ``check_valid`` can be used to configure what the function will do in the
presence of a matrix that is not positive semidefinite. Valid options are
``ignore``, ``warn`` and ``raise``. The default value, ``warn`` keeps the
the behavior used on previous releases.
33 changes: 23 additions & 10 deletions numpy/random/mtrand/mtrand.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4355,9 +4355,10 @@ cdef class RandomState:
self.lock)

# Multivariate distributions:
def multivariate_normal(self, mean, cov, size=None):
def multivariate_normal(self, mean, cov, size=None, check_valid='warn',
tol=1e-8):
"""
multivariate_normal(mean, cov[, size])
multivariate_normal(mean, cov[, size, check_valid, tol])

Draw random samples from a multivariate normal distribution.

Expand All @@ -4380,6 +4381,10 @@ cdef class RandomState:
generated, and packed in an `m`-by-`n`-by-`k` arrangement. Because
each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``.
If no shape is specified, a single (`N`-D) sample is returned.
check_valid : { 'warn', 'raise', 'ignore' }, optional
Behavior when the covariance matrix is not positive semidefinite.
tol : float, optional
Tolerance when checking the singular values in covariance matrix.

Returns
-------
Expand Down Expand Up @@ -4464,11 +4469,11 @@ cdef class RandomState:
shape = size

if len(mean.shape) != 1:
raise ValueError("mean must be 1 dimensional")
raise ValueError("mean must be 1 dimensional")
if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]):
raise ValueError("cov must be 2 dimensional and square")
raise ValueError("cov must be 2 dimensional and square")
if mean.shape[0] != cov.shape[0]:
raise ValueError("mean and cov must have same length")
raise ValueError("mean and cov must have same length")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a check for check_valid being one of the 3 accepted strings?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually, move it from line 4511 up here. It's misplaced there; only checked for when not psd.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that looks good now

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note that the check is itself later in the code, when checking. However, it may go unchecked when the psd passes, so I have rearranged the logic so it is always checked.

# Compute shape of output and create a matrix of independent
# standard normally distributed random numbers. The matrix has rows
Expand All @@ -4491,12 +4496,20 @@ cdef class RandomState:
# not zero. We continue to use the SVD rather than Cholesky in
# order to preserve current outputs. Note that symmetry has not
# been checked.

(u, s, v) = svd(cov)
neg = (np.sum(u.T * v, axis=1) < 0) & (s > 0)
if np.any(neg):
s[neg] = 0.
warnings.warn("covariance is not positive-semidefinite.",
RuntimeWarning)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would be the entire next part under
if check_valid != 'ignore':

no reason to check if we 'ignore'

if check_valid != 'ignore':
if check_valid != 'warn' and check_valid != 'raise':
raise ValueError("check_valid must equal 'warn', 'raise', or 'ignore'")

psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol)
if not psd:
if check_valid == 'warn':
warnings.warn("covariance is not positive-semidefinite.",
RuntimeWarning)
else:
raise ValueError("covariance is not positive-semidefinite.")

x = np.dot(x, np.sqrt(s)[:, None] * v)
x += mean
Expand Down
36 changes: 23 additions & 13 deletions numpy/random/tests/test_random.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import division, absolute_import, print_function
import warnings

import numpy as np
from numpy.testing import (
TestCase, run_module_suite, assert_, assert_raises, assert_equal,
assert_warns, assert_array_equal, assert_array_almost_equal,
suppress_warnings)
assert_warns, assert_no_warnings, assert_array_equal,
assert_array_almost_equal, suppress_warnings)
from numpy import random
from numpy.compat import asbytes
import sys
Expand Down Expand Up @@ -617,28 +618,37 @@ def test_multinomial(self):
def test_multivariate_normal(self):
np.random.seed(self.seed)
mean = (.123456789, 10)
# Hmm... not even symmetric.
cov = [[1, 0], [1, 0]]
cov = [[1, 0], [0, 1]]
size = (3, 2)
actual = np.random.multivariate_normal(mean, cov, size)
desired = np.array([[[-1.47027513018564449, 10.],
[-1.65915081534845532, 10.]],
[[-2.29186329304599745, 10.],
[-1.77505606019580053, 10.]],
[[-0.54970369430044119, 10.],
[0.29768848031692957, 10.]]])
desired = np.array([[[1.463620246718631, 11.73759122771936 ],
[1.622445133300628, 9.771356667546383]],
[[2.154490787682787, 12.170324946056553],
[1.719909438201865, 9.230548443648306]],
[[0.689515026297799, 9.880729819607714],
[-0.023054015651998, 9.201096623542879]]])

assert_array_almost_equal(actual, desired, decimal=15)

# Check for default size, was raising deprecation warning
actual = np.random.multivariate_normal(mean, cov)
desired = np.array([-0.79441224511977482, 10.])
desired = np.array([0.895289569463708, 9.17180864067987])
assert_array_almost_equal(actual, desired, decimal=15)

# Check that non positive-semidefinite covariance raises warning
# Check that non positive-semidefinite covariance warns with
# RuntimeWarning
mean = [0, 0]
cov = [[1, 1 + 1e-10], [1 + 1e-10, 1]]
cov = [[1, 2], [2, 1]]
assert_warns(RuntimeWarning, np.random.multivariate_normal, mean, cov)

# and that it doesn't warn with RuntimeWarning check_valid='ignore'
assert_no_warnings(np.random.multivariate_normal, mean, cov,
check_valid='ignore')

# and that it raises with RuntimeWarning check_valid='raises'
assert_raises(ValueError, np.random.multivariate_normal, mean, cov,
check_valid='raise')

def test_negative_binomial(self):
np.random.seed(self.seed)
actual = np.random.negative_binomial(n=100, p=.12345, size=(3, 2))
Expand Down
0