diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst index eed3f72667c0..ddeeae40fce2 100644 --- a/doc/release/1.13.0-notes.rst +++ b/doc/release/1.13.0-notes.rst @@ -124,3 +124,16 @@ Boolean indexing changes * Boolean indexing into scalar arrays return a new 1-d array. This means that ``array(1)[array(True)]`` gives ``array([1])`` and not the original array. +``np.random.multivariate_normal`` behavior with bad covariance matrix +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is now possible to adjust the behavior the function will have when dealing +with the covariance matrix by using two new keyword arguments: + +* ``tol`` can be used to specify a tolerance to use when checking that + the covariance matrix is positive semidefinite. + +* ``check_valid`` can be used to configure what the function will do in the + presence of a matrix that is not positive semidefinite. Valid options are + ``ignore``, ``warn`` and ``raise``. The default value, ``warn`` keeps the + the behavior used on previous releases. diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx index 65c19540ef02..bf3a385a97a7 100644 --- a/numpy/random/mtrand/mtrand.pyx +++ b/numpy/random/mtrand/mtrand.pyx @@ -4355,9 +4355,10 @@ cdef class RandomState: self.lock) # Multivariate distributions: - def multivariate_normal(self, mean, cov, size=None): + def multivariate_normal(self, mean, cov, size=None, check_valid='warn', + tol=1e-8): """ - multivariate_normal(mean, cov[, size]) + multivariate_normal(mean, cov[, size, check_valid, tol]) Draw random samples from a multivariate normal distribution. @@ -4380,6 +4381,10 @@ cdef class RandomState: generated, and packed in an `m`-by-`n`-by-`k` arrangement. Because each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``. If no shape is specified, a single (`N`-D) sample is returned. + check_valid : { 'warn', 'raise', 'ignore' }, optional + Behavior when the covariance matrix is not positive semidefinite. + tol : float, optional + Tolerance when checking the singular values in covariance matrix. Returns ------- @@ -4464,11 +4469,11 @@ cdef class RandomState: shape = size if len(mean.shape) != 1: - raise ValueError("mean must be 1 dimensional") + raise ValueError("mean must be 1 dimensional") if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]): - raise ValueError("cov must be 2 dimensional and square") + raise ValueError("cov must be 2 dimensional and square") if mean.shape[0] != cov.shape[0]: - raise ValueError("mean and cov must have same length") + raise ValueError("mean and cov must have same length") # Compute shape of output and create a matrix of independent # standard normally distributed random numbers. The matrix has rows @@ -4491,12 +4496,20 @@ cdef class RandomState: # not zero. We continue to use the SVD rather than Cholesky in # order to preserve current outputs. Note that symmetry has not # been checked. + (u, s, v) = svd(cov) - neg = (np.sum(u.T * v, axis=1) < 0) & (s > 0) - if np.any(neg): - s[neg] = 0. - warnings.warn("covariance is not positive-semidefinite.", - RuntimeWarning) + + if check_valid != 'ignore': + if check_valid != 'warn' and check_valid != 'raise': + raise ValueError("check_valid must equal 'warn', 'raise', or 'ignore'") + + psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol) + if not psd: + if check_valid == 'warn': + warnings.warn("covariance is not positive-semidefinite.", + RuntimeWarning) + else: + raise ValueError("covariance is not positive-semidefinite.") x = np.dot(x, np.sqrt(s)[:, None] * v) x += mean diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py index 47301a770c5b..e8a7d0fbf661 100644 --- a/numpy/random/tests/test_random.py +++ b/numpy/random/tests/test_random.py @@ -1,10 +1,11 @@ from __future__ import division, absolute_import, print_function +import warnings import numpy as np from numpy.testing import ( TestCase, run_module_suite, assert_, assert_raises, assert_equal, - assert_warns, assert_array_equal, assert_array_almost_equal, - suppress_warnings) + assert_warns, assert_no_warnings, assert_array_equal, + assert_array_almost_equal, suppress_warnings) from numpy import random from numpy.compat import asbytes import sys @@ -617,28 +618,37 @@ def test_multinomial(self): def test_multivariate_normal(self): np.random.seed(self.seed) mean = (.123456789, 10) - # Hmm... not even symmetric. - cov = [[1, 0], [1, 0]] + cov = [[1, 0], [0, 1]] size = (3, 2) actual = np.random.multivariate_normal(mean, cov, size) - desired = np.array([[[-1.47027513018564449, 10.], - [-1.65915081534845532, 10.]], - [[-2.29186329304599745, 10.], - [-1.77505606019580053, 10.]], - [[-0.54970369430044119, 10.], - [0.29768848031692957, 10.]]]) + desired = np.array([[[1.463620246718631, 11.73759122771936 ], + [1.622445133300628, 9.771356667546383]], + [[2.154490787682787, 12.170324946056553], + [1.719909438201865, 9.230548443648306]], + [[0.689515026297799, 9.880729819607714], + [-0.023054015651998, 9.201096623542879]]]) + assert_array_almost_equal(actual, desired, decimal=15) # Check for default size, was raising deprecation warning actual = np.random.multivariate_normal(mean, cov) - desired = np.array([-0.79441224511977482, 10.]) + desired = np.array([0.895289569463708, 9.17180864067987]) assert_array_almost_equal(actual, desired, decimal=15) - # Check that non positive-semidefinite covariance raises warning + # Check that non positive-semidefinite covariance warns with + # RuntimeWarning mean = [0, 0] - cov = [[1, 1 + 1e-10], [1 + 1e-10, 1]] + cov = [[1, 2], [2, 1]] assert_warns(RuntimeWarning, np.random.multivariate_normal, mean, cov) + # and that it doesn't warn with RuntimeWarning check_valid='ignore' + assert_no_warnings(np.random.multivariate_normal, mean, cov, + check_valid='ignore') + + # and that it raises with RuntimeWarning check_valid='raises' + assert_raises(ValueError, np.random.multivariate_normal, mean, cov, + check_valid='raise') + def test_negative_binomial(self): np.random.seed(self.seed) actual = np.random.negative_binomial(n=100, p=.12345, size=(3, 2))