8000 Reverted 69b304c. · matplotlib/matplotlib@e8b3041 · GitHub
[go: up one dir, main page]

Skip to content

Commit e8b3041

Browse files
committed
Reverted 69b304c.
1 parent e6f1b38 commit e8b3041

File tree

3 files changed

+116
-137
lines changed

3 files changed

+116
-137
lines changed

examples/statistics/violinplot_demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,4 @@
5454

5555
fig.suptitle("Violin Plotting Examples")
5656
fig.subplots_adjust(hspace=0.4)
57-
plt.show()
57+
plt.show()

lib/matplotlib/axes/_axes.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6725,7 +6725,7 @@ def matshow(self, Z, **kwargs):
67256725
integer=True))
67266726
return im
67276727

6728-
def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5, showmeans=False,
6728+
def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=False,
67296729
showextrema=True, showmedians=False):
67306730
"""
67316731
Make a violin plot.
@@ -6748,9 +6748,6 @@ def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5,
67486748
positions : array-like, default = [1, 2, ..., n]
67496749
Sets the positions of the violins. The ticks and limits are
67506750
automatically set to match the positions.
6751-
6752-
points: array-like, default = 100
6753-
Number of points to evaluate pdf estimation for Gaussian kernel
67546751
67556752
vert : bool, default = True.
67566753
If true, creates vertical violin plot
@@ -6809,9 +6806,6 @@ def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5,
68096806
cbars = None
68106807
cmedians = None
68116808

6812-
datashape_message = ("List of violinplot statistics and `{0}` "
6813-
"values must have same the length")
6814-
68156809
# Validate positions
68166810
if positions == None:
68176811
positions = range(1, len(dataset) + 1)
@@ -6836,14 +6830,13 @@ def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5,
68366830
# Render violins
68376831
for d,p,w in zip(dataset,positions,widths):
68386832
# Calculate the kernel density
6839-
kde = mlab.gaussian_kde(d)
6840-
m = kde.dataset.min()
6841-
M = kde.dataset.max()
6842-
mean = np.mean(kde.dataset)
6843-
median = np.median(kde.dataset)
6844-
coords = np.arange(m,M,(M-m)/float(points))
6845-
6846-
v = kde.evaluate(coords)
6833+
kde = mlab.ksdensity(d)
6834+
m = kde['xmin']
6835+
M = kde['xmax']
6836+
mean = kde['mean']
6837+
median = kde['median']
6838+
v = kde['result']
6839+
coords = np.arange(m,M,(M-m)/100.)
68476840

68486841
# Since each data point p is plotted from v-p to v+p,
68496842
# we need to scale it by an additional 0.5 factor so that we get
@@ -6853,10 +6846,10 @@ def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5,
68536846
# create vertical violin plot
68546847
if vert: 9E88
68556848
bodies += [self.fill_betweenx(coords,
6856-
-v+p,
6857-
v+p,
6858-
facecolor='y',
6859-
alpha=0.3)]
6849+
-v+p,
6850+
v+p,
6851+
facecolor='y',
6852+
alpha=0.3)]
68606853
# create horizontal violin plot
68616854
else:
68626855
bodies += [self.fill_between(coords,
@@ -6902,6 +6895,10 @@ def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5,
69026895
if showmedians:
69036896
cmedians = self.vlines(medians, pmins, pmaxes, colors='r')
69046897

6898+
6899+
6900+
6901+
69056902
# Reset hold
69066903
self.hold(holdStatus)
69076904

lib/matplotlib/mlab.py

Lines changed: 99 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -3656,12 +3656,12 @@ def stineman_interp(xi,x,y,yp=None):
36563656
1/(dy1+dy2),))
36573657
return yi
36583658

3659-
class gaussian_kde(object):
3659+
def ksdensity(dataset, bw_method=None):
36603660
"""
36613661
Representation of a kernel-density estimate using Gaussian kernels.
36623662
36633663
Call signature::
3664-
kde = gaussian_kde(dataset, 'silverman')
3664+
kde_dict = ksdensity(dataset, 'silverman')
36653665
36663666
Parameters
36673667
----------
@@ -3676,10 +3676,10 @@ class gaussian_kde(object):
36763676
Attributes
36773677
----------
36783678
dataset : ndarray
3679-
The dataset with which `gaussian_kde` was initialized.
3680-
dim : int
3679+
The dataset with which `ksdensity` was initialized.
3680+
d : int
36813681
Number of dimensions.
3682-
num_dp : int
3682+
n : int
36833683
Number of datapoints.
36843684
factor : float
36853685
The bandwidth factor, obtained from `kde.covariance_factor`, with which
@@ -3690,135 +3690,117 @@ class gaussian_kde(object):
36903690
inv_cov : ndarray
36913691
The inverse of `covariance`.
36923692
3693-
Methods
3693+
Returns
36943694
-------
3695-
kde.evaluate(points) : ndarray
3696-
Evaluate the estimated pdf on a provided set of points.
3697-
kde(points) : ndarray
3698-
Same as kde.evaluate(points)
3699-
kde.set_bandwidth(bw_method='scott') : None
3700-
Computes the bandwidth, i.e. the coefficient that multiplies the data
3701-
covariance matrix to obtain the kernel covariance matrix.
3702-
.. versionadded:: 0.11.0
3703-
kde.covariance_factor : float
3704-
Computes the coefficient (`kde.factor`) that multiplies the data
3705-
covariance matrix to obtain the kernel covariance matrix.
3706-
The default is `scotts_factor`. A subclass can overwrite this method
3707-
to provide a different method, or set it through a call to
3708-
`kde.set_bandwidth`.
3695+
A dictionary mapping each various aspects of the computed KDE.
3696+
The dictionary has the following keys:
3697+
3698+
xmin : number
3699+
The min of the input dataset
3700+
xmax : number
3701+
The max of the input dataset
3702+
mean : number
3703+
The mean of the result
3704+
median: number
3705+
The median of the result
3706+
result: (# of points,)-array
3707+
The array of the evaluated PDF estimation
3708+
3709+
Raises
3710+
------
3711+
ValueError : if the dimensionality of the input points is different than
3712+
the dimensionality of the KDE.
37093713
37103714
"""
37113715

37123716
# This implementation with minor modification was too good to pass up.
37133717
# from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
37143718

3715-
def __init__(self, dataset, bw_method=None):
3716-
self.dataset = np.atleast_2d(dataset)
3717-
if not self.dataset.size > 1:
3718-
raise ValueError("`dataset` input should have multiple elements.")
3719+
dataset = np.array(np.atleast_2d(dataset))
3720+
xmin = dataset.min()
3721+
xmax = dataset.max()
37193722

3720-
self.dim, self.num_dp = self.dataset.shape
3721-
self.set_bandwidth(bw_method=bw_method)
3723+
if not dataset.size > 1:
3724+
raise ValueError("`dataset` input should have multiple elements.")
37223725

3723-
def scotts_factor(self):
3724-
return np.power(self.num_dp, -1./(self.dim+4))
3726+
dim, num_dp = dataset.shape
37253727

3726-
def silverman_factor(self):
3727-
return np.power(self.num_dp*(self.dim+2.0)/4.0, -1./(self.dim+4))
3728+
# ----------------------------------------------
3729+
# Set Bandwidth, defaulted to Scott's Factor
3730+
# ----------------------------------------------
3731+
scotts_factor = lambda: np.power(num_dp, -1./(dim+4))
3732+
silverman_factor = lambda: np.power(num_dp*(dim+2.0)/4.0, -1./(dim+4))
37283733

3729-
# Default method to calculate bandwidth, can be overwritten by subclass
3734+
# Default method to calculate bandwidth, can be overwritten by subclass
37303735
covariance_factor = scotts_factor
37313736

3732-
def set_bandwidth(self, bw_method=None):
3733-
if bw_method is None:
3734-
pass
3735-
elif bw_method == 'scott':
3736-
self.covariance_factor = self.scotts_factor
3737-
elif bw_method == 'silverman':
3738-
self.covariance_factor = self.silverman_factor
3739-
elif np.isscalar(bw_method) and not isinstance(bw_method, six.string_types):
3740-
self._bw_method = 'use constant'
3741-
self.covariance_factor = lambda: bw_method
3742-
elif callable(bw_method):
3743-
self._bw_method = bw_method
3744-
self.covariance_factor = lambda: self._bw_method(self)
3737+
if bw_method is None:
3738+
pass
3739+
elif bw_method == 'scott':
3740+
covariance_factor = scotts_factor
3741+
elif bw_method == 'silverman':
3742+
covariance_factor = silverman_factor
3743+
elif np.isscalar(bw_method) and not isinstance(bw_method, six.string_types):
3744+
covariance_factor = lambda: bw_method
3745+
else:
3746+
msg = "`bw_method` should be 'scott', 'silverman', or a scalar"
3747+
raise ValueError(msg)
3748+
3749+
# ---------------------------------------------------------------
3750+
# Computes covariance matrix for each Gaussian kernel with factor
3751+
# ---------------------------------------------------------------
3752+
factor = covariance_factor()
3753+
3754+
# Cache covariance and inverse covariance of the data
3755+
data_covariance = np.atleast_2d(np.cov(dataset, rowvar=1, bias=False))
3756+
data_inv_cov = np.linalg.inv(data_covariance)
3757+
3758+
covariance = data_covariance * factor**2
3759+
inv_cov = data_inv_cov / factor**2
3760+
norm_factor = np.sqrt(np.linalg.det(2*np.pi*covariance)) * num_dp
3761+
3762+
# ----------------------------------------------
3763+
# Evaluate the estimated pdf on a set of points.
3764+
# ----------------------------------------------
3765+
points = np.atleast_2d(np.arange(xmin, xmax, (xmax-xmin)/100.))
3766+
3767+
dim_pts, num_dp_pts = np.array(points).shape
3768+
if dim_pts != dim:
3769+
if dim_pts == 1 and num_dp_pts == num_dp:
3770+
# points was passed in as a row vector
3771+
points = np.reshape(points, (dim, 1))
3772+
num_dp_pts = 1
37453773
else:
3746-
msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
3747-
"or a callable."
3774+
msg = "points have dimension %s,\
3775+
dataset has dimension %s" % (dim_pts, dim)
37483776
raise ValueError(msg)
37493777

3750-
self._compute_covariance()
3751-
3752-
def _compute_covariance(self):
3753-
"""Computes the covariance matrix for each Gaussian kernel using
3754-
covariance_factor().
3755-
"""
3756-
self.factor = self.covariance_factor()
3757-
# Cache covariance and inverse covariance of the data
3758-
if not hasattr(self, '_data_inv_cov'):
3759-
self._data_covariance = np.atleast_2d(np.cov(self.dataset, rowvar=1,
3760-
bias=False))
3761-
self._data_inv_cov = np.linalg.inv(self._data_covariance)
3762-
3763-
self.covariance = self._data_covariance * self.factor**2
3764-
self.inv_cov = self._data_inv_cov / self.factor**2
3765-
self._norm_factor = np.sqrt(np.linalg.det(2*np.pi*self.covariance)) * self.num_dp
3766-
3767-
def evaluate(self, points):
3768-
"""Evaluate the estimated pdf on a set of points.
3769-
3770-
Parameters
3771-
----------
3772-
points : (# of dimensions, # of points)-array
3773-
Alternatively, a (# of dimensions,) vector can be passed in and
3774-
treated as a single point.
3775-
3776-
Returns
3777-
-------
3778-
values : (# of points,)-array
3779-
The values at each point.
3780-
3781-
Raises
3782-
------
3783-
ValueError : if the dimensionality of the input points is different than
3784-
the dimensionality of the KDE.
3778+
result = np.zeros((num_dp_pts,), dtype=np.float)
37853779

3786-
"""
3787-
points = np.atleast_2d(points)
3788-
3789-
d, m = points.shape
3790-
if d != self.dim:
3791-
if d == 1 and m == self.dim:
3792-
# points was passed in as a row vector
3793-
points = np.reshape(points, (self.dim, 1))
3794-
m = 1
3795-
else:
3796-
msg = "points have dimension %s, dataset has dimension %s" % (d,
3797-
self.dim)
3798-
raise ValueError(msg)
3799-
3800-
result = np.zeros((m,), dtype=np.float)
3801-
3802-
if m >= self.num_dp:
3803-
# there are more points than data, so loop over data
3804-
for i in range(self.num_dp):
3805-
diff = self.dataset[:, i, np.newaxis] - points
3806-
tdiff = np.dot(self.inv_cov, diff)
3807-
energy = np.sum(diff*tdiff,axis=0) / 2.0
3808-
result = result + np.exp(-energy)
3809-
else:
3810-
# loop over points
3811-
for i in range(m):
3812-
diff = self.dataset - points[:, i, np.newaxis]
3813-
tdiff = np.dot(self.inv_cov, diff)
3814-
energy = np.sum(diff * tdiff, axis=0) / 2.0
3815-
result[i] = np.sum(np.exp(-energy), axis=0)
3816-
3817-
result = result / self._norm_factor
3818-
3819-
return result
3820-
3821-
__call__ = evaluate
3780+
if num_dp_pts >= num_dp:
3781+
# there are more points than data, so loop over data
3782+
for i in range(num_dp):
3783+
diff = dataset[:, i, np.newaxis] - points
3784+
tdiff = np.dot(inv_cov, diff)
3785+
energy = np.sum(diff*tdiff, axis=0) / 2.0
3786+
result = result + np.exp(-energy)
3787+
else:
3788+
# loop over points
3789+
for i in range(num_dp_pts):
3790+
diff = dataset - points[:, i, np.newaxis]
3791+
tdiff = np.dot(inv_cov, diff)
3792+
energy = np.sum(diff * tdiff, axis=0) / 2.0
3793+
result[i] = np.sum(np.exp(-energy), axis=0)
3794+
3795+
result = result / norm_factor
3796+
3797+
return {
3798+
'xmin': xmin,
3799+
'xmax': xmax,
3800+
'mean': np.mean(dataset),
3801+
'median': np.median(dataset),
3802+
'result': result
3803+
}
38223804

38233805
##################################################
38243806
# Code related to things in and around polygons

0 commit comments

Comments
 (0)
0