8000 ENH make GMMs and LLE cloneable. · pfdevilliers/scikit-learn@520da07 · GitHub
[go: up one dir, main page]

Skip to content

Commit 520da07

Browse files
committed
ENH make GMMs and LLE cloneable.
1 parent 04d8160 commit 520da07

File tree

5 files changed

+56
-56
lines changed

5 files changed

+56
-56
lines changed

sklearn/hmm.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,11 +1126,11 @@ def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
11261126
lgmm_posteriors += np.log(posteriors[:, state][:, np.newaxis]
11271127
+ np.finfo(np.float).eps)
11281128
gmm_posteriors = np.exp(lgmm_posteriors)
1129-
tmp_gmm = GMM(g.n_components, covariance_type=g._covariance_type)
1129+
tmp_gmm = GMM(g.n_components, covariance_type=g.covariance_type)
11301130
n_features = g.means_.shape[1]
11311131
tmp_gmm._set_covars(
11321132
distribute_covar_matrix_to_match_covariance_type(
1133-
np.eye(n_features), g._covariance_type,
1133+
np.eye(n_features), g.covariance_type,
11341134
g.n_components))
11351135
norm = tmp_gmm._do_mstep(obs, gmm_posteriors, params)
11361136

@@ -1141,7 +1141,7 @@ def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
11411141
if 'm' in params:
11421142
stats['means'][state] += tmp_gmm.means_ * norm[:, np.newaxis]
11431143
if 'c' in params:
1144-
if tmp_gmm._covariance_type == 'tied':
1144+
if tmp_gmm.covariance_type == 'tied':
11451145
stats['covars'][state] += tmp_gmm.covars_ * norm.sum()
11461146
else:
11471147
cvnorm = np.copy(norm)
@@ -1162,7 +1162,7 @@ def _do_mstep(self, stats, params):
11621162
if 'm' in params:
11631163
g.means_ = stats['means'][state] / norm[:, np.newaxis]
11641164
if 'c' in params:
1165-
if g._covariance_type == 'tied':
1165+
if g.covariance_type == 'tied':
11661166
g.covars_ = ((stats['covars'][state]
11671167
+ self.covars_prior * np.eye(n_features))
11681168
/ norm.sum())
@@ -1171,10 +1171,10 @@ def _do_mstep(self, stats, params):
11711171
shape = np.ones(g.covars_.ndim)
11721172
shape[0] = np.shape(g.covars_)[0]
11731173
cvnorm.shape = shape
1174-
if (g._covariance_type in ['spherical', 'diag']):
1174+
if (g.covariance_type in ['spherical', 'diag']):
11751175
g.covars_ = (stats['covars'][state]
11761176
+ self.covars_prior) / cvnorm
1177-
elif g._covariance_type == 'full':
1177+
elif g.covariance_type == 'full':
11781178
eye = np.eye(n_features)
11791179
g.covars_ = ((stats['covars'][state]
11801180
+ self.covars_prior * eye[np.newaxis])

sklearn/manifold/locally_linear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -611,10 +611,10 @@ def __init__(self, n_neighbors=5, n_components=2, reg=1E-3,
611611
self.hessian_tol = hessian_tol
612612
self.modified_tol = modified_tol
613613
self.random_state = random_state
614-
self.nbrs_ = NearestNeighbors(n_neighbors,
615-
algorithm=neighbors_algorithm)
616614

617615
def _fit_transform(self, X):
616+
self.nbrs_ = NearestNeighbors(self.n_neighbors,
617+
algorithm=self.neighbors_algorithm)
618618
if self.out_dim:
619619
warnings.warn("Parameter ``out_dim`` was renamed to "
620620
"``n_components`` and is now deprecated.", DeprecationWarning,

sklearn/mixture/dpgmm.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,11 @@ def __init__(self, n_components=1, covariance_type='diag', alpha=1.0,
207207

208208
def _get_precisions(self):
209209
"""Return precisions as a full matrix."""
210-
if self._covariance_type == 'full':
210+
if self.covariance_type == 'full':
211211
return self.precs_
212-
elif self._covariance_type in ['diag', 'spherical']:
212+
elif self.covariance_type in ['diag', 'spherical']:
213213
return [np.diag(cov) for cov in self.precs_]
214-
elif self._covariance_type == 'tied':
214+
elif self.covariance_type == 'tied':
215215
return [self.precs_] * self.n_components
216216

217217
def _get_covars(self):
@@ -261,12 +261,12 @@ def eval(self, X):
261261
# Free memory and developers cognitive load:
262262
del dgamma1, dgamma2, sd
263263

264-
if self._covariance_type not in ['full', 'tied', 'diag', 'spherical']:
264+
if self.covariance_type not in ['full', 'tied', 'diag', 'spherical']:
265265
raise NotImplementedError("This ctype is not implemented: %s"
266-
% self._covariance_type)
266+
% self.covariance_type)
267267
p = _bound_state_log_lik(X, self._initial_bound + self.bound_prec_,
268268
self.precs_, self.means_,
269-
self._covariance_type)
269+
self.covariance_type)
270270
z = p + dgamma
271271
z = log_normalize(z, axis=-1)
272272
bound = np.sum(z * p, axis=-1)
@@ -285,13 +285,13 @@ def _update_means(self, X, z):
285285
"""Update the variational distributions for the means"""
286286
n_features = X.shape[1]
287287
for k in xrange(self.n_components):
288-
if self._covariance_type in ['spherical', 'diag']:
288+
if self.covariance_type in ['spherical', 'diag']:
289289
num = np.sum(z.T[k].reshape((-1, 1)) * X, axis=0)
290290
num *= self.precs_[k]
291291
den = 1. + self.precs_[k] * np.sum(z.T[k])
292292
self.means_[k] = num / den
293-
elif self._covariance_type in ['tied', 'full']:
294-
if self._covariance_type == 'tied':
293+
elif self.covariance_type in ['tied', 'full']:
294+
if self.covariance_type == 'tied':
295295
cov = self.precs_
296296
else:
297297
cov = self.precs_[k]
@@ -303,7 +303,7 @@ def _update_means(self, X, z):
303303
def _update_precisions(self, X, z):
304304
"""Update the variational distributions for the precisions"""
305305
n_features = X.shape[1]
306-
if self._covariance_type == 'spherical':
306+
if self.covariance_type == 'spherical':
307307
self.dof_ = 0.5 * n_features * np.sum(z, axis=0)
308308
for k in xrange(self.n_components):
309309
# could be more memory efficient ?
@@ -315,7 +315,7 @@ def _update_precisions(self, X, z):
315315
digamma(self.dof_[k]) - np.log(self.scale_[k])))
316316
self.precs_ = np.tile(self.dof_ / self.scale_, [n_features, 1]).T
317317

318-
elif self._covariance_type == 'diag':
318+
elif self.covariance_type == 'diag':
319319
for k in xrange(self.n_components):
320320
self.dof_[k].fill(1. + 0.5 * np.sum(z.T[k], axis=0))
321321
sq_diff = (X - self.means_[k]) ** 2 # see comment above
@@ -326,7 +326,7 @@ def _update_precisions(self, X, z):
326326
- np.log(self.scale_[k]))
327327
self.bound_prec_[k] -= 0.5 * np.sum(self.precs_[k])
328328

329-
elif self._covariance_type == 'tied':
329+
elif self.covariance_type == 'tied':
330330
self.dof_ = 2 + X.shape[0] + n_features
331331
self.scale_ = (X.shape[0] + 1) * np.identity(n_features)
332332
for k in xrange(self.n_components):
@@ -339,7 +339,7 @@ def _update_precisions(self, X, z):
339339
self.dof_, self.scale_, self.det_scale_, n_features)
340340
self.bound_prec_ -= 0.5 * self.dof_ * np.trace(self.scale_)
341341

342-
elif self._covariance_type == 'full':
342+
elif self.covariance_type == 'full':
343343
for k in xrange(self.n_components):
344344
sum_resp = np.sum(z.T[k])
345345
self.dof_[k] = 2 + sum_resp + n_features
@@ -367,7 +367,7 @@ def _monitor(self, X, z, n, end=False):
367367
print "Bound after updating %8s: %f" % (n, self.lower_bound(X, z))
368368
if end == True:
369369
print "Cluster proportions:", self.gamma_.T[1]
370-
print "covariance_type:", self._covariance_type
370+
print "covariance_type:", self.covariance_type
371371

372372
def _do_mstep(self, X, z, params):
373373
"""Maximize the variational lower bound
@@ -414,20 +414,20 @@ def _bound_means(self):
414414
def _bound_precisions(self):
415415
"""Returns the bound term related to precisions"""
416416
logprior = 0.
417-
if self._covariance_type == 'spherical':
417+
if self.covariance_type == 'spherical':
418418
logprior += np.sum(gammaln(self.dof_))
419419
logprior -= np.sum(
420420
(self.dof_ - 1) * digamma(np.maximum(0.5, self.dof_)))
421421
logprior += np.sum(- np.log(self.scale_) + self.dof_ -\
422422
self.precs_[:, 0])
423-
elif self._covariance_type == 'diag':
423+
elif self.covariance_type == 'diag':
424424
logprior += np.sum(gammaln(self.dof_))
425425
logprior -= np.sum(
426426
(self.dof_ - 1) * digamma(np.maximum(0.5, self.dof_)))
427427
logprior += np.sum(- np.log(self.scale_) + self.dof_ - self.precs_)
428-
elif self._covariance_type == 'tied':
428+
elif self.covariance_type == 'tied':
429429
logprior += _bound_wishart(self.dof_, self.scale_, self.det_scale_)
430-
elif self._covariance_type == 'full':
430+
elif self.covariance_type == 'full':
431431
for k in xrange(self.n_components):
432432
logprior += _bound_wishart(self.dof_[k],
433433
self.scale_[k],
@@ -456,16 +456,16 @@ def _logprior(self, z):
456456

457457
def lower_bound(self, X, z):
458458
"""returns a lower bound on model evidence based on X and membership"""
459-
if self._covariance_type not in ['full', 'tied', 'diag', 'spherical']:
459+
if self.covariance_type not in ['full', 'tied', 'diag', 'spherical']:
460460
raise NotImplementedError("This ctype is not implemented: %s"
461-
% self._covariance_type)
461+
% self.covariance_type)
462462

463463
X = np.asarray(X)
464464
if X.ndim == 1:
465465
X = X[:, np.newaxis]
466466
c = np.sum(z * _bound_state_log_lik(
467467
X, self._initial_bound + self.bound_prec_,
468-
self.precs_, self.means_, self._covariance_type))
468+
self.precs_, self.means_, self.covariance_type))
469469

470470
return c + self._logprior(z)
471471

@@ -526,29 +526,29 @@ def fit(self, X, **kwargs):
526526
self.weights_ = np.tile(1.0 / self.n_components, self.n_components)
527527

528528
if 'c' in self.init_params or not hasattr(self, 'precs_'):
529-
if self._covariance_type == 'spherical':
529+
if self.covariance_type == 'spherical':
530530
self.dof_ = np.ones(self.n_components)
531531
self.scale_ = np.ones(self.n_components)
532532
self.precs_ = np.ones((self.n_components, n_features))
533533
self.bound_prec_ = 0.5 * n_features * (
534534
digamma(self.dof_) - np.log(self.scale_))
535-
elif self._covariance_type == 'diag':
535+
elif self.covariance_type == 'diag':
536536
self.dof_ = 1 + 0.5 * n_features
537537
self.dof_ *= np.ones((self.n_components, n_features))
538538
self.scale_ = np.ones((self.n_components, n_features))
539539
self.precs_ = np.ones((self.n_components, n_features))
540540
self.bound_prec_ = 0.5 * (np.sum(digamma(self.dof_) -
541541
np.log(self.scale_), 1))
542542
self.bound_prec_ -= 0.5 * np.sum(self.precs_, 1)
543-
elif self._covariance_type == 'tied':
543+
elif self.covariance_type == 'tied':
544544
self.dof_ = 1.
545545
self.scale_ = np.identity(n_features)
546546
self.precs_ = np.identity(n_features)
547547
self.det_scale_ = 1.
548548
self.bound_prec_ = 0.5 * wishart_log_det(
549549
self.dof_, self.scale_, self.det_scale_, n_features)
550550
self.bound_prec_ -= 0.5 * self.dof_ * np.trace(self.scale_)
551-
elif self._covariance_type == 'full':
551+
elif self.covariance_type == 'full':
552552
self.dof_ = (1 + self.n_components + X.shape[0])
553553
self.dof_ *= np.ones(self.n_components)
554554
self.scale_ = [2 * np.identity(n_features)
@@ -692,12 +692,12 @@ def eval(self, X):
692692
bound = np.zeros(X.shape[0])
693693
dg = digamma(self.gamma_) - digamma(np.sum(self.gamma_))
694694

695-
if self._covariance_type not in ['full', 'tied', 'diag', 'spherical']:
695+
if self.covariance_type not in ['full', 'tied', 'diag', 'spherical']:
696696
raise NotImplementedError("This ctype is not implemented: %s"
697-
% self._covariance_type)
697+
% self.covariance_type)
698698
p = _bound_state_log_lik(
699699
X, self._initial_bound + self.bound_prec_,
700-
self.precs_, self.means_, self._covariance_type)
700+
self.precs_, self.means_, self.covariance_type)
701701

702702
z = p + dg
703703
z = log_normalize(z, axis=-1)
@@ -741,4 +741,4 @@ def _monitor(self, X, z, n, end=False):
741741
print "Bound after updating %8s: %f" % (n, self.lower_bound(X, z))
742742
if end == True:
743743
print "Cluster proportions:", self.gamma_
744-
print "covariance_type:", self._covariance_type
744+
print "covariance_type:", self.covariance_type

sklearn/mixture/gmm.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ class GMM(BaseEstimator):
196196
>>> obs = np.concatenate((np.random.randn(100, 1),
197197
... 10 + np.random.randn(300, 1)))
198198
>>> g.fit(obs) # doctest: +NORMALIZE_WHITESPACE
199-
GMM(covariance_type=None, init_params='wmc', min_covar=0.001,
199+
GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
200200
n_components=2, n_init=1, n_iter=100, params='wmc',
201201
random_state=None, thresh=0.01)
202202
>>> np.round(g.weights_, 2)
@@ -214,7 +214,7 @@ class GMM(BaseEstimator):
214214
>>> # Refit the model on new data (initial parameters remain the
215215
>>> # same), this time with an even split between the two modes.
216216
>>> g.fit(20 * [[0]] + 20 * [[10]]) # doctest: +NORMALIZE_WHITESPACE
217-
GMM(covariance_type=None, init_params='wmc', min_covar=0.001,
217+
GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
218218
n_components=2, n_init=1, n_iter=100, params='wmc',
219219
random_state=None, thresh=0.01)
220220
>>> np.round(g.weights_, 2)
@@ -226,7 +226,7 @@ def __init__(self, n_components=1, covariance_type='diag',
226226
random_state=None, thresh=1e-2, min_covar=1e-3,
227227
n_iter=100, n_init=1, params='wmc', init_params='wmc'):
228228
self.n_components = n_components
229-
self._covariance_type = covariance_type
229+
self.covariance_type = covariance_type
230230
self.thresh = thresh
231231
self.min_covar = min_covar
232232
self.random_state = random_state
@@ -257,19 +257,19 @@ def _get_covars(self):
257257
(`n_states`, `n_features`) if 'diag',
258258
(`n_states`, `n_features`, `n_features`) if 'full'
259259
"""
260-
if self._covariance_type == 'full':
260+
if self.covariance_type == 'full':
261261
return self.covars_
262-
elif self._covariance_type == 'diag':
262+
elif self.covariance_type == 'diag':
263263
return [np.diag(cov) for cov in self.covars_]
264-
elif self._covariance_type == 'tied':
264+
elif self.covariance_type == 'tied':
265265
return [self.covars_] * self.n_components
266-
elif self._covariance_type == 'spherical':
266+
elif self.covariance_type == 'spherical':
267267
return [np.diag(cov) for cov in self.covars_]
268268

269269
def _set_covars(self, covars):
270270
"""Provide values for covariance"""
271271
covars = np.asarray(covars)
272-
_validate_covars(covars, self._covariance_type, self.n_components)
272+
_validate_covars(covars, self.covariance_type, self.n_components)
273273
self.covars_ = covars
274274

275275
def eval(self, X):
@@ -302,7 +302,7 @@ def eval(self, X):
302302
raise ValueError('the shape of X is not compatible with self')
303303

304304
lpr = (log_multivariate_normal_density(
305-
X, self.means_, self.covars_, self._covariance_type)
305+
X, self.means_, self.covars_, self.covariance_type)
306306
+ np.log(self.weights_))
307307
logprob = logsumexp(lpr, axis=1)
308308
responsibilities = np.exp(lpr - logprob[:, np.newaxis])
@@ -420,14 +420,14 @@ def sample(self, n_samples=1, random_state=None):
420420
# number of those occurrences
421421
num_comp_in_X = comp_in_X.sum()
422422
if num_comp_in_X > 0:
423-
if self._covariance_type == 'tied':
423+
if self.covariance_type == 'tied':
424424
cv = self.covars_
425-
elif self._covariance_type == 'spherical':
425+
elif self.covariance_type == 'spherical':
426426
cv = self.covars_[comp][0]
427427
else:
428428
cv = self.covars_[comp]
429429
X[comp_in_X] = sample_gaussian(
430-
self.means_[comp], cv, self._covariance_type,
430+
self.means_[comp], cv, self.covariance_type,
431431
num_comp_in_X, random_state=random_state).T
432432
return X
433433

@@ -490,7 +490,7 @@ def fit(self, X, **kwargs):
490490
cv.shape = (1, 1)
491491
self.covars_ = \
492492
distribute_covar_matrix_to_match_covariance_type(
493-
cv, self._covariance_type, self.n_components)
493+
cv, self.covariance_type, self.n_components)
494494

495495
# EM algorithms
496496
log_likelihood = []
@@ -536,7 +536,7 @@ def _do_mstep(self, X, responsibilities, params, min_covar=0):
536536
if 'm' in params:
537537
self.means_ = weighted_X_sum * inverse_weights
538538
if 'c' in params:
539-
covar_mstep_func = _covar_mstep_funcs[self._covariance_type]
539+
covar_mstep_func = _covar_mstep_funcs[self.covariance_type]
540540
self.covars_ = covar_mstep_func(
541541
self, X, responsibilities, weighted_X_sum, inverse_weights,
542542
min_covar)
@@ -545,13 +545,13 @@ def _do_mstep(self, X, responsibilities, params, min_covar=0):
545545
def _n_parameters(self):
546546
"""Return the number of free parameters in the model."""
547547
ndim = self.means_.shape[1]
548-
if self._covariance_type == 'full':
548+
if self.covariance_type == 'full':
549549
cov_params = self.n_components * ndim * (ndim + 1) / 2.
550-
elif self._covariance_type == 'diag':
550+
elif self.covariance_type == 'diag':
551551
cov_params = self.n_components * ndim
552-
elif self._covariance_type == 'tied':
552+
elif self.covariance_type == 'tied':
553553
cov_params = ndim * (ndim + 1) / 2.
554-
elif self._covariance_type == 'spherical':
554+
elif self.covariance_type == 'spherical':
555555
cov_params = self.n_components
556556
mean_params = ndim * self.n_components
557557
return int(cov_params + mean_params + self.n_components - 1)

sklearn/mixture/tests/test_gmm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def test_GMM_attributes():
108108
means = rng.randint(-20, 20, (n_components, n_features))
109109

110110
assert g.n_components == n_components
111-
assert g._covariance_type == covariance_type
111+
assert g.covariance_type == covariance_type
112112

113113
g.weights_ = weights
114114
assert_array_almost_equal(g.weights_, weights)

0 commit comments

Comments
 (0)
0