8000 Merge pull request #893 from amueller/common_test · scikit-learn/scikit-learn@549d82f · GitHub
[go: up one dir, main page]

Skip to content

Commit 549d82f

Browse files
committed
Merge pull request #893 from amueller/common_test
MRG factorize common tests.
2 parents 7ce5b22 + bd94b49 commit 549d82f

File tree

29 files changed

+408
-108
lines changed

29 files changed

+408
-108
lines changed

sklearn/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,12 @@ def fit_transform(self, X, y=None, **fit_params):
363363
return self.fit(X, y, **fit_params).transform(X)
364364

365365

366+
###############################################################################
367+
class MetaEstimatorMixin(object):
368+
"""Mixin class for all meta estimators in scikit-learn"""
369+
# this is just a tag for the moment
370+
371+
366372
###############################################################################
367373
# XXX: Temporary solution to figure out if an estimator is a classifier
368374

sklearn/covariance/outlier_detection.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
import numpy as np
1616
import scipy as sp
1717
from . import MinCovDet
18-
from ..base import ClassifierMixin
1918
from ..utils import deprecated
19+
from ..base import ClassifierMixin
2020

2121

22-
class OutlierDetectionMixin(ClassifierMixin):
22+
class OutlierDetectionMixin(object):
2323
"""Set of methods for outliers detection with covariance estimators.
2424
2525
Parameters
@@ -103,7 +103,7 @@ def predict(self, X):
103103
return is_inlier
104104

105105

106-
class EllipticEnvelope(OutlierDetectionMixin, MinCovDet):
106+
class EllipticEnvelope(ClassifierMixin, OutlierDetectionMixin, MinCovDet):
107107
"""An object for detecting outliers in a Gaussian distributed dataset.
108108
109109
Attributes

sklearn/decomposition/dict_learning.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,7 @@ class DictionaryLearning(BaseEstimator, SparseCodingMixin):
935935
SparsePCA
936936
MiniBatchSparsePCA
937937
"""
938-
def __init__(self, n_atoms, alpha=1, max_iter=1000, tol=1e-8,
938+
def __init__(self, n_atoms=None, alpha=1, max_iter=1000, tol=1e-8,
939939
fit_algorithm='lars', transform_algorithm='omp',
940940
transform_n_nonzero_coefs=None, transform_alpha=None,
941941
n_jobs=1, code_init=None, dict_init=None, verbose=False,
@@ -968,7 +968,12 @@ def fit(self, X, y=None):
968968
"""
969969
self.random_state = check_random_state(self.random_state)
970970
X = np.asarray(X)
971-
V, U, E = dict_learning(X, self.n_atoms, self.alpha,
971+
if self.n_atoms is None:
972+
n_atoms = X.shape[1]
973+
else:
974+
n_atoms = self.n_atoms
975+
976+
V, U, E = dict_learning(X, n_atoms, self.alpha,
972977
tol=self.tol, max_iter=self.max_iter,
973978
method=self.fit_algorithm,
974979
n_jobs=self.n_jobs,
@@ -1080,7 +1085,7 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
10801085
MiniBatchSparsePCA
10811086
10821087
"""
1083-
def __init__(self, n_atoms, alpha=1, n_iter=1000,
1088+
def __init__(self, n_atoms=None, alpha=1, n_iter=1000,
10841089
fit_algorithm='lars', n_jobs=1, chunk_size=3,
10851090
shuffle=True, dict_init=None, transform_algorithm='omp',
10861091
transform_n_nonzero_coefs=None, transform_alpha=None,
@@ -1115,7 +1120,12 @@ def fit(self, X, y=None):
11151120
"""
11161121
self.random_state = check_random_state(self.random_state)
11171122
X = np.asarray(X)
1118-
U = dict_learning_online(X, self.n_atoms, self.alpha,
1123+
if self.n_atoms is None:
1124+
n_atoms = X.shape[1]
1125+
else:
1126+
n_atoms = self.n_atoms
1127+
1128+
U = dict_learning_online(X, n_atoms, self.alpha,
11191129
n_iter=self.n_iter, return_code=False,
11201130
method=self.fit_algorithm,
11211131
n_jobs=self.n_jobs,

sklearn/decomposition/pca.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def _assess_dimension_(spectrum, rank, n_samples, n_features):
4949

5050
pu = -rank * np.log(2)
5151
for i in range(rank):
52-
pu += gammaln((n_features - i) / 2) - np.log(np.pi) * (n_features - i) / 2
52+
pu += (gammaln((n_features - i) / 2)
53+
- np.log(np.pi) * (n_features - i) / 2)
5354

5455
pl = np.sum(np.log(spectrum[:rank]))
5556
pl = -pl * n_samples / 2
@@ -429,8 +430,8 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
429430
430431
"""
431432

432-
def __init__(self, n_components, copy=True, iterated_power=3,
433-
whiten=False, random_state=None):
433+
def __init__(self, n_components=None, copy=True, iterated_power=3,
434+
whiten=False, random_state=None):
434435
self.n_components = n_components
435436
self.copy = copy
436437
self.iterated_power = iterated_power
@@ -465,8 +466,12 @@ def fit(self, X, y=None):
465466
# Center data
466467
self.mean_ = np.mean(X, axis=0)
467468
X -= self.mean_
469+
if self.n_components is None:
470+
n_components = X.shape[1]
471+
else:
472+
n_components = self.n_components
468473

469-
U, S, V = randomized_svd(X, self.n_components,
474+
U, S, V = randomized_svd(X, n_components,
470475
n_iterations=self.iterated_power,
471476
random_state=self.random_state)
472477

sklearn/decomposition/sparse_pca.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ class SparsePCA(BaseEstimator, TransformerMixin):
7272
MiniBatchSparsePCA
7373
DictionaryLearning
7474
"""
75-
def __init__(self, n_components, alpha=1, ridge_alpha=0.01, max_iter=1000,
76-
tol=1e-8, method='lars', n_jobs=1, U_init=None,
77-
V_init=None, verbose=False, random_state=None):
75+
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
76+
max_iter=1000, tol=1e-8, method='lars', n_jobs=1, U_init=None,
77+
V_init=None, verbose=False, random_state=None):
7878
self.n_components = n_components
7979
self.alpha = alpha
8080
self.ridge_alpha = ridge_alpha
@@ -103,9 +103,13 @@ def fit(self, X, y=None):
103103
"""
104104
self.random_state = check_random_state(self.random_state)
105105
X = np.asarray(X)
106+
if self.n_components is None:
107+
n_components = X.shape[1]
108+
else:
109+
n_components = self.n_components
106110
code_init = self.V_init.T if self.V_init is not None else None
107111
dict_init = self.U_init.T if self.U_init is not None else None
108-
Vt, _, E = dict_learning(X.T, self.n_components, self.alpha,
112+
Vt, _, E = dict_learning(X.T, n_components, self.alpha,
109113
tol=self.tol, max_iter=self.max_iter,
110114
method=self.method, n_jobs=self.n_jobs,
111115
verbose=self.verbose,
@@ -212,9 +216,9 @@ class MiniBatchSparsePCA(SparsePCA):
212216
SparsePCA
213217
DictionaryLearning
214218
"""
215-
def __init__(self, n_components, alpha=1, ridge_alpha=0.01, n_iter=100,
216-
callback=None, chunk_size=3, verbose=False, shuffle=True,
217-
n_jobs=1, method='lars', random_state=None):
219+
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
220+
n_iter=100, callback=None, chunk_size=3, verbose=False,
221+
shuffle=True, n_jobs=1, method='lars', random_state=None):
218222
self.n_components = n_components
219223
self.alpha = alpha
220224
self.ridge_alpha = ridge_alpha
@@ -243,7 +247,11 @@ def fit(self, X, y=None):
243247
"""
244248
self.random_state = check_random_state(self.random_state)
245249
X = np.asarray(X)
246-
Vt, _ = dict_learning_online(X.T, self.n_components, alpha=self.alpha,
250+
if self.n_components is None:
251+
n_components = X.shape[1]
252+
else:
253+
n_components = self.n_components
254+
Vt, _ = dict_learning_online(X.T, n_components, alpha=self.alpha,
247255
n_iter=self.n_iter, return_code=True,
248256
dict_init=None, verbose=self.verbose,
249257
callback=self.callback,

sklearn/ensemble/base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77

88
from ..base import clone
99
from ..base import BaseEstimator
10+
from ..base import MetaEstimatorMixin
1011

1112

12-
class BaseEnsemble(BaseEstimator):
13+
class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
1314
"""Base class for all ensemble classes.
1415
1516
Warning: This class should not be used directly. Use derived classes
@@ -27,7 +28,8 @@ class BaseEnsemble(BaseEstimator):
2728
The list of attributes to use as parameters when instantiating a
2829
new base estimator. If none are given, default parameters are used.
2930
"""
30-
def __init__(self, base_estimator, n_estimators, estimator_params=[]):
31+
32+
def __init__(self, base_estimator, n_estimators=10, estimator_params=[]):
3133
# Check parameters
3234
if not isinstance(base_estimator, BaseEstimator):
3335
raise TypeError("estimator must be a subclass of BaseEstimator")

sklearn/ensemble/forest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
3535

3636
import itertools
3737
import numpy as np
38+
from abc import ABCMeta, abstractmethod
3839

3940
from ..base import ClassifierMixin, RegressorMixin
4041
from ..externals.joblib import Parallel, delayed, cpu_count
@@ -164,6 +165,9 @@ class BaseForest(BaseEnsemble, SelectorMixin):
164165
Warning: This class should not be used directly. Use derived classes
165166
instead.
166167
"""
168+
__metaclass__ = ABCMeta
169+
170+
@abstractmethod
167171
def __init__(self, base_estimator,
168172
n_estimators=10,
169173
estimator_params=[],
@@ -294,6 +298,9 @@ class ForestClassifier(BaseForest, ClassifierMixin):
294298
Warning: This class should not be used directly. Use derived classes
295299
instead.
296300
"""
301+
__metaclass__ = ABCMeta
302+
303+
@abstractmethod
297304
def __init__(self, base_estimator,
298305
n_estimators=10,
299306
estimator_params=[],
@@ -394,6 +401,9 @@ class ForestRegressor(BaseForest, RegressorMixin):
394401
Warning: This class should not be used directly. Use derived classes
395402
instead.
396403
"""
404+
__metaclass__ = ABCMeta
405+
406+
@abstractmethod
397407
def __init__(self, base_estimator,
398408
n_estimators=10,
399409
estimator_params=[],

sklearn/ensemble/gradient_boosting.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,9 @@ def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,
326326

327327
class BaseGradientBoosting(BaseEnsemble):
328328
"""Abstract base class for Gradient Boosting. """
329+
__metaclass__ = ABCMeta
330+
331+
@abstractmethod
329332
def __init__(self, loss, learn_rate, n_estimators, min_samples_split,
330333
min_samples_leaf, max_depth, init, subsample, random_state):
331334
if n_estimators <= 0:

sklearn/feature_extraction/image.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ class PatchExtractor(BaseEstimator):
330330
Pseudo number generator state used for random sampling.
331331
332332
"""
333-
def __init__(self, patch_size, max_patches=None, random_state=None):
333+
def __init__(self, patch_size=None, max_patches=None, random_state=None):
334334
self.patch_size = patch_size
335335
self.max_patches = max_patches
336336
self.random_state = random_state
@@ -367,16 +367,21 @@ def transform(self, X):
367367
n_images, i_h, i_w = X.shape[:3]
368368
X = np.reshape(X, (n_images, i_h, i_w, -1))
369369
n_channels = X.shape[-1]
370+
if self.patch_size is None:
371+
patch_size = i_h / 10, i_w / 10
372+
else:
373+
patch_size = self.patch_size
374+
370375
if self.max_patches:
371376
n_patches = self.max_patches
372377
else:
373-
p_h, p_w = self.patch_size
378+
p_h, p_w = patch_size
374379
n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
375-
patches_shape = (n_images * n_patches,) + self.patch_size
380+
patches_shape = (n_images * n_patches,) + patch_size
376381
if n_channels > 1:
377382
patches_shape += (n_channels,)
378383
patches = np.empty(patches_shape)
379384
for ii, image in enumerate(X):
380385
patches[ii * n_patches:(ii + 1) * n_patches] = extract_patches_2d(
381-
image, self.patch_size, self.max_patches, self.random_state)
386+
image, patch_size, self.max_patches, self.random_state)
382387
return patches

sklearn/feature_extraction/tests/test_image.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,13 @@ def test_patch_extractor_max_patches():
188188
assert_true(patches.shape == (len(lenas) * 100, 8, 8))
189189

190190

191+
def test_patch_extractor_max_patches_default():
192+
lenas = lena_collection
193+
extr = PatchExtractor(max_patches=100, random_state=0)
194+
patches = extr.transform(lenas)
195+
assert_equal(patches.shape, (len(lenas) * 100, 12, 12))
196+
197+
191198
def test_patch_extractor_all_patches():
192199
lenas = lena_collection
193200
i_h, i_w = lenas.shape[1:3]

sklearn/feature_selection/rfe.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88

99
import numpy as np
1010
from ..base import BaseEstimator
11+
from ..base import MetaEstimatorMixin
1112
from ..base import clone
1213
from ..base import is_classifier
1314
from ..cross_validation import check_cv
1415

1516

16-
class RFE(BaseEstimator):
17+
class RFE(BaseEstimator, MetaEstimatorMixin):
1718
"""Feature ranking with recursive feature elimination.
1819
1920
Given an external estimator that assigns weights to features (e.g., the
@@ -83,7 +84,7 @@ class RFE(BaseEstimator):
8384
for cancer classification using support vector machines",
8485
Mach. Learn., 46(1-3), 389--422, 2002.
8586
"""
86-
def __init__(self, estimator, n_features_to_select, step=1):
87+
def __init__(self, estimator, n_features_to_select=None, step=1):
8788
self.estimator = estimator
8889
self.n_features_to_select = n_features_to_select
8990
self.step = step
@@ -102,6 +103,10 @@ def fit(self, X, y):
102103
"""
103104
# Initialization
104105
n_features = X.shape[1]
106+
if self.n_features_to_select is None:
107+
n_features_to_select = n_features / 2
108+
else:
109+
n_features_to_select = self.n_features_to_select
105110

106111
if 0.0 < self.step < 1.0:
107112
step = int(self.step * n_features)
@@ -114,7 +119,7 @@ def fit(self, X, y):
114119
ranking_ = np.ones(n_features, dtype=np.int)
115120

116121
# Elimination
117-
while np.sum(support_) > self.n_features_to_select:
122+
while np.sum(support_) > n_features_to_select:
118123
# Remaining features
119124
features = np.arange(n_features)[support_]
120125

@@ -128,7 +133,7 @@ def fit(self, X, y):
128133
ranks = np.argsort(estimator.coef_ ** 2)
129134

130135
# Eliminate the worse features
131-
threshold = min(step, np.sum(support_) - self.n_features_to_select)
136+
threshold = min(step, np.sum(support_) - n_features_to_select)
132137
support_[features[ranks][:threshold]] = False
133138
ranking_[np.logical_not(support_)] += 1
134139

@@ -187,7 +192,7 @@ def transform(self, X):
187192
return X[:, self.support_]
188193

189194

190-
class RFECV(RFE):
195+
class RFECV(RFE, MetaEstimatorMixin):
191196
"""Feature ranking with recursive feature elimination and cross-validated
192197
selection of the best number of features.
193198
@@ -283,9 +288,8 @@ def fit(self, X, y):
283288
regression).
284289
"""
285290
# Initialization
286-
rfe = RFE(estimator=self.estimator,
287-
n_features_to_select=1,
288-
step=self.step)
291+
rfe = RFE(estimator=self.estimator, n_features_to_select=1,
292+
step=self.step)
289293

290294
cv = check_cv(self.cv, X, y, is_classifier(self.estimator))
291295
scores = {}

0 commit comments

Comments
 (0)
0