8000 MRG factorize common tests. by amueller · Pull Request #893 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

MRG factorize common tests. #893

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Jun 26, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
863d53c
ENH factorize common tests.
amueller Jun 7, 2012
1e9c337
ENH don't list abstract base classes
amueller Jun 7, 2012
26c31bf
ENH make base classes abstract meta classes
amueller Jun 7, 2012
86cedaf
ENH make all Estimators default constructible (except SparseCoder)
amueller Jun 7, 2012
04d8160
ENH Add MetaEstimatorMixin, make RFE default constructible
amueller Jun 7, 2012
520da07
ENH make GMMs and LLE cloneable.
amueller Jun 7, 2012
7cdeecf
COSMIT get rid of warnings (can't get rid of deprecation warnings onl…
amueller Jun 7, 2012
21c0d1a
ENH make BaseLabelPropagation abstract base class, make OutlierDetect…
amueller Jun 7, 2012
282d267
BUG fix testing for abstract classes
amueller Jun 7, 2012
f71185d
ENH default score func for univariate feature selection: f_classif
amueller Jun 7, 2012
ee7d78c
Make sparse svm base class ABC
amueller Jun 7, 2012
30d6246
FIX better class selection, more strict testing.
amueller Jun 7, 2012
7a585e4
ENH more tests
amueller Jun 7, 2012
310a8ce
MISC raise NotImplementedError instead of value error in decision_fun…
amueller Jun 12, 2012
c02938f
ENH do zero mean, unit variance on iris, don't test naive Bayes (for …
amueller Jun 12, 2012
cd0b531
ENH change defaults on SGD (works on digits and iris and I just guess…
amueller Jun 12, 2012
f0026be
ENH avoid division by zero in LDA, also avoid reusing variable names.
amueller Jun 12, 2012
4264acd
MISC don't test SVM for the moment, rest works :)
amueller Jun 12, 2012
bab0539
ENH make LinearModel and LinearModelCV abstract base classes
amueller Jun 12, 2012
6aa167c
ENH test regressors
amueller Jun 12, 2012
5338bda
MISC shuffle iris for SGD based methods
amueller Jun 13, 2012
a44318a
Revert "ENH change defaults on SGD (works on digits and iris and I ju…
amueller Jun 13, 2012
ddf5693
ENH Fix seed that makes SGDClassifier work.
amueller Jun 13, 2012
971d131
ENH create BaseRidge base class
amueller Jun 13, 2012
a93e0af
ENH test more shapes, test non-consecutive classes, test accuracy on …
amueller Jun 13, 2012
94708a3
FIX minor rebasing and other problems
amueller Jun 26, 2012
bd94b49
MISC cleanup common testing
amueller Jun 26, 2012
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,12 @@ def fit_transform(self, X, y=None, **fit_params):
return self.fit(X, y, **fit_params).transform(X)


###############################################################################
class MetaEstimatorMixin(object):
"""Mixin class for all meta estimators in scikit-learn"""
# this is just a tag for the moment


###############################################################################
# XXX: Temporary solution to figure out if an estimator is a classifier

Expand Down
6 changes: 3 additions & 3 deletions sklearn/covariance/outlier_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
import numpy as np
import scipy as sp
from . import MinCovDet
from ..base import ClassifierMixin
from ..utils import deprecated
from ..base import ClassifierMixin


class OutlierDetectionMixin(ClassifierMixin):
class OutlierDetectionMixin(object):
"""Set of methods for outliers detection with covariance estimators.

Parameters
Expand Down Expand Up @@ -103,7 +103,7 @@ def predict(self, X):
return is_inlier


class EllipticEnvelope(OutlierDetectionMixin, MinCovDet):
class EllipticEnvelope(ClassifierMixin, OutlierDetectionMixin, MinCovDet):
"""An object for detecting outliers in a Gaussian distributed dataset.

Attributes
Expand Down
18 changes: 14 additions & 4 deletions sklearn/decomposition/dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ class DictionaryLearning(BaseEstimator, SparseCodingMixin):
SparsePCA
MiniBatchSparsePCA
"""
def __init__(self, n_atoms, alpha=1, max_iter=1000, tol=1e-8,
def __init__(self, n_atoms=None, alpha=1, max_iter=1000, tol=1e-8,
fit_algorithm='lars', transform_algorithm='omp',
transform_n_nonzero_coefs=None, transform_alpha=None,
n_jobs=1, code_init=None, dict_init=None, verbose=False,
Expand Down Expand Up @@ -968,7 +968,12 @@ def fit(self, X, y=None):
"""
self.random_state = check_random_state(self.random_state)
X = np.asarray(X)
V, U, E = dict_learning(X, self.n_atoms, self.alpha,
if self.n_atoms is None:
n_atoms = X.shape[1]
else:
n_atoms = self.n_atoms

V, U, E = dict_learning(X, n_atoms, self.alpha,
tol=self.tol, max_iter=self.max_iter,
method=self.fit_algorithm,
n_jobs=self.n_jobs,
Expand Down Expand Up @@ -1080,7 +1085,7 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
MiniBatchSparsePCA

"""
def __init__(self, n_atoms, alpha=1, n_iter=1000,
def __init__(self, n_atoms=None, alpha=1, n_iter=1000,
fit_algorithm='lars', n_jobs=1, chunk_size=3,
shuffle=True, dict_init=None, transform_algorithm='omp',
transform_n_nonzero_coefs=None, transform_alpha=None,
Expand Down Expand Up @@ -1115,7 +1120,12 @@ def fit(self, X, y=None):
"""
self.random_state = check_random_state(self.random_state)
X = np.asarray(X)
U = dict_learning_online(X, self.n_atoms, self.alpha,
if self.n_atoms is None:
n_atoms = X.shape[1]
else:
n_atoms = self.n_atoms

U = dict_learning_online(X, n_atoms, self.alpha,
n_iter=self.n_iter, return_code=False,
method=self.fit_algorithm,
n_jobs=self.n_jobs,
Expand Down
13 changes: 9 additions & 4 deletions sklearn/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def _assess_dimension_(spectrum, rank, n_samples, n_features):

pu = -rank * np.log(2)
for i in range(rank):
pu += gammaln((n_features - i) / 2) - np.log(np.pi) * (n_features - i) / 2
pu += (gammaln((n_features - i) / 2)
- np.log(np.pi) * (n_features - i) / 2)

pl = np.sum(np.log(spectrum[:rank]))
pl = -pl * n_samples / 2
Expand Down Expand Up @@ -429,8 +430,8 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):

"""

def __init__(self, n_components, copy=True, iterated_power=3,
whiten=False, random_state=None):
def __init__(self, n_components=None, copy=True, iterated_power=3,
whiten=False, random_state=None):
self.n_components = n_components
self.copy = copy
self.iterated_power = iterated_power
Expand Down Expand Up @@ -465,8 +466,12 @@ def fit(self, X, y=None):
# Center data
self.mean_ = np.mean(X, axis=0)
X -= self.mean_
if self.n_components is None:
n_components = X.shape[1]
else:
n_components = self.n_components

U, S, V = randomized_svd(X, self.n_components,
U, S, V = randomized_svd(X, n_components,
n_iterations=self.iterated_power,
random_state=self.random_state)

Expand Down
24 changes: 16 additions & 8 deletions sklearn/decomposition/sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ class SparsePCA(BaseEstimator, TransformerMixin):
MiniBatchSparsePCA
DictionaryLearning
"""
def __init__(self, n_components, alpha=1, ridge_alpha=0.01, max_iter=1000,
tol=1e-8, method='lars', n_jobs=1, U_init=None,
V_init=None, verbose=False, random_state=None):
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
max_iter=1000, tol=1e-8, method='lars', n_jobs=1, U_init=None,
V_init=None, verbose=False, random_state=None):
self.n_components = n_components
self.alpha = alpha
self.ridge_alpha = ridge_alpha
Expand Down Expand Up @@ -103,9 +103,13 @@ def fit(self, X, y=None):
"""
self.random_state = check_random_state(self.random_state)
X = np.asarray(X)
if self.n_components is None:
n_components = X.shape[1]
else:
n_components = self.n_components
code_init = self.V_init.T if self.V_init is not None else None
dict_init = self.U_init.T if self.U_init is not None else None
Vt, _, E = dict_learning(X.T, self.n_components, self.alpha,
Vt, _, E = dict_learning(X.T, n_components, self.alpha,
tol=self.tol, max_iter=self.max_iter,
method=self.method, n_jobs=self.n_jobs,
verbose=self.verbose,
Expand Down Expand Up @@ -212,9 +216,9 @@ class MiniBatchSparsePCA(SparsePCA):
SparsePCA
DictionaryLearning
"""
def __init__(self, n_components, alpha=1, ridge_alpha=0.01, n_iter=100,
callback=None, chunk_size=3, verbose=False, shuffle=True,
n_jobs=1, method='lars', random_state=None):
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01,
n_iter=100, callback=None, chunk_size=3, verbose=False,
shuffle=True, n_jobs=1, method='lars', random_state=None):
self.n_components = n_components
self.alpha = alpha
self.ridge_alpha = ridge_alpha
Expand Down Expand Up @@ -243,7 +247,11 @@ def fit(self, X, y=None):
"""
self.random_state = check_random_state(self.random_state)
X = np.asarray(X)
Vt, _ = dict_learning_online(X.T, self.n_components, alpha=self.alpha,
if self.n_components is None:
n_components = X.shape[1]
else:
n_components = self.n_components
Vt, _ = dict_learning_online(X.T, n_components, alpha=self.alpha,
n_iter=self.n_iter, return_code=True,
dict_init=None, verbose=self.verbose,
callback=self.callback,
Expand Down
6 changes: 4 additions & 2 deletions sklearn/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

from ..base import clone
from ..base import BaseEstimator
from ..base import MetaEstimatorMixin


class BaseEnsemble(BaseEstimator):
class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
"""Base class for all ensemble classes.

Warning: This class should not be used directly. Use derived classes
Expand All @@ -27,7 +28,8 @@ class BaseEnsemble(BaseEstimator):
The list of attributes to use as parameters when instantiating a
new base estimator. If none are given, default parameters are used.
"""
def __init__(self, base_estimator, n_estimators, estimator_params=[]):

def __init__(self, base_estimator, n_estimators=10, estimator_params=[]):
# Check parameters
if not isinstance(base_estimator, BaseEstimator):
raise TypeError("estimator must be a subclass of BaseEstimator")
Expand Down
10 changes: 10 additions & 0 deletions sklearn/ensemble/forest.py
F42D
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class calls the ``fit`` method of each sub-estimator on random samples

import itertools
import numpy as np
from abc import ABCMeta, abstractmethod

from ..base import ClassifierMixin, RegressorMixin
from ..externals.joblib import Parallel, delayed, cpu_count
Expand Down Expand Up @@ -164,6 +165,9 @@ class BaseForest(BaseEnsemble, SelectorMixin):
Warning: This class should not be used directly. Use derived classes
instead.
"""
__metaclass__ = ABCMeta

@abstractmethod
def __init__(self, base_estimator,
n_estimators=10,
estimator_params=[],
Expand Down Expand Up @@ -294,6 +298,9 @@ class ForestClassifier(BaseForest, ClassifierMixin):
Warning: This class should not be used directly. Use derived classes
instead.
"""
__metaclass__ = ABCMeta

@abstractmethod
def __init__(self, base_estimator,
n_estimators=10,
estimator_params=[],
Expand Down Expand Up @@ -394,6 +401,9 @@ class ForestRegressor(BaseForest, RegressorMixin):
Warning: This class should not be used directly. Use derived classes
instead.
"""
__metaclass__ = ABCMeta

@abstractmethod
def __init__(self, base_estimator,
n_estimators=10,
estimator_params=[],
Expand Down
3 changes: 3 additions & 0 deletions sklearn/ensemble/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,

class BaseGradientBoosting(BaseEnsemble):
"""Abstract base class for Gradient Boosting. """
__metaclass__ = ABCMeta

@abstractmethod
def __init__(self, loss, learn_rate, n_estimators, min_samples_split,
min_samples_leaf, max_depth, init, subsample, random_state):
if n_estimators <= 0:
Expand Down
13 changes: 9 additions & 4 deletions sklearn/feature_extraction/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ class PatchExtractor(BaseEstimator):
Pseudo number generator state used for random sampling.

"""
def __init__(self, patch_size, max_patches=None, random_state=None):
def __init__(self, patch_size=None, max_patches=None, random_state=None):
self.patch_size = patch_size
self.max_patches = max_patches
self.random_state = random_state
Expand Down Expand Up @@ -367,16 +367,21 @@ def transform(self, X):
n_images, i_h, i_w = X.shape[:3]
X = np.reshape(X, (n_images, i_h, i_w, -1))
n_channels = X.shape[-1]
if self.patch_size is None:
patch_size = i_h / 10, i_w / 10
else:
patch_size = self.patch_size

if self.max_patches:
n_patches = self.max_patches
else:
p_h, p_w = self.patch_size
p_h, p_w = patch_size
n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
patches_shape = (n_images * n_patches,) + self.patch_size
patches_shape = (n_images * n_patches,) + patch_size
if n_channels > 1:
patches_shape += (n_channels,)
patches = np.empty(patches_shape)
for ii, image in enumerate(X):
patches[ii * n_patches:(ii + 1) * n_patches] = extract_patches_2d(
image, self.patch_size, self.max_patches, self.random_state)
image, patch_size, self.max_patches, self.random_state)
return patches
7 changes: 7 additions & 0 deletions sklearn/feature_extraction/tests/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ def test_patch_extractor_max_patches():
assert_true(patches.shape == (len(lenas) * 100, 8, 8))


def test_patch_extractor_max_patches_default():
lenas = lena_collection
extr = PatchExtractor(max_patches=100, random_state=0)
patches = extr.transform(lenas)
assert_equal(patches.shape, (len(lenas) * 100, 12, 12))


def test_patch_extractor_all_patches():
lenas = lena_collection
i_h, i_w = lenas.shape[1:3]
Expand Down
20 changes: 12 additions & 8 deletions sklearn/feature_selection/rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

import numpy as np
from ..base import BaseEstimator
from ..base import MetaEstimatorMixin
from ..base import clone
from ..base import is_classifier
from ..cross_validation import check_cv


class RFE(BaseEstimator):
class RFE(BaseEstimator, MetaEstimatorMixin):
"""Feature ranking with recursive feature elimination.

Given an external estimator that assigns weights to features (e.g., the
Expand Down Expand Up @@ -83,7 +84,7 @@ class RFE(BaseEstimator):
for cancer classification using support vector machines",
Mach. Learn., 46(1-3), 389--422, 2002.
"""
def __init__(self, estimator, n_features_to_select, step=1):
def __init__(self, estimator, n_features_to_select=None, step=1):
self.estimator = estimator
self.n_features_to_select = n_features_to_select
self.step = step
Expand All @@ -102,6 +103,10 @@ def fit(self, X, y):
"""
# Initialization
n_features = X.shape[1]
if self.n_features_to_select is None:
n_features_to_select = n_features / 2
else:
n_features_to_select = self.n_features_to_select

if 0.0 < self.step < 1.0:
step = int(self.step * n_features)
Expand All @@ -114,7 +119,7 @@ def fit(self, X, y):
ranking_ = np.ones(n_features, dtype=np.int)

# Elimination
while np.sum(support_) > self.n_features_to_select:
while np.sum(support_) > n_features_to_select:
# Remaining features
features = np.arange(n_features)[support_]

Expand All @@ -128,7 +133,7 @@ def fit(self, X, y):
ranks = np.argsort(estimator.coef_ ** 2)

# Eliminate the worse features
threshold = min(step, np.sum(support_) - self.n_features_to_select)
threshold = min(step, np.sum(support_) - n_features_to_select)
support_[features[ranks][:threshold]] = False
ranking_[np.logical_not(support_)] += 1

Expand Down Expand Up @@ -187,7 +192,7 @@ def transform(self, X):
return X[:, self.support_]


class RFECV(RFE):
class RFECV(RFE, MetaEstimatorMixin):
"""Feature ranking with recursive feature elimination and cross-validated
selection of the best number of features.

Expand Down Expand Up @@ -283,9 +288,8 @@ def fit(self, X, y):
regression).
"""
# Initialization
rfe = RFE(estimator=self.estimator,
n_features_to_select=1,
step=self.step)
rfe = RFE(estimator=self.estimator, n_features_to_select=1,
step=self.step)

cv = check_cv(self.cv, X, y, is_classifier(self.estimator))
scores = {}
Expand Down
Loading
0