From ce53c81ee3c8b5545e7ec17125634d2082ae62ba Mon Sep 17 00:00:00 2001 From: Karan Desai Date: Fri, 10 Mar 2017 02:33:08 +0530 Subject: [PATCH 01/12] ENH Add verbose option and corresponding tests for Pipeline. --- doc/modules/pipeline.rst | 13 ++-- sklearn/pipeline.py | 131 ++++++++++++++++++++++++++++----- sklearn/tests/test_pipeline.py | 58 ++++++++++++++- 3 files changed, 177 insertions(+), 25 deletions(-) diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst index 232b3ed72bbda..425fe6ed66dc0 100644 --- a/doc/modules/pipeline.rst +++ b/doc/modules/pipeline.rst @@ -47,7 +47,7 @@ is an estimator object:: >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS Pipeline(memory=None, steps=[('reduce_dim', PCA(copy=True,...)), - ('clf', SVC(C=1.0,...))]) + ('clf', SVC(C=1.0,...))], verbose=False) The utility function :func:`make_pipeline` is a shorthand for constructing pipelines; @@ -62,7 +62,7 @@ filling in the names automatically:: steps=[('binarizer', Binarizer(copy=True, threshold=0.0)), ('multinomialnb', MultinomialNB(alpha=1.0, class_prior=None, - fit_prior=True))]) + fit_prior=True))], verbose=False) The estimators of a pipeline are stored as a list in the ``steps`` attribute:: @@ -82,7 +82,8 @@ Parameters of the estimators in the pipeline can be accessed using the >>> pipe.set_params(clf__C=10) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS Pipeline(memory=None, steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',...)), - ('clf', SVC(C=10, cache_size=200, class_weight=None,...))]) + ('clf', SVC(C=10, cache_size=200, class_weight=None,...))], + verbose=False) Attributes of named_steps map to keys, enabling tab completion in interactive environments:: @@ -160,7 +161,7 @@ object:: >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS Pipeline(..., steps=[('reduce_dim', PCA(copy=True,...)), - ('clf', SVC(C=1.0,...))]) + ('clf', SVC(C=1.0,...))], verbose=False) >>> # Clear the cache directory when you don't need it anymore >>> rmtree(cachedir) @@ -177,7 +178,7 @@ object:: >>> pipe.fit(digits.data, digits.target) ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS Pipeline(memory=None, - steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))]) + steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))], verbose=False) >>> # The pca instance can be inspected directly >>> print(pca1.components_) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS [[ -1.77484909e-19 ... 4.07058917e-18]] @@ -199,7 +200,7 @@ object:: >>> cached_pipe.fit(digits.data, digits.target) ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS Pipeline(memory=..., - steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))]) + steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))], verbose=False) >>> print(cached_pipe.named_steps['reduce_dim'].components_) ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS [[ -1.77484909e-19 ... 4.07058917e-18]] diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 66da9dffeb066..9cda48aa081eb 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -10,6 +10,8 @@ # License: BSD from collections import defaultdict +from abc import ABCMeta, abstractmethod +import time import numpy as np from scipy import sparse @@ -17,16 +19,73 @@ from .base import clone, TransformerMixin from .externals.joblib import Parallel, delayed from .externals import six -from .utils.metaestimators import if_delegate_has_method +from .utils.metaestimators import if_delegate_has_method, _BaseComposition from .utils import Bunch from .utils.validation import check_memory -from .utils.metaestimators import _BaseComposition __all__ = ['Pipeline', 'FeatureUnion'] -class Pipeline(_BaseComposition): +class _BasePipeline(six.with_metaclass(ABCMeta, _BaseComposition)): + """Handles parameter management for classifiers composed of named steps. + """ + + @abstractmethod + def __init__(self): + pass + + def _replace_step(self, steps_attr, name, new_val): + # assumes `name` is a valid step name + new_steps = getattr(self, steps_attr)[:] + for i, (step_name, _) in enumerate(new_steps): + if step_name == name: + new_steps[i] = (name, new_val) + break + setattr(self, steps_attr, new_steps) + + def _get_params(self, steps_attr, deep=True): + out = super(_BasePipeline, self).get_params(deep=False) + if not deep: + return out + steps = getattr(self, steps_attr) + out.update(steps) + for name, estimator in steps: + if estimator is None: + continue + for key, value in six.iteritems(estimator.get_params(deep=True)): + out['%s__%s' % (name, key)] = value + return out + + def _set_params(self, steps_attr, **params): + # Ensure strict ordering of parameter setting: + # 1. All steps + if steps_attr in params: + setattr(self, steps_attr, params.pop(steps_attr)) + # 2. Step replacement + step_names, _ = zip(*getattr(self, steps_attr)) + for name in list(six.iterkeys(params)): + if '__' not in name and name in step_names: + self._replace_step(steps_attr, name, params.pop(name)) + # 3. Step parameters and other initilisation arguments + super(_BasePipeline, self).set_params(**params) + return self + + def _validate_names(self, names): + if len(set(names)) != len(names): + raise ValueError('Names provided are not unique: ' + '{0!r}'.format(list(names))) + invalid_names = set(names).intersection(self.get_params(deep=False)) + if invalid_names: + raise ValueError('Step names conflict with constructor arguments: ' + '{0!r}'.format(sorted(invalid_names))) + invalid_names = [name for name in names if '__' in name] + if invalid_names: + raise ValueError('Step names must not contain __: got ' + '{0!r}'.format(invalid_names)) + + +class Pipeline(_BasePipeline): """Pipeline of transforms with a final estimator. Sequentially apply a list of transforms and a final estimator. @@ -62,6 +121,9 @@ class Pipeline(_BaseComposition): inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. + verbose : boolean, optional + Verbosity mode. + Attributes ---------- named_steps : bunch object, a dictionary with attribute access @@ -89,7 +151,7 @@ class Pipeline(_BaseComposition): ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE Pipeline(memory=None, steps=[('anova', SelectKBest(...)), - ('svc', SVC(...))]) + ('svc', SVC(...))], verbose=False) >>> prediction = anova_svm.predict(X) >>> anova_svm.score(X, y) # doctest: +ELLIPSIS 0.829... @@ -109,11 +171,12 @@ class Pipeline(_BaseComposition): # BaseEstimator interface - def __init__(self, steps, memory=None): + def __init__(self, steps, memory=None, verbose=False): # shallow copy of steps self.steps = list(steps) self._validate_steps() self.memory = memory + self.verbose = verbose def get_params(self, deep=True): """Get parameters for this estimator. @@ -168,6 +231,14 @@ def _validate_steps(self): "'%s' (type %s) doesn't" % (estimator, type(estimator))) + def _print_final_step(self, start_time, time_elapsed_so_far): + time_elapsed = time.time() - start_time + time_elapsed_so_far += time_elapsed + print('[Pipeline] (step %d of %d) %s ... %.5fs' % + (len(self.steps), len(self.steps), self.steps[-1][0], + time_elapsed_so_far)) + print('[Pipeline] Total time elapsed: %.5fs' % time_elapsed_so_far) + @property def _estimator_type(self): return self.steps[-1][1]._estimator_type @@ -196,7 +267,10 @@ def _fit(self, X, y=None, **fit_params): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X + # Keep a record of time elapsed + time_elapsed_so_far = 0 for step_idx, (name, transformer) in enumerate(self.steps[:-1]): + step_start_time = time.time() if transformer is None: pass else: @@ -214,9 +288,16 @@ def _fit(self, X, y=None, **fit_params): # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) + + step_time_elapsed = time.time() - step_start_time + time_elapsed_so_far += step_time_elapsed + # Logging time elapsed for current step to stdout + if self.verbose: + print('[Pipeline] (step %d of %d) %s ... %.5fs' % + (step_idx + 1, len(self.steps), name, step_time_elapsed)) if self._final_estimator is None: - return Xt, {} - return Xt, fit_params_steps[self.steps[-1][0]] + return Xt, {}, time_elapsed_so_far + return Xt, fit_params_steps[self.steps[-1][0]], time_elapsed_so_far def fit(self, X, y=None, **fit_params): """Fit the model @@ -244,9 +325,12 @@ def fit(self, X, y=None, **fit_params): self : Pipeline This estimator """ - Xt, fit_params = self._fit(X, y, **fit_params) + Xt, fit_params, time_elapsed_so_far = self._fit(X, y, **fit_params) + final_step_start_time = time.time() if self._final_estimator is not None: self._final_estimator.fit(Xt, y, **fit_params) + if self.verbose: + self._print_final_step(final_step_start_time, time_elapsed_so_far) return self def fit_transform(self, X, y=None, **fit_params): @@ -277,13 +361,21 @@ def fit_transform(self, X, y=None, **fit_params): Transformed samples """ last_step = self._final_estimator - Xt, fit_params = self._fit(X, y, **fit_params) - if hasattr(last_step, 'fit_transform'): - return last_step.fit_transform(Xt, y, **fit_params) - elif last_step is None: + Xt, fit_params, time_elapsed_so_far = self._fit(X, y, **fit_params) + final_step_start_time = time.time() + if last_step is None: + if self.verbose: + print('[Pipeline] Step %s is NoneType.' % self.steps[-1][0]) + print('[Pipeline] Total time elapsed: %.3fs' % + time_elapsed_so_far) return Xt + elif hasattr(last_step, 'fit_transform'): + Xt = last_step.fit_transform(Xt, y, **fit_params) else: - return last_step.fit(Xt, y, **fit_params).transform(Xt) + Xt = last_step.fit(Xt, y, **fit_params).transform(Xt) + if self.verbose: + self._print_final_step(final_step_start_time, time_elapsed_so_far) + return Xt @if_delegate_has_method(delegate='_final_estimator') def predict(self, X): @@ -332,8 +424,12 @@ def fit_predict(self, X, y=None, **fit_params): ------- y_pred : array-like """ - Xt, fit_params = self._fit(X, y, **fit_params) - return self.steps[-1][-1].fit_predict(Xt, y, **fit_params) + Xt, fit_params, time_elapsed_so_far = self._fit(X, y, **fit_params) + final_step_start_time = time.time() + y_pred = self.steps[-1][-1].fit_predict(Xt, y, **fit_params) + if self.verbose: + self._print_final_step(final_step_start_time, time_elapsed_so_far) + return y_pred @if_delegate_has_method(delegate='_final_estimator') def predict_proba(self, X): @@ -547,7 +643,7 @@ def make_pipeline(*steps, **kwargs): Pipeline(memory=None, steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), - ('gaussiannb', GaussianNB(priors=None))]) + ('gaussiannb', GaussianNB(priors=None))], verbose=False) Returns ------- @@ -584,7 +680,7 @@ def _fit_transform_one(transformer, weight, X, y, return res * weight, transformer -class FeatureUnion(_BaseComposition, TransformerMixin): +class FeatureUnion(_BasePipeline, TransformerMixin): """Concatenates results of multiple transformer objects. This estimator applies a list of transformer objects in parallel to the @@ -612,6 +708,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin): Keys are transformer names, values the weights. """ + def __init__(self, transformer_list, n_jobs=1, transformer_weights=None): self.transformer_list = list(transformer_list) self.n_jobs = n_jobs diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 1165370885d36..ed54482cd26c8 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -67,12 +67,12 @@ def set_params(self, **params): class NoInvTransf(NoTrans): - def transform(self, X): + def transform(self, X, y=None): return X class Transf(NoInvTransf): - def transform(self, X): + def transform(self, X, y=None): return X def inverse_transform(self, X): @@ -573,6 +573,7 @@ def make(): 'memory': None, 'm2__mult': 2, 'last__mult': 5, + 'verbose': False }) pipeline.set_params(m2=None) @@ -968,3 +969,56 @@ def test_make_pipeline_memory(): assert_true(pipeline.memory is None) shutil.rmtree(cachedir) + +def check_pipeline_verbosity_fit_predict(pipe_method): + # Test that the verbosity of pipeline is proper + from sklearn.externals.six.moves import cStringIO as StringIO + import sys + old_stdout = sys.stdout + sys.stdout = StringIO() + pipe_method(X=None, y=None, clf__should_succeed=True) + verbose_output = sys.stdout + sys.stdout = old_stdout + + # check output + verbose_output.seek(0) + lines = verbose_output.readlines() + assert_true('[Pipeline] (step 1 of 2) transf ... ' in lines[0]) + assert_true('[Pipeline] (step 2 of 2) clf ... ' in lines[1]) + assert_true('[Pipeline] Total time elapsed: ' in lines[2]) + + +def test_pipeline_fit_verbosity(): + pipe = Pipeline([('transf', Transf()), ('clf', FitParamT())], verbose=True) + yield check_pipeline_verbosity_fit_predict, pipe.fit + yield check_pipeline_verbosity_fit_predict, pipe.fit_predict + + +def check_pipeline_verbosity_fit_transform(pipe_method, last_was_none=False): + # Test that the verbosity of pipeline is proper + from sklearn.externals.six.moves import cStringIO as StringIO + import sys + old_stdout = sys.stdout + sys.stdout = StringIO() + pipe_method(X=[[1, 2, 3], [4, 5, 6]], y=[[7], [8]]) + verbose_output = sys.stdout + sys.stdout = old_stdout + + # check output + verbose_output.seek(0) + lines = verbose_output.readlines() + assert_true('[Pipeline] (step 1 of 2) mult1 ... ' in lines[0]) + if last_was_none: + assert_true('[Pipeline] Step mult2 is NoneType.' in lines[1]) + else: + assert_true('[Pipeline] (step 2 of 2) mult2 ... ' in lines[1]) + assert_true('[Pipeline] Total time elapsed: ' in lines[2]) + + +def test_pipeline_verbosity_fit_transform(): + pipe = Pipeline([('mult1', Mult(mult=1)), ('mult2', Mult(mult=2))], + verbose=True) + yield check_pipeline_verbosity_fit_transform, pipe.fit_transform + pipe = Pipeline([('mult1', Mult(mult=1)), ('mult2', None)], + verbose=True) + yield check_pipeline_verbosity_fit_transform, pipe.fit_transform, True From c7bbb94b716f93aae9abc6c16dc4a9c12f066603 Mon Sep 17 00:00:00 2001 From: Karan Desai Date: Fri, 10 Mar 2017 17:11:55 +0530 Subject: [PATCH 02/12] ENH Add verbose option and corresponding tests for FeatureUnion. --- doc/modules/pipeline.rst | 4 +- sklearn/pipeline.py | 70 +++++++++++++++++++++++++--------- sklearn/tests/test_pipeline.py | 25 ++++++++++++ 3 files changed, 80 insertions(+), 19 deletions(-) diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst index 425fe6ed66dc0..456d2f6142847 100644 --- a/doc/modules/pipeline.rst +++ b/doc/modules/pipeline.rst @@ -254,7 +254,7 @@ and ``value`` is an estimator object:: FeatureUnion(n_jobs=1, transformer_list=[('linear_pca', PCA(copy=True,...)), ('kernel_pca', KernelPCA(alpha=1.0,...))], - transformer_weights=None) + transformer_weights=None, verbose=False) Like pipelines, feature unions have a shorthand constructor called @@ -269,7 +269,7 @@ and ignored by setting to ``None``:: FeatureUnion(n_jobs=1, transformer_list=[('linear_pca', PCA(copy=True,...)), ('kernel_pca', None)], - transformer_weights=None) + transformer_weights=None, verbose=False) .. topic:: Examples: diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 9cda48aa081eb..5d799ebe03686 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -656,8 +656,16 @@ def make_pipeline(*steps, **kwargs): return Pipeline(_name_estimators(steps), memory=memory) -def _fit_one_transformer(transformer, X, y): - return transformer.fit(X, y) +def _fit_one_transformer(trans, X, y, verbose=False, idx=None, + total_steps=None, name=None): + # idx, total_steps and name are not required when verbosity is disabled + step_start_time = time.time() + trans = trans.fit(X, y) + step_time_elapsed = time.time() - step_start_time + if verbose: + print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % + (idx + 1, total_steps, name, step_time_elapsed)) + return trans def _transform_one(transformer, weight, X): @@ -668,16 +676,22 @@ def _transform_one(transformer, weight, X): return res * weight -def _fit_transform_one(transformer, weight, X, y, - **fit_params): - if hasattr(transformer, 'fit_transform'): - res = transformer.fit_transform(X, y, **fit_params) +def _fit_transform_one(trans, weight, X, y, verbose=False, idx=None, + total_steps=None, name=None, **fit_params): + # idx, total_steps and name are not required when verbosity is disabled + step_start_time = time.time() + if hasattr(trans, 'fit_transform'): + res = trans.fit_transform(X, y, **fit_params) else: - res = transformer.fit(X, y, **fit_params).transform(X) + res = trans.fit(X, y, **fit_params).transform(X) + step_time_elapsed = time.time() - step_start_time + if verbose: + print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % + (idx + 1, total_steps, name, step_time_elapsed)) # if we have a weight for this transformer, multiply output if weight is None: - return res, transformer - return res * weight, transformer + return res, trans + return res * weight, trans class FeatureUnion(_BasePipeline, TransformerMixin): @@ -707,12 +721,17 @@ class FeatureUnion(_BasePipeline, TransformerMixin): Multiplicative weights for features per transformer. Keys are transformer names, values the weights. + verbose : boolean, optional + Verbosity mode. + """ - def __init__(self, transformer_list, n_jobs=1, transformer_weights=None): - self.transformer_list = list(transformer_list) + def __init__(self, transformer_list, n_jobs=1, transformer_weights=None, + verbose=False): + self.transformer_list = tosequence(transformer_list) self.n_jobs = n_jobs self.transformer_weights = transformer_weights + self.verbose = verbose self._validate_transformers() def get_params(self, deep=True): @@ -802,9 +821,18 @@ def fit(self, X, y=None): This estimator """ self._validate_transformers() + all_transformers = [(name, trans, weight) for name, trans, weight in + self._iter()] + total_steps = len(all_transformers) + # Keep a record of time elapsed + start_time = time.time() transformers = Parallel(n_jobs=self.n_jobs)( - delayed(_fit_one_transformer)(trans, X, y) - for _, trans, _ in self._iter()) + delayed(_fit_one_transformer)(trans, X, y, self.verbose, idx, + total_steps, name) + for idx, (name, trans, _) in enumerate(all_transformers)) + time_elapsed = time.time() - start_time + if self.verbose: + print('[FeatureUnion] Total time elapsed: %.5fs' % time_elapsed) self._update_transformer_list(transformers) return self @@ -826,10 +854,18 @@ def fit_transform(self, X, y=None, **fit_params): sum of n_components (output dimension) over transformers. """ self._validate_transformers() + all_transformers = [(name, trans, weight) for name, trans, weight in + self._iter()] + total_steps = len(all_transformers) + # Keep a record of time elapsed + start_time = time.time() result = Parallel(n_jobs=self.n_jobs)( - delayed(_fit_transform_one)(trans, weight, X, y, - **fit_params) - for name, trans, weight in self._iter()) + delayed(_fit_transform_one)(trans, weight, X, y, self.verbose, + idx, total_steps, name) + for idx, (name, trans, weight) in enumerate(all_transformers)) + time_elapsed = time.time() - start_time + if self.verbose: + print('[FeatureUnion] Total time elapsed: %.5fs' % time_elapsed) if not result: # All transformers are None @@ -908,7 +944,7 @@ def make_union(*transformers, **kwargs): TruncatedSVD(algorithm='randomized', n_components=2, n_iter=5, random_state=None, tol=0.0))], - transformer_weights=None) + transformer_weights=None, verbose=False) """ n_jobs = kwargs.pop('n_jobs', 1) if kwargs: diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index ed54482cd26c8..69907919071c1 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1022,3 +1022,28 @@ def test_pipeline_verbosity_fit_transform(): pipe = Pipeline([('mult1', Mult(mult=1)), ('mult2', None)], verbose=True) yield check_pipeline_verbosity_fit_transform, pipe.fit_transform, True + + +def check_feature_union_verbosity(feature_union_method): + # Test that the verbosity of feature union is proper + from sklearn.externals.six.moves import cStringIO as StringIO + import sys + old_stdout = sys.stdout + sys.stdout = StringIO() + feature_union_method(X=[[1, 2, 3], [4, 5, 6]], y=[[7], [8]]) + verbose_output = sys.stdout + sys.stdout = old_stdout + + # check output + verbose_output.seek(0) + lines = verbose_output.readlines() + assert_true('[FeatureUnion] (step 1 of 2) mult1 ... ' in lines[0]) + assert_true('[FeatureUnion] (step 2 of 2) mult2 ... ' in lines[1]) + assert_true('[FeatureUnion] Total time elapsed: ' in lines[2]) + + +def test_feature_union_verbosity(): + union = FeatureUnion([('mult1', Mult(mult=1)), ('mult2', Mult(mult=2))], + verbose=True) + yield check_feature_union_verbosity, union.fit + yield check_feature_union_verbosity, union.fit_transform From d1353fb67a6ad0074ace386a067e54d492a882fd Mon Sep 17 00:00:00 2001 From: Karan Desai Date: Fri, 10 Mar 2017 20:14:41 +0530 Subject: [PATCH 03/12] ENH Add _pretty_print method to print from Pipeline properly. - Each line printed by Pipeline and FeatureUnion, when their verbosity mode is on, will be 70 characters long. --- sklearn/pipeline.py | 70 ++++++++++++++++++++++++---------- sklearn/tests/test_pipeline.py | 14 +++---- 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 5d799ebe03686..4053c580c4656 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -27,6 +27,22 @@ __all__ = ['Pipeline', 'FeatureUnion'] +def _pretty_print(step_info): + """Helper method to print the information about execution of a particular + step of Pipeline / FeatureUnion (if verbosity is enabled). It receives a + string having information about current step and prints it in such a way + that its length is 70 characters. + + Parameters + ---------- + step_info : str + String of form '[ClassName] (step x of y) step_name ... time_elapsed' + + """ + name, elapsed = step_info.split('...') + print('%s%s%s' % (name, '.' * (70 - len(name + elapsed)), elapsed)) + + class _BasePipeline(six.with_metaclass(ABCMeta, _BaseComposition)): """Handles parameter management for classifiers composed of named steps. """ @@ -231,13 +247,12 @@ def _validate_steps(self): "'%s' (type %s) doesn't" % (estimator, type(estimator))) - def _print_final_step(self, start_time, time_elapsed_so_far): - time_elapsed = time.time() - start_time - time_elapsed_so_far += time_elapsed - print('[Pipeline] (step %d of %d) %s ... %.5fs' % - (len(self.steps), len(self.steps), self.steps[-1][0], - time_elapsed_so_far)) - print('[Pipeline] Total time elapsed: %.5fs' % time_elapsed_so_far) + def _print_final_step(self, final_step_time_elapsed, time_elapsed_so_far): + _pretty_print('[Pipeline] (step %d of %d) %s ... %.5fs' % + (len(self.steps), len(self.steps), self.steps[-1][0], + final_step_time_elapsed)) + _pretty_print('[Pipeline] Total time elapsed: ... %.5fs' % + time_elapsed_so_far) @property def _estimator_type(self): @@ -293,8 +308,9 @@ def _fit(self, X, y=None, **fit_params): time_elapsed_so_far += step_time_elapsed # Logging time elapsed for current step to stdout if self.verbose: - print('[Pipeline] (step %d of %d) %s ... %.5fs' % - (step_idx + 1, len(self.steps), name, step_time_elapsed)) + _pretty_print('[Pipeline] (step %d of %d) %s ... %.5fs' % + (step_idx + 1, len(self.steps), name, + step_time_elapsed)) if self._final_estimator is None: return Xt, {}, time_elapsed_so_far return Xt, fit_params_steps[self.steps[-1][0]], time_elapsed_so_far @@ -329,8 +345,11 @@ def fit(self, X, y=None, **fit_params): final_step_start_time = time.time() if self._final_estimator is not None: self._final_estimator.fit(Xt, y, **fit_params) + final_step_time_elapsed = time.time() - final_step_start_time + time_elapsed_so_far += final_step_time_elapsed if self.verbose: - self._print_final_step(final_step_start_time, time_elapsed_so_far) + self._print_final_step(final_step_time_elapsed, + time_elapsed_so_far) return self def fit_transform(self, X, y=None, **fit_params): @@ -365,16 +384,20 @@ def fit_transform(self, X, y=None, **fit_params): final_step_start_time = time.time() if last_step is None: if self.verbose: - print('[Pipeline] Step %s is NoneType.' % self.steps[-1][0]) - print('[Pipeline] Total time elapsed: %.3fs' % - time_elapsed_so_far) + _pretty_print('[Pipeline] Step %s is NoneType ...' % + self.steps[-1][0]) + _pretty_print('[Pipeline] Total time elapsed: ... %.5fs' % + time_elapsed_so_far) return Xt elif hasattr(last_step, 'fit_transform'): Xt = last_step.fit_transform(Xt, y, **fit_params) else: Xt = last_step.fit(Xt, y, **fit_params).transform(Xt) + final_step_time_elapsed = time.time() - final_step_start_time + time_elapsed_so_far += final_step_time_elapsed if self.verbose: - self._print_final_step(final_step_start_time, time_elapsed_so_far) + self._print_final_step(final_step_time_elapsed, + time_elapsed_so_far) return Xt @if_delegate_has_method(delegate='_final_estimator') @@ -427,8 +450,11 @@ def fit_predict(self, X, y=None, **fit_params): Xt, fit_params, time_elapsed_so_far = self._fit(X, y, **fit_params) final_step_start_time = time.time() y_pred = self.steps[-1][-1].fit_predict(Xt, y, **fit_params) + final_step_time_elapsed = time.time() - final_step_start_time + time_elapsed_so_far += final_step_time_elapsed if self.verbose: - self._print_final_step(final_step_start_time, time_elapsed_so_far) + self._print_final_step(final_step_time_elapsed, + time_elapsed_so_far) return y_pred @if_delegate_has_method(delegate='_final_estimator') @@ -663,8 +689,8 @@ def _fit_one_transformer(trans, X, y, verbose=False, idx=None, trans = trans.fit(X, y) step_time_elapsed = time.time() - step_start_time if verbose: - print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % - (idx + 1, total_steps, name, step_time_elapsed)) + _pretty_print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % + (idx + 1, total_steps, name, step_time_elapsed)) return trans @@ -686,8 +712,8 @@ def _fit_transform_one(trans, weight, X, y, verbose=False, idx=None, res = trans.fit(X, y, **fit_params).transform(X) step_time_elapsed = time.time() - step_start_time if verbose: - print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % - (idx + 1, total_steps, name, step_time_elapsed)) + _pretty_print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % + (idx + 1, total_steps, name, step_time_elapsed)) # if we have a weight for this transformer, multiply output if weight is None: return res, trans @@ -832,7 +858,8 @@ def fit(self, X, y=None): for idx, (name, trans, _) in enumerate(all_transformers)) time_elapsed = time.time() - start_time if self.verbose: - print('[FeatureUnion] Total time elapsed: %.5fs' % time_elapsed) + _pretty_print( + '[FeatureUnion] Total time elapsed: ... %.5fs' % time_elapsed) self._update_transformer_list(transformers) return self @@ -865,7 +892,8 @@ def fit_transform(self, X, y=None, **fit_params): for idx, (name, trans, weight) in enumerate(all_transformers)) time_elapsed = time.time() - start_time if self.verbose: - print('[FeatureUnion] Total time elapsed: %.5fs' % time_elapsed) + _pretty_print( + '[FeatureUnion] Total time elapsed: ... %.5fs' % time_elapsed) if not result: # All transformers are None diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 69907919071c1..bfbd5760ef9a8 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -983,8 +983,8 @@ def check_pipeline_verbosity_fit_predict(pipe_method): # check output verbose_output.seek(0) lines = verbose_output.readlines() - assert_true('[Pipeline] (step 1 of 2) transf ... ' in lines[0]) - assert_true('[Pipeline] (step 2 of 2) clf ... ' in lines[1]) + assert_true('[Pipeline] (step 1 of 2) transf ...' in lines[0]) + assert_true('[Pipeline] (step 2 of 2) clf ...' in lines[1]) assert_true('[Pipeline] Total time elapsed: ' in lines[2]) @@ -1007,11 +1007,11 @@ def check_pipeline_verbosity_fit_transform(pipe_method, last_was_none=False): # check output verbose_output.seek(0) lines = verbose_output.readlines() - assert_true('[Pipeline] (step 1 of 2) mult1 ... ' in lines[0]) + assert_true('[Pipeline] (step 1 of 2) mult1 ...' in lines[0]) if last_was_none: - assert_true('[Pipeline] Step mult2 is NoneType.' in lines[1]) + assert_true('[Pipeline] Step mult2 is NoneType ...' in lines[1]) else: - assert_true('[Pipeline] (step 2 of 2) mult2 ... ' in lines[1]) + assert_true('[Pipeline] (step 2 of 2) mult2 ...' in lines[1]) assert_true('[Pipeline] Total time elapsed: ' in lines[2]) @@ -1037,8 +1037,8 @@ def check_feature_union_verbosity(feature_union_method): # check output verbose_output.seek(0) lines = verbose_output.readlines() - assert_true('[FeatureUnion] (step 1 of 2) mult1 ... ' in lines[0]) - assert_true('[FeatureUnion] (step 2 of 2) mult2 ... ' in lines[1]) + assert_true('[FeatureUnion] (step 1 of 2) mult1 ...' in lines[0]) + assert_true('[FeatureUnion] (step 2 of 2) mult2 ...' in lines[1]) assert_true('[FeatureUnion] Total time elapsed: ' in lines[2]) From a981bdc2533bbd93d870994b0e68fd796c7acbdd Mon Sep 17 00:00:00 2001 From: Karan Desai Date: Fri, 10 Mar 2017 20:18:24 +0530 Subject: [PATCH 04/12] Add a changelog entry about verbosity of Pipeline. --- doc/whats_new.rst | 109 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 0ca707ce2cbbf..bc49a821db437 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -415,6 +415,113 @@ Miscellaneous - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now accept non-finite features. :issue:`8931` by :user:`Attractadore`. + - Added optional parameter ``verbose`` in :class:`pipeline.Pipeline` and + :class:`pipeline.FeatureUnion` for showing progress and timing of each + step. :issue:`8568` by :user:`Karan Desai `. + + - Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in + documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` + :user:`Oscar Najera ` + + - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` + now support online learning using `partial_fit`. + issue: `8053` by :user:`Peng Yu `. + - :class:`pipeline.Pipeline` allows to cache transformers + within a pipeline by using the ``memory`` constructor parameter. + By :issue:`7990` by :user:`Guillaume Lemaitre `. + + - :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and + :class:`decomposition.TruncatedSVD` now expose the singular values + from the underlying SVD. They are stored in the attribute + ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. + + - :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans` + now uses significantly less memory when assigning data points to their + nearest cluster center. :issue:`7721` by :user:`Jon Crall `. + + - Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`, + :class:`model_selection.RandomizedSearchCV`, :class:`grid_search.GridSearchCV`, + and :class:`grid_search.RandomizedSearchCV` that matches the ``classes_`` + attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295` + by :user:`Alyssa Batula `, :user:`Dylan Werner-Meier `, + and :user:`Stephen Hoover `. + + - The ``min_weight_fraction_leaf`` constraint in tree construction is now + more efficient, taking a fast path to declare a node a leaf if its weight + is less than 2 * the minimum. Note that the constructed tree will be + different from previous versions where ``min_weight_fraction_leaf`` is + used. :issue:`7441` by :user:`Nelson Liu `. + + - Added ``average`` parameter to perform weights averaging in + :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939` + by :user:`Andrea Esuli `. + + - Custom metrics for the :mod:`sklearn.neighbors` binary trees now have + fewer constraints: they must take two 1d-arrays and return a float. + :issue:`6288` by `Jake Vanderplas`_. + + - :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor` + now support sparse input for prediction. + :issue:`6101` by :user:`Ibraim Ganiev `. + + - Added ``shuffle`` and ``random_state`` parameters to shuffle training + data before taking prefixes of it based on training sizes in + :func:`model_selection.learning_curve`. + :issue:`7506` by :user:`Narine Kokhlikyan `. + + - Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel` + to enable selection of the norm order when ``coef_`` is more than 1D + + - Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`. + :issue:`7723` by :user:`Mikhail Korobov `. + + - ``check_estimator`` now attempts to ensure that methods transform, predict, etc. + do not set attributes on the estimator. + :issue:`7533` by :user:`Ekaterina Krivich `. + + - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` + will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with + norm 'max' the norms returned will be the same as for dense matrices. + :issue:`7771` by `Ang Lu `_. + + - :class:`sklearn.linear_model.RANSACRegressor` no longer throws an error + when calling ``fit`` if no inliers are found in its first iteration. + Furthermore, causes of skipped iterations are tracked in newly added + attributes, ``n_skips_*``. + :issue:`7914` by :user:`Michael Horrell `. + + - :func:`model_selection.cross_val_predict` now returns output of the + correct shape for all values of the argument ``method``. + :issue:`7863` by :user:`Aman Dalmia `. + + - Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not + exactly implement Benjamini-Hochberg procedure. It formerly may have + selected fewer features than it should. + :issue:`7490` by :user:`Peng Meng `. + + - Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`. + A ``TypeError`` will be raised for any other kwargs. :issue:`8028` + by :user:`Alexander Booth `. + + - Added type checking to the ``accept_sparse`` parameter in + :mod:`sklearn.utils.validation` methods. This parameter now accepts only + boolean, string, or list/tuple of strings. ``accept_sparse=None`` is deprecated + and should be replaced by ``accept_sparse=False``. + :issue:`7880` by :user:`Josh Karnofsky `. + + - :class:`model_selection.GridSearchCV`, :class:`model_selection.RandomizedSearchCV` + and :func:`model_selection.cross_val_score` now allow estimators with callable + kernels which were previously prohibited. :issue:`8005` by `Andreas Müller`_ . + + + - Added ability to use sparse matrices in :func:`feature_selection.f_regression` + with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune `. + + - Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score` by + Victor Poughon. + + - In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict`` + is a lot faster with ``return_std=True`` by :user:`Hadrien Bertrand `. Bug fixes ......... @@ -5754,3 +5861,5 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson. .. _Neeraj Gangwar: http://neerajgangwar.in .. _Arthur Mensch: https://amensch.fr + +.. _Karan Desai: https://www.github.com/karandesai-96 From 3bddca19703d775e2389fd31b39acadad35cfc63 Mon Sep 17 00:00:00 2001 From: petrushev Date: Fri, 1 Sep 2017 12:52:43 +0200 Subject: [PATCH 05/12] Fix bug in pipeline transformer list in-place assignment; Fix error message bug --- sklearn/pipeline.py | 6 +++--- sklearn/tests/test_pipeline.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 4053c580c4656..765d8da7fbf41 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -20,7 +20,7 @@ from .externals.joblib import Parallel, delayed from .externals import six from .utils.metaestimators import if_delegate_has_method, _BaseComposition -from .utils import Bunch +from .utils import Bunch, tosequence from .utils.validation import check_memory @@ -934,10 +934,10 @@ def transform(self, X): def _update_transformer_list(self, transformers): transformers = iter(transformers) - self.transformer_list[:] = [ + self.transformer_list = tosequence([ (name, None if old is None else next(transformers)) for name, old in self.transformer_list - ] + ]) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index bfbd5760ef9a8..6ccb3a2360ecc 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -834,9 +834,9 @@ def test_step_name_validation(): # we validate in construction (despite scikit-learn convention) bad_steps3 = [('a', Mult(2)), (param, Mult(3))] for bad_steps, message in [ - (bad_steps1, "Estimator names must not contain __: got ['a__q']"), + (bad_steps1, "Step names must not contain __: got ['a__q']"), (bad_steps2, "Names provided are not unique: ['a', 'a']"), - (bad_steps3, "Estimator names conflict with constructor " + (bad_steps3, "Step names conflict with constructor " "arguments: ['%s']" % param), ]: # three ways to make invalid: From ba083b8264d9201755da4a01e77bfb9e5f209f5d Mon Sep 17 00:00:00 2001 From: petrushev Date: Fri, 1 Sep 2017 13:15:53 +0200 Subject: [PATCH 06/12] Extend `make_pipeline` to support the `verbose` kwarg as well --- sklearn/pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 765d8da7fbf41..2949d383dce8e 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -676,10 +676,11 @@ def make_pipeline(*steps, **kwargs): p : Pipeline """ memory = kwargs.pop('memory', None) + verbose = kwargs.pop('verbose', False) if kwargs: raise TypeError('Unknown keyword arguments: "{}"' .format(list(kwargs.keys())[0])) - return Pipeline(_name_estimators(steps), memory=memory) + return Pipeline(_name_estimators(steps), memory=memory, verbose=verbose) def _fit_one_transformer(trans, X, y, verbose=False, idx=None, From 5857d5e9b5b1cca9d6083c8bd4246165459291e5 Mon Sep 17 00:00:00 2001 From: petrushev Date: Fri, 1 Sep 2017 13:40:35 +0200 Subject: [PATCH 07/12] Fix `whats new` to reflect the Pipeline verbose change --- doc/whats_new.rst | 114 +++------------------------------------------- 1 file changed, 7 insertions(+), 107 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index bc49a821db437..d8d5c43004dce 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -48,6 +48,13 @@ Model evaluation and meta-estimators - A scorer based on :func:`metrics.brier_score_loss` is also available. :issue:`9521` by :user:`Hanmin Qin `. +Miscellaneous + +- Added optional parameter ``verbose`` in :class:`pipeline.Pipeline` and + :class:`pipeline.FeatureUnion` for showing progress and timing of each + step. :issue:`8568` by :user:`Karan Desai `. + + Bug fixes ......... @@ -415,113 +422,6 @@ Miscellaneous - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now accept non-finite features. :issue:`8931` by :user:`Attractadore`. - - Added optional parameter ``verbose`` in :class:`pipeline.Pipeline` and - :class:`pipeline.FeatureUnion` for showing progress and timing of each - step. :issue:`8568` by :user:`Karan Desai `. - - - Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in - documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` - :user:`Oscar Najera ` - - - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` - now support online learning using `partial_fit`. - issue: `8053` by :user:`Peng Yu `. - - :class:`pipeline.Pipeline` allows to cache transformers - within a pipeline by using the ``memory`` constructor parameter. - By :issue:`7990` by :user:`Guillaume Lemaitre `. - - - :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and - :class:`decomposition.TruncatedSVD` now expose the singular values - from the underlying SVD. They are stored in the attribute - ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. - - - :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans` - now uses significantly less memory when assigning data points to their - nearest cluster center. :issue:`7721` by :user:`Jon Crall `. - - - Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`, - :class:`model_selection.RandomizedSearchCV`, :class:`grid_search.GridSearchCV`, - and :class:`grid_search.RandomizedSearchCV` that matches the ``classes_`` - attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295` - by :user:`Alyssa Batula `, :user:`Dylan Werner-Meier `, - and :user:`Stephen Hoover `. - - - The ``min_weight_fraction_leaf`` constraint in tree construction is now - more efficient, taking a fast path to declare a node a leaf if its weight - is less than 2 * the minimum. Note that the constructed tree will be - different from previous versions where ``min_weight_fraction_leaf`` is - used. :issue:`7441` by :user:`Nelson Liu `. - - - Added ``average`` parameter to perform weights averaging in - :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939` - by :user:`Andrea Esuli `. - - - Custom metrics for the :mod:`sklearn.neighbors` binary trees now have - fewer constraints: they must take two 1d-arrays and return a float. - :issue:`6288` by `Jake Vanderplas`_. - - - :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor` - now support sparse input for prediction. - :issue:`6101` by :user:`Ibraim Ganiev `. - - - Added ``shuffle`` and ``random_state`` parameters to shuffle training - data before taking prefixes of it based on training sizes in - :func:`model_selection.learning_curve`. - :issue:`7506` by :user:`Narine Kokhlikyan `. - - - Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel` - to enable selection of the norm order when ``coef_`` is more than 1D - - - Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`. - :issue:`7723` by :user:`Mikhail Korobov `. - - - ``check_estimator`` now attempts to ensure that methods transform, predict, etc. - do not set attributes on the estimator. - :issue:`7533` by :user:`Ekaterina Krivich `. - - - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` - will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with - norm 'max' the norms returned will be the same as for dense matrices. - :issue:`7771` by `Ang Lu `_. - - - :class:`sklearn.linear_model.RANSACRegressor` no longer throws an error - when calling ``fit`` if no inliers are found in its first iteration. - Furthermore, causes of skipped iterations are tracked in newly added - attributes, ``n_skips_*``. - :issue:`7914` by :user:`Michael Horrell `. - - - :func:`model_selection.cross_val_predict` now returns output of the - correct shape for all values of the argument ``method``. - :issue:`7863` by :user:`Aman Dalmia `. - - - Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not - exactly implement Benjamini-Hochberg procedure. It formerly may have - selected fewer features than it should. - :issue:`7490` by :user:`Peng Meng `. - - - Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`. - A ``TypeError`` will be raised for any other kwargs. :issue:`8028` - by :user:`Alexander Booth `. - - - Added type checking to the ``accept_sparse`` parameter in - :mod:`sklearn.utils.validation` methods. This parameter now accepts only - boolean, string, or list/tuple of strings. ``accept_sparse=None`` is deprecated - and should be replaced by ``accept_sparse=False``. - :issue:`7880` by :user:`Josh Karnofsky `. - - - :class:`model_selection.GridSearchCV`, :class:`model_selection.RandomizedSearchCV` - and :func:`model_selection.cross_val_score` now allow estimators with callable - kernels which were previously prohibited. :issue:`8005` by `Andreas Müller`_ . - - - - Added ability to use sparse matrices in :func:`feature_selection.f_regression` - with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune `. - - - Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score` by - Victor Poughon. - - - In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict`` - is a lot faster with ``return_std=True`` by :user:`Hadrien Bertrand `. Bug fixes ......... From 72f4d0ed39742117bfda1c1bff0fcca8619f2a24 Mon Sep 17 00:00:00 2001 From: petrushev Date: Fri, 1 Sep 2017 13:59:09 +0200 Subject: [PATCH 08/12] Remove `_BasePipeline` resulting from a bad rebase --- sklearn/pipeline.py | 63 ++-------------------------------- sklearn/tests/test_pipeline.py | 4 +-- 2 files changed, 4 insertions(+), 63 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 2949d383dce8e..d05f8ee1991a7 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -10,7 +10,6 @@ # License: BSD from collections import defaultdict -from abc import ABCMeta, abstractmethod import time import numpy as np @@ -43,65 +42,7 @@ def _pretty_print(step_info): print('%s%s%s' % (name, '.' * (70 - len(name + elapsed)), elapsed)) -class _BasePipeline(six.with_metaclass(ABCMeta, _BaseComposition)): - """Handles parameter management for classifiers composed of named steps. - """ - - @abstractmethod - def __init__(self): - pass - - def _replace_step(self, steps_attr, name, new_val): - # assumes `name` is a valid step name - new_steps = getattr(self, steps_attr)[:] - for i, (step_name, _) in enumerate(new_steps): - if step_name == name: - new_steps[i] = (name, new_val) - break - setattr(self, steps_attr, new_steps) - - def _get_params(self, steps_attr, deep=True): - out = super(_BasePipeline, self).get_params(deep=False) - if not deep: - return out - steps = getattr(self, steps_attr) - out.update(steps) - for name, estimator in steps: - if estimator is None: - continue - for key, value in six.iteritems(estimator.get_params(deep=True)): - out['%s__%s' % (name, key)] = value - return out - - def _set_params(self, steps_attr, **params): - # Ensure strict ordering of parameter setting: - # 1. All steps - if steps_attr in params: - setattr(self, steps_attr, params.pop(steps_attr)) - # 2. Step replacement - step_names, _ = zip(*getattr(self, steps_attr)) - for name in list(six.iterkeys(params)): - if '__' not in name and name in step_names: - self._replace_step(steps_attr, name, params.pop(name)) - # 3. Step parameters and other initilisation arguments - super(_BasePipeline, self).set_params(**params) - return self - - def _validate_names(self, names): - if len(set(names)) != len(names): - raise ValueError('Names provided are not unique: ' - '{0!r}'.format(list(names))) - invalid_names = set(names).intersection(self.get_params(deep=False)) - if invalid_names: - raise ValueError('Step names conflict with constructor arguments: ' - '{0!r}'.format(sorted(invalid_names))) - invalid_names = [name for name in names if '__' in name] - if invalid_names: - raise ValueError('Step names must not contain __: got ' - '{0!r}'.format(invalid_names)) - - -class Pipeline(_BasePipeline): +class Pipeline(_BaseComposition): """Pipeline of transforms with a final estimator. Sequentially apply a list of transforms and a final estimator. @@ -721,7 +662,7 @@ def _fit_transform_one(trans, weight, X, y, verbose=False, idx=None, return res * weight, trans -class FeatureUnion(_BasePipeline, TransformerMixin): +class FeatureUnion(_BaseComposition, TransformerMixin): """Concatenates results of multiple transformer objects. This estimator applies a list of transformer objects in parallel to the diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 6ccb3a2360ecc..bfbd5760ef9a8 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -834,9 +834,9 @@ def test_step_name_validation(): # we validate in construction (despite scikit-learn convention) bad_steps3 = [('a', Mult(2)), (param, Mult(3))] for bad_steps, message in [ - (bad_steps1, "Step names must not contain __: got ['a__q']"), + (bad_steps1, "Estimator names must not contain __: got ['a__q']"), (bad_steps2, "Names provided are not unique: ['a', 'a']"), - (bad_steps3, "Step names conflict with constructor " + (bad_steps3, "Estimator names conflict with constructor " "arguments: ['%s']" % param), ]: # three ways to make invalid: From 1b7245e52e2c192ab67ea6eb4c5883ab5e226265 Mon Sep 17 00:00:00 2001 From: petrushev Date: Fri, 1 Sep 2017 14:03:17 +0200 Subject: [PATCH 09/12] Revert `FeatureUnion.transformer_list` to list instead of sequence --- sklearn/pipeline.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index d05f8ee1991a7..91ec6b377074f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -19,7 +19,7 @@ from .externals.joblib import Parallel, delayed from .externals import six from .utils.metaestimators import if_delegate_has_method, _BaseComposition -from .utils import Bunch, tosequence +from .utils import Bunch from .utils.validation import check_memory @@ -696,7 +696,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin): def __init__(self, transformer_list, n_jobs=1, transformer_weights=None, verbose=False): - self.transformer_list = tosequence(transformer_list) + self.transformer_list = list(transformer_list) self.n_jobs = n_jobs self.transformer_weights = transformer_weights self.verbose = verbose @@ -876,10 +876,10 @@ def transform(self, X): def _update_transformer_list(self, transformers): transformers = iter(transformers) - self.transformer_list = tosequence([ + self.transformer_list[:] = [ (name, None if old is None else next(transformers)) for name, old in self.transformer_list - ]) + ] def make_union(*transformers, **kwargs): From 3938adc38d086aaf3aafc8700b840d7a47f5a1c3 Mon Sep 17 00:00:00 2001 From: petrushev Date: Fri, 1 Sep 2017 14:19:36 +0200 Subject: [PATCH 10/12] Extend `FeatureUnion` to support the `verbose` kwarg as well --- doc/whats_new.rst | 5 +++++ sklearn/pipeline.py | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index d8d5c43004dce..6e54350b10d43 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -54,6 +54,11 @@ Miscellaneous :class:`pipeline.FeatureUnion` for showing progress and timing of each step. :issue:`8568` by :user:`Karan Desai `. +- Added optional parameter ``verbose`` in functions `pipeline.make_pipeline` + and `pipeline.make_union` to extend the same functionality as the + corresponding classes. :issue:`9668` by + :user:`Baze Petrushev `. + Bug fixes ......... diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 91ec6b377074f..3046c5fa5009b 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -601,6 +601,9 @@ def make_pipeline(*steps, **kwargs): inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. + verbose : boolean, optional + Verbosity mode. + Examples -------- >>> from sklearn.naive_bayes import GaussianNB @@ -896,6 +899,9 @@ def make_union(*transformers, **kwargs): n_jobs : int, optional Number of jobs to run in parallel (default 1). + verbose : boolean, optional + Verbosity mode. + Returns ------- f : FeatureUnion @@ -917,9 +923,11 @@ def make_union(*transformers, **kwargs): transformer_weights=None, verbose=False) """ n_jobs = kwargs.pop('n_jobs', 1) + verbose = kwargs.pop('verbose', False) if kwargs: # We do not currently support `transformer_weights` as we may want to # change its type spec in make_union raise TypeError('Unknown keyword arguments: "{}"' .format(list(kwargs.keys())[0])) - return FeatureUnion(_name_estimators(transformers), n_jobs=n_jobs) + return FeatureUnion(_name_estimators(transformers), n_jobs=n_jobs, + verbose=verbose) From efb4aac6a1ce790bcb16dadf4cd5be5c1464be29 Mon Sep 17 00:00:00 2001 From: petrushev Date: Sun, 3 Sep 2017 22:25:18 +0200 Subject: [PATCH 11/12] Minor tidy-up --- doc/whats_new.rst | 3 +-- sklearn/pipeline.py | 40 +++++++++++++++++----------------- sklearn/tests/test_pipeline.py | 4 ++-- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 6e54350b10d43..981e7ae95788c 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -57,7 +57,7 @@ Miscellaneous - Added optional parameter ``verbose`` in functions `pipeline.make_pipeline` and `pipeline.make_union` to extend the same functionality as the corresponding classes. :issue:`9668` by - :user:`Baze Petrushev `. + :user:`Baze Petrushev ` and :user:`Karan Desai `. Bug fixes @@ -5766,5 +5766,4 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson. .. _Neeraj Gangwar: http://neerajgangwar.in .. _Arthur Mensch: https://amensch.fr - .. _Karan Desai: https://www.github.com/karandesai-96 diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 3046c5fa5009b..b5975e002b568 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,10 +18,11 @@ from .base import clone, TransformerMixin from .externals.joblib import Parallel, delayed from .externals import six -from .utils.metaestimators import if_delegate_has_method, _BaseComposition +from .utils.metaestimators import if_delegate_has_method from .utils import Bunch from .utils.validation import check_memory +from .utils.metaestimators import _BaseComposition __all__ = ['Pipeline', 'FeatureUnion'] @@ -627,16 +628,16 @@ def make_pipeline(*steps, **kwargs): return Pipeline(_name_estimators(steps), memory=memory, verbose=verbose) -def _fit_one_transformer(trans, X, y, verbose=False, idx=None, +def _fit_one_transformer(transformer, X, y, verbose=False, idx=None, total_steps=None, name=None): # idx, total_steps and name are not required when verbosity is disabled step_start_time = time.time() - trans = trans.fit(X, y) + transformer = transformer.fit(X, y) step_time_elapsed = time.time() - step_start_time if verbose: _pretty_print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % (idx + 1, total_steps, name, step_time_elapsed)) - return trans + return transformer def _transform_one(transformer, weight, X): @@ -647,22 +648,22 @@ def _transform_one(transformer, weight, X): return res * weight -def _fit_transform_one(trans, weight, X, y, verbose=False, idx=None, +def _fit_transform_one(transformer, weight, X, y, verbose=False, idx=None, total_steps=None, name=None, **fit_params): # idx, total_steps and name are not required when verbosity is disabled step_start_time = time.time() - if hasattr(trans, 'fit_transform'): - res = trans.fit_transform(X, y, **fit_params) + if hasattr(transformer, 'fit_transform'): + res = transformer.fit_transform(X, y, **fit_params) else: - res = trans.fit(X, y, **fit_params).transform(X) + res = transformer.fit(X, y, **fit_params).transform(X) step_time_elapsed = time.time() - step_start_time if verbose: _pretty_print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % (idx + 1, total_steps, name, step_time_elapsed)) # if we have a weight for this transformer, multiply output if weight is None: - return res, trans - return res * weight, trans + return res, transformer + return res * weight, transformer class FeatureUnion(_BaseComposition, TransformerMixin): @@ -792,15 +793,14 @@ def fit(self, X, y=None): This estimator """ self._validate_transformers() - all_transformers = [(name, trans, weight) for name, trans, weight in - self._iter()] + all_transformers = list(self._iter()) total_steps = len(all_transformers) # Keep a record of time elapsed start_time = time.time() transformers = Parallel(n_jobs=self.n_jobs)( - delayed(_fit_one_transformer)(trans, X, y, self.verbose, idx, - total_steps, name) - for idx, (name, trans, _) in enumerate(all_transformers)) + delayed(_fit_one_transformer)(transformer, X, y, self.verbose, + idx, total_steps, name) + for idx, (name, transformer, _) in enumerate(all_transformers)) time_elapsed = time.time() - start_time if self.verbose: _pretty_print( @@ -826,15 +826,15 @@ def fit_transform(self, X, y=None, **fit_params): sum of n_components (output dimension) over transformers. """ self._validate_transformers() - all_transformers = [(name, trans, weight) for name, trans, weight in - self._iter()] + all_transformers = list(self._iter()) total_steps = len(all_transformers) # Keep a record of time elapsed start_time = time.time() result = Parallel(n_jobs=self.n_jobs)( - delayed(_fit_transform_one)(trans, weight, X, y, self.verbose, - idx, total_steps, name) - for idx, (name, trans, weight) in enumerate(all_transformers)) + delayed(_fit_transform_one)(transformer, weight, X, y, + self.verbose, idx, total_steps, name, + **fit_params) + for idx, (name, transformer, weight) in enumerate(all_transformers)) time_elapsed = time.time() - start_time if self.verbose: _pretty_print( diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index bfbd5760ef9a8..45454345cb9f1 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -67,12 +67,12 @@ def set_params(self, **params): class NoInvTransf(NoTrans): - def transform(self, X, y=None): + def transform(self, X): return X class Transf(NoInvTransf): - def transform(self, X, y=None): + def transform(self, X): return X def inverse_transform(self, X): From 2e5b6cc62b76a5ce752ea39c4b39594ab883b6b0 Mon Sep 17 00:00:00 2001 From: petrushev Date: Wed, 6 Sep 2017 00:18:03 +0200 Subject: [PATCH 12/12] Add `message_with_time` helper --- sklearn/cross_validation.py | 6 +-- sklearn/model_selection/_validation.py | 8 ++-- sklearn/pipeline.py | 59 ++++++++++---------------- sklearn/tests/test_pipeline.py | 27 +++++++----- sklearn/utils/__init__.py | 22 +++++++++- 5 files changed, 66 insertions(+), 56 deletions(-) diff --git a/sklearn/cross_validation.py b/sklearn/cross_validation.py index 7646459da3936..5544890d95e8d 100644 --- a/sklearn/cross_validation.py +++ b/sklearn/cross_validation.py @@ -22,7 +22,8 @@ import scipy.sparse as sp from .base import is_classifier, clone -from .utils import indexable, check_random_state, safe_indexing +from .utils import (indexable, check_random_state, safe_indexing, + message_with_time) from .utils.validation import (_is_arraylike, _num_samples, column_or_1d) from .utils.multiclass import type_of_target @@ -1700,8 +1701,7 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, if verbose > 2: msg += ", score=%f" % test_score if verbose > 1: - end_msg = "%s -%s" % (msg, logger.short_format_time(scoring_time)) - print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) + print(message_with_time('CV', msg, scoring_time)) ret = [train_score] if return_train_score else [] ret.extend([test_score, _num_samples(X_test), scoring_time]) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 773f70fb7dba2..a5a3a50dae443 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -20,10 +20,11 @@ import scipy.sparse as sp from ..base import is_classifier, clone -from ..utils import indexable, check_random_state, safe_indexing +from ..utils import (indexable, check_random_state, safe_indexing, + message_with_time) from ..utils.validation import _is_arraylike, _num_samples from ..utils.metaestimators import _safe_split -from ..externals.joblib import Parallel, delayed, logger +from ..externals.joblib import Parallel, delayed from ..externals.six.moves import zip from ..metrics.scorer import check_scoring, _check_multimetric_scoring from ..exceptions import FitFailedWarning @@ -480,8 +481,7 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose, msg += ", score=%s" % test_scores if verbose > 1: total_time = score_time + fit_time - end_msg = "%s, total=%s" % (msg, logger.short_format_time(total_time)) - print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) + print(message_with_time('CV', msg, total_time)) ret = [train_scores, test_scores] if return_train_score else [test_scores] diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index b5975e002b568..1a4e4d24c6e62 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -19,7 +19,7 @@ from .externals.joblib import Parallel, delayed from .externals import six from .utils.metaestimators import if_delegate_has_method -from .utils import Bunch +from .utils import Bunch, message_with_time from .utils.validation import check_memory from .utils.metaestimators import _BaseComposition @@ -27,22 +27,6 @@ __all__ = ['Pipeline', 'FeatureUnion'] -def _pretty_print(step_info): - """Helper method to print the information about execution of a particular - step of Pipeline / FeatureUnion (if verbosity is enabled). It receives a - string having information about current step and prints it in such a way - that its length is 70 characters. - - Parameters - ---------- - step_info : str - String of form '[ClassName] (step x of y) step_name ... time_elapsed' - - """ - name, elapsed = step_info.split('...') - print('%s%s%s' % (name, '.' * (70 - len(name + elapsed)), elapsed)) - - class Pipeline(_BaseComposition): """Pipeline of transforms with a final estimator. @@ -190,11 +174,11 @@ def _validate_steps(self): % (estimator, type(estimator))) def _print_final_step(self, final_step_time_elapsed, time_elapsed_so_far): - _pretty_print('[Pipeline] (step %d of %d) %s ... %.5fs' % - (len(self.steps), len(self.steps), self.steps[-1][0], - final_step_time_elapsed)) - _pretty_print('[Pipeline] Total time elapsed: ... %.5fs' % - time_elapsed_so_far) + message = '(step %d of %d) %s' % ( + len(self.steps), len(self.steps), self.steps[-1][0]) + print(message_with_time('Pipeline', message, final_step_time_elapsed)) + print(message_with_time( + 'Pipeline', 'Total time elapsed:', time_elapsed_so_far)) @property def _estimator_type(self): @@ -250,9 +234,9 @@ def _fit(self, X, y=None, **fit_params): time_elapsed_so_far += step_time_elapsed # Logging time elapsed for current step to stdout if self.verbose: - _pretty_print('[Pipeline] (step %d of %d) %s ... %.5fs' % - (step_idx + 1, len(self.steps), name, - step_time_elapsed)) + message = '(step %d of %d) %s' % ( + step_idx + 1, len(self.steps), name) + print(message_with_time('Pipeline', message, step_time_elapsed)) if self._final_estimator is None: return Xt, {}, time_elapsed_so_far return Xt, fit_params_steps[self.steps[-1][0]], time_elapsed_so_far @@ -326,10 +310,10 @@ def fit_transform(self, X, y=None, **fit_params): final_step_start_time = time.time() if last_step is None: if self.verbose: - _pretty_print('[Pipeline] Step %s is NoneType ...' % - self.steps[-1][0]) - _pretty_print('[Pipeline] Total time elapsed: ... %.5fs' % - time_elapsed_so_far) + message = 'Step %s is NoneType' % (self.steps[-1][0],) + print(message_with_time('Pipeline', message, 0)) + print(message_with_time( + 'Pipeline', 'Total time elapsed', time_elapsed_so_far)) return Xt elif hasattr(last_step, 'fit_transform'): Xt = last_step.fit_transform(Xt, y, **fit_params) @@ -635,8 +619,8 @@ def _fit_one_transformer(transformer, X, y, verbose=False, idx=None, transformer = transformer.fit(X, y) step_time_elapsed = time.time() - step_start_time if verbose: - _pretty_print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % - (idx + 1, total_steps, name, step_time_elapsed)) + message = '(step %d of %d) %s' % (idx + 1, total_steps, name) + print(message_with_time('FeatureUnion', message, step_time_elapsed)) return transformer @@ -658,8 +642,8 @@ def _fit_transform_one(transformer, weight, X, y, verbose=False, idx=None, res = transformer.fit(X, y, **fit_params).transform(X) step_time_elapsed = time.time() - step_start_time if verbose: - _pretty_print('[FeatureUnion] (step %d of %d) %s ... %.5fs' % - (idx + 1, total_steps, name, step_time_elapsed)) + message = '(step %d of %d) %s' % (idx + 1, total_steps, name) + print(message_with_time('FeatureUnion', message, step_time_elapsed)) # if we have a weight for this transformer, multiply output if weight is None: return res, transformer @@ -803,8 +787,9 @@ def fit(self, X, y=None): for idx, (name, transformer, _) in enumerate(all_transformers)) time_elapsed = time.time() - start_time if self.verbose: - _pretty_print( - '[FeatureUnion] Total time elapsed: ... %.5fs' % time_elapsed) + print(message_with_time( + 'FeatureUnion', 'Total time elapsed:', time_elapsed)) + self._update_transformer_list(transformers) return self @@ -837,8 +822,8 @@ def fit_transform(self, X, y=None, **fit_params): for idx, (name, transformer, weight) in enumerate(all_transformers)) time_elapsed = time.time() - start_time if self.verbose: - _pretty_print( - '[FeatureUnion] Total time elapsed: ... %.5fs' % time_elapsed) + print(message_with_time( + 'FeatureUnion', 'Total time elapsed:', time_elapsed)) if not result: # All transformers are None diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 45454345cb9f1..76d3fe66634fe 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -983,10 +983,11 @@ def check_pipeline_verbosity_fit_predict(pipe_method): # check output verbose_output.seek(0) lines = verbose_output.readlines() - assert_true('[Pipeline] (step 1 of 2) transf ...' in lines[0]) - assert_true('[Pipeline] (step 2 of 2) clf ...' in lines[1]) - assert_true('[Pipeline] Total time elapsed: ' in lines[2]) - + assert_true('(step 1 of 2) transf' in lines[0]) + assert_true('(step 2 of 2) clf' in lines[1]) + assert_true('Total time elapsed' in lines[2]) + for line in lines: + assert line.startswith('[Pipeline]') def test_pipeline_fit_verbosity(): pipe = Pipeline([('transf', Transf()), ('clf', FitParamT())], verbose=True) @@ -1007,12 +1008,13 @@ def check_pipeline_verbosity_fit_transform(pipe_method, last_was_none=False): # check output verbose_output.seek(0) lines = verbose_output.readlines() - assert_true('[Pipeline] (step 1 of 2) mult1 ...' in lines[0]) + assert_true('(step 1 of 2) mult1' in lines[0]) + assert_true(lines[0].startswith('[Pipeline]')) if last_was_none: - assert_true('[Pipeline] Step mult2 is NoneType ...' in lines[1]) + assert_true('Step mult2 is NoneType' in lines[1]) else: - assert_true('[Pipeline] (step 2 of 2) mult2 ...' in lines[1]) - assert_true('[Pipeline] Total time elapsed: ' in lines[2]) + assert_true('(step 2 of 2) mult2' in lines[1]) + assert_true('Total time elapsed' in lines[2]) def test_pipeline_verbosity_fit_transform(): @@ -1037,9 +1039,12 @@ def check_feature_union_verbosity(feature_union_method): # check output verbose_output.seek(0) lines = verbose_output.readlines() - assert_true('[FeatureUnion] (step 1 of 2) mult1 ...' in lines[0]) - assert_true('[FeatureUnion] (step 2 of 2) mult2 ...' in lines[1]) - assert_true('[FeatureUnion] Total time elapsed: ' in lines[2]) + assert_true('(step 1 of 2) mult1' in lines[0]) + assert_true('(step 2 of 2) mult2' in lines[1]) + assert_true('Total time elapsed' in lines[2]) + assert_true(lines[0].startswith('[FeatureUnion]')) + assert_true(lines[1].startswith('[FeatureUnion]')) + assert_true(lines[2].startswith('[FeatureUnion]')) def test_feature_union_verbosity(): diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 4b2665cdd4f77..f4c2a1791663f 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -14,7 +14,7 @@ check_consistent_length, check_X_y, indexable, check_symmetric) from .class_weight import compute_class_weight, compute_sample_weight -from ..externals.joblib import cpu_count +from ..externals.joblib import cpu_count, logger from ..exceptions import DataConversionWarning from .deprecation import deprecated @@ -506,3 +506,23 @@ def indices_to_mask(indices, mask_length): mask[indices] = True return mask + + +def message_with_time(source, message, time_): + """Create one line message for logging purposes + + Parameters + ---------- + source: str + String indicating the source or the reference of the message + + message: str + Short message + + time_: int + Time in seconds + """ + start_message = '[%s]' % (source,) + end_message = "%s, total=%s" % (message, logger.short_format_time(time_)) + dots_len = (68 - len(start_message) - len(end_message)) + return ("%s %s %s" % (start_message, dots_len * '.', end_message))