feature-engine
diff --git a/‎feature_engine/__init__.py
Lines changed: 3 additions & 3 deletions b/‎feature_engine/__init__.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎feature_engine/creation/__init__.py
Lines changed: 3 additions & 5 deletions b/‎feature_engine/creation/__init__.py
Lines changed: 3 additions & 5 deletions
diff --git a/‎feature_engine/creation/mathematical_combination.py
Lines changed: 40 additions & 26 deletions b/‎feature_engine/creation/mathematical_combination.py
Lines changed: 40 additions & 26 deletions
diff --git a/‎feature_engine/dataframe_checks.py
Lines changed: 8 additions & 4 deletions b/‎feature_engine/dataframe_checks.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎feature_engine/discretisation/__init__.py
Lines changed: 8 additions & 7 deletions b/‎feature_engine/discretisation/__init__.py
Lines changed: 8 additions & 7 deletions
diff --git a/‎feature_engine/discretisation/arbitrary.py
Lines changed: 22 additions & 12 deletions b/‎feature_engine/discretisation/arbitrary.py
Lines changed: 22 additions & 12 deletions
@@ -2,9 +2,9 @@
 import feature_engine
 
 PACKAGE_ROOT = pathlib.Path(feature_engine.__file__).resolve().parent
-VERSION_PATH = PACKAGE_ROOT / 'VERSION'
+VERSION_PATH = PACKAGE_ROOT / "VERSION"
 
 name = "feature_engine"
 
-with open(VERSION_PATH, 'r') as version_file:
-    __version__ = version_file.read().strip()
+with open(VERSION_PATH, "r") as version_file:
+    __version__ = version_file.read().strip()
@@ -1,10 +1,8 @@
 """
-The module creation includes classes to create new variables by combination of existing variables in the
-dataframe.
+The module creation includes classes to create new variables by combination of existing
+variables in the dataframe.
 """
 
 from .mathematical_combination import MathematicalCombination
 
-__all__ = [
-    'MathematicalCombination'
-]
+__all__ = ["MathematicalCombination"]
@@ -6,9 +6,10 @@ class MathematicalCombination(BaseNumericalTransformer):
     MathematicalCombination() applies basic mathematical operations across features,
     returning 1 or more additional features as a result.
 
-    For example, if we have the variables number_payments_first_quarter, number_payments_second_quarter,
-    number_payments_third_quarter and number_payments_fourth_quarter, we can use MathematicalCombination()
-    to calculate the total number of payments and mean number of payments as follows:
+    For example, if we have the variables number_payments_first_quarter,
+    number_payments_second_quarter, number_payments_third_quarter and
+    number_payments_fourth_quarter, we can use MathematicalCombination() to calculate
+    the total number of payments and mean number of payments as follows:
 
     .. code-block:: python
 
@@ -31,8 +32,8 @@ class MathematicalCombination(BaseNumericalTransformer):
 
         transformer.fit_transform(X)
 
-    The transformed X will contain the additional features total_number_payments and mean_number_payments,
-    plus the original set of variables.
+    The transformed X will contain the additional features total_number_payments and
+    mean_number_payments, plus the original set of variables.
 
     Parameters
     ----------
@@ -51,7 +52,8 @@ class MathematicalCombination(BaseNumericalTransformer):
         Each operation should be a string and must be one of the elements
         from the list: ['sum', 'prod', 'mean', 'std', 'max', 'min']
 
-        Each operation will result in a new variable that will be added to the transformed dataset.
+        Each operation will result in a new variable that will be added to the
+        transformed dataset.
 
     new_variables_names: list, default=None
         Names of the newly created variables. The user can enter a name or a list
@@ -64,47 +66,57 @@ class MathematicalCombination(BaseNumericalTransformer):
 
         The name of the variables indicated by the user should coincide with the order
         in which the mathematical operations are initialised in the transformer.
-        That is, if you set math_operations = ['mean', 'prod'], the first new variable name
-        will be assigned to the mean of the variables and the second variable name
+        That is, if you set math_operations = ['mean', 'prod'], the first new variable
+        name will be assigned to the mean of the variables and the second variable name
         to the product of the variables.
 
         If new_variable_names=None, the transformer will assign an arbitrary name
-        to the newly created features starting by the name of the mathematical operation,
-        followed by the variables combined separated by -.
+        to the newly created features starting by the name of the mathematical
+        operation, followed by the variables combined separated by -.
 
     """
 
     def __init__(self, variables=None, math_operations=None, new_variables_names=None):
 
         if math_operations is None:
-            math_operations = ['sum', 'prod', 'mean', 'std', 'max', 'min']
+            math_operations = ["sum", "prod", "mean", "std", "max", "min"]
 
         self.variables = variables
         self.new_variables_names = new_variables_names
-        self._math_operations_permitted = ['sum', 'prod', 'mean', 'std', 'max', 'min']
+        self._math_operations_permitted = ["sum", "prod", "mean", "std", "max", "min"]
 
         if not isinstance(math_operations, list):
             raise KeyError("math_operations parameter must be a list or None")
 
-        if any(operation not in self._math_operations_permitted for operation in math_operations):
-            raise KeyError("At least one of math_operations is not found in permitted operations set. "
-                           "Choose one of ['sum', 'prod', 'mean', 'std', 'max', 'min']")
+        if any(
+            operation not in self._math_operations_permitted
+            for operation in math_operations
+        ):
+            raise KeyError(
+                "At least one of math_operations is not permitted operation. "
+                "Choose one of ['sum', 'prod', 'mean', 'std', 'max', 'min']"
+            )
         else:
             self.math_operations = math_operations
 
         if self.variables and len(self.variables) <= 1:
             raise KeyError(
-                "MathematicalCombination requires two or more features to make proper transformations.")
+                "MathematicalCombination requires two or more features to make proper "
+                "transformations."
+            )
 
-        if self.new_variables_names and len(self.new_variables_names) != len(self.math_operations):
+        if self.new_variables_names and len(self.new_variables_names) != len(
+            self.math_operations
+        ):
             raise KeyError(
-                "Number of items in New_variables_names must be equal to number of items in math_operations."
+                "Number of items in New_variables_names must be equal to number of "
+                "items in math_operations."
             )
 
     def fit(self, X, y=None):
-        Performs dataframe checks. Selects variables to transform if None were indicated by the user.
-        Creates dictionary of column to transformation mappings
+        Performs dataframe checks. Selects variables to transform if None were indicated
+        by the user. Creates dictionary of column to transformation mappings.
 
         X : pandas dataframe of shape = [n_samples, n_features]
             The training input samples.
@@ -118,12 +130,13 @@ def fit(self, X, y=None):
         self.input_shape_ = X.shape
 
         if self.new_variables_names:
-            self.combination_dict_ = dict(zip(self.new_variables_names, self.math_operations))
+            self.combination_dict_ = dict(
+                zip(self.new_variables_names, self.math_operations)
+            )
         else:
             self.combination_dict_ = {
                 f"{operation}({'-'.join(self.variables)})": operation
-                for operation
-                in self.math_operations
+                for operation in self.math_operations
             }
 
         return self
@@ -132,7 +145,8 @@ def transform(self, X):
         """
         Transforms source dataset.
 
-        Adds column for each operation with calculation based on variables and operation.
+        Adds a column for each operation with the calculation based on the variables
+        and operations indicated when setting up the transformer.
 
         Parameters
         ----------
@@ -143,8 +157,8 @@ def transform(self, X):
         Returns
         -------
 
-        X_transformed : pandas dataframe of shape = [n_samples, n_features + n_operations]
-            The dataframe with operations results added.
+        X_transformed : pandas dataframe, shape = [n_samples, n_features + n_operations]
+            The dataframe with the operations results added as columns.
         """
         X = super().transform(X)
 
 
@@ -16,12 +16,16 @@ def _check_input_matches_training_df(X, reference):
     # check that dataframe to transform has the same number of columns
     # that the dataframe used during fit method
     if X.shape[1] != reference:
-        raise ValueError('The number of columns in this data set is different from the one used to fit this '
-                         'transformer (when using the fit method)')
+        raise ValueError(
+            "The number of columns in this data set is different from the one used to "
+            "fit this transformer (when using the fit method)"
+        )
     return None
 
 
 def _check_contains_na(X, variables):
     if X[variables].isnull().values.any():
-        raise ValueError('Some of the variables to transform contain missing values. Check and remove those '
-                         'before using this transformer.')
+        raise ValueError(
+            "Some of the variables to transform contain missing values. Check and "
+            "remove those before using this transformer."
+        )
@@ -1,15 +1,16 @@
 """
-The module discretisation includes classes to sort continuous variables into bins / intervals.
+The module discretisation includes classes to sort continuous variables into bins or
+intervals.
 """
 
 from .decision_tree import DecisionTreeDiscretiser
-from . equal_frequency import EqualFrequencyDiscretiser
-from .equal_width import  EqualWidthDiscretiser
+from .equal_frequency import EqualFrequencyDiscretiser
 from .arbitrary import ArbitraryDiscretiser
 
 __all__ = [
-    'DecisionTreeDiscretiser',
-    'EqualFrequencyDiscretiser',
-    'EqualWidthDiscretiser',
-    'ArbitraryDiscretiser'
+    "DecisionTreeDiscretiser",
+    "EqualFrequencyDiscretiser",
+    "EqualWidthDiscretiser",
+    "ArbitraryDiscretiser",
 ]
@@ -15,8 +15,9 @@ class ArbitraryDiscretiser(BaseNumericalTransformer):
     'var2':[5, 10, 15, 20]}.
 
     The UserInputDiscretiser() works only with numerical variables. The discretiser will
-    check if the dictionary entered by the user contains variables present in the training
-    set, and if these variables are cast as numerical, before doing any transformation.
+    check if the dictionary entered by the user contains variables present in the
+    training set, and if these variables are cast as numerical, before doing any
+    transformation.
 
     Then it transforms the variables, that is, it sorts the values into the intervals,
     transform.
@@ -25,8 +26,10 @@ class ArbitraryDiscretiser(BaseNumericalTransformer):
     ----------
 
     binning_dict : dict
-        The dictionary with the variable : interval limits pairs, provided by the user. A
-        valid dictionary looks like this: {'var1':[0, 10, 100, 1000], 'var2':[5, 10, 15, 20]}.
+        The dictionary with the variable : interval limits pairs, provided by the user.
+        A valid dictionary looks like this:
+
+         binning_dict = {'var1':[0, 10, 100, 1000], 'var2':[5, 10, 15, 20]}.
 
     return_object : bool, default=False
         Whether the numbers in the discrete variable should be returned as
@@ -42,10 +45,12 @@ class ArbitraryDiscretiser(BaseNumericalTransformer):
     def __init__(self, binning_dict, return_object=False, return_boundaries=False):
 
         if not isinstance(binning_dict, dict):
-            raise ValueError("Please provide at a dictionary with the interval limits per variable")
+            raise ValueError(
+                "Please provide at a dictionary with the interval limits per variable"
+            )
 
         if not isinstance(return_object, bool):
-            raise ValueError('return_object must be True or False')
+            raise ValueError("return_object must be True or False")
 
         self.binning_dict = binning_dict
         self.variables = [x for x in binning_dict.keys()]
@@ -54,7 +59,8 @@ def __init__(self, binning_dict, return_object=False, return_boundaries=False):
 
     def fit(self, X, y=None):
         """
-        Checks that the user entered variables are in the train set and cast as numerical.
+        Checks that the user entered variables are in the train set and cast as
+        numerical.
 
         Parameters
         ----------
@@ -80,15 +86,17 @@ def fit(self, X, y=None):
         if all(variable in X.columns for variable in self.variables):
             self.binner_dict_ = self.binning_dict
         else:
-            raise ValueError('There are variables in the provided dictionary which are not present in the train set '
-                             'or not cast as numerical')
+            raise ValueError(
+                "There are variables in the provided dictionary which are not present "
+                "in the train set or not cast as numerical"
+            )
 
         self.input_shape_ = X.shape
 
         return self
 
     def transform(self, X):
-        """ Sorts the variable values into the intervals.
+        """Sorts the variable values into the intervals.
 
         Parameters
         ----------
@@ -112,10 +120,12 @@ def transform(self, X):
 
         else:
             for feature in self.variables:
-                X[feature] = pd.cut(X[feature], self.binner_dict_[feature], labels=False)
+                X[feature] = pd.cut(
+                    X[feature], self.binner_dict_[feature], labels=False
+                )
 
             # return object
             if self.return_object:
-                X[self.variables] = X[self.variables].astype('O')
+                X[self.variables] = X[self.variables].astype("O")
 
         return X