xhluca
diff --git a/‎doc/whats_new/v0.20.rst
Lines changed: 0 additions & 9 deletions b/‎doc/whats_new/v0.20.rst
Lines changed: 0 additions & 9 deletions
diff --git a/‎sklearn/impute.py
Lines changed: 16 additions & 19 deletions b/‎sklearn/impute.py
Lines changed: 16 additions & 19 deletions
diff --git a/‎sklearn/tests/test_impute.py
Lines changed: 1 addition & 36 deletions b/‎sklearn/tests/test_impute.py
Lines changed: 1 addition & 36 deletions
diff --git a/‎sklearn/utils/estimator_checks.py
Lines changed: 4 additions & 11 deletions b/‎sklearn/utils/estimator_checks.py
Lines changed: 4 additions & 11 deletions
@@ -36,15 +36,6 @@ Changelog
   threaded when `n_jobs > 1` or `n_jobs = -1`.
   :issue:`13005` by :user:`Prabakaran Kumaresshan <nixphix>`.
 
-:mod:`sklearn.impute`
-.....................
-
-- |Fix| add support for non-numeric data in
-  :class:`sklearn.impute.MissingIndicator` which was not supported while
-  :class:`sklearn.impute.SimpleImputer` was supporting this for some
-  imputation strategies.
-  :issue:`13046` by :user:`Guillaume Lemaitre <glemaitre>`.
-
 :mod:`sklearn.linear_model`
 ...........................
 
 
@@ -533,23 +533,6 @@ def _get_missing_features_info(self, X):
 
         return imputer_mask, features_with_missing
 
-    def _validate_input(self, X):
-        if not is_scalar_nan(self.missing_values):
-            force_all_finite = True
-        else:
-            force_all_finite = "allow-nan"
-        X = check_array(X, accept_sparse=('csc', 'csr'), dtype=None,
-                        force_all_finite=force_all_finite)
-        _check_inputs_dtype(X, self.missing_values)
-        if X.dtype.kind not in ("i", "u", "f", "O"):
-            raise ValueError("MissingIndicator does not support data with "
-                             "dtype {0}. Please provide either a numeric array"
-                             " (with a floating point or integer dtype) or "
-                             "categorical data represented either as an array "
                              "with integer dtype or an array of string values "
-                             "with an object dtype.".format(X.dtype))
-        return X
-
     def fit(self, X, y=None):
         """Fit the transformer on X.
 
@@ -564,7 +547,14 @@ def fit(self, X, y=None):
         self : object
             Returns self.
         """
-        X = self._validate_input(X)
+        if not is_scalar_nan(self.missing_values):
+            force_all_finite = True
+        else:
+            force_all_finite = "allow-nan"
+        X = check_array(X, accept_sparse=('csc', 'csr'),
+                        force_all_finite=force_all_finite)
+        _check_inputs_dtype(X, self.missing_values)
+
         self._n_features = X.shape[1]
 
         if self.features not in ('missing-only', 'all'):
@@ -598,7 +588,14 @@ def transform(self, X):
 
         """
         check_is_fitted(self, "features_")
-        X = self._validate_input(X)
+
+        if not is_scalar_nan(self.missing_values):
+            force_all_finite = True
+        else:
+            force_all_finite = "allow-nan"
+        X = check_array(X, accept_sparse=('csc', 'csr'),
+                        force_all_finite=force_all_finite)
+        _check_inputs_dtype(X, self.missing_values)
 
         if X.shape[1] != self._n_features:
             raise ValueError("X has a different number of features "
 
@@ -13,7 +13,6 @@
 from sklearn.impute import MissingIndicator
 from sklearn.impute import SimpleImputer
 from sklearn.pipeline import Pipeline
-from sklearn.pipeline import make_union
 from sklearn.model_selection import GridSearchCV
 from sklearn import tree
 from sklearn.random_projection import sparse_random_matrix
@@ -510,10 +509,7 @@ def test_imputation_copy():
       "'features' has to be either 'missing-only' or 'all'"),
      (np.array([[-1, 1], [1, 2]]), np.array([[-1, 1], [1, 2]]),
       {'features': 'all', 'sparse': 'random'},
-      "'sparse' has to be a boolean or 'auto'"),
-     (np.array([['a', 'b'], ['c', 'a']], dtype=str),
-      np.array([['a', 'b'], ['c', 'a']], dtype=str),
-      {}, "MissingIndicator does not support data with dtype")]
+      "'sparse' has to be a boolean or 'auto'")]
 )
 def test_missing_indicator_error(X_fit, X_trans, params, msg_err):
     indicator = MissingIndicator(missing_values=-1)
@@ -618,37 +614,6 @@ def test_missing_indicator_sparse_param(arr_type, missing_values,
             assert isinstance(X_trans_mask, np.ndarray)
 
 
-def test_missing_indicator_string():
-    X = np.array([['a', 'b', 'c'], ['b', 'c', 'a']], dtype=object)
-    indicator = MissingIndicator(missing_values='a', features='all')
-    X_trans = indicator.fit_transform(X)
-    assert_array_equal(X_trans, np.array([[True, False, False],
-                                          [False, False, True]]))
-
-
-@pytest.mark.parametrize(
-    "X, missing_values, X_trans_exp",
-    [(np.array([['a', 'b'], ['b', 'a']], dtype=object), 'a',
-      np.array([['b', 'b', True, False], ['b', 'b', False, True]],
-               dtype=object)),
-     (np.array([[np.nan, 1.], [1., np.nan]]), np.nan,
-      np.array([[1., 1., True, False], [1., 1., False, True]])),
-     (np.array([[np.nan, 'b'], ['b', np.nan]], dtype=object), np.nan,
-      np.array([['b', 'b', True, False], ['b', 'b', False, True]],
-               dtype=object)),
-     (np.array([[None, 'b'], ['b', None]], dtype=object), None,
-      np.array([['b', 'b', True, False], ['b', 'b', False, True]],
-               dtype=object))]
-)
-def test_missing_indicator_with_imputer(X, missing_values, X_trans_exp):
-    trans = make_union(
-        SimpleImputer(missing_values=missing_values, strategy='most_frequent'),
-        MissingIndicator(missing_values=missing_values)
-    )
-    X_trans = trans.fit_transform(X)
-    assert_array_equal(X_trans, X_trans_exp)
-
-
 @pytest.mark.parametrize("imputer_constructor",
                          [SimpleImputer])
 @pytest.mark.parametrize(
 
@@ -72,10 +72,10 @@
                 'OrthogonalMatchingPursuit', 'PLSCanonical', 'PLSRegression',
                 'RANSACRegressor', 'RadiusNeighborsRegressor',
                 'RandomForestRegressor', 'Ridge', 'RidgeCV']
+
 ALLOW_NAN = ['Imputer', 'SimpleImputer', 'MissingIndicator',
              'MaxAbsScaler', 'MinMaxScaler', 'RobustScaler', 'StandardScaler',
              'PowerTransformer', 'QuantileTransformer']
-SUPPORT_STRING = ['SimpleImputer', 'MissingIndicator']
 
 
 def _yield_non_meta_checks(name, estimator):
@@ -623,16 +623,9 @@ def check_dtype_object(name, estimator_orig):
         if "Unknown label type" not in str(e):
             raise
 
-    if name not in SUPPORT_STRING:
-        X[0, 0] = {'foo': 'bar'}
-        msg = "argument must be a string or a number"
-        assert_raises_regex(TypeError, msg, estimator.fit, X, y)
-    else:
-        # Estimators supporting string will not call np.asarray to convert the
-        # data to numeric and therefore, the error will not be raised.
-        # Checking for each element dtype in the input array will be costly.
-        # Refer to #11401 for full discussion.
-        estimator.fit(X, y)
+    X[0, 0] = {'foo': 'bar'}
+    msg = "argument must be a string or a number"
+    assert_raises_regex(TypeError, msg, estimator.fit, X, y)
 
 
 def check_complex_data(name, estimator_orig):