diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index 2f44ddb8585c6..d75efbf2b6ab0 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -67,4 +67,4 @@
 plt.xticks(())
 plt.yticks(())
 
-plt.show()
+plt.show()
\ No newline at end of file
diff --git a/examples/missing_values.py b/examples/missing_values.py
index 8a0895f9a589f..53503c67dac01 100644
--- a/examples/missing_values.py
+++ b/examples/missing_values.py
@@ -1,11 +1,12 @@
 """
-======================================================
+====================================================
 Imputing missing values before building an estimator
-======================================================
+====================================================
 
-This example shows that imputing the missing values can give better results
-than discarding the samples containing any missing value.
-Imputing does not always improve the predictions, so please check via cross-validation.
+This example shows that imputing the missing values can give
+better results than discarding the samples containing any missing value.
+Imputing does not always improve the predictions,
+so please check via cross-validation.
 Sometimes dropping rows or using marker values is more effective.
 
 Missing values can be replaced by the mean, the median or the most frequent
@@ -13,38 +14,54 @@
 The median is a more robust estimator for data with high magnitude variables
 which could dominate results (otherwise known as a 'long tail').
 
-Script output::
+Another option is the MICE imputer. This uses round-robin linear regression,
+treating every variable as an output in turn. The simple version implemented
+assumes Gaussian output variables. If your output variables are obviously
+non-Gaussian, consider transforming them to improve performance.
 
-  Score with the entire dataset = 0.56
-  Score without the samples containing missing values = 0.48
-  Score after imputation of the missing values = 0.55
+Script output:
+
+  MSE with the entire dataset = 3354.15
+  MSE without the samples containing missing values = 2968.98
+  MSE after mean imputation of the missing values = 3507.77
+  MSE after MICE imputation of the missing values = 3340.39
+
+In this case, imputing helps the classifier match the original score.
+
+Note that MICE will not always be better than, e.g., simple mean imputation.
+To see an example of this, swap in ``boston`` for ``diabetes``.
 
-In this case, imputing helps the classifier get close to the original score.
-  
 """
 import numpy as np
 
+from sklearn.datasets import load_diabetes
 from sklearn.datasets import load_boston
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import Imputer
+from sklearn.preprocessing import MICEImputer
 from sklearn.model_selection import cross_val_score
 
 rng = np.random.RandomState(0)
 
-dataset = load_boston()
+dataset_name = 'diabetes'  # 'boston' for another examples
+if dataset_name == 'boston':
+    dataset = load_boston()
+elif dataset_name == 'diabetes':
+    dataset = load_diabetes()
 X_full, y_full = dataset.data, dataset.target
 n_samples = X_full.shape[0]
 n_features = X_full.shape[1]
 
 # Estimate the score on the entire dataset, with no missing values
 estimator = RandomForestRegressor(random_state=0, n_estimators=100)
-score = cross_val_score(estimator, X_full, y_full).mean()
-print("Score with the entire dataset = %.2f" % score)
+score = cross_val_score(estimator, X_full, y_full,
+                        scoring='neg_mean_squared_error').mean() * -1
+print("MSE with the entire dataset = %.2f" % score)
 
 # Add missing values in 75% of the lines
 missing_rate = 0.75
-n_missing_samples = np.floor(n_samples * missing_rate)
+n_missing_samples = int(np.floor(n_samples * missing_rate))
 missing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,
                                       dtype=np.bool),
                              np.ones(n_missing_samples,
@@ -56,10 +73,11 @@
 X_filtered = X_full[~missing_samples, :]
 y_filtered = y_full[~missing_samples]
 estimator = RandomForestRegressor(random_state=0, n_estimators=100)
-score = cross_val_score(estimator, X_filtered, y_filtered).mean()
-print("Score without the samples containing missing values = %.2f" % score)
+score = cross_val_score(estimator, X_filtered, y_filtered,
+                        scoring='neg_mean_squared_error').mean() * -1
+print("MSE without the samples containing missing values = %.2f" % score)
 
-# Estimate the score after imputation of the missing values
+# Estimate the score after imputation (mean strategy) of the missing values
 X_missing = X_full.copy()
 X_missing[np.where(missing_samples)[0], missing_features] = 0
 y_missing = y_full.copy()
@@ -68,5 +86,14 @@
                                           axis=0)),
                       ("forest", RandomForestRegressor(random_state=0,
                                                        n_estimators=100))])
-score = cross_val_score(estimator, X_missing, y_missing).mean()
-print("Score after imputation of the missing values = %.2f" % score)
+score = cross_val_score(estimator, X_missing, y_missing,
+                        scoring='neg_mean_squared_error').mean() * -1
+print("MSE after mean imputation of the missing values = %.2f" % score)
+
+# Estimate the score after imputation (MICE strategy) of the missing values
+estimator = Pipeline([("imputer", MICEImputer(missing_values=0)),
+                      ("forest", RandomForestRegressor(random_state=0,
+                                                       n_estimators=100))])
+score = cross_val_score(estimator, X_missing, y_missing,
+                        scoring='neg_mean_squared_error').mean() * -1
+print("MSE after MICE imputation of the missing values = %.2f" % score)
\ No newline at end of file
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 84d42e7177a0a..f6c7bf8ba9f95 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -449,7 +449,7 @@ def fit(self, X, y, sample_weight=None):
         self.constant_ = np.reshape(self.constant_, (1, -1))
         return self
 
-    def predict(self, X):
+    def predict(self, X, return_std=False):
         """
         Perform classification on test vectors X.
 
@@ -459,18 +459,29 @@ def predict(self, X):
             Input vectors, where n_samples is the number of samples
             and n_features is the number of features.
 
+        return_std : boolean, optional
+            Whether to return the standard deviation of posterior prediction.
+
         Returns
         -------
         y : array, shape = [n_samples]  or [n_samples, n_outputs]
             Predicted target values for X.
+
+        y_std : array, shape = [n_samples]  or [n_samples, n_outputs]
+            Standard deviation of predictive distribution of query points.
         """
         check_is_fitted(self, "constant_")
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
         n_samples = X.shape[0]
 
         y = np.ones((n_samples, 1)) * self.constant_
+        y_std = np.zeros((n_samples, 1))
 
         if self.n_outputs_ == 1 and not self.output_2d_:
             y = np.ravel(y)
+            y_std = np.ravel(y_std)
 
-        return y
+        if return_std:
+            return y, y_std
+        else:
+            return y
diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
index cabbd469c10d4..6e3280e083dad 100644
--- a/sklearn/preprocessing/__init__.py
+++ b/sklearn/preprocessing/__init__.py
@@ -29,6 +29,7 @@
 from .label import MultiLabelBinarizer
 
 from .imputation import Imputer
+from .mice import MICEImputer
 
 
 __all__ = [
@@ -38,6 +39,7 @@
     'KernelCenterer',
     'LabelBinarizer',
     'LabelEncoder',
+    'MICEImputer',
     'MultiLabelBinarizer',
     'MinMaxScaler',
     'MaxAbsScaler',
@@ -54,4 +56,4 @@
     'maxabs_scale',
     'minmax_scale',
     'label_binarize',
-]
+]
\ No newline at end of file
diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index e414e98f424df..2636375520489 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -166,6 +166,10 @@ def fit(self, X, y=None):
                                                    self.missing_values,
                                                    self.axis)
 
+            invalid_mask = np.isnan(self.statistics_)
+            valid_mask = np.logical_not(invalid_mask)
+            self._valid_statistics_indexes = np.where(valid_mask)[0]
+
         return self
 
     def _sparse_fit(self, X, strategy, missing_values, axis):
@@ -339,14 +343,13 @@ def transform(self, X):
         invalid_mask = np.isnan(statistics)
         valid_mask = np.logical_not(invalid_mask)
         valid_statistics = statistics[valid_mask]
-        valid_statistics_indexes = np.where(valid_mask)[0]
         missing = np.arange(X.shape[not self.axis])[invalid_mask]
 
         if self.axis == 0 and invalid_mask.any():
             if self.verbose:
                 warnings.warn("Deleting features without "
                               "observed values: %s" % missing)
-            X = X[:, valid_statistics_indexes]
+            X = X[:, self._valid_statistics_indexes]
         elif self.axis == 1 and invalid_mask.any():
             raise ValueError("Some rows only contain "
                              "missing values: %s" % missing)
@@ -374,4 +377,4 @@ def transform(self, X):
 
             X[coordinates] = values
 
-        return X
+        return X
\ No newline at end of file
diff --git a/sklearn/preprocessing/mice.py b/sklearn/preprocessing/mice.py
new file mode 100644
index 0000000000000..953f9ae399b7e
--- /dev/null
+++ b/sklearn/preprocessing/mice.py
@@ -0,0 +1,365 @@
+# Authors: Sergey Feldman <sergeyfeldman@gmail.com>
+# License: BSD 3 clause
+
+from copy import deepcopy
+from time import time
+
+import numpy as np
+
+from .imputation import _get_mask, Imputer
+from ..base import BaseEstimator, TransformerMixin
+from ..dummy import DummyRegressor
+from ..preprocessing import normalize
+from ..utils import check_array
+from ..utils.validation import check_is_fitted
+
+__all__ = [
+    'MICEImputer',
+]
+
+
+class MICEImputer(BaseEstimator, TransformerMixin):
+    """
+    Basic implementation of MICE package from R.
+    This version assumes all of the columns are Gaussian.
+
+    Parameters
+    ----------
+    missing_values : integer or "NaN", optional (default="NaN")
+        The placeholder for the missing values. All occurrences of
+        missing_values will be imputed. For missing values encoded as
+        np.nan, use the string value "NaN".
+
+    imputation_order : str, optional, default="monotone"
+        The order in which the columns will be imputed.
+        "monotone" - From columns with fewest missing values to most.
+        "revmonotone" - From columns with most missing values to least.
+        "roman" - Left to right.
+        "arabic" - Right to left.
+        "random" - A random order for each round.
+
+    n_imputations : int, optional, defaults = 100.
+        Number of MICE rounds to perform the results of which will be
+        used in the final average.
+
+    n_burn_in : int, optional, default = 10.
+        Number of initial MICE rounds to perform the results of which
+        will not be returned.
+
+    n_nearest_columns : int, optional, default = np.infty.
+        Number of other columns to use to estimate the missing values of
+        the current column. Can provide significant speed-up
+        when the number of columns is huge.
+
+    initial_fill_method : str, optional, default = "mean"
+        Valid values: {"mean", "median", or "most_frequent"}.
+        Uses sklearn.preprocessing.Imputer.
+
+    min_value : float, default = None
+        Minimum possible imputed value.
+
+    max_value : float, default = None
+        Maximum possible imputed value.
+
+    verbose : boolean, default = False
+        Whether to provide updates on progress.
+
+    Notes
+    -----
+    - Columns which only contain missing values at `fit` are discarded upon
+        `transform`.
+
+    References
+    ----------
+
+    .. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice:
+        Multivariate Imputation by Chained Equations in R". Journal of
+        Statistical Software 45: 1-67.
+        <https://www.jstatsoft.org/article/view/v045i03>`_
+    """
+
+    def __init__(
+            self,
+            missing_values='NaN',
+            imputation_order='monotone',
+            n_imputations=100,
+            n_burn_in=10,
+            n_nearest_columns=np.infty,
+            initial_fill_method="mean",
+            min_value=None,
+            max_value=None,
+            verbose=False):
+        from ..linear_model import BayesianRidge  # avoiding circular import
+        self.model = BayesianRidge()
+        self.missing_values = missing_values
+        self.imputation_order = imputation_order
+        self.n_imputations = n_imputations
+        self.n_burn_in = n_burn_in
+        self.n_nearest_columns = n_nearest_columns
+        self.initial_fill_method = initial_fill_method
+        self.min_value = min_value
+        self.max_value = max_value
+        self.verbose = verbose
+        self.trained_model_triplets = []
+
+    def _fill_in_one_column(self,
+                            X_filled,
+                            mask_missing_values,
+                            this_column,
+                            other_columns,
+                            model=None,
+                            min_std=1e-6):
+        # if nothing is missing, just return the default
+        if mask_missing_values[:, this_column].sum() == 0:
+            return X_filled, model
+        missing_row_mask = mask_missing_values[:, this_column]
+        if model is None:
+            X_train = X_filled[:, other_columns][~missing_row_mask]
+            y_train = X_filled[:, this_column][~missing_row_mask]
+            if np.std(y_train) > 0:
+                model = deepcopy(self.model)
+                model.fit(X_train, y_train)
+            else:
+                model = DummyRegressor()
+                model.fit(X_train, y_train)
+        X_test = X_filled[:, other_columns][missing_row_mask]
+        mus, sigmas = model.predict(X_test, return_std=True)
+        sigmas = np.maximum(sigmas, min_std)
+        imputed_values = np.random.normal(mus, sigmas)
+        imputed_values = self._clip(imputed_values)
+        X_filled[missing_row_mask, this_column] = imputed_values
+        return X_filled, model
+
+    def _get_other_columns(self,
+                           n_features,
+                           this_column,
+                           abs_correlation_matrix):
+        """
+        Get a list of other columns to predict this_column.
+
+        If self.n_nearest_columns is less than or equal to the total
+        number of columns, then use a probability proportional to the absolute
+        correlation between this_column and each other column to randomly
+        choose a subsample of the other columns (without replacement).
+
+        Parameters
+        ----------
+        n_features : integer
+            Number of columns in X.
+
+        this_column : integer
+            Index of the column currently being imputed.
+
+        abs_correlation_matrix : array-like, shape (n_features, n_features)
+            Absolute correlation matrix of X at the begging of the current
+            round. The diagonal has been zeroed out and each column has been
+            normalized to sum to 1.
+        """
+        ordered_columns = np.arange(n_features)
+        if self.n_nearest_columns <= n_features - 1:
+            p = abs_correlation_matrix[:, this_column]
+            other_columns = np.random.choice(ordered_columns,
+                                             self.n_nearest_columns,
+                                             replace=False,
+                                             p=p)
+        else:
+            other_columns = np.concatenate([ordered_columns[:this_column],
+                                            ordered_columns[this_column + 1:]])
+        return other_columns
+
+    def _clip(self, X):
+        """
+        Clip values to fall within min/max constraints.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Input data, where "n_samples" is the number of samples and
+            "n_features" is the number of features.
+        """
+        X = np.asarray(X)
+        if self.min_value is not None:
+            X[X < self.min_value] = self.min_value
+        if self.max_value is not None:
+            X[X > self.max_value] = self.max_value
+        return X
+
+    def _get_ordered_indices(self, mask_missing_values):
+        """
+        Decide what order we will update the columns.
+
+        As a homage to the MICE R package, we will have 4 main options of
+        how to order the updates, and use a random order if anything else
+        is specified.
+
+        Also, this function filters out columns which have no missing values.
+
+        Parameters
+        ----------
+        mask_missing_values : array-like, shape (n_samples, n_features)
+            Input data's missing indicator matrix, where "n_samples" is the
+            number of samples and "n_features" is the number of features.
+        """
+        n_samples, n_features = mask_missing_values.shape
+        fraction_of_missing_values = mask_missing_values.mean(axis=0)
+        all_features_indices = np.arange(n_features)
+        if self.imputation_order == 'roman':
+            ordered_indices = all_features_indices
+        elif self.imputation_order == 'arabic':
+            ordered_indices = all_features_indices[::-1]
+        elif self.imputation_order == 'monotone':
+            ordered_indices = np.argsort(fraction_of_missing_values)[::-1]
+        elif self.imputation_order == 'revmonotone':
+            ordered_indices = np.argsort(fraction_of_missing_values)
+        else:
+            ordered_indices = np.arange(n_features)
+            np.random.shuffle(ordered_indices)
+
+        # filter out indices for which we have no missing values
+        valid_features = all_features_indices[fraction_of_missing_values > 0]
+        valid_features = set(valid_features)
+        ordered_indices = [i for i in ordered_indices if i in valid_features]
+        return ordered_indices
+
+    def _get_abs_correlation_matrix(self, X_filled, eps=1e-6):
+        # at each stage all but one of the features is used as input
+        n_features = X_filled.shape[1]
+        if self.n_nearest_columns > n_features - 1:
+            return None
+        abs_correlation_matrix = np.abs(np.corrcoef(X_filled.T))
+        # np.corrcoef is not defined for constant columns
+        abs_correlation_matrix[np.isnan(abs_correlation_matrix)] = eps
+        np.fill_diagonal(abs_correlation_matrix, 0)
+        abs_correlation_matrix = normalize(abs_correlation_matrix, norm='l1',
+                                           axis=0)
+        return abs_correlation_matrix
+
+    def fit_transform(self, X, y=None):
+        """
+        Fit the imputer on X and return the transformed X.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Input data, where "n_samples" is the number of samples and
+            "n_features" is the number of features.
+        """
+        X = check_array(X, dtype=np.float64, force_all_finite=False)
+        X = np.asarray(X, order="F")
+        mask_missing_values = _get_mask(X, self.missing_values)
+
+        # initial imputation
+        self.initial_imputer_ = Imputer(missing_values=self.missing_values,
+                                        strategy=self.initial_fill_method,
+                                        axis=0)
+        X_filled = self.initial_imputer_.fit_transform(X)
+        self._val_inds = self.initial_imputer_._valid_statistics_indexes
+        X = X[:, self._val_inds]
+        mask_missing_values = mask_missing_values[:, self._val_inds]
+
+        # perform imputations
+        n_samples, n_features = X_filled.shape
+        total_rounds = self.n_burn_in + self.n_imputations
+        results_list = []
+        if self.verbose:
+            print("[MICE] Completing matrix with shape %s" % (X.shape,))
+            start_t = time()
+            mice_msg = '[MICE] Ending imputation round '
+        for m in range(total_rounds):
+            # order in which to impute
+            ordered_indices = self._get_ordered_indices(mask_missing_values)
+
+            # abs_correlation matrix is used to choose a subset of other
+            # features to impute from  f
+            abs_corr_mat = self._get_abs_correlation_matrix(X_filled)
+
+            # Fill in each column in the order of ordered_indices
+            for this_column in ordered_indices:
+                other_columns = self._get_other_columns(n_features,
+                                                        this_column,
+                                                        abs_corr_mat)
+                X_filled, model = self._fill_in_one_column(X_filled,
+                                                           mask_missing_values,
+                                                           this_column,
+                                                           other_columns)
+                model_triplet = (this_column, other_columns, model)
+                self.trained_model_triplets.append(model_triplet)
+
+            if m >= self.n_burn_in:
+                results_list.append(X_filled[mask_missing_values])
+            if self.verbose:
+                print(mice_msg + 'round %d/%d, elapsed time %0.2f'
+                      % (m + 1, total_rounds, time() - start_t))
+
+        if len(results_list) > 0:
+            X[mask_missing_values] = np.array(results_list).mean(axis=0)
+        else:
+            X[mask_missing_values] = X_filled[mask_missing_values]
+
+        return X
+
+    def transform(self, X):
+        """
+        Impute all missing values in X.
+
+        Parameters
+        ----------
+        X : array-like}, shape = [n_samples, n_features]
+            The input data to complete.
+        """
+        check_is_fitted(self, 'initial_imputer_')
+        X = check_array(X, dtype=np.float64, force_all_finite=False)
+        X = np.asarray(X, order="F")
+        mask_missing_values = _get_mask(X, self.missing_values)
+
+        # initial imputation
+        X_filled = self.initial_imputer_.transform(X)
+        X = X[:, self._val_inds]
+        mask_missing_values = mask_missing_values[:, self._val_inds]
+
+        # perform imputations
+        n_samples, n_features = X_filled.shape
+        total_rounds = self.n_burn_in + self.n_imputations
+        if total_rounds > 0:
+            total_iterations = len(self.trained_model_triplets)
+            imputations_per_round = total_iterations / total_rounds
+            round_index = 0
+            results_list = []
+            if self.verbose:
+                print("[MICE] Completing matrix with shape %s" % (X.shape,))
+                start_t = time()
+                mice_msg = '[MICE] Ending imputation round '
+            for i, model_triplet in enumerate(self.trained_model_triplets):
+                this_column, other_columns, model = model_triplet
+                X_filled, _ = self._fill_in_one_column(X_filled,
+                                                       mask_missing_values,
+                                                       this_column,
+                                                       other_columns,
+                                                       model)
+                if (i + 1) % imputations_per_round == 0:
+                    round_index += 1
+                    if round_index >= self.n_burn_in:
+                        results_list.append(X_filled[mask_missing_values])
+                    if self.verbose:
+                        print(mice_msg + '%d/%d, elapsed time %0.2f'
+                              % (round_index, total_rounds, time() - start_t))
+
+        if total_rounds > 0 and len(results_list) > 0:
+            X[mask_missing_values] = np.array(results_list).mean(axis=0)
+        else:
+            X[mask_missing_values] = X_filled[mask_missing_values]
+
+        return X
+
+    def fit(self, X, y=None):
+        """
+        Fit the imputer on X and return self.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Input data, where "n_samples" is the number of samples and
+            "n_features" is the number of features.
+        """
+        self.fit_transform(X)
+        return self
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index cb23e0ba8a315..dd574d619b421 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -85,7 +85,7 @@ def _yield_non_meta_checks(name, Estimator):
         # cross-decomposition's "transform" returns X and Y
         yield check_pipeline_consistency
 
-    if name not in ['Imputer']:
+    if name not in ['Imputer', 'MICEImputer']:
         # Test that all estimators check their input for NaN's and infs
         yield check_estimators_nan_inf