From 686d7581496fde08dc0fdcba59e46d47c89580d6 Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Wed, 27 Jun 2018 15:21:12 +0200
Subject: [PATCH 001/163] add example multiple imputation

---
 examples/plot_multiple_imputation.py | 389 +++++++++++++++++++++++++++
 1 file changed, 389 insertions(+)
 create mode 100644 examples/plot_multiple_imputation.py

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
new file mode 100644
index 0000000000000..ac3c8bf1d6d08
--- /dev/null
+++ b/examples/plot_multiple_imputation.py
@@ -0,0 +1,389 @@
+"""
+=================================================
+Imputing missing values using multiple imputation
+=================================================
+
+By default, the ChainedImputer performs single imputation: a method where every
+missing value is replaced with one imputed value. The strength of the method is
+that it allows for finding unbiased statistical estimates due to its chained
+character. However, the disadvantage is that every imputed value is treated as
+if the value was observed, leading to an imputed dataset that does not reflect
+the uncertainty that occurs due to the presence of missing values. This makes it
+hard to find valid statistical inferences because the variance (and standard error)
+of statistical estimates become too small.
+
+An alternative is using the ChainedImputer to perform multiple imputation: a method
+where every missing value is imputed multiple times. The procedure results in
+multiple datasets where the observed data is similar in every dataset, but the imputed
+data is different. All desired steps after imputation are performed on every dataset,
+including the analysis. Then, Rubin's pooling rules are used to combine the estimates
+into one final result.
+
+In this example we will show how to use the ChainedImputer to perform multiple imputation,
+what the effect is on the standard error of beta coefficients and how to set up a prediction
+model using multiple imputation.
+"""
+
+import numpy as np
+from scipy import stats
+import matplotlib.pyplot as plt
+
+from sklearn.datasets import load_boston
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from sklearn.impute import SimpleImputer, ChainedImputer
+from sklearn.metrics import mean_squared_error as mse
+
+rng = np.random.RandomState(0)
+
+def ampute(X, missing_rate = 0.75, mech = "MCAR"):
+
+    n_samples = X.shape[0]
+    n_features = X.shape[1]
+    X_incomplete = X.copy()
+
+    # MCAR mechanism
+    if mech == 'MCAR':
+        for i in np.arange(n_features):
+            dropped_indices = np.array(np.random.choice(np.arange(n_samples), size=int(missing_rate * n_samples), replace=False))
+            X_incomplete[dropped_indices[:, None], i] = None
+
+    # MNAR mechanism
+    if mech == "MNAR":
+        for i in np.arange(n_features):
+            data_values = -np.mean(X[:, i]) + X[:, i]
+            weights = list(map(lambda x: math.exp(x) / (1 + math.exp(x)), data_values))
+            probs = np.array(weights) / np.sum(np.array(weights))
+            dropped_indices = np.array(np.random.choice(np.arange(n_samples), size=int(missing_rate * n_samples), p=probs, replace=False))
+            X_incomplete[dropped_indices[:, None], i] = None
+
+    return X_incomplete
+
+def calculate_variance_of_beta_estimates(y_true, y_pred, X):
+
+    residuals = np.sum((y_true - y_pred)**2)
+    sigma_hat_squared = (1 / (len(y_true) - 2)) * residuals
+    X_prime_X = np.dot(X.T, X)
+    covariance_matrix = sigma_hat_squared / X_prime_X
+    vars = np.diag(covariance_matrix)
+
+    return vars
+
+### EXAMPLE 1.
+### COMPARE STATISTICAL ESTIMATES AND THEIR VARIANCE FOR LINEAR REGRESSION MODEL
+
+def get_results_full_dataset(X, y):
+
+    # Perform linear regression on full data as a way of comparison
+    estimator = LinearRegression()
+    estimator.fit(X, y)
+    y_predict = estimator.predict(X)
+
+    # Save the beta estimates
+    # The variance of these estimates
+    # And 1.96 * standard error of the estimates (useful to know the 95% confidence interval)
+    full_coefs = estimator.coef_
+    full_vars = calculate_variance_of_beta_estimates(y, y_predict, X)
+    full_errorbar = 1.96 * np.sqrt(full_vars)
+
+    return full_coefs, full_vars, full_errorbar
+
+def get_results_chained_imputation(X_incomplete, y):
+
+    # Impute incomplete data with ChainedImputer
+    # Setting burnin at 99 and using only the last imputation
+    imputer = ChainedImputer(n_burn_in=99, n_imputations=1)
+    imputer.fit(X_incomplete)
+    X_imputed = imputer.transform(X_incomplete)
+
+    # Perform linear regression on chained single imputed data
+    # Estimate beta estimates and their variances
+    estimator = LinearRegression()
+    estimator.fit(X_imputed, y)
+    y_predict = estimator.predict(X_imputed)
+
+    # Save the beta estimates
+    # The variance of these estimates
+    # And 1.96 * standard error of the estimates
+    chained_coefs = estimator.coef_
+    chained_vars = calculate_variance_of_beta_estimates(y, y_predict, X_imputed)
+    chained_errorbar = 1.96 * np.sqrt(chained_vars)
+
+    return chained_coefs, chained_vars, chained_errorbar
+
+def get_results_mice_imputation(X_incomplete, y):
+
+    # Impute incomplete data using the ChainedImputer as a MICEImputer
+    # Setting burnin at 99, using only last imputation and loop this procedure m times
+    m = 5
+    multiple_imputations = []
+
+    for i in range(m):
+
+        imputer = ChainedImputer(n_burn_in=99, n_imputations=1,random_state=i)
+        imputer.fit(X_incomplete)
+        X_imputed = imputer.transform(X_incomplete)
+        multiple_imputations.append(X_imputed)
+
+    # Perform a model on each of the m imputed datasets
+    # Estimate the estimates for each model/dataset
+    m_coefs = []
+    m_vars = []
+    for i in range(m):
+
+        estimator = LinearRegression()
+        estimator.fit(multiple_imputations[i], y)
+        y_predict = estimator.predict(multiple_imputations[i])
+
+        m_coefs.append(estimator.coef_)
+        m_vars.append(calculate_variance_of_beta_estimates(y, y_predict, multiple_imputations[i]))
+
+    # Calculate the end estimates by applying Rubin's rules
+    # Rubin's rules can be slightly different for different types of estimates
+    # In case of linear regression, these are the rules:
+    # The value of every estimate is the mean of estimates in each of the m datasets
+    # The variance of these estimates is a combination of the variance of each of the m estimates (Ubar)
+    # And the variance between the m estimates (B)
+
+    Qbar = np.mean(m_coefs, axis = 0)
+    Ubar = np.mean(m_vars, axis = 0)
+    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis = 0)
+    T = Ubar + B + (B/m)
+
+    # The final 1.96 * standard error is then the sqrt of the variance
+    mice_errorbar = 1.96 * np.sqrt(T)
+
+    return Qbar, T, mice_errorbar
+
+# The original MICE procedure includes all variables inluding the output variable in the imputation
+# process. The idea is that the imputation model should at least contain the analysis model to
+# result in unbiased estimates
+def get_results_mice_imputation_includingy(X_incomplete, y):
+
+    # Impute incomplete data using the ChainedImputer as a MICEImputer
+    # Now using the output variable in the imputation loop
+    m = 5
+    multiple_imputations = []
+
+    for i in range(m):
+
+        Xy = np.column_stack((X_incomplete, y))
+        imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=i)
+        imputer.fit(Xy)
+        data_imputed = imputer.transform(Xy)
+
+        # We save only the X imputed data because we don't want to use y to predict y later on
+        X_imputed = data_imputed[:, :-1]
+        multiple_imputations.append(X_imputed)
+
+    # Perform linear regression on mice multiple imputed data
+    # Estimate beta estimates and their variances
+    m_coefs = []
+    m_vars = []
+    for i in range(m):
+
+        estimator = LinearRegression()
+        estimator.fit(multiple_imputations[i], y)
+        y_predict = estimator.predict(multiple_imputations[i])
+
+        m_coefs.append(estimator.coef_)
+        m_vars.append(calculate_variance_of_beta_estimates(y, y_predict, multiple_imputations[i]))
+
+    # Calculate the end results by applying Rubin's rules
+    # The value of every estimate is the mean of the values over the m datasets
+    # The variance of these estimates is a combination of the variance of each of the m estimates (Ubar)
+    # And the variance between the m estimates (B)
+
+    Qbar = np.mean(m_coefs, axis = 0)
+    Ubar = np.mean(m_vars, axis = 0)
+    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis = 0)
+    T = Ubar + B + (B/m)
+
+    # The final 1.96 * standard error is then the sqrt of the variance
+    mice_errorbar = 1.96 * np.sqrt(T)
+
+    return Qbar, T, mice_errorbar
+
+# Now lets run these imputation procedures
+# We use the Boston dataset and analyze the outcomes of the beta coefficients and their standard errors
+# We standardize the data before running the procedure to be able to compare the coefficients
+# We run the procedure for 3 missingness mechanisms (MCAR, MAR and MNAR)
+
+dataset = load_boston()
+X_full, y = dataset.data, dataset.target
+
+scaler = StandardScaler()
+X_scaled = scaler.fit_transform(X_full)
+y_scaled = stats.zscore(y)
+
+print("Executing Example 1 MCAR Missingness")
+Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
+
+full_coefs, full_vars, full_errorbar = get_results_full_dataset(X_scaled, y_scaled)
+chained_coefs, chained_vars, chained_errorbar = get_results_chained_imputation(Boston_X_incomplete_MCAR, y_scaled)
+mice_coefs, mice_vars, mice_errorbar = get_results_mice_imputation(Boston_X_incomplete_MCAR, y_scaled)
+mice_y_coefs, mice_y_vars, mice_y_errorbar = get_results_mice_imputation_includingy(Boston_X_incomplete_MCAR, y_scaled)
+
+coefs = (full_coefs, chained_coefs, mice_coefs, mice_y_coefs)
+vars = (full_vars, chained_vars, mice_vars, mice_y_vars)
+errorbars = (full_errorbar, chained_errorbar, mice_errorbar, mice_y_errorbar)
+
+# We plot the results
+n_situations = 4
+n = np.arange(n_situations)
+n_labels = ['Full Data', 'Chained Imputer', 'Mice Imputer', 'Mice Imputer with y']
+colors = ['r', 'orange', 'b', 'purple']
+width = 0.3
+plt.figure(figsize=(24, 16))
+
+plt1 = plt.subplot(211)
+for j in n:
+    plt1.bar(np.arange(len(coefs[j])) + (3*j*(width/n_situations)), coefs[j], width = width, color = colors[j])
+plt.legend(n_labels)
+
+plt2 = plt.subplot(212)
+for j in n:
+    plt2.bar(np.arange(len(errorbars[j])) + (3*j*(width/n_situations)), errorbars[j], width = width, color = colors[j])
+
+plt1.set_title("MCAR Missingness")
+plt1.set_ylabel("Beta Coefficients")
+plt2.set_ylabel("Standard Errors")
+plt1.set_xlabel("Features")
+plt2.set_xlabel("Features")
+
+plt.show()
+
+### EXAMPLE 2. ###
+### SHOW MULTIPLE IMPUTATION IN PREDICTION CONTEXT ###
+
+# In this example, we show how to apply the imputer in a train/test situation
+# There are two approaches to get the end result of the prediction model
+# In approach 1 you calculate the evaluation metric for every i in m and later average these values
+# In approach 2 you average the predictions of every i in m and then calculate the evaluation metric
+
+def get_results_full_data(X_train, X_test, y_train, y_test):
+
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    X_test_scaled = scaler.transform(X_test)
+
+    estimator = LinearRegression()
+    estimator.fit(X_train_scaled, y_train)
+    y_predict = estimator.predict(X_test_scaled)
+    mse_full = mse(y_test, y_predict)
+
+    return mse_full
+
+# Perform pipeline for i in m
+# Approach 1: pool the mse values of the m datasets
+def get_results_multiple_imputation_approach1(X_train, X_test, y_train, y_test):
+
+    m = 5
+    multiple_mses = []
+
+    for i in range(m):
+
+        # Fit the imputer for every i in im
+        # Be aware that you fit the imputer on the train data
+        # And apply to the test data
+        imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=i)
+        X_train_imputed = imputer.fit_transform(X_train)
+        X_test_imputed = imputer.transform(X_test)
+
+        # Perform the steps you wish to take before fitting the estimator
+        scaler = StandardScaler()
+        X_train_scaled = scaler.fit_transform(X_train_imputed)
+        X_test_scaled = scaler.transform(X_test_imputed)
+
+        # Finally fit the estimator and calculate the error metric for every i in m
+        estimator = LinearRegression()
+        estimator.fit(X_train_scaled, y_train)
+        y_predict = estimator.predict(X_test_scaled)
+        mse_approach1 = mse(y_test, y_predict)
+        multiple_mses.append(mse_approach1)
+
+    # Average the error metric over the m loops to get a final result
+    mse_approach1 = np.mean(multiple_mses, axis=0)
+
+    return mse_approach1
+
+# Approach 2: average the predictions of the m datasets and then calculate the mse
+def get_results_multiple_imputation_approach2(X_train, X_test, y_train, y_test):
+
+    m = 5
+    multiple_predictions = []
+
+    for i in range(m):
+
+        # Fit the imputer for every i in im
+        # Be aware that you fit the imputer on the train data
+        # And apply to the test data
+        imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=i)
+        X_train_imputed = imputer.fit_transform(X_train)
+        X_test_imputed = imputer.transform(X_test)
+
+        # Perform the steps you wish to take before fitting the estimator
+        scaler = StandardScaler()
+        X_train_scaled = scaler.fit_transform(X_train_imputed)
+        X_test_scaled = scaler.transform(X_test_imputed)
+
+        # Finally fit the estimator and calculate the predictions for every i in m
+        estimator = LinearRegression()
+        estimator.fit(X_train_scaled, y_train)
+        y_predict = estimator.predict(X_test_scaled)
+        multiple_predictions.append(y_predict)
+
+    # Average the predictions over the m loops
+    # Then calculate the error metric
+    predictions_average = np.mean(multiple_predictions, axis=0)
+    mse_approach2 = mse(y_test, predictions_average)
+
+    return mse_approach2
+
+def perform_simulation(dataset, X_incomplete, nsim = 10):
+
+    X_full, y = dataset.data, dataset.target
+    outcome = []
+
+    for j in np.arange(nsim):
+
+        train_indices, test_indices = train_test_split(np.arange(X_full.shape[0]))
+
+        X_incomplete_train = X_incomplete[train_indices]
+        X_full_train = X_full[train_indices]
+        X_incomplete_test = X_incomplete[test_indices]
+        X_full_test = X_full[test_indices]
+        y_train = y[train_indices]
+        y_test = y[test_indices]
+
+        mse_full = get_results_full_data(X_full_train, X_full_test, y_train, y_test)
+        mse_approach1 = get_results_multiple_imputation_approach1(X_incomplete_train, X_incomplete_test, y_train, y_test)
+        mse_approach2 = get_results_multiple_imputation_approach2(X_incomplete_train, X_incomplete_test, y_train, y_test)
+
+        outcome.append((mse_full, mse_approach1, mse_approach2))
+
+    return np.mean(outcome, axis = 0), np.std(outcome, axis = 0)
+
+# Execute
+print("Executing Example 1 MCAR Missingness")
+Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
+mse_means, mse_std = perform_simulation(load_boston(), Boston_X_incomplete_MCAR, nsim=10)
+
+# Plot results
+n_situations = 3
+n = np.arange(n_situations)
+n_labels = ['Full Data', 'Average MSE', 'Average Predictions']
+colors = ['r', 'green', 'yellow']
+width = 0.3
+plt.figure(figsize=(6, 6))
+
+plt1 = plt.subplot(111)
+for j in n:
+    plt1.bar(j, mse_means[j], yerr = mse_std[j],
+             width = width, color = colors[j])
+
+plt1.set_title("MCAR Missingness")
+plt1.set_ylabel("Mean Squared Error")
+plt.legend(n_labels)
+plt.show()

From 965ae8ef42b6870a85eaa3f596a266fdca424ae0 Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Wed, 27 Jun 2018 17:36:17 +0200
Subject: [PATCH 002/163] adjust figure widths and legends

---
 examples/plot_multiple_imputation.py | 48 ++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
index ac3c8bf1d6d08..9528ee9a017ee 100644
--- a/examples/plot_multiple_imputation.py
+++ b/examples/plot_multiple_imputation.py
@@ -235,7 +235,7 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
 n_labels = ['Full Data', 'Chained Imputer', 'Mice Imputer', 'Mice Imputer with y']
 colors = ['r', 'orange', 'b', 'purple']
 width = 0.3
-plt.figure(figsize=(24, 16))
+plt.figure(figsize=(12, 16))
 
 plt1 = plt.subplot(211)
 for j in n:
@@ -275,6 +275,23 @@ def get_results_full_data(X_train, X_test, y_train, y_test):
 
     return mse_full
 
+def get_results_single_imputation(X_train, X_test, y_train, y_test):
+
+    imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=0)
+    X_train_imputed = imputer.fit_transform(X_train)
+    X_test_imputed = imputer.transform(X_test)
+
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train_imputed)
+    X_test_scaled = scaler.transform(X_test_imputed)
+
+    estimator = LinearRegression()
+    estimator.fit(X_train_scaled, y_train)
+    y_predict = estimator.predict(X_test_scaled)
+    mse_single = mse(y_test, y_predict)
+
+    return mse_single
+
 # Perform pipeline for i in m
 # Approach 1: pool the mse values of the m datasets
 def get_results_multiple_imputation_approach1(X_train, X_test, y_train, y_test):
@@ -358,32 +375,35 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
         y_test = y[test_indices]
 
         mse_full = get_results_full_data(X_full_train, X_full_test, y_train, y_test)
+        mse_single = get_results_single_imputation(X_incomplete_train, X_incomplete_test, y_train, y_test)
         mse_approach1 = get_results_multiple_imputation_approach1(X_incomplete_train, X_incomplete_test, y_train, y_test)
         mse_approach2 = get_results_multiple_imputation_approach2(X_incomplete_train, X_incomplete_test, y_train, y_test)
 
-        outcome.append((mse_full, mse_approach1, mse_approach2))
+        outcome.append((mse_full, mse_single, mse_approach1, mse_approach2))
 
     return np.mean(outcome, axis = 0), np.std(outcome, axis = 0)
 
 # Execute
-print("Executing Example 1 MCAR Missingness")
+print("Executing Example 2 MCAR Missingness")
 Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
 mse_means, mse_std = perform_simulation(load_boston(), Boston_X_incomplete_MCAR, nsim=10)
 
 # Plot results
-n_situations = 3
+n_situations = 4
 n = np.arange(n_situations)
-n_labels = ['Full Data', 'Average MSE', 'Average Predictions']
-colors = ['r', 'green', 'yellow']
-width = 0.3
-plt.figure(figsize=(6, 6))
+n_labels = ['Full Data', 'Single Imputation', 'MI Average MSE', 'MI Average Predictions']
+colors = ['r', 'orange', 'green', 'yellow']
 
-plt1 = plt.subplot(111)
+plt.figure(figsize=(12, 6))
+ax1 = plt.subplot(111)
 for j in n:
-    plt1.bar(j, mse_means[j], yerr = mse_std[j],
-             width = width, color = colors[j])
+    ax1.barh(j, mse_means[j], xerr=mse_std[j],
+             color=colors[j], alpha=0.6, align='center')
+
+ax1.set_title('MCAR Missingness')
+ax1.set_yticks(n)
+ax1.set_xlabel('Mean Squared Error')
+ax1.invert_yaxis()
+ax1.set_yticklabels(n_labels)
 
-plt1.set_title("MCAR Missingness")
-plt1.set_ylabel("Mean Squared Error")
-plt.legend(n_labels)
 plt.show()

From fa082de111fd68d211a4e8473264938a1e1db00b Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Thu, 28 Jun 2018 12:53:44 +0200
Subject: [PATCH 003/163] changed code according pep rules and increased figure
 size

---
 examples/plot_multiple_imputation.py | 276 ++++++++++++++++-----------
 1 file changed, 160 insertions(+), 116 deletions(-)

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
index 9528ee9a017ee..2384fe00d49fb 100644
--- a/examples/plot_multiple_imputation.py
+++ b/examples/plot_multiple_imputation.py
@@ -8,25 +8,26 @@
 that it allows for finding unbiased statistical estimates due to its chained
 character. However, the disadvantage is that every imputed value is treated as
 if the value was observed, leading to an imputed dataset that does not reflect
-the uncertainty that occurs due to the presence of missing values. This makes it
-hard to find valid statistical inferences because the variance (and standard error)
-of statistical estimates become too small.
-
-An alternative is using the ChainedImputer to perform multiple imputation: a method
-where every missing value is imputed multiple times. The procedure results in
-multiple datasets where the observed data is similar in every dataset, but the imputed
-data is different. All desired steps after imputation are performed on every dataset,
-including the analysis. Then, Rubin's pooling rules are used to combine the estimates
-into one final result.
-
-In this example we will show how to use the ChainedImputer to perform multiple imputation,
-what the effect is on the standard error of beta coefficients and how to set up a prediction
-model using multiple imputation.
+the uncertainty that occurs due to the presence of missing values. This makes
+it hard to find valid statistical inferences because the variance (and standard
+error) of statistical estimates become too small.
+
+An alternative is using the ChainedImputer to perform multiple imputation: a
+method where every missing value is imputed multiple times. The procedure
+results in multiple datasets where the observed data is similar in every
+dataset, but the imputed data is different. All desired steps after imputation
+are performed on every dataset, including the analysis. Then, Rubin's pooling
+rules are used to combine the estimates into one final result.
+
+In this example we will show how to use the ChainedImputer to perform multiple
+imputation, what the effect is on the standard error of beta coefficients and
+how to set up a prediction model using multiple imputation.
 """
 
+import math
 import numpy as np
-from scipy import stats
 import matplotlib.pyplot as plt
+from scipy import stats
 
 from sklearn.datasets import load_boston
 from sklearn.linear_model import LinearRegression
@@ -38,7 +39,6 @@
 rng = np.random.RandomState(0)
 
 def ampute(X, missing_rate = 0.75, mech = "MCAR"):
-
     n_samples = X.shape[0]
     n_features = X.shape[1]
     X_incomplete = X.copy()
@@ -46,22 +46,29 @@ def ampute(X, missing_rate = 0.75, mech = "MCAR"):
     # MCAR mechanism
     if mech == 'MCAR':
         for i in np.arange(n_features):
-            dropped_indices = np.array(np.random.choice(np.arange(n_samples), size=int(missing_rate * n_samples), replace=False))
+            dropped_indices = np.array(np.random.choice(np.arange(n_samples),
+                                                        size=int(missing_rate
+                                                                 * n_samples),
+                                                        replace=False))
             X_incomplete[dropped_indices[:, None], i] = None
 
     # MNAR mechanism
     if mech == "MNAR":
         for i in np.arange(n_features):
             data_values = -np.mean(X[:, i]) + X[:, i]
-            weights = list(map(lambda x: math.exp(x) / (1 + math.exp(x)), data_values))
+            weights = list(map(lambda x: math.exp(x) / (1 + math.exp(x)),
+                               data_values))
             probs = np.array(weights) / np.sum(np.array(weights))
-            dropped_indices = np.array(np.random.choice(np.arange(n_samples), size=int(missing_rate * n_samples), p=probs, replace=False))
+            dropped_indices = np.array(np.random.choice(np.arange(n_samples),
+                                                        size=int(missing_rate
+                                                                 * n_samples),
+                                                        p=probs,
+                                                        replace=False))
             X_incomplete[dropped_indices[:, None], i] = None
 
     return X_incomplete
 
 def calculate_variance_of_beta_estimates(y_true, y_pred, X):
-
     residuals = np.sum((y_true - y_pred)**2)
     sigma_hat_squared = (1 / (len(y_true) - 2)) * residuals
     X_prime_X = np.dot(X.T, X)
@@ -70,19 +77,22 @@ def calculate_variance_of_beta_estimates(y_true, y_pred, X):
 
     return vars
 
-### EXAMPLE 1.
-### COMPARE STATISTICAL ESTIMATES AND THEIR VARIANCE FOR LINEAR REGRESSION MODEL
+###############################################################################
 
-def get_results_full_dataset(X, y):
+# EXAMPLE 1. COMPARE STATISTICAL ESTIMATES AND THEIR VARIANCE USING MULTIPLE
+# IMPUTATION IN A LINEAR REGRESSION MODEL.
+
+###############################################################################
 
+def get_results_full_dataset(X, y):
     # Perform linear regression on full data as a way of comparison
     estimator = LinearRegression()
     estimator.fit(X, y)
     y_predict = estimator.predict(X)
 
-    # Save the beta estimates
-    # The variance of these estimates
-    # And 1.96 * standard error of the estimates (useful to know the 95% confidence interval)
+    # Save the beta estimates, the variance of these estimates and 1.96 *
+    # standard error of the estimates. The latter is useful to know the 95%
+    # confidence interval.
     full_coefs = estimator.coef_
     full_vars = calculate_variance_of_beta_estimates(y, y_predict, X)
     full_errorbar = 1.96 * np.sqrt(full_vars)
@@ -90,9 +100,8 @@ def get_results_full_dataset(X, y):
     return full_coefs, full_vars, full_errorbar
 
 def get_results_chained_imputation(X_incomplete, y):
-
     # Impute incomplete data with ChainedImputer
-    # Setting burnin at 99 and using only the last imputation
+    # Setting n_burn_in at 99 and using only the last imputation
     imputer = ChainedImputer(n_burn_in=99, n_imputations=1)
     imputer.fit(X_incomplete)
     X_imputed = imputer.transform(X_incomplete)
@@ -103,24 +112,22 @@ def get_results_chained_imputation(X_incomplete, y):
     estimator.fit(X_imputed, y)
     y_predict = estimator.predict(X_imputed)
 
-    # Save the beta estimates
-    # The variance of these estimates
-    # And 1.96 * standard error of the estimates
+    # Save the beta estimates, the variance of these estimates and 1.96 *
+    # standard error of the estimates
     chained_coefs = estimator.coef_
-    chained_vars = calculate_variance_of_beta_estimates(y, y_predict, X_imputed)
+    chained_vars = calculate_variance_of_beta_estimates(
+            y, y_predict, X_imputed)
     chained_errorbar = 1.96 * np.sqrt(chained_vars)
 
     return chained_coefs, chained_vars, chained_errorbar
 
 def get_results_mice_imputation(X_incomplete, y):
-
     # Impute incomplete data using the ChainedImputer as a MICEImputer
-    # Setting burnin at 99, using only last imputation and loop this procedure m times
+    # Setting n_burn_in at 99 and using only last imputation and loop this
+    # procedure m times.
     m = 5
     multiple_imputations = []
-
     for i in range(m):
-
         imputer = ChainedImputer(n_burn_in=99, n_imputations=1,random_state=i)
         imputer.fit(X_incomplete)
         X_imputed = imputer.transform(X_incomplete)
@@ -131,49 +138,46 @@ def get_results_mice_imputation(X_incomplete, y):
     m_coefs = []
     m_vars = []
     for i in range(m):
-
         estimator = LinearRegression()
         estimator.fit(multiple_imputations[i], y)
         y_predict = estimator.predict(multiple_imputations[i])
-
         m_coefs.append(estimator.coef_)
-        m_vars.append(calculate_variance_of_beta_estimates(y, y_predict, multiple_imputations[i]))
+        m_vars.append(calculate_variance_of_beta_estimates(
+                y, y_predict, multiple_imputations[i]))
 
-    # Calculate the end estimates by applying Rubin's rules
+    # Calculate the end estimates by applying Rubin's rules.
     # Rubin's rules can be slightly different for different types of estimates
     # In case of linear regression, these are the rules:
-    # The value of every estimate is the mean of estimates in each of the m datasets
-    # The variance of these estimates is a combination of the variance of each of the m estimates (Ubar)
-    # And the variance between the m estimates (B)
-
+    #
+    # The value of every estimate is the mean of the estimates in each of the m
+    # datasets. The variance of these estimates is a combination of the
+    # variance of each of the m estimates (Ubar) and the variance between the m
+    # estimates (B). The standard error is the sqrt of the variance.
     Qbar = np.mean(m_coefs, axis = 0)
     Ubar = np.mean(m_vars, axis = 0)
     B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis = 0)
     T = Ubar + B + (B/m)
-
-    # The final 1.96 * standard error is then the sqrt of the variance
     mice_errorbar = 1.96 * np.sqrt(T)
 
     return Qbar, T, mice_errorbar
 
-# The original MICE procedure includes all variables inluding the output variable in the imputation
-# process. The idea is that the imputation model should at least contain the analysis model to
-# result in unbiased estimates
+# The original MICE procedure includes all variables inluding the output
+# variable in the imputation process. The idea is that the imputation model
+# should at least contain the analysis model to result in unbiased estimates.
+# In this function, we will also include y in the imputation process.
 def get_results_mice_imputation_includingy(X_incomplete, y):
-
     # Impute incomplete data using the ChainedImputer as a MICEImputer
     # Now using the output variable in the imputation loop
     m = 5
     multiple_imputations = []
-
     for i in range(m):
-
         Xy = np.column_stack((X_incomplete, y))
         imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=i)
         imputer.fit(Xy)
         data_imputed = imputer.transform(Xy)
 
-        # We save only the X imputed data because we don't want to use y to predict y later on
+        # We save only the X imputed data because we do not want to use y to
+        # predict y later on.
         X_imputed = data_imputed[:, :-1]
         multiple_imputations.append(X_imputed)
 
@@ -182,92 +186,115 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
     m_coefs = []
     m_vars = []
     for i in range(m):
-
         estimator = LinearRegression()
         estimator.fit(multiple_imputations[i], y)
         y_predict = estimator.predict(multiple_imputations[i])
-
         m_coefs.append(estimator.coef_)
-        m_vars.append(calculate_variance_of_beta_estimates(y, y_predict, multiple_imputations[i]))
-
-    # Calculate the end results by applying Rubin's rules
-    # The value of every estimate is the mean of the values over the m datasets
-    # The variance of these estimates is a combination of the variance of each of the m estimates (Ubar)
-    # And the variance between the m estimates (B)
+        m_vars.append(calculate_variance_of_beta_estimates(
+                y, y_predict, multiple_imputations[i]))
 
+    # Calculate the end estimates by applying Rubin's rules.
+    # Rubin's rules can be slightly different for different types of estimates
+    # In case of linear regression, these are the rules:
+    #
+    # The value of every estimate is the mean of the estimates in each of the m
+    # datasets. The variance of these estimates is a combination of the
+    # variance of each of the m estimates (Ubar) and the variance between the m
+    # estimates (B). The standard error is the sqrt of the variance.
     Qbar = np.mean(m_coefs, axis = 0)
     Ubar = np.mean(m_vars, axis = 0)
     B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis = 0)
     T = Ubar + B + (B/m)
-
-    # The final 1.96 * standard error is then the sqrt of the variance
     mice_errorbar = 1.96 * np.sqrt(T)
 
     return Qbar, T, mice_errorbar
 
-# Now lets run these imputation procedures
-# We use the Boston dataset and analyze the outcomes of the beta coefficients and their standard errors
-# We standardize the data before running the procedure to be able to compare the coefficients
-# We run the procedure for 3 missingness mechanisms (MCAR, MAR and MNAR)
-
+# Now lets run all these imputation procedures.
+# We use the Boston dataset and analyze the outcomes of the beta coefficients
+# and their standard errors. We standardize the data before running the
+# procedure to be able to compare the coefficients. We run the procedure for
+# MCAR missingness only. This can easily be changed to MNAR by setting the
+# `mech` argument.
+#
+# Loading the data
 dataset = load_boston()
 X_full, y = dataset.data, dataset.target
 
+# Standardizing the data
 scaler = StandardScaler()
 X_scaled = scaler.fit_transform(X_full)
 y_scaled = stats.zscore(y)
 
+# Start the procedure
 print("Executing Example 1 MCAR Missingness")
-Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
 
-full_coefs, full_vars, full_errorbar = get_results_full_dataset(X_scaled, y_scaled)
-chained_coefs, chained_vars, chained_errorbar = get_results_chained_imputation(Boston_X_incomplete_MCAR, y_scaled)
-mice_coefs, mice_vars, mice_errorbar = get_results_mice_imputation(Boston_X_incomplete_MCAR, y_scaled)
-mice_y_coefs, mice_y_vars, mice_y_errorbar = get_results_mice_imputation_includingy(Boston_X_incomplete_MCAR, y_scaled)
+# First, make the data incomplete with a MCAR mechanism.
+Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
 
+# Second, run all the imputation procedures as described above.
+full_coefs, full_vars, full_errorbar = get_results_full_dataset(
+        X_scaled, y_scaled)
+chained_coefs, chained_vars, chained_errorbar = get_results_chained_imputation(
+        Boston_X_incomplete_MCAR, y_scaled)
+mice_coefs, mice_vars, mice_errorbar = get_results_mice_imputation(
+        Boston_X_incomplete_MCAR, y_scaled)
+mice_y_coefs, mice_y_vars, mice_y_errorbar = \
+    get_results_mice_imputation_includingy(
+        Boston_X_incomplete_MCAR, y_scaled)
+
+# Combine the results from the four imputation procedures.
 coefs = (full_coefs, chained_coefs, mice_coefs, mice_y_coefs)
 vars = (full_vars, chained_vars, mice_vars, mice_y_vars)
 errorbars = (full_errorbar, chained_errorbar, mice_errorbar, mice_y_errorbar)
 
-# We plot the results
+# And plot the results
 n_situations = 4
 n = np.arange(n_situations)
-n_labels = ['Full Data', 'Chained Imputer', 'Mice Imputer', 'Mice Imputer with y']
+n_labels = ['Full Data', 'Chained Imputer',
+            'Mice Imputer', 'Mice Imputer with y']
 colors = ['r', 'orange', 'b', 'purple']
 width = 0.3
-plt.figure(figsize=(12, 16))
+plt.figure(figsize=(24, 32))
 
 plt1 = plt.subplot(211)
 for j in n:
-    plt1.bar(np.arange(len(coefs[j])) + (3*j*(width/n_situations)), coefs[j], width = width, color = colors[j])
+    plt1.bar(np.arange(len(coefs[j])) + (3*j*(width/n_situations)),
+             coefs[j], width = width, color = colors[j])
 plt.legend(n_labels)
 
 plt2 = plt.subplot(212)
 for j in n:
-    plt2.bar(np.arange(len(errorbars[j])) + (3*j*(width/n_situations)), errorbars[j], width = width, color = colors[j])
+    plt2.bar(np.arange(len(errorbars[j])) + (3*j*(width/n_situations)),
+             errorbars[j], width = width, color = colors[j])
 
 plt1.set_title("MCAR Missingness")
 plt1.set_ylabel("Beta Coefficients")
 plt2.set_ylabel("Standard Errors")
 plt1.set_xlabel("Features")
 plt2.set_xlabel("Features")
-
 plt.show()
 
-### EXAMPLE 2. ###
-### SHOW MULTIPLE IMPUTATION IN PREDICTION CONTEXT ###
+###############################################################################
 
-# In this example, we show how to apply the imputer in a train/test situation
-# There are two approaches to get the end result of the prediction model
-# In approach 1 you calculate the evaluation metric for every i in m and later average these values
-# In approach 2 you average the predictions of every i in m and then calculate the evaluation metric
+# EXAMPLE 2. SHOW MULTIPLE IMPUTATION IN A PREDICTION CONTEXT.
 
-def get_results_full_data(X_train, X_test, y_train, y_test):
+###############################################################################
 
+# In this example, we show how to apply MICE imputation in a train/test
+# situation. There are two approaches to get the end result of the prediction
+# model. In approach 1 you calculate the evaluation metric for every i in m and
+# later average these values. In approach 2 you average the predictions of
+# every i in m and then calculate the evaluation metric. We test both
+# approaches.
+#
+# Apply the regression model on the full dataset as a way of comparison.
+def get_results_full_data(X_train, X_test, y_train, y_test):
+    # Standardize data
     scaler = StandardScaler()
     X_train_scaled = scaler.fit_transform(X_train)
     X_test_scaled = scaler.transform(X_test)
 
+    # Perform estimation and prediction
     estimator = LinearRegression()
     estimator.fit(X_train_scaled, y_train)
     y_predict = estimator.predict(X_test_scaled)
@@ -275,16 +302,19 @@ def get_results_full_data(X_train, X_test, y_train, y_test):
 
     return mse_full
 
+# Use the ChainedImputer as a single imputation procedure.
 def get_results_single_imputation(X_train, X_test, y_train, y_test):
-
+    # Apply imputation
     imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=0)
     X_train_imputed = imputer.fit_transform(X_train)
     X_test_imputed = imputer.transform(X_test)
 
+    # Standardize data
     scaler = StandardScaler()
     X_train_scaled = scaler.fit_transform(X_train_imputed)
     X_test_scaled = scaler.transform(X_test_imputed)
 
+    # Perform estimation and prediction
     estimator = LinearRegression()
     estimator.fit(X_train_scaled, y_train)
     y_predict = estimator.predict(X_test_scaled)
@@ -292,15 +322,13 @@ def get_results_single_imputation(X_train, X_test, y_train, y_test):
 
     return mse_single
 
-# Perform pipeline for i in m
-# Approach 1: pool the mse values of the m datasets
-def get_results_multiple_imputation_approach1(X_train, X_test, y_train, y_test):
-
+# Now use the IterativeImputer as a MICE Imputer by looping over i in m.
+# Approach 1: pool the mse values of the m datasets.
+def get_results_multiple_imputation_approach1(X_train, X_test,
+                                              y_train, y_test):
     m = 5
     multiple_mses = []
-
     for i in range(m):
-
         # Fit the imputer for every i in im
         # Be aware that you fit the imputer on the train data
         # And apply to the test data
@@ -309,31 +337,32 @@ def get_results_multiple_imputation_approach1(X_train, X_test, y_train, y_test):
         X_test_imputed = imputer.transform(X_test)
 
         # Perform the steps you wish to take before fitting the estimator
+        # Such as standardization.
         scaler = StandardScaler()
         X_train_scaled = scaler.fit_transform(X_train_imputed)
         X_test_scaled = scaler.transform(X_test_imputed)
 
-        # Finally fit the estimator and calculate the error metric for every i in m
+        # Finally fit the estimator and calculate the error metric for every i
+        # in m. Save all error metric values.
         estimator = LinearRegression()
         estimator.fit(X_train_scaled, y_train)
         y_predict = estimator.predict(X_test_scaled)
         mse_approach1 = mse(y_test, y_predict)
         multiple_mses.append(mse_approach1)
 
-    # Average the error metric over the m loops to get a final result
+    # Average the error metric values over the m loops to get a final result.
     mse_approach1 = np.mean(multiple_mses, axis=0)
 
     return mse_approach1
 
-# Approach 2: average the predictions of the m datasets and then calculate the mse
-def get_results_multiple_imputation_approach2(X_train, X_test, y_train, y_test):
-
+# Approach 2: average the predictions of the m datasets and then calculate the
+# error metric.
+def get_results_multiple_imputation_approach2(X_train, X_test,
+                                              y_train, y_test):
     m = 5
     multiple_predictions = []
-
     for i in range(m):
-
-        # Fit the imputer for every i in im
+        # Fit the imputer for every i in m
         # Be aware that you fit the imputer on the train data
         # And apply to the test data
         imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=i)
@@ -341,32 +370,34 @@ def get_results_multiple_imputation_approach2(X_train, X_test, y_train, y_test):
         X_test_imputed = imputer.transform(X_test)
 
         # Perform the steps you wish to take before fitting the estimator
+        # Such as standardization
         scaler = StandardScaler()
         X_train_scaled = scaler.fit_transform(X_train_imputed)
         X_test_scaled = scaler.transform(X_test_imputed)
 
-        # Finally fit the estimator and calculate the predictions for every i in m
+        # Finally fit the estimator and calculate the predictions for every i
+        # in m. Save the predictions.
         estimator = LinearRegression()
         estimator.fit(X_train_scaled, y_train)
         y_predict = estimator.predict(X_test_scaled)
         multiple_predictions.append(y_predict)
 
     # Average the predictions over the m loops
-    # Then calculate the error metric
+    # Then calculate the error metric.
     predictions_average = np.mean(multiple_predictions, axis=0)
     mse_approach2 = mse(y_test, predictions_average)
 
     return mse_approach2
 
 def perform_simulation(dataset, X_incomplete, nsim = 10):
-
     X_full, y = dataset.data, dataset.target
     outcome = []
 
+    # Start a simulation process that executes the process nsim times.
     for j in np.arange(nsim):
-
-        train_indices, test_indices = train_test_split(np.arange(X_full.shape[0]))
-
+        # First, split the data in train and test dataset.
+        train_indices, test_indices = train_test_split(
+                np.arange(X_full.shape[0]))
         X_incomplete_train = X_incomplete[train_indices]
         X_full_train = X_full[train_indices]
         X_incomplete_test = X_incomplete[test_indices]
@@ -374,19 +405,33 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
         y_train = y[train_indices]
         y_test = y[test_indices]
 
-        mse_full = get_results_full_data(X_full_train, X_full_test, y_train, y_test)
-        mse_single = get_results_single_imputation(X_incomplete_train, X_incomplete_test, y_train, y_test)
-        mse_approach1 = get_results_multiple_imputation_approach1(X_incomplete_train, X_incomplete_test, y_train, y_test)
-        mse_approach2 = get_results_multiple_imputation_approach2(X_incomplete_train, X_incomplete_test, y_train, y_test)
-
+        # Second, perform the imputation procedures and calculation of the
+        # error metric for every one of the four situations.
+        mse_full = get_results_full_data(
+                X_full_train, X_full_test, y_train, y_test)
+        mse_single = get_results_single_imputation(
+                X_incomplete_train, X_incomplete_test, y_train, y_test)
+        mse_approach1 = get_results_multiple_imputation_approach1(
+                X_incomplete_train, X_incomplete_test, y_train, y_test)
+        mse_approach2 = get_results_multiple_imputation_approach2(
+                X_incomplete_train, X_incomplete_test, y_train, y_test)
+
+        # Save the outcome of every simulation round
         outcome.append((mse_full, mse_single, mse_approach1, mse_approach2))
 
+    # Return the mean and standard deviation of the nsim outcome values
     return np.mean(outcome, axis = 0), np.std(outcome, axis = 0)
 
-# Execute
+# Execute the simulation
 print("Executing Example 2 MCAR Missingness")
+
+# Generate missing values with a MCAR mechanism
 Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
-mse_means, mse_std = perform_simulation(load_boston(), Boston_X_incomplete_MCAR, nsim=10)
+
+# Perform the simulation
+mse_means, mse_std = perform_simulation(load_boston(),
+                                        Boston_X_incomplete_MCAR,
+                                        nsim=10)
 
 # Plot results
 n_situations = 4
@@ -394,7 +439,7 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
 n_labels = ['Full Data', 'Single Imputation', 'MI Average MSE', 'MI Average Predictions']
 colors = ['r', 'orange', 'green', 'yellow']
 
-plt.figure(figsize=(12, 6))
+plt.figure(figsize=(24, 12))
 ax1 = plt.subplot(111)
 for j in n:
     ax1.barh(j, mse_means[j], xerr=mse_std[j],
@@ -405,5 +450,4 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
 ax1.set_xlabel('Mean Squared Error')
 ax1.invert_yaxis()
 ax1.set_yticklabels(n_labels)
-
 plt.show()

From e3e2465fc10bf6859c5a5f758017de832ea1dd57 Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Thu, 28 Jun 2018 13:07:25 +0200
Subject: [PATCH 004/163] solve two issues from lgtm and improve introduction
 text

---
 examples/plot_multiple_imputation.py | 72 +++++++++++++++-------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
index 2384fe00d49fb..bb2b6cb225560 100644
--- a/examples/plot_multiple_imputation.py
+++ b/examples/plot_multiple_imputation.py
@@ -3,25 +3,26 @@
 Imputing missing values using multiple imputation
 =================================================
 
-By default, the ChainedImputer performs single imputation: a method where every
-missing value is replaced with one imputed value. The strength of the method is
-that it allows for finding unbiased statistical estimates due to its chained
-character. However, the disadvantage is that every imputed value is treated as
-if the value was observed, leading to an imputed dataset that does not reflect
-the uncertainty that occurs due to the presence of missing values. This makes
-it hard to find valid statistical inferences because the variance (and standard
-error) of statistical estimates become too small.
-
-An alternative is using the ChainedImputer to perform multiple imputation: a
+By default, the IterativeImputer performs single imputation: a method where
+every missing value is replaced with one imputed value. The chained character
+of the method and the possiblity to draw imputation values from the posterior
+distribution of a Bayesian imputation model allows for the finding of unbiased
+statistical estimates. However, the disadvantage is that every imputed value is
+treated as if the value was observed, leading to an imputed dataset that does
+not reflect the uncertainty that occurs due to the presence of missing values.
+This makes it hard to find valid statistical inferences because the variance
+(and standard error) of statistical estimates become too small.
+
+An alternative is using the IterativeImputer to perform multiple imputation: a
 method where every missing value is imputed multiple times. The procedure
 results in multiple datasets where the observed data is similar in every
 dataset, but the imputed data is different. All desired steps after imputation
 are performed on every dataset, including the analysis. Then, Rubin's pooling
 rules are used to combine the estimates into one final result.
 
-In this example we will show how to use the ChainedImputer to perform multiple
-imputation, what the effect is on the standard error of beta coefficients and
-how to set up a prediction model using multiple imputation.
+In this example we will show how to use the ITerativeImputer to perform
+multiple imputation, what the effect is on the standard error of beta
+coefficients and how to set up a prediction model using multiple imputation.
 """
 
 import math
@@ -33,11 +34,12 @@
 from sklearn.linear_model import LinearRegression
 from sklearn.preprocessing import StandardScaler
 from sklearn.model_selection import train_test_split
-from sklearn.impute import SimpleImputer, ChainedImputer
+from sklearn.impute import ChainedImputer
 from sklearn.metrics import mean_squared_error as mse
 
 rng = np.random.RandomState(0)
 
+# Start by defining a basic amputation function
 def ampute(X, missing_rate = 0.75, mech = "MCAR"):
     n_samples = X.shape[0]
     n_features = X.shape[1]
@@ -68,6 +70,9 @@ def ampute(X, missing_rate = 0.75, mech = "MCAR"):
 
     return X_incomplete
 
+# Make a function that calculates the variance of the beta estimates. This is
+# necessary because the linear regression model from sklearn does not provide
+# these values.
 def calculate_variance_of_beta_estimates(y_true, y_pred, X):
     residuals = np.sum((y_true - y_pred)**2)
     sigma_hat_squared = (1 / (len(y_true) - 2)) * residuals
@@ -100,8 +105,8 @@ def get_results_full_dataset(X, y):
     return full_coefs, full_vars, full_errorbar
 
 def get_results_chained_imputation(X_incomplete, y):
-    # Impute incomplete data with ChainedImputer
-    # Setting n_burn_in at 99 and using only the last imputation
+    # Impute incomplete data with IterativeImputer using single imputation
+    # We set n_burn_in at 99 and use only the last imputation
     imputer = ChainedImputer(n_burn_in=99, n_imputations=1)
     imputer.fit(X_incomplete)
     X_imputed = imputer.transform(X_incomplete)
@@ -122,9 +127,9 @@ def get_results_chained_imputation(X_incomplete, y):
     return chained_coefs, chained_vars, chained_errorbar
 
 def get_results_mice_imputation(X_incomplete, y):
-    # Impute incomplete data using the ChainedImputer as a MICEImputer
-    # Setting n_burn_in at 99 and using only last imputation and loop this
-    # procedure m times.
+    # Impute incomplete data using the IterativeImputer to perform multiple
+    # imputation. We set n_burn_in at 99 and use only last imputation and
+    # loop this procedure m times.
     m = 5
     multiple_imputations = []
     for i in range(m):
@@ -161,12 +166,13 @@ def get_results_mice_imputation(X_incomplete, y):
 
     return Qbar, T, mice_errorbar
 
-# The original MICE procedure includes all variables inluding the output
-# variable in the imputation process. The idea is that the imputation model
-# should at least contain the analysis model to result in unbiased estimates.
-# In this function, we will also include y in the imputation process.
+# The original multiple imputation procedure as developed under the name
+# MICE includes all variables in the imputation process; including the output
+# variable. The reason to do this is that the imputation model should at least
+# contain the analysis model to result in unbiased estimates. In this function,
+# we will also include y in the imputation process.
 def get_results_mice_imputation_includingy(X_incomplete, y):
-    # Impute incomplete data using the ChainedImputer as a MICEImputer
+    # Impute incomplete data using the IterativeImputer as a MICEImputer
     # Now using the output variable in the imputation loop
     m = 5
     multiple_imputations = []
@@ -213,8 +219,7 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
 # We use the Boston dataset and analyze the outcomes of the beta coefficients
 # and their standard errors. We standardize the data before running the
 # procedure to be able to compare the coefficients. We run the procedure for
-# MCAR missingness only. This can easily be changed to MNAR by setting the
-# `mech` argument.
+# MCAR missingness only.
 #
 # Loading the data
 dataset = load_boston()
@@ -280,7 +285,7 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
 
 ###############################################################################
 
-# In this example, we show how to apply MICE imputation in a train/test
+# In this example, we show how to apply multiple imputation in a train/test
 # situation. There are two approaches to get the end result of the prediction
 # model. In approach 1 you calculate the evaluation metric for every i in m and
 # later average these values. In approach 2 you average the predictions of
@@ -322,8 +327,8 @@ def get_results_single_imputation(X_train, X_test, y_train, y_test):
 
     return mse_single
 
-# Now use the IterativeImputer as a MICE Imputer by looping over i in m.
-# Approach 1: pool the mse values of the m datasets.
+# Now use the IterativeImputer to perform multiple imputation by looping over
+# i in m. Approach 1: pool the mse values of the m datasets.
 def get_results_multiple_imputation_approach1(X_train, X_test,
                                               y_train, y_test):
     m = 5
@@ -355,8 +360,8 @@ def get_results_multiple_imputation_approach1(X_train, X_test,
 
     return mse_approach1
 
-# Approach 2: average the predictions of the m datasets and then calculate the
-# error metric.
+# Approach 2: We average the predictions of the m datasets and then calculate
+# the error metric.
 def get_results_multiple_imputation_approach2(X_train, X_test,
                                               y_train, y_test):
     m = 5
@@ -397,7 +402,7 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
     for j in np.arange(nsim):
         # First, split the data in train and test dataset.
         train_indices, test_indices = train_test_split(
-                np.arange(X_full.shape[0]))
+                np.arange(X_full.shape[0]), random_state=j)
         X_incomplete_train = X_incomplete[train_indices]
         X_full_train = X_full[train_indices]
         X_incomplete_test = X_incomplete[test_indices]
@@ -436,7 +441,8 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
 # Plot results
 n_situations = 4
 n = np.arange(n_situations)
-n_labels = ['Full Data', 'Single Imputation', 'MI Average MSE', 'MI Average Predictions']
+n_labels = ['Full Data', 'Single Imputation',
+            'MI Average MSE', 'MI Average Predictions']
 colors = ['r', 'orange', 'green', 'yellow']
 
 plt.figure(figsize=(24, 12))

From 15b3b914fe893d79f5fa2e162d80e6f639d146f4 Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Thu, 28 Jun 2018 14:41:22 +0200
Subject: [PATCH 005/163] remove spaces in arguments and add lines for
 definitions

---
 examples/plot_multiple_imputation.py | 41 ++++++++++++++++++----------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
index bb2b6cb225560..2bcc8c9fd2c27 100644
--- a/examples/plot_multiple_imputation.py
+++ b/examples/plot_multiple_imputation.py
@@ -39,8 +39,9 @@
 
 rng = np.random.RandomState(0)
 
+
 # Start by defining a basic amputation function
-def ampute(X, missing_rate = 0.75, mech = "MCAR"):
+def ampute(X, missing_rate=0.75, mech="MCAR"):
     n_samples = X.shape[0]
     n_features = X.shape[1]
     X_incomplete = X.copy()
@@ -70,6 +71,7 @@ def ampute(X, missing_rate = 0.75, mech = "MCAR"):
 
     return X_incomplete
 
+
 # Make a function that calculates the variance of the beta estimates. This is
 # necessary because the linear regression model from sklearn does not provide
 # these values.
@@ -89,6 +91,7 @@ def calculate_variance_of_beta_estimates(y_true, y_pred, X):
 
 ###############################################################################
 
+
 def get_results_full_dataset(X, y):
     # Perform linear regression on full data as a way of comparison
     estimator = LinearRegression()
@@ -104,6 +107,7 @@ def get_results_full_dataset(X, y):
 
     return full_coefs, full_vars, full_errorbar
 
+
 def get_results_chained_imputation(X_incomplete, y):
     # Impute incomplete data with IterativeImputer using single imputation
     # We set n_burn_in at 99 and use only the last imputation
@@ -126,6 +130,7 @@ def get_results_chained_imputation(X_incomplete, y):
 
     return chained_coefs, chained_vars, chained_errorbar
 
+
 def get_results_mice_imputation(X_incomplete, y):
     # Impute incomplete data using the IterativeImputer to perform multiple
     # imputation. We set n_burn_in at 99 and use only last imputation and
@@ -133,7 +138,7 @@ def get_results_mice_imputation(X_incomplete, y):
     m = 5
     multiple_imputations = []
     for i in range(m):
-        imputer = ChainedImputer(n_burn_in=99, n_imputations=1,random_state=i)
+        imputer = ChainedImputer(n_burn_in=99, n_imputations=1, random_state=i)
         imputer.fit(X_incomplete)
         X_imputed = imputer.transform(X_incomplete)
         multiple_imputations.append(X_imputed)
@@ -158,14 +163,15 @@ def get_results_mice_imputation(X_incomplete, y):
     # datasets. The variance of these estimates is a combination of the
     # variance of each of the m estimates (Ubar) and the variance between the m
     # estimates (B). The standard error is the sqrt of the variance.
-    Qbar = np.mean(m_coefs, axis = 0)
-    Ubar = np.mean(m_vars, axis = 0)
-    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis = 0)
+    Qbar = np.mean(m_coefs, axis=0)
+    Ubar = np.mean(m_vars, axis=0)
+    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis=0)
     T = Ubar + B + (B/m)
     mice_errorbar = 1.96 * np.sqrt(T)
 
     return Qbar, T, mice_errorbar
 
+
 # The original multiple imputation procedure as developed under the name
 # MICE includes all variables in the imputation process; including the output
 # variable. The reason to do this is that the imputation model should at least
@@ -207,14 +213,15 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
     # datasets. The variance of these estimates is a combination of the
     # variance of each of the m estimates (Ubar) and the variance between the m
     # estimates (B). The standard error is the sqrt of the variance.
-    Qbar = np.mean(m_coefs, axis = 0)
-    Ubar = np.mean(m_vars, axis = 0)
-    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis = 0)
+    Qbar = np.mean(m_coefs, axis=0)
+    Ubar = np.mean(m_vars, axis=0)
+    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis=0)
     T = Ubar + B + (B/m)
     mice_errorbar = 1.96 * np.sqrt(T)
 
     return Qbar, T, mice_errorbar
 
+
 # Now lets run all these imputation procedures.
 # We use the Boston dataset and analyze the outcomes of the beta coefficients
 # and their standard errors. We standardize the data before running the
@@ -234,7 +241,7 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
 print("Executing Example 1 MCAR Missingness")
 
 # First, make the data incomplete with a MCAR mechanism.
-Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
+Boston_X_incomplete_MCAR = ampute(X_scaled, mech="MCAR")
 
 # Second, run all the imputation procedures as described above.
 full_coefs, full_vars, full_errorbar = get_results_full_dataset(
@@ -264,13 +271,13 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
 plt1 = plt.subplot(211)
 for j in n:
     plt1.bar(np.arange(len(coefs[j])) + (3*j*(width/n_situations)),
-             coefs[j], width = width, color = colors[j])
+             coefs[j], width=width, color=colors[j])
 plt.legend(n_labels)
 
 plt2 = plt.subplot(212)
 for j in n:
     plt2.bar(np.arange(len(errorbars[j])) + (3*j*(width/n_situations)),
-             errorbars[j], width = width, color = colors[j])
+             errorbars[j], width=width, color=colors[j])
 
 plt1.set_title("MCAR Missingness")
 plt1.set_ylabel("Beta Coefficients")
@@ -285,6 +292,7 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
 
 ###############################################################################
 
+
 # In this example, we show how to apply multiple imputation in a train/test
 # situation. There are two approaches to get the end result of the prediction
 # model. In approach 1 you calculate the evaluation metric for every i in m and
@@ -307,6 +315,7 @@ def get_results_full_data(X_train, X_test, y_train, y_test):
 
     return mse_full
 
+
 # Use the ChainedImputer as a single imputation procedure.
 def get_results_single_imputation(X_train, X_test, y_train, y_test):
     # Apply imputation
@@ -327,6 +336,7 @@ def get_results_single_imputation(X_train, X_test, y_train, y_test):
 
     return mse_single
 
+
 # Now use the IterativeImputer to perform multiple imputation by looping over
 # i in m. Approach 1: pool the mse values of the m datasets.
 def get_results_multiple_imputation_approach1(X_train, X_test,
@@ -360,6 +370,7 @@ def get_results_multiple_imputation_approach1(X_train, X_test,
 
     return mse_approach1
 
+
 # Approach 2: We average the predictions of the m datasets and then calculate
 # the error metric.
 def get_results_multiple_imputation_approach2(X_train, X_test,
@@ -394,7 +405,8 @@ def get_results_multiple_imputation_approach2(X_train, X_test,
 
     return mse_approach2
 
-def perform_simulation(dataset, X_incomplete, nsim = 10):
+
+def perform_simulation(dataset, X_incomplete, nsim=10):
     X_full, y = dataset.data, dataset.target
     outcome = []
 
@@ -425,13 +437,14 @@ def perform_simulation(dataset, X_incomplete, nsim = 10):
         outcome.append((mse_full, mse_single, mse_approach1, mse_approach2))
 
     # Return the mean and standard deviation of the nsim outcome values
-    return np.mean(outcome, axis = 0), np.std(outcome, axis = 0)
+    return np.mean(outcome, axis=0), np.std(outcome, axis=0)
+
 
 # Execute the simulation
 print("Executing Example 2 MCAR Missingness")
 
 # Generate missing values with a MCAR mechanism
-Boston_X_incomplete_MCAR = ampute(X_scaled, mech = "MCAR")
+Boston_X_incomplete_MCAR = ampute(X_scaled, mech="MCAR")
 
 # Perform the simulation
 mse_means, mse_std = perform_simulation(load_boston(),

From ed1db8bb6d5e33c50e354de2246324d7c7f19210 Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Fri, 29 Jun 2018 16:19:29 +0200
Subject: [PATCH 006/163] put rules in separate functions and include
 explanation

---
 examples/plot_multiple_imputation.py | 106 ++++++++++++++++++---------
 1 file changed, 71 insertions(+), 35 deletions(-)

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
index 2bcc8c9fd2c27..901781cb294bd 100644
--- a/examples/plot_multiple_imputation.py
+++ b/examples/plot_multiple_imputation.py
@@ -17,15 +17,44 @@
 method where every missing value is imputed multiple times. The procedure
 results in multiple datasets where the observed data is similar in every
 dataset, but the imputed data is different. All desired steps after imputation
-are performed on every dataset, including the analysis. Then, Rubin's pooling
-rules are used to combine the estimates into one final result.
-
-In this example we will show how to use the ITerativeImputer to perform
-multiple imputation, what the effect is on the standard error of beta
-coefficients and how to set up a prediction model using multiple imputation.
+are performed on every dataset, such as standardization and other feature
+engineering steps. The estimation model is also fitted on each of the datasets.
+
+One final model is obtained by combining the estimates of each model with
+Rubin's pooling rules. These rules assume that the parameters of interest are
+normally distributed which is the case with, for example, estimates of the mean
+and regression coefficients. Other parameters, such as correlation
+coefficients need transformation to suit the assumption of normality.
+If it is not possible to approximate a normal distribution, it is better to use
+robust summary measures such as medians or ranges instead of using Rubin’s
+pooling rules. This applies to an estimate like explained variance.
+
+In sum, Rubin’s pooling rules are as follows. The overall point estimate after
+multiple imputation (denoted by Qbar) is the average of all the m point
+estimates. The variance of the overall point estimate is a combination of
+so-called within imputation variance (Ubar) and between imputation
+variance (B). Ubar is the average of the m variances of the m point estimates.
+Both Qbar and Ubar are corrected with a factor 1 / m to account for sampling
+variance. The between imputation variance (B) is the sum of the squared
+difference between Qbar and the m point estimates, corrected with a factor
+1 / (m – 1). Then, the total variance (T) of the MI overall point estimate is
+Ubar + B + B/m.
+
+In this document we will show how to use the IterativeImputer to perform
+multiple imputation. In example 1 we show the effect of Rubin’s pooling
+rules on the variance of regression estimates. Due to the between imputation
+variance, the standard errors of all regression coefficients are larger with
+multiple imputation than with single imputation. This allows for valid
+statistical inference making.
+
+In example 2 we show how to set up a prediction model using multiple imputation.
+We compare two approaches. In one approach, we make predictions for each of the
+m datasets and combine the m evaluation error metrics into one overall value.
+In the other approach, we combine the predictions and calculate one evaluation
+error metric over the averaged predictions. A short simulation study shows that
+the second approach results in the smallest Mean Squared Error.
 """
 
-import math
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy import stats
@@ -59,8 +88,7 @@ def ampute(X, missing_rate=0.75, mech="MCAR"):
     if mech == "MNAR":
         for i in np.arange(n_features):
             data_values = -np.mean(X[:, i]) + X[:, i]
-            weights = list(map(lambda x: math.exp(x) / (1 + math.exp(x)),
-                               data_values))
+            weights = 1 / (1 + np.exp(-data_values))
             probs = np.array(weights) / np.sum(np.array(weights))
             dropped_indices = np.array(np.random.choice(np.arange(n_samples),
                                                         size=int(missing_rate
@@ -84,6 +112,31 @@ def calculate_variance_of_beta_estimates(y_true, y_pred, X):
 
     return vars
 
+
+# Apply Rubin's pooling rules as follows.
+# The value of every estimate is the mean of the estimates in each of the m
+# datasets (Qbar). The variance of these estimates is a combination of the
+# variance of each of the m estimates (Ubar) and the variance between the m
+# estimates (B).
+#
+# Make a function that calculates Qbar from m estimates
+def calculate_Qbar(m_estimates):
+    m = len(m_estimates)
+    Qbar = 1/m * np.sum(m_estimates, axis=0)
+
+    return Qbar
+
+
+# Make a function that calculates T from m estimates and their variances
+def calculate_T(m_estimates, m_variances, Qbar):
+    m = len(m_estimates)
+    Ubar = 1/m * np.sum(m_variances, axis=0)
+    B = 1/(m - 1) * np.sum((Qbar - m_estimates) ** 2, axis=0)
+    T = Ubar + B + (B/m)
+
+    return T
+
+
 ###############################################################################
 
 # EXAMPLE 1. COMPARE STATISTICAL ESTIMATES AND THEIR VARIANCE USING MULTIPLE
@@ -156,17 +209,8 @@ def get_results_mice_imputation(X_incomplete, y):
                 y, y_predict, multiple_imputations[i]))
 
     # Calculate the end estimates by applying Rubin's rules.
-    # Rubin's rules can be slightly different for different types of estimates
-    # In case of linear regression, these are the rules:
-    #
-    # The value of every estimate is the mean of the estimates in each of the m
-    # datasets. The variance of these estimates is a combination of the
-    # variance of each of the m estimates (Ubar) and the variance between the m
-    # estimates (B). The standard error is the sqrt of the variance.
-    Qbar = np.mean(m_coefs, axis=0)
-    Ubar = np.mean(m_vars, axis=0)
-    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis=0)
-    T = Ubar + B + (B/m)
+    Qbar = calculate_Qbar(m_coefs)
+    T = calculate_T(m_coefs, m_vars, Qbar)
     mice_errorbar = 1.96 * np.sqrt(T)
 
     return Qbar, T, mice_errorbar
@@ -206,17 +250,8 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
                 y, y_predict, multiple_imputations[i]))
 
     # Calculate the end estimates by applying Rubin's rules.
-    # Rubin's rules can be slightly different for different types of estimates
-    # In case of linear regression, these are the rules:
-    #
-    # The value of every estimate is the mean of the estimates in each of the m
-    # datasets. The variance of these estimates is a combination of the
-    # variance of each of the m estimates (Ubar) and the variance between the m
-    # estimates (B). The standard error is the sqrt of the variance.
-    Qbar = np.mean(m_coefs, axis=0)
-    Ubar = np.mean(m_vars, axis=0)
-    B = (1 / (m-1)) * np.mean((Qbar - m_coefs) ** 2, axis=0)
-    T = Ubar + B + (B/m)
+    Qbar = calculate_Qbar(m_coefs)
+    T = calculate_T(m_coefs, m_vars, Qbar)
     mice_errorbar = 1.96 * np.sqrt(T)
 
     return Qbar, T, mice_errorbar
@@ -238,7 +273,7 @@ def get_results_mice_imputation_includingy(X_incomplete, y):
 y_scaled = stats.zscore(y)
 
 # Start the procedure
-print("Executing Example 1 MCAR Missingness")
+print("Executing Example 1 MCAR Missingness...")
 
 # First, make the data incomplete with a MCAR mechanism.
 Boston_X_incomplete_MCAR = ampute(X_scaled, mech="MCAR")
@@ -434,14 +469,15 @@ def perform_simulation(dataset, X_incomplete, nsim=10):
                 X_incomplete_train, X_incomplete_test, y_train, y_test)
 
         # Save the outcome of every simulation round
-        outcome.append((mse_full, mse_single, mse_approach1, mse_approach2))
+        outcome.append((mse_full, mse_single, mse_approach1,
+                        mse_approach2))
 
     # Return the mean and standard deviation of the nsim outcome values
     return np.mean(outcome, axis=0), np.std(outcome, axis=0)
 
 
 # Execute the simulation
-print("Executing Example 2 MCAR Missingness")
+print("Executing Example 2 MCAR Missingness...")
 
 # Generate missing values with a MCAR mechanism
 Boston_X_incomplete_MCAR = ampute(X_scaled, mech="MCAR")
@@ -449,7 +485,7 @@ def perform_simulation(dataset, X_incomplete, nsim=10):
 # Perform the simulation
 mse_means, mse_std = perform_simulation(load_boston(),
                                         Boston_X_incomplete_MCAR,
-                                        nsim=10)
+                                        nsim=2)
 
 # Plot results
 n_situations = 4

From 7c3fb9705a21f34bd392361233111ea6976054d8 Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Fri, 29 Jun 2018 17:01:27 +0200
Subject: [PATCH 007/163] line from 80 to 79 characters

---
 examples/plot_multiple_imputation.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
index 901781cb294bd..781d29db0fd4b 100644
--- a/examples/plot_multiple_imputation.py
+++ b/examples/plot_multiple_imputation.py
@@ -47,12 +47,13 @@
 multiple imputation than with single imputation. This allows for valid
 statistical inference making.
 
-In example 2 we show how to set up a prediction model using multiple imputation.
-We compare two approaches. In one approach, we make predictions for each of the
-m datasets and combine the m evaluation error metrics into one overall value.
-In the other approach, we combine the predictions and calculate one evaluation
-error metric over the averaged predictions. A short simulation study shows that
-the second approach results in the smallest Mean Squared Error.
+In example 2 we show how to set up a prediction model using multiple
+imputation. We compare two approaches. In one approach, we make predictions for
+each of the m datasets and combine the m evaluation error metrics into one
+overall value. In the other approach, we combine the predictions and calculate
+one evaluation error metric over the averaged predictions. A short simulation
+study shows that the second approach results in the smallest Mean Squared
+Error.
 """
 
 import numpy as np

From 40dd0bf6b62ef3824ff68af5d3c51d4412fd4e67 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 3 Sep 2018 16:05:59 +0800
Subject: [PATCH 008/163] DOC Format in DBSCAN

---
 sklearn/cluster/dbscan_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
index f10890e10f2c8..c1239b1388dce 100644
--- a/sklearn/cluster/dbscan_.py
+++ b/sklearn/cluster/dbscan_.py
@@ -233,7 +233,7 @@ class DBSCAN(BaseEstimator, ClusterMixin):
 
     n_jobs : int or None, optional (default=None)
         The number of parallel jobs to run.
-       ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 

From 721ebaece0a1829eecaf0ab0a597b52de97b0d8c Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Mon, 3 Sep 2018 16:36:40 +0200
Subject: [PATCH 009/163] MNT Change max_bound -> max_eps in OPTICS (#11984)

---
 doc/modules/clustering.rst           | 10 +++----
 sklearn/cluster/optics_.py           | 40 ++++++++++++++--------------
 sklearn/cluster/tests/test_optics.py | 10 +++----
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 968a66e67fdcf..1f8210f35ffb4 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -838,9 +838,9 @@ algorithm builds a *reachability* graph, which assigns each sample both a
 ``reachability_`` distance, and a spot within the cluster ``ordering_``
 attribute; these two attributes are assigned when the model is fitted, and are
 used to determine cluster membership. If OPTICS is run with the default value
-of *inf* set for ``max_bound``, then DBSCAN style cluster extraction can be
+of *inf* set for ``max_eps``, then DBSCAN style cluster extraction can be
 performed in linear time for any given ``eps`` value using the
-``extract_dbscan`` method. Setting ``max_bound`` to a lower value will result
+``extract_dbscan`` method. Setting ``max_eps`` to a lower value will result
 in shorter run times, and can be thought of as the maximum cluster object size
 (in diameter) that OPTICS will be able to extract.
 
@@ -892,10 +892,10 @@ larger parent cluster.
     shorter run time than OPTICS; however, for repeated runs at varying ``eps``
     values, a single run of OPTICS may require less cumulative runtime than
     DBSCAN. It is also important to note that OPTICS output can be unstable at
-    ``eps`` values very close to the initial ``max_bound`` value. OPTICS seems
+    ``eps`` values very close to the initial ``max_eps`` value. OPTICS seems
     to produce near identical results to DBSCAN provided that ``eps`` passed to
     ``extract_dbscan`` is a half order of magnitude less than the inital
-    ``max_bound`` that was used to fit; using a value close to ``max_bound``
+    ``max_eps`` that was used to fit; using a value close to ``max_eps``
     will throw a warning, and using a value larger will result in an exception. 
 
 .. topic:: Computational Complexity
@@ -909,7 +909,7 @@ larger parent cluster.
     multithreaded, and has better algorithmic runtime complexity than OPTICS--
     at the cost of worse memory scaling. For extremely large datasets that
     exhaust system memory using HDBSCAN, OPTICS will maintain *n* (as opposed
-    to *n^2* memory scaling); however, tuning of the ``max_bound`` parameter
+    to *n^2* memory scaling); however, tuning of the ``max_eps`` parameter
     will likely need to be used to give a solution in a reasonable amount of
     wall time.
 
diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index e10a92a7590e6..bc0fe5bfe7ceb 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -21,7 +21,7 @@
 from ._optics_inner import quick_scan
 
 
-def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
+def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
            p=2, metric_params=None, maxima_ratio=.75,
            rejection_ratio=.7, similarity_threshold=0.4,
            significant_min=.003, min_cluster_size_ratio=.005,
@@ -45,11 +45,11 @@ def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
         The number of samples in a neighborhood for a point to be considered
         as a core point.
 
-    max_bound : float, optional
+    max_eps : float, optional
         The maximum distance between two samples for them to be considered
         as in the same neighborhood. This is also the largest object size
         expected within the dataset. Default value of "np.inf" will identify
-        clusters across all scales; reducing `max_bound` will result in
+        clusters across all scales; reducing `max_eps` will result in
         shorter run times.
 
     metric : string or callable, optional
@@ -147,7 +147,7 @@ def optics(X, min_samples=5, max_bound=np.inf, metric='euclidean',
     Record 28, no. 2 (1999): 49-60.
     """
 
-    clust = OPTICS(min_samples, max_bound, metric, p, metric_params,
+    clust = OPTICS(min_samples, max_eps, metric, p, metric_params,
                    maxima_ratio, rejection_ratio,
                    similarity_threshold, significant_min,
                    min_cluster_size_ratio, min_maxima_ratio,
@@ -172,11 +172,11 @@ class OPTICS(BaseEstimator, ClusterMixin):
         The number of samples in a neighborhood for a point to be considered
         as a core point.
 
-    max_bound : float, optional
+    max_eps : float, optional
         The maximum distance between two samples for them to be considered
         as in the same neighborhood. This is also the largest object size
         expected within the dataset. Default value of "np.inf" will identify
-        clusters across all scales; reducing `max_bound` will result in
+        clusters across all scales; reducing `max_eps` will result in
         shorter run times.
 
     metric : string or callable, optional
@@ -284,14 +284,14 @@ class OPTICS(BaseEstimator, ClusterMixin):
     Record 28, no. 2 (1999): 49-60.
     """
 
-    def __init__(self, min_samples=5, max_bound=np.inf, metric='euclidean',
+    def __init__(self, min_samples=5, max_eps=np.inf, metric='euclidean',
                  p=2, metric_params=None, maxima_ratio=.75,
                  rejection_ratio=.7, similarity_threshold=0.4,
                  significant_min=.003, min_cluster_size_ratio=.005,
                  min_maxima_ratio=0.001, algorithm='ball_tree',
                  leaf_size=30, n_jobs=None):
 
-        self.max_bound = max_bound
+        self.max_eps = max_eps
         self.min_samples = min_samples
         self.maxima_ratio = maxima_ratio
         self.rejection_ratio = rejection_ratio
@@ -310,7 +310,7 @@ def fit(self, X, y=None):
         """Perform OPTICS clustering
 
         Extracts an ordered list of points and reachability distances, and
-        performs initial clustering using `max_bound` distance specified at
+        performs initial clustering using `max_eps` distance specified at
         OPTICS object instantiation.
 
         Parameters
@@ -378,7 +378,7 @@ def fit(self, X, y=None):
     def _expand_cluster_order(self, point, X, nbrs):
         # As above, not parallelizable. Parallelizing would allow items in
         # the 'unprocessed' list to switch to 'processed'
-        if self.core_distances_[point] <= self.max_bound:
+        if self.core_distances_[point] <= self.max_eps:
             while not self._processed[point]:
                 self._processed[point] = True
                 self.ordering_.append(point)
@@ -389,7 +389,7 @@ def _expand_cluster_order(self, point, X, nbrs):
 
     def _set_reach_dist(self, point_index, X, nbrs):
         P = np.array(X[point_index]).reshape(1, -1)
-        indices = nbrs.radius_neighbors(P, radius=self.max_bound,
+        indices = nbrs.radius_neighbors(P, radius=self.max_eps,
                                         return_distance=False)[0]
 
         # Getting indices of neighbors that have not been processed
@@ -416,17 +416,17 @@ def _set_reach_dist(self, point_index, X, nbrs):
     def extract_dbscan(self, eps):
         """Performs DBSCAN extraction for an arbitrary epsilon.
 
-        Extraction runs in linear time. Note that if the `max_bound` OPTICS
+        Extraction runs in linear time. Note that if the `max_eps` OPTICS
         parameter was set to < inf for extracting reachability and ordering
         arrays, DBSCAN extractions will be unstable for `eps` values close to
-        `max_bound`. Setting `eps` < (`max_bound` / 5.0) will guarantee
+        `max_eps`. Setting `eps` < (`max_eps` / 5.0) will guarantee
         extraction parity with DBSCAN.
 
         Parameters
         ----------
         eps : float or int, required
-            DBSCAN `eps` parameter. Must be set to < `max_bound`. Equivalence
-            with DBSCAN algorithm is achieved if `eps` is < (`max_bound` / 5)
+            DBSCAN `eps` parameter. Must be set to < `max_eps`. Equivalence
+            with DBSCAN algorithm is achieved if `eps` is < (`max_eps` / 5)
 
         Returns
         -------
@@ -438,14 +438,14 @@ def extract_dbscan(self, eps):
         """
         check_is_fitted(self, 'reachability_')
 
-        if eps > self.max_bound:
+        if eps > self.max_eps:
             raise ValueError('Specify an epsilon smaller than %s. Got %s.'
-                             % (self.max_bound, eps))
+                             % (self.max_eps, eps))
 
-        if eps * 5.0 > (self.max_bound * 1.05):
+        if eps * 5.0 > (self.max_eps * 1.05):
             warnings.warn(
-                "Warning, max_bound (%s) is close to eps (%s): "
-                "Output may be unstable." % (self.max_bound, eps),
+                "Warning, max_eps (%s) is close to eps (%s): "
+                "Output may be unstable." % (self.max_eps, eps),
                 RuntimeWarning, stacklevel=2)
         # Stability warning is documented in _extract_dbscan method...
 
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 2116e75bf4a54..5a89cb7a0c439 100755
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -27,7 +27,7 @@ def test_correct_number_of_clusters():
     X = generate_clustered_data(n_clusters=n_clusters)
     # Parameters chosen specifically for this task.
     # Compute OPTICS
-    clust = OPTICS(max_bound=5.0 * 6.0, min_samples=4, metric='euclidean')
+    clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, metric='euclidean')
     clust.fit(X)
     # number of clusters, ignoring noise if present
     n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
@@ -41,7 +41,7 @@ def test_minimum_number_of_sample_check():
 
     # Compute OPTICS
     X = [[1, 1]]
-    clust = OPTICS(max_bound=5.0 * 0.3, min_samples=10)
+    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10)
 
     # Run the fit
     assert_raise_message(ValueError, msg, clust.fit, X)
@@ -51,7 +51,7 @@ def test_empty_extract():
     # Test extract where fit() has not yet been run.
     msg = ("This OPTICS instance is not fitted yet. Call 'fit' with "
            "appropriate arguments before using this method.")
-    clust = OPTICS(max_bound=5.0 * 0.3, min_samples=10)
+    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10)
     assert_raise_message(ValueError, msg, clust.extract_dbscan, 0.01)
 
 
@@ -63,7 +63,7 @@ def test_bad_extract():
                                 cluster_std=0.4, random_state=0)
 
     # Compute OPTICS
-    clust = OPTICS(max_bound=5.0 * 0.003, min_samples=10)
+    clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10)
     clust2 = clust.fit(X)
     assert_raise_message(ValueError, msg, clust2.extract_dbscan, 0.3)
 
@@ -76,7 +76,7 @@ def test_close_extract():
                                 cluster_std=0.4, random_state=0)
 
     # Compute OPTICS
-    clust = OPTICS(max_bound=1.0, min_samples=10)
+    clust = OPTICS(max_eps=1.0, min_samples=10)
     clust3 = clust.fit(X)
     # check warning when centers are passed
     assert_warns(RuntimeWarning, clust3.extract_dbscan, .3)

From 84c4e544a0496bf382232e5c5bc7abf7eb699d70 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Tue, 4 Sep 2018 01:39:18 +1000
Subject: [PATCH 010/163] COSMIT remove unnecessary _TreeNode methods (#11983)

---
 sklearn/cluster/optics_.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index bc0fe5bfe7ceb..306fec73939e5 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -612,12 +612,6 @@ def __init__(self, points, start, end, parent_node):
         self.children = []
         self.split_point = -1
 
-    def assign_split_point(self, split_point):
-        self.split_point = split_point
-
-    def add_child(self, child):
-        self.children.append(child)
-
 
 def _is_local_maxima(index, reachability_plot, neighborhood_size):
     right_idx = slice(index + 1, index + neighborhood_size + 1)
@@ -661,7 +655,7 @@ def _cluster_tree(node, parent_node, local_maxima_points,
 
     # take largest local maximum as possible separation between clusters
     s = local_maxima_points[0]
-    node.assign_split_point(s)
+    node.split_point = s
     local_maxima_points = local_maxima_points[1:]
 
     # create two new nodes and add to list of nodes
@@ -683,7 +677,7 @@ def _cluster_tree(node, parent_node, local_maxima_points,
     node_list.append((node_2, local_max_2))
 
     if reachability_plot[s] < significant_min:
-        node.assign_split_point(-1)
+        node.split_point = -1
         # if split_point is not significant, ignore this split and continue
         _cluster_tree(node, parent_node, local_maxima_points,
                       reachability_plot, reachability_ordering,
@@ -715,7 +709,7 @@ def _cluster_tree(node, parent_node, local_maxima_points,
                 (avg_reach2 / reachability_plot[s]) >= rejection_ratio):
             # since split_point is not significant,
             # ignore this split and continue (reject both child nodes)
-            node.assign_split_point(-1)
+            node.split_point = -1
             _cluster_tree(node, parent_node, local_maxima_points,
                           reachability_plot, reachability_ordering,
                           min_cluster_size, maxima_ratio, rejection_ratio,
@@ -733,7 +727,7 @@ def _cluster_tree(node, parent_node, local_maxima_points,
         node_list.remove((node_2, local_max_2))
     if not node_list:
         # parent_node will be a leaf
-        node.assign_split_point(-1)
+        node.split_point = -1
         return
 
     # Check if nodes can be moved up one level - the new cluster created
@@ -748,13 +742,13 @@ def _cluster_tree(node, parent_node, local_maxima_points,
 
     for nl in node_list:
         if bypass_node == 1:
-            parent_node.add_child(nl[0])
+            parent_node.children.append(nl[0])
             _cluster_tree(nl[0], parent_node, nl[1],
                           reachability_plot, reachability_ordering,
                           min_cluster_size, maxima_ratio, rejection_ratio,
                           similarity_threshold, significant_min)
         else:
-            node.add_child(nl[0])
+            node.children.append(nl[0])
             _cluster_tree(nl[0], node, nl[1], reachability_plot,
                           reachability_ordering, min_cluster_size,
                           maxima_ratio, rejection_ratio,

From 07051bc04cc6746fb3370cb7ba0e246784372014 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Tue, 4 Sep 2018 00:23:16 +0200
Subject: [PATCH 011/163] DOC OPTICS: improve docstring and add default values.
 (#11987)

---
 sklearn/cluster/optics_.py | 54 ++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 306fec73939e5..272f987dc9177 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -41,20 +41,19 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
     X : array, shape (n_samples, n_features)
         The data.
 
-    min_samples : int
+    min_samples : int (default=5)
         The number of samples in a neighborhood for a point to be considered
         as a core point.
 
-    max_eps : float, optional
+    max_eps : float, optional (default=np.inf)
         The maximum distance between two samples for them to be considered
-        as in the same neighborhood. This is also the largest object size
-        expected within the dataset. Default value of "np.inf" will identify
+        as in the same neighborhood. Default value of "np.inf" will identify
         clusters across all scales; reducing `max_eps` will result in
         shorter run times.
 
-    metric : string or callable, optional
+    metric : string or callable, optional (default='euclidean')
         The distance metric to use for neighborhood lookups. Default is
-        "minkowski". Other options include "euclidean", "manhattan",
+        "euclidean". Other options include "minkowski", "manhattan",
         "chebyshev", "haversine", "seuclidean", "hamming", "canberra",
         and "braycurtis". The "wminkowski" and "mahalanobis" metrics are
         also valid with an additional argument.
@@ -68,20 +67,20 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
     metric_params : dict, optional (default=None)
         Additional keyword arguments for the metric function.
 
-    maxima_ratio : float, optional
+    maxima_ratio : float, optional (default=.75)
         The maximum ratio we allow of average height of clusters on the
         right and left to the local maxima in question. The higher the
         ratio, the more generous the algorithm is to preserving local
         minima, and the more cuts the resulting tree will have.
 
-    rejection_ratio : float, optional
+    rejection_ratio : float, optional (default=.7)
         Adjusts the fitness of the clustering. When the maxima_ratio is
         exceeded, determine which of the clusters to the left and right to
         reject based on rejection_ratio. Higher values will result in points
         being more readily classified as noise; conversely, lower values will
         result in more points being clustered.
 
-    similarity_threshold : float, optional
+    similarity_threshold : float, optional (default=.4)
         Used to check if nodes can be moved up one level, that is, if the
         new cluster created is too "similar" to its parent, given the
         similarity threshold. Similarity can be determined by 1) the size
@@ -91,19 +90,21 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
         node. A lower value for the similarity threshold means less levels
         in the tree.
 
-    significant_min : float, optional
+    significant_min : float, optional (default=.003)
         Sets a lower threshold on how small a significant maxima can be.
 
-    min_cluster_size_ratio : float, optional
+    min_cluster_size_ratio : float, optional (default=.005)
         Minimum percentage of dataset expected for cluster membership.
 
-    min_maxima_ratio : float, optional
+    min_maxima_ratio : float, optional (default=.001)
         Used to determine neighborhood size for minimum cluster membership.
+        Each local maxima should be a largest value in a neighborhood
+        of the `size min_maxima_ratio * len(X)` from left and right.
 
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
         Algorithm used to compute the nearest neighbors:
 
-        - 'ball_tree' will use :class:`BallTree`
+        - 'ball_tree' will use :class:`BallTree` (default)
         - 'kd_tree' will use :class:`KDTree`
         - 'brute' will use a brute-force search.
         - 'auto' will attempt to decide the most appropriate algorithm
@@ -168,20 +169,19 @@ class OPTICS(BaseEstimator, ClusterMixin):
 
     Parameters
     ----------
-    min_samples : int
+    min_samples : int (default=5)
         The number of samples in a neighborhood for a point to be considered
         as a core point.
 
-    max_eps : float, optional
+    max_eps : float, optional (default=np.inf)
         The maximum distance between two samples for them to be considered
-        as in the same neighborhood. This is also the largest object size
-        expected within the dataset. Default value of "np.inf" will identify
+        as in the same neighborhood. Default value of "np.inf" will identify
         clusters across all scales; reducing `max_eps` will result in
         shorter run times.
 
-    metric : string or callable, optional
+    metric : string or callable, optional (default='euclidean')
         The distance metric to use for neighborhood lookups. Default is
-        "minkowski". Other options include "euclidean", "manhattan",
+        "euclidean". Other options include "minkowski", "manhattan",
         "chebyshev", "haversine", "seuclidean", "hamming", "canberra",
         and "braycurtis". The "wminkowski" and "mahalanobis" metrics are
         also valid with an additional argument.
@@ -195,20 +195,20 @@ class OPTICS(BaseEstimator, ClusterMixin):
     metric_params : dict, optional (default=None)
         Additional keyword arguments for the metric function.
 
-    maxima_ratio : float, optional
+    maxima_ratio : float, optional (default=.75)
         The maximum ratio we allow of average height of clusters on the
         right and left to the local maxima in question. The higher the
         ratio, the more generous the algorithm is to preserving local
         minima, and the more cuts the resulting tree will have.
 
-    rejection_ratio : float, optional
+    rejection_ratio : float, optional (default=.7)
         Adjusts the fitness of the clustering. When the maxima_ratio is
         exceeded, determine which of the clusters to the left and right to
         reject based on rejection_ratio. Higher values will result in points
         being more readily classified as noise; conversely, lower values will
         result in more points being clustered.
 
-    similarity_threshold : float, optional
+    similarity_threshold : float, optional (default=.4)
         Used to check if nodes can be moved up one level, that is, if the
         new cluster created is too "similar" to its parent, given the
         similarity threshold. Similarity can be determined by 1) the size
@@ -218,19 +218,21 @@ class OPTICS(BaseEstimator, ClusterMixin):
         node. A lower value for the similarity threshold means less levels
         in the tree.
 
-    significant_min : float, optional
+    significant_min : float, optional (default=.003)
         Sets a lower threshold on how small a significant maxima can be.
 
-    min_cluster_size_ratio : float, optional
+    min_cluster_size_ratio : float, optional (default=.005)
         Minimum percentage of dataset expected for cluster membership.
 
-    min_maxima_ratio : float, optional
+    min_maxima_ratio : float, optional (default=.001)
         Used to determine neighborhood size for minimum cluster membership.
+        Each local maxima should be a largest value in a neighborhood
+        of the `size min_maxima_ratio * len(X)` from left and right.
 
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
         Algorithm used to compute the nearest neighbors:
 
-        - 'ball_tree' will use :class:`BallTree`
+        - 'ball_tree' will use :class:`BallTree` (default)
         - 'kd_tree' will use :class:`KDTree`
         - 'brute' will use a brute-force search.
         - 'auto' will attempt to decide the most appropriate algorithm

From ddf37c75c7b912104df56e1325363cd94a4fdd5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20Szyma=C5=84ski?= <niedakh@gmail.com>
Date: Tue, 4 Sep 2018 01:49:42 +0200
Subject: [PATCH 012/163] DOC adding scikit-multilearn to related projects list
 (#11988)

---
 doc/related_projects.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index 9e5d5a32c0575..ce5f5c24dbf3a 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -183,7 +183,10 @@ and tasks.
 
 - `multiisotonic <https://github.com/alexfields/multiisotonic>`_ Isotonic
   regression on multidimensional features.
-  
+
+- `scikit-multilearn <https://scikit.ml>`_ Multi-label classification with 
+  focus on label space manipulation.
+
 - `seglearn <https://github.com/dmbee/seglearn>`_ Time series and sequence 
   learning using sliding window segmentation.
 

From a0418215fe34b68d96f611de08ba2558c4d791fb Mon Sep 17 00:00:00 2001
From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com>
Date: Tue, 4 Sep 2018 23:36:15 +0200
Subject: [PATCH 013/163] TST FIX use match rather than message in
 pytest.raises (#12001)

Previously these assertions would pass without matching.
---
 sklearn/model_selection/tests/test_search.py | 8 ++++----
 sklearn/utils/tests/test_validation.py       | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 0409794bf08eb..969b6288a71e8 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -133,13 +133,13 @@ def assert_grid_iter_equals_getitem(grid):
 
 @pytest.mark.parametrize(
     "input, error_type, error_message",
-    [(0, TypeError, 'Parameter grid is not a dict or a list (0)'),
-     ([{'foo': [0]}, 0], TypeError, 'Parameter grid is not a dict (0)'),
+    [(0, TypeError, 'Parameter grid is not a dict or a list \(0\)'),
+     ([{'foo': [0]}, 0], TypeError, 'Parameter grid is not a dict \(0\)'),
      ({'foo': 0}, TypeError, "Parameter grid value is not iterable "
-      "(key='foo', value=0)")]
+      "\(key='foo', value=0\)")]
 )
 def test_validate_parameter_grid_input(input, error_type, error_message):
-    with pytest.raises(error_type, message=error_message):
+    with pytest.raises(error_type, match=error_message):
         ParameterGrid(input)
 
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 3e577ebaa8eec..5b32d9e2115d3 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -172,7 +172,7 @@ def test_check_array_force_all_finite_valid(value, force_all_finite, retype):
      (np.inf, 'allow-nan', 'Input contains infinity'),
      (np.nan, True, 'Input contains NaN, infinity'),
      (np.nan, 'allow-inf', 'force_all_finite should be a bool or "allow-nan"'),
-     (np.nan, 1, 'force_all_finite should be a bool or "allow-nan"')]
+     (np.nan, 1, 'Input contains NaN, infinity')]
 )
 @pytest.mark.parametrize(
     "retype",
@@ -182,7 +182,7 @@ def test_check_array_force_all_finiteinvalid(value, force_all_finite,
                                              match_msg, retype):
     X = retype(np.arange(4).reshape(2, 2).astype(np.float))
     X[0, 0] = value
-    with pytest.raises(ValueError, message=match_msg):
+    with pytest.raises(ValueError, match=match_msg):
         check_array(X, force_all_finite=force_all_finite,
                     accept_sparse=True)
 

From efeb23dbba80d03ba96d39c262f49c9cf9e279e4 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Wed, 5 Sep 2018 19:02:23 +1000
Subject: [PATCH 014/163] DOC note controversy on multiclass balanced accuracy
 definition (#11994)

---
 sklearn/metrics/classification.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 52a07df9aea29..60f47980d6a17 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -1403,6 +1403,13 @@ def balanced_accuracy_score(y_true, y_pred, sample_weight=None,
     --------
     recall_score, roc_auc_score
 
+    Notes
+    -----
+    Some literature promotes alternative definitions of balanced accuracy. Our
+    definition is equivalent to :func:`accuracy_score` with class-balanced
+    sample weights, and shares desirable properties with the binary case.
+    See the :ref:`User Guide <balanced_accuracy_score>`.
+
     References
     ----------
     .. [1] Brodersen, K.H.; Ong, C.S.; Stephan, K.E.; Buhmann, J.M. (2010).

From dff84c81949374ca49eae2b1b2d267c5b5f12505 Mon Sep 17 00:00:00 2001
From: jakirkham <jakirkham@gmail.com>
Date: Wed, 5 Sep 2018 05:11:00 -0400
Subject: [PATCH 015/163] MNT Use `fmax` when finding the maximum (#12005)

Instead of adding an `if` to check for values that become the new max,
simply use `fmax` to get the maximum and update the value. This improves
readability. It may improve performance as `fmax` can be a single
assembly instruction. Though most compilers can probably figure this out
anyways.
---
 sklearn/linear_model/cd_fast.pyx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/cd_fast.pyx b/sklearn/linear_model/cd_fast.pyx
index cd044824b4b7a..c75ad0f667d46 100644
--- a/sklearn/linear_model/cd_fast.pyx
+++ b/sklearn/linear_model/cd_fast.pyx
@@ -251,11 +251,9 @@ def enet_coordinate_descent(floating[::1] w,
 
                 # update the maximum absolute coefficient update
                 d_w_ii = fabs(w[ii] - w_ii)
-                if d_w_ii > d_w_max:
-                    d_w_max = d_w_ii
+                d_w_max = fmax(d_w_max, d_w_ii)
 
-                if fabs(w[ii]) > w_max:
-                    w_max = fabs(w[ii])
+                w_max = fmax(w_max, fabs(w[ii]))
 
             if (w_max == 0.0 or
                 d_w_max / w_max < d_w_tol or

From 8d5b08d94db2e19382f67edaff2a4b80ab6605de Mon Sep 17 00:00:00 2001
From: Albert Thomas <albertthomas88@gmail.com>
Date: Wed, 5 Sep 2018 11:15:35 +0200
Subject: [PATCH 016/163] DOC small changes in outlier detection documentation
 (#12003)

---
 doc/modules/outlier_detection.rst | 34 +++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst
index 9dbe013bef5d7..3482d4246cda7 100644
--- a/doc/modules/outlier_detection.rst
+++ b/doc/modules/outlier_detection.rst
@@ -8,9 +8,9 @@ Novelty and Outlier Detection
 
 Many applications require being able to decide whether a new observation
 belongs to the same distribution as existing observations (it is an
-`inlier`), or should be considered as different (it is an outlier).
+*inlier*), or should be considered as different (it is an *outlier*).
 Often, this ability is used to clean real data sets. Two important
-distinction must be made:
+distinctions must be made:
 
 :outlier detection:
   The training data contains outliers which are defined as observations that
@@ -35,7 +35,7 @@ a low density region of the training data, considered as normal in this
 context.
 
 The scikit-learn project provides a set of machine learning tools that
-can be used both for novelty or outliers detection. This strategy is
+can be used both for novelty or outlier detection. This strategy is
 implemented with objects learning in an unsupervised way from the data::
 
     estimator.fit(X_train)
@@ -77,6 +77,18 @@ not available.
   The scores of abnormality of the training samples are always accessible
   through the ``negative_outlier_factor_`` attribute.
 
+The behavior of :class:`neighbors.LocalOutlierFactor` is summarized in the
+following table.
+
+===================== ================================ =====================
+Method                Outlier detection                Novelty detection
+===================== ================================ =====================
+``fit_predict``       OK                               Not available
+``predict``           Not available                    Use only on new data
+``decision_function`` Not available                    Use only on new data
+``score_samples``     Use ``negative_outlier_factor_`` Use only on new data
+===================== ================================ =====================
+
 
 Overview of outlier detection methods
 =====================================
@@ -162,7 +174,7 @@ Outlier Detection
 
 Outlier detection is similar to novelty detection in the sense that
 the goal is to separate a core of regular observations from some
-polluting ones, called "outliers". Yet, in the case of outlier
+polluting ones, called *outliers*. Yet, in the case of outlier
 detection, we don't have a clean data set representing the population
 of regular observations that can be used to train any tool.
 
@@ -341,19 +353,7 @@ Note that ``fit_predict`` is not available in this case.
   The scores of abnormality of the training samples are always accessible
   through the ``negative_outlier_factor_`` attribute.
 
-The behavior of LOF is summarized in the following table.
-
-====================  ================================  =====================
-Method                Outlier detection                 Novelty detection
-====================  ================================  =====================
-`fit_predict`         OK                                Not available
-`predict`             Not available                     Use only on test data
-`decision_function`   Not available                     Use only on test data
-`score_samples`       Use `negative_outlier_factor_`    Use only on test data
-====================  ================================  =====================
-
-
-This strategy is illustrated below.
+Novelty detection with Local Outlier Factor is illustrated below.
 
   .. figure:: ../auto_examples/neighbors/images/sphx_glr_plot_lof_novelty_detection_001.png
      :target: ../auto_examples/neighbors/sphx_glr_plot_lof_novelty_detection.html

From c2682309206a9e6b298a00479af8b82b80e444f4 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 5 Sep 2018 17:51:49 +0800
Subject: [PATCH 017/163] MNT Remove n_clusters_ in OPTICS (#11981)

---
 sklearn/cluster/optics_.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 272f987dc9177..5c20ddb421845 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -372,7 +372,6 @@ def fit(self, X, y=None):
                                                  self.min_cluster_size_ratio,
                                                  self.min_maxima_ratio)
         self.core_sample_indices_ = indices_
-        self.n_clusters_ = np.max(self.labels_)
         return self
 
     # OPTICS helper functions; these should not be public #

From 5e101a2a07ea3586fe663598495cdc3893cb7665 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 5 Sep 2018 13:02:48 +0200
Subject: [PATCH 018/163] Joblib 0.12.4 (#12007)

This should fix #11971 (fixed PyPy support, pypy3 is now part of the joblib build matrix on travis).

It should also be backported to 0.20.X.
---
 sklearn/externals/copy_joblib.sh              |    2 +-
 sklearn/externals/joblib/__init__.py          |    2 +-
 .../joblib/externals/loky/__init__.py         |   20 +-
 .../externals/joblib/externals/loky/_base.py  | 1055 +++++++++--------
 .../joblib/externals/loky/backend/compat.py   |    4 +-
 .../joblib/externals/loky/backend/context.py  |   24 +-
 .../externals/loky/backend/reduction.py       |   11 +-
 .../joblib/externals/loky/process_executor.py |   30 +-
 sklearn/externals/joblib/memory.py            |   34 +-
 9 files changed, 617 insertions(+), 565 deletions(-)

diff --git a/sklearn/externals/copy_joblib.sh b/sklearn/externals/copy_joblib.sh
index 878413297759f..f2c4ab3ed359b 100755
--- a/sklearn/externals/copy_joblib.sh
+++ b/sklearn/externals/copy_joblib.sh
@@ -11,7 +11,7 @@ else
         JOBLIB=$1
 fi
 
-pip install $JOBLIB --target $INSTALL_FOLDER
+pip install --no-cache $JOBLIB --target $INSTALL_FOLDER
 cp -r $INSTALL_FOLDER/joblib joblib
 rm -rf $INSTALL_FOLDER
 
diff --git a/sklearn/externals/joblib/__init__.py b/sklearn/externals/joblib/__init__.py
index 5953feeb92a52..a42646eb4c754 100644
--- a/sklearn/externals/joblib/__init__.py
+++ b/sklearn/externals/joblib/__init__.py
@@ -106,7 +106,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = '0.12.3'
+__version__ = '0.12.4'
 
 
 from .memory import Memory, MemorizedResult, register_store_backend
diff --git a/sklearn/externals/joblib/externals/loky/__init__.py b/sklearn/externals/joblib/externals/loky/__init__.py
index 6c5296210e427..18c01d0a6aa04 100644
--- a/sklearn/externals/joblib/externals/loky/__init__.py
+++ b/sklearn/externals/joblib/externals/loky/__init__.py
@@ -3,10 +3,20 @@
 :class:`ProcessPoolExecutor` and a function :func:`get_reusable_executor` which
 hide the pool management under the hood.
 """
-from .reusable_executor import get_reusable_executor  # noqa: F401
-from .process_executor import ProcessPoolExecutor  # noqa: F401
-from .process_executor import BrokenProcessPool  # noqa: F401
+from ._base import Executor, Future
+from ._base import wait, as_completed
+from ._base import TimeoutError, CancelledError
+from ._base import ALL_COMPLETED, FIRST_COMPLETED, FIRST_EXCEPTION
 
-from .backend.context import cpu_count  # noqa: F401
+from .backend.context import cpu_count
+from .reusable_executor import get_reusable_executor
+from .process_executor import BrokenProcessPool, ProcessPoolExecutor
 
-__version__ = '2.2.2'
+
+__all__ = ["get_reusable_executor", "cpu_count", "wait", "as_completed",
+           "Future", "Executor", "ProcessPoolExecutor",
+           "BrokenProcessPool", "CancelledError", "TimeoutError",
+           "FIRST_COMPLETED", "FIRST_EXCEPTION", "ALL_COMPLETED", ]
+
+
+__version__ = '2.3.0'
diff --git a/sklearn/externals/joblib/externals/loky/_base.py b/sklearn/externals/joblib/externals/loky/_base.py
index ff4ac92cf402d..92422bbf3f2a4 100644
--- a/sklearn/externals/joblib/externals/loky/_base.py
+++ b/sklearn/externals/joblib/externals/loky/_base.py
@@ -11,46 +11,58 @@
 # Licensed to PSF under a Contributor Agreement.
 
 import sys
-import collections
+import time
 import logging
 import threading
-import time
+import collections
+
+
+if sys.version_info[:2] >= (3, 3):
+
+    from concurrent.futures import wait, as_completed
+    from concurrent.futures import TimeoutError, CancelledError
+    from concurrent.futures import Executor, Future as _BaseFuture
+
+    from concurrent.futures import FIRST_EXCEPTION
+    from concurrent.futures import ALL_COMPLETED, FIRST_COMPLETED
+
+    from concurrent.futures._base import LOGGER
+    from concurrent.futures._base import PENDING, RUNNING, CANCELLED
+    from concurrent.futures._base import CANCELLED_AND_NOTIFIED, FINISHED
+else:
 
-FIRST_COMPLETED = 'FIRST_COMPLETED'
-FIRST_EXCEPTION = 'FIRST_EXCEPTION'
-ALL_COMPLETED = 'ALL_COMPLETED'
-_AS_COMPLETED = '_AS_COMPLETED'
-
-# Possible future states (for internal use by the futures package).
-PENDING = 'PENDING'
-RUNNING = 'RUNNING'
-# The future was cancelled by the user...
-CANCELLED = 'CANCELLED'
-# ...and _Waiter.add_cancelled() was called by a worker.
-CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED'
-FINISHED = 'FINISHED'
-
-_FUTURE_STATES = [
-    PENDING,
-    RUNNING,
-    CANCELLED,
-    CANCELLED_AND_NOTIFIED,
-    FINISHED
-]
-
-_STATE_TO_DESCRIPTION_MAP = {
-    PENDING: "pending",
-    RUNNING: "running",
-    CANCELLED: "cancelled",
-    CANCELLED_AND_NOTIFIED: "cancelled",
-    FINISHED: "finished"
-}
-
-# Logger for internal use by the futures package.
-LOGGER = logging.getLogger("concurrent.futures")
-
-
-if sys.version_info[:2] < (3, 3):
+    FIRST_COMPLETED = 'FIRST_COMPLETED'
+    FIRST_EXCEPTION = 'FIRST_EXCEPTION'
+    ALL_COMPLETED = 'ALL_COMPLETED'
+    _AS_COMPLETED = '_AS_COMPLETED'
+
+    # Possible future states (for internal use by the futures package).
+    PENDING = 'PENDING'
+    RUNNING = 'RUNNING'
+    # The future was cancelled by the user...
+    CANCELLED = 'CANCELLED'
+    # ...and _Waiter.add_cancelled() was called by a worker.
+    CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED'
+    FINISHED = 'FINISHED'
+
+    _FUTURE_STATES = [
+        PENDING,
+        RUNNING,
+        CANCELLED,
+        CANCELLED_AND_NOTIFIED,
+        FINISHED
+    ]
+
+    _STATE_TO_DESCRIPTION_MAP = {
+        PENDING: "pending",
+        RUNNING: "running",
+        CANCELLED: "cancelled",
+        CANCELLED_AND_NOTIFIED: "cancelled",
+        FINISHED: "finished"
+    }
+
+    # Logger for internal use by the futures package.
+    LOGGER = logging.getLogger("concurrent.futures")
 
     class Error(Exception):
         """Base class for all future-related exceptions."""
@@ -63,548 +75,553 @@ class CancelledError(Error):
     class TimeoutError(Error):
         """The operation exceeded the given deadline."""
         pass
-else:
-    from concurrent.futures import CancelledError, TimeoutError
 
+    class _Waiter(object):
+        """Provides the event that wait() and as_completed() block on."""
+        def __init__(self):
+            self.event = threading.Event()
+            self.finished_futures = []
 
-class _Waiter(object):
-    """Provides the event that wait() and as_completed() block on."""
-    def __init__(self):
-        self.event = threading.Event()
-        self.finished_futures = []
+        def add_result(self, future):
+            self.finished_futures.append(future)
 
-    def add_result(self, future):
-        self.finished_futures.append(future)
+        def add_exception(self, future):
+            self.finished_futures.append(future)
 
-    def add_exception(self, future):
-        self.finished_futures.append(future)
+        def add_cancelled(self, future):
+            self.finished_futures.append(future)
 
-    def add_cancelled(self, future):
-        self.finished_futures.append(future)
+    class _AsCompletedWaiter(_Waiter):
+        """Used by as_completed()."""
 
+        def __init__(self):
+            super(_AsCompletedWaiter, self).__init__()
+            self.lock = threading.Lock()
 
-class _AsCompletedWaiter(_Waiter):
-    """Used by as_completed()."""
+        def add_result(self, future):
+            with self.lock:
+                super(_AsCompletedWaiter, self).add_result(future)
+                self.event.set()
 
-    def __init__(self):
-        super(_AsCompletedWaiter, self).__init__()
-        self.lock = threading.Lock()
+        def add_exception(self, future):
+            with self.lock:
+                super(_AsCompletedWaiter, self).add_exception(future)
+                self.event.set()
 
-    def add_result(self, future):
-        with self.lock:
-            super(_AsCompletedWaiter, self).add_result(future)
-            self.event.set()
+        def add_cancelled(self, future):
+            with self.lock:
+                super(_AsCompletedWaiter, self).add_cancelled(future)
+                self.event.set()
 
-    def add_exception(self, future):
-        with self.lock:
-            super(_AsCompletedWaiter, self).add_exception(future)
-            self.event.set()
+    class _FirstCompletedWaiter(_Waiter):
+        """Used by wait(return_when=FIRST_COMPLETED)."""
 
-    def add_cancelled(self, future):
-        with self.lock:
-            super(_AsCompletedWaiter, self).add_cancelled(future)
+        def add_result(self, future):
+            super(_FirstCompletedWaiter, self).add_result(future)
             self.event.set()
 
+        def add_exception(self, future):
+            super(_FirstCompletedWaiter, self).add_exception(future)
+            self.event.set()
 
-class _FirstCompletedWaiter(_Waiter):
-    """Used by wait(return_when=FIRST_COMPLETED)."""
-
-    def add_result(self, future):
-        super(_FirstCompletedWaiter, self).add_result(future)
-        self.event.set()
-
-    def add_exception(self, future):
-        super(_FirstCompletedWaiter, self).add_exception(future)
-        self.event.set()
+        def add_cancelled(self, future):
+            super(_FirstCompletedWaiter, self).add_cancelled(future)
+            self.event.set()
 
-    def add_cancelled(self, future):
-        super(_FirstCompletedWaiter, self).add_cancelled(future)
-        self.event.set()
+    class _AllCompletedWaiter(_Waiter):
+        """Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED)."""
 
+        def __init__(self, num_pending_calls, stop_on_exception):
+            self.num_pending_calls = num_pending_calls
+            self.stop_on_exception = stop_on_exception
+            self.lock = threading.Lock()
+            super(_AllCompletedWaiter, self).__init__()
 
-class _AllCompletedWaiter(_Waiter):
-    """Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED)."""
+        def _decrement_pending_calls(self):
+            with self.lock:
+                self.num_pending_calls -= 1
+                if not self.num_pending_calls:
+                    self.event.set()
 
-    def __init__(self, num_pending_calls, stop_on_exception):
-        self.num_pending_calls = num_pending_calls
-        self.stop_on_exception = stop_on_exception
-        self.lock = threading.Lock()
-        super(_AllCompletedWaiter, self).__init__()
+        def add_result(self, future):
+            super(_AllCompletedWaiter, self).add_result(future)
+            self._decrement_pending_calls()
 
-    def _decrement_pending_calls(self):
-        with self.lock:
-            self.num_pending_calls -= 1
-            if not self.num_pending_calls:
+        def add_exception(self, future):
+            super(_AllCompletedWaiter, self).add_exception(future)
+            if self.stop_on_exception:
                 self.event.set()
+            else:
+                self._decrement_pending_calls()
 
-    def add_result(self, future):
-        super(_AllCompletedWaiter, self).add_result(future)
-        self._decrement_pending_calls()
-
-    def add_exception(self, future):
-        super(_AllCompletedWaiter, self).add_exception(future)
-        if self.stop_on_exception:
-            self.event.set()
-        else:
+        def add_cancelled(self, future):
+            super(_AllCompletedWaiter, self).add_cancelled(future)
             self._decrement_pending_calls()
 
-    def add_cancelled(self, future):
-        super(_AllCompletedWaiter, self).add_cancelled(future)
-        self._decrement_pending_calls()
-
-
-class _AcquireFutures(object):
-    """A context manager that does an ordered acquire of Future conditions."""
-
-    def __init__(self, futures):
-        self.futures = sorted(futures, key=id)
-
-    def __enter__(self):
-        for future in self.futures:
-            future._condition.acquire()
+    class _AcquireFutures(object):
+        """A context manager that does an ordered acquire of Future conditions.
+        """
 
-    def __exit__(self, *args):
-        for future in self.futures:
-            future._condition.release()
+        def __init__(self, futures):
+            self.futures = sorted(futures, key=id)
 
+        def __enter__(self):
+            for future in self.futures:
+                future._condition.acquire()
 
-def _create_and_install_waiters(fs, return_when):
-    if return_when == _AS_COMPLETED:
-        waiter = _AsCompletedWaiter()
-    elif return_when == FIRST_COMPLETED:
-        waiter = _FirstCompletedWaiter()
-    else:
-        pending_count = sum(
-                f._state not in [CANCELLED_AND_NOTIFIED, FINISHED] for f in fs)
+        def __exit__(self, *args):
+            for future in self.futures:
+                future._condition.release()
 
-        if return_when == FIRST_EXCEPTION:
-            waiter = _AllCompletedWaiter(pending_count, stop_on_exception=True)
-        elif return_when == ALL_COMPLETED:
-            waiter = _AllCompletedWaiter(pending_count,
-                                         stop_on_exception=False)
+    def _create_and_install_waiters(fs, return_when):
+        if return_when == _AS_COMPLETED:
+            waiter = _AsCompletedWaiter()
+        elif return_when == FIRST_COMPLETED:
+            waiter = _FirstCompletedWaiter()
         else:
-            raise ValueError("Invalid return condition: %r" % return_when)
-
-    for f in fs:
-        f._waiters.append(waiter)
-
-    return waiter
-
-
-def as_completed(fs, timeout=None):
-    """An iterator over the given futures that yields each as it completes.
-
-    Args:
-        fs: The sequence of Futures (possibly created by different Executors)
-            to iterate over.
-        timeout: The maximum number of seconds to wait. If None, then there
-            is no limit on the wait time.
-
-    Returns:
-        An iterator that yields the given Futures as they complete (finished or
-        cancelled). If any given Futures are duplicated, they will be returned
-        once.
-
-    Raises:
-        TimeoutError: If the entire result iterator could not be generated
-            before the given timeout.
-    """
-    if timeout is not None:
-        end_time = timeout + time.time()
-
-    fs = set(fs)
-    with _AcquireFutures(fs):
-        finished = set(
-                f for f in fs
-                if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
-        pending = fs - finished
-        waiter = _create_and_install_waiters(fs, _AS_COMPLETED)
-
-    try:
-        for future in finished:
-            yield future
-
-        while pending:
-            if timeout is None:
-                wait_timeout = None
+            pending_count = sum(
+                    f._state not in [CANCELLED_AND_NOTIFIED, FINISHED]
+                    for f in fs)
+
+            if return_when == FIRST_EXCEPTION:
+                waiter = _AllCompletedWaiter(pending_count,
+                                             stop_on_exception=True)
+            elif return_when == ALL_COMPLETED:
+                waiter = _AllCompletedWaiter(pending_count,
+                                             stop_on_exception=False)
             else:
-                wait_timeout = end_time - time.time()
-                if wait_timeout < 0:
-                    raise TimeoutError('%d (of %d) futures unfinished' % (
-                        len(pending), len(fs)))
-
-            waiter.event.wait(wait_timeout)
-
-            with waiter.lock:
-                finished = waiter.finished_futures
-                waiter.finished_futures = []
-                waiter.event.clear()
+                raise ValueError("Invalid return condition: %r" % return_when)
 
-            for future in finished:
-                yield future
-                pending.remove(future)
-
-    finally:
         for f in fs:
-            with f._condition:
-                f._waiters.remove(waiter)
-
-
-DoneAndNotDoneFutures = collections.namedtuple(
-        'DoneAndNotDoneFutures', 'done not_done')
-
-
-def wait(fs, timeout=None, return_when=ALL_COMPLETED):
-    """Wait for the futures in the given sequence to complete.
-
-    Args:
-        fs: The sequence of Futures (possibly created by different Executors)
-            to wait upon.
-        timeout: The maximum number of seconds to wait. If None, then there
-            is no limit on the wait time.
-        return_when: Indicates when this function should return. The options
-            are:
-
-            FIRST_COMPLETED - Return when any future finishes or is
-                              cancelled.
-            FIRST_EXCEPTION - Return when any future finishes by raising an
-                              exception. If no future raises an exception
-                              then it is equivalent to ALL_COMPLETED.
-            ALL_COMPLETED -   Return when all futures finish or are cancelled.
-
-    Returns:
-        A named 2-tuple of sets. The first set, named 'done', contains the
-        futures that completed (is finished or cancelled) before the wait
-        completed. The second set, named 'not_done', contains uncompleted
-        futures.
-    """
-    with _AcquireFutures(fs):
-        done = set(f for f in fs
-                   if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
-        not_done = set(fs) - done
-
-        if (return_when == FIRST_COMPLETED) and done:
-            return DoneAndNotDoneFutures(done, not_done)
-        elif (return_when == FIRST_EXCEPTION) and done:
-            if any(f for f in done
-                   if not f.cancelled() and f.exception() is not None):
-                return DoneAndNotDoneFutures(done, not_done)
-
-        if len(done) == len(fs):
-            return DoneAndNotDoneFutures(done, not_done)
-
-        waiter = _create_and_install_waiters(fs, return_when)
-
-    waiter.event.wait(timeout)
-    for f in fs:
-        with f._condition:
-            f._waiters.remove(waiter)
-
-    done.update(waiter.finished_futures)
-    return DoneAndNotDoneFutures(done, set(fs) - done)
-
-
-class Future(object):
-    """Represents the result of an asynchronous computation."""
-
-    def __init__(self):
-        """Initializes the future. Should not be called by clients."""
-        self._condition = threading.Condition()
-        self._state = PENDING
-        self._result = None
-        self._exception = None
-        self._waiters = []
-        self._done_callbacks = []
-
-    def _invoke_callbacks(self):
-        for callback in self._done_callbacks:
-            try:
-                callback(self)
-            except BaseException:
-                LOGGER.exception('exception calling callback for %r', self)
-
-    def __repr__(self):
-        with self._condition:
-            if self._state == FINISHED:
-                if self._exception:
-                    return '<%s at %#x state=%s raised %s>' % (
-                        self.__class__.__name__,
-                        id(self),
-                        _STATE_TO_DESCRIPTION_MAP[self._state],
-                        self._exception.__class__.__name__)
-                else:
-                    return '<%s at %#x state=%s returned %s>' % (
-                        self.__class__.__name__,
-                        id(self),
-                        _STATE_TO_DESCRIPTION_MAP[self._state],
-                        self._result.__class__.__name__)
-            return '<%s at %#x state=%s>' % (
-                    self.__class__.__name__,
-                    id(self),
-                   _STATE_TO_DESCRIPTION_MAP[self._state])
-
-    def cancel(self):
-        """Cancel the future if possible.
-
-        Returns True if the future was cancelled, False otherwise. A future
-        cannot be cancelled if it is running or has already completed.
-        """
-        with self._condition:
-            if self._state in [RUNNING, FINISHED]:
-                return False
-
-            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                return True
-
-            self._state = CANCELLED
-            self._condition.notify_all()
-
-        self._invoke_callbacks()
-        return True
-
-    def cancelled(self):
-        """Return True if the future was cancelled."""
-        with self._condition:
-            return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]
-
-    def running(self):
-        """Return True if the future is currently executing."""
-        with self._condition:
-            return self._state == RUNNING
-
-    def done(self):
-        """Return True of the future was cancelled or finished executing."""
-        with self._condition:
-            return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]
-
-    def __get_result(self):
-        if self._exception:
-            raise self._exception
-        else:
-            return self._result
-
-    def add_done_callback(self, fn):
-        """Attaches a callable that will be called when the future finishes.
-
-        Args:
-            fn: A callable that will be called with this future as its only
-                argument when the future completes or is cancelled. The
-                callable will always be called by a thread in the same process
-                in which it was added. If the future has already completed or
-                been cancelled then the callable will be called immediately.
-                These callables are called in the order that they were added.
-        """
-        with self._condition:
-            if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED,
-                                   FINISHED]:
-                self._done_callbacks.append(fn)
-                return
-        fn(self)
-
-    def result(self, timeout=None):
-        """Return the result of the call that the future represents.
-
-        Args:
-            timeout: The number of seconds to wait for the result if the future
-                isn't done. If None, then there is no limit on the wait time.
+            f._waiters.append(waiter)
 
-        Returns:
-            The result of the call that the future represents.
-
-        Raises:
-            CancelledError: If the future was cancelled.
-            TimeoutError: If the future didn't finish executing before the
-                given timeout.
-            Exception: If the call raised then that exception will be raised.
-        """
-        with self._condition:
-            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                raise CancelledError()
-            elif self._state == FINISHED:
-                return self.__get_result()
-
-            self._condition.wait(timeout)
-
-            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                raise CancelledError()
-            elif self._state == FINISHED:
-                return self.__get_result()
-            else:
-                raise TimeoutError()
+        return waiter
 
-    def exception(self, timeout=None):
-        """Return the exception raised by the call that the future represents.
+    def as_completed(fs, timeout=None):
+        """An iterator over the given futures that yields each as it completes.
 
         Args:
-            timeout: The number of seconds to wait for the exception if the
-                future isn't done. If None, then there is no limit on the wait
-                time.
+            fs: The sequence of Futures (possibly created by different
+                Executors) to iterate over.
+            timeout: The maximum number of seconds to wait. If None, then there
+                is no limit on the wait time.
 
         Returns:
-            The exception raised by the call that the future represents or None
-            if the call completed without raising.
+            An iterator that yields the given Futures as they complete
+            (finished or cancelled). If any given Futures are duplicated, they
+            will be returned once.
 
         Raises:
-            CancelledError: If the future was cancelled.
-            TimeoutError: If the future didn't finish executing before the
-                given timeout.
+            TimeoutError: If the entire result iterator could not be generated
+                before the given timeout.
         """
+        if timeout is not None:
+            end_time = timeout + time.time()
 
-        with self._condition:
-            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                raise CancelledError()
-            elif self._state == FINISHED:
-                return self._exception
-
-            self._condition.wait(timeout)
-
-            if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
-                raise CancelledError()
-            elif self._state == FINISHED:
-                return self._exception
-            else:
-                raise TimeoutError()
-
-    # The following methods should only be used by Executors and in tests.
-    def set_running_or_notify_cancel(self):
-        """Mark the future as running or process any cancel notifications.
-
-        Should only be used by Executor implementations and unit tests.
-
-        If the future has been cancelled (cancel() was called and returned
-        True) then any threads waiting on the future completing (though calls
-        to as_completed() or wait()) are notified and False is returned.
-
-        If the future was not cancelled then it is put in the running state
-        (future calls to running() will return True) and True is returned.
-
-        This method should be called by Executor implementations before
-        executing the work associated with this future. If this method returns
-        False then the work should not be executed.
+        fs = set(fs)
+        with _AcquireFutures(fs):
+            finished = set(
+                    f for f in fs
+                    if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
+            pending = fs - finished
+            waiter = _create_and_install_waiters(fs, _AS_COMPLETED)
 
-        Returns:
-            False if the Future was cancelled, True otherwise.
+        try:
+            for future in finished:
+                yield future
 
-        Raises:
-            RuntimeError: if this method was already called or if set_result()
-                or set_exception() was called.
-        """
-        with self._condition:
-            if self._state == CANCELLED:
-                self._state = CANCELLED_AND_NOTIFIED
-                for waiter in self._waiters:
-                    waiter.add_cancelled(self)
-                # self._condition.notify_all() is not necessary because
-                # self.cancel() triggers a notification.
-                return False
-            elif self._state == PENDING:
-                self._state = RUNNING
-                return True
-            else:
-                LOGGER.critical('Future %s in unexpected state: %s',
-                                id(self),
-                                self._state)
-                raise RuntimeError('Future in unexpected state')
+            while pending:
+                if timeout is None:
+                    wait_timeout = None
+                else:
+                    wait_timeout = end_time - time.time()
+                    if wait_timeout < 0:
+                        raise TimeoutError('%d (of %d) futures unfinished' % (
+                            len(pending), len(fs)))
 
-    def set_result(self, result):
-        """Sets the return value of work associated with the future.
+                waiter.event.wait(wait_timeout)
 
-        Should only be used by Executor implementations and unit tests.
-        """
-        with self._condition:
-            self._result = result
-            self._state = FINISHED
-            for waiter in self._waiters:
-                waiter.add_result(self)
-            self._condition.notify_all()
-        self._invoke_callbacks()
-
-    def set_exception(self, exception):
-        """Sets the result of the future as being the given exception.
-
-        Should only be used by Executor implementations and unit tests.
-        """
-        with self._condition:
-            self._exception = exception
-            self._state = FINISHED
-            for waiter in self._waiters:
-                waiter.add_exception(self)
-            self._condition.notify_all()
-        self._invoke_callbacks()
+                with waiter.lock:
+                    finished = waiter.finished_futures
+                    waiter.finished_futures = []
+                    waiter.event.clear()
 
+                for future in finished:
+                    yield future
+                    pending.remove(future)
 
-class Executor(object):
-    """This is an abstract base class for concrete asynchronous executors."""
+        finally:
+            for f in fs:
+                with f._condition:
+                    f._waiters.remove(waiter)
 
-    def submit(self, fn, *args, **kwargs):
-        """Submits a callable to be executed with the given arguments.
+    DoneAndNotDoneFutures = collections.namedtuple(
+            'DoneAndNotDoneFutures', 'done not_done')
 
-        Schedules the callable to be executed as fn(*args, **kwargs) and
-        returns a Future instance representing the execution of the callable.
-
-        Returns:
-            A Future representing the given call.
-        """
-        raise NotImplementedError()
-
-    def map(self, fn, *iterables, **kwargs):
-        """Returns an iterator equivalent to map(fn, iter).
+    def wait(fs, timeout=None, return_when=ALL_COMPLETED):
+        """Wait for the futures in the given sequence to complete.
 
         Args:
-            fn: A callable that will take as many arguments as there are
-                passed iterables.
+            fs: The sequence of Futures (possibly created by different
+                Executors) to wait upon.
             timeout: The maximum number of seconds to wait. If None, then there
                 is no limit on the wait time.
-            chunksize: The size of the chunks the iterable will be broken into
-                before being passed to a child process. This argument is only
-                used by ProcessPoolExecutor; it is ignored by
-                ThreadPoolExecutor.
+            return_when: Indicates when this function should return. The
+                options are:
 
-        Returns:
-            An iterator equivalent to: map(func, *iterables) but the calls may
-            be evaluated out-of-order.
+                FIRST_COMPLETED - Return when any future finishes or is
+                                cancelled.
+                FIRST_EXCEPTION - Return when any future finishes by raising an
+                                exception. If no future raises an exception
+                                then it is equivalent to ALL_COMPLETED.
+                ALL_COMPLETED -   Return when all futures finish or are
+                                cancelled.
 
-        Raises:
-            TimeoutError: If the entire result iterator could not be generated
-                before the given timeout.
-            Exception: If fn(*args) raises for any values.
+        Returns:
+            A named 2-tuple of sets. The first set, named 'done', contains the
+            futures that completed (is finished or cancelled) before the wait
+            completed. The second set, named 'not_done', contains uncompleted
+            futures.
         """
-        timeout = kwargs.get('timeout')
-        if timeout is not None:
-            end_time = timeout + time.time()
+        with _AcquireFutures(fs):
+            done = set(f for f in fs
+                       if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
+            not_done = set(fs) - done
 
-        fs = [self.submit(fn, *args) for args in zip(*iterables)]
+            if (return_when == FIRST_COMPLETED) and done:
+                return DoneAndNotDoneFutures(done, not_done)
+            elif (return_when == FIRST_EXCEPTION) and done:
+                if any(f for f in done
+                       if not f.cancelled() and f.exception() is not None):
+                    return DoneAndNotDoneFutures(done, not_done)
 
-        # Yield must be hidden in closure so that the futures are submitted
-        # before the first iterator value is required.
-        def result_iterator():
-            try:
-                for future in fs:
-                    if timeout is None:
-                        yield future.result()
-                    else:
-                        yield future.result(end_time - time.time())
-            finally:
-                for future in fs:
-                    future.cancel()
-        return result_iterator()
+            if len(done) == len(fs):
+                return DoneAndNotDoneFutures(done, not_done)
 
-    def shutdown(self, wait=True):
-        """Clean-up the resources associated with the Executor.
+            waiter = _create_and_install_waiters(fs, return_when)
 
-        It is safe to call this method several times. Otherwise, no other
-        methods can be called after this one.
+        waiter.event.wait(timeout)
+        for f in fs:
+            with f._condition:
+                f._waiters.remove(waiter)
 
-        Args:
-            wait: If True then shutdown will not return until all running
-                futures have finished executing and the resources used by the
-                executor have been reclaimed.
-        """
-        pass
+        done.update(waiter.finished_futures)
+        return DoneAndNotDoneFutures(done, set(fs) - done)
+
+    class _BaseFuture(object):
+        """Represents the result of an asynchronous computation."""
+
+        def __init__(self):
+            """Initializes the future. Should not be called by clients."""
+            self._condition = threading.Condition()
+            self._state = PENDING
+            self._result = None
+            self._exception = None
+            self._waiters = []
+            self._done_callbacks = []
+
+        def __repr__(self):
+            with self._condition:
+                if self._state == FINISHED:
+                    if self._exception:
+                        return '<%s at %#x state=%s raised %s>' % (
+                            self.__class__.__name__,
+                            id(self),
+                            _STATE_TO_DESCRIPTION_MAP[self._state],
+                            self._exception.__class__.__name__)
+                    else:
+                        return '<%s at %#x state=%s returned %s>' % (
+                            self.__class__.__name__,
+                            id(self),
+                            _STATE_TO_DESCRIPTION_MAP[self._state],
+                            self._result.__class__.__name__)
+                return '<%s at %#x state=%s>' % (
+                        self.__class__.__name__,
+                        id(self),
+                        _STATE_TO_DESCRIPTION_MAP[self._state])
+
+        def cancel(self):
+            """Cancel the future if possible.
+
+            Returns True if the future was cancelled, False otherwise. A future
+            cannot be cancelled if it is running or has already completed.
+            """
+            with self._condition:
+                if self._state in [RUNNING, FINISHED]:
+                    return False
+
+                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+                    return True
+
+                self._state = CANCELLED
+                self._condition.notify_all()
+
+            self._invoke_callbacks()
+            return True
+
+        def cancelled(self):
+            """Return True if the future was cancelled."""
+            with self._condition:
+                return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]
+
+        def running(self):
+            """Return True if the future is currently executing."""
+            with self._condition:
+                return self._state == RUNNING
+
+        def done(self):
+            """Return True of the future was cancelled or finished executing.
+            """
+            with self._condition:
+                return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED,
+                                       FINISHED]
+
+        def __get_result(self):
+            if self._exception:
+                raise self._exception
+            else:
+                return self._result
+
+        def add_done_callback(self, fn):
+            """Attaches a callable that will be called when the future finishes.
+
+            Args:
+                fn: A callable that will be called with this future as its only
+                    argument when the future completes or is cancelled. The
+                    callable will always be called by a thread in the same
+                    process in which it was added. If the future has already
+                    completed or been cancelled then the callable will be
+                    called immediately. These callables are called in the order
+                    that they were added.
+            """
+            with self._condition:
+                if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED,
+                                       FINISHED]:
+                    self._done_callbacks.append(fn)
+                    return
+            fn(self)
+
+        def result(self, timeout=None):
+            """Return the result of the call that the future represents.
+
+            Args:
+                timeout: The number of seconds to wait for the result if the
+                    future isn't done. If None, then there is no limit on the
+                    wait time.
+
+            Returns:
+                The result of the call that the future represents.
+
+            Raises:
+                CancelledError: If the future was cancelled.
+                TimeoutError: If the future didn't finish executing before the
+                    given timeout.
+                Exception: If the call raised then that exception will be
+                raised.
+            """
+            with self._condition:
+                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+                    raise CancelledError()
+                elif self._state == FINISHED:
+                    return self.__get_result()
+
+                self._condition.wait(timeout)
+
+                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+                    raise CancelledError()
+                elif self._state == FINISHED:
+                    return self.__get_result()
+                else:
+                    raise TimeoutError()
+
+        def exception(self, timeout=None):
+            """Return the exception raised by the call that the future
+            represents.
+
+            Args:
+                timeout: The number of seconds to wait for the exception if the
+                    future isn't done. If None, then there is no limit on the
+                    wait time.
+
+            Returns:
+                The exception raised by the call that the future represents or
+                None if the call completed without raising.
+
+            Raises:
+                CancelledError: If the future was cancelled.
+                TimeoutError: If the future didn't finish executing before the
+                    given timeout.
+            """
+
+            with self._condition:
+                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+                    raise CancelledError()
+                elif self._state == FINISHED:
+                    return self._exception
+
+                self._condition.wait(timeout)
+
+                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+                    raise CancelledError()
+                elif self._state == FINISHED:
+                    return self._exception
+                else:
+                    raise TimeoutError()
+
+        # The following methods should only be used by Executors and in tests.
+        def set_running_or_notify_cancel(self):
+            """Mark the future as running or process any cancel notifications.
+
+            Should only be used by Executor implementations and unit tests.
+
+            If the future has been cancelled (cancel() was called and returned
+            True) then any threads waiting on the future completing (though
+            calls to as_completed() or wait()) are notified and False is
+            returned.
+
+            If the future was not cancelled then it is put in the running state
+            (future calls to running() will return True) and True is returned.
+
+            This method should be called by Executor implementations before
+            executing the work associated with this future. If this method
+            returns False then the work should not be executed.
+
+            Returns:
+                False if the Future was cancelled, True otherwise.
+
+            Raises:
+                RuntimeError: if this method was already called or if
+                    set_result() or set_exception() was called.
+            """
+            with self._condition:
+                if self._state == CANCELLED:
+                    self._state = CANCELLED_AND_NOTIFIED
+                    for waiter in self._waiters:
+                        waiter.add_cancelled(self)
+                    # self._condition.notify_all() is not necessary because
+                    # self.cancel() triggers a notification.
+                    return False
+                elif self._state == PENDING:
+                    self._state = RUNNING
+                    return True
+                else:
+                    LOGGER.critical('Future %s in unexpected state: %s',
+                                    id(self),
+                                    self._state)
+                    raise RuntimeError('Future in unexpected state')
+
+        def set_result(self, result):
+            """Sets the return value of work associated with the future.
+
+            Should only be used by Executor implementations and unit tests.
+            """
+            with self._condition:
+                self._result = result
+                self._state = FINISHED
+                for waiter in self._waiters:
+                    waiter.add_result(self)
+                self._condition.notify_all()
+            self._invoke_callbacks()
+
+        def set_exception(self, exception):
+            """Sets the result of the future as being the given exception.
+
+            Should only be used by Executor implementations and unit tests.
+            """
+            with self._condition:
+                self._exception = exception
+                self._state = FINISHED
+                for waiter in self._waiters:
+                    waiter.add_exception(self)
+                self._condition.notify_all()
+            self._invoke_callbacks()
 
-    def __enter__(self):
-        return self
+    class Executor(object):
+        """This is an abstract base class for concrete asynchronous executors.
+        """
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.shutdown(wait=True)
-        return False
+        def submit(self, fn, *args, **kwargs):
+            """Submits a callable to be executed with the given arguments.
+
+            Schedules the callable to be executed as fn(*args, **kwargs) and
+            returns a Future instance representing the execution of the
+            callable.
+
+            Returns:
+                A Future representing the given call.
+            """
+            raise NotImplementedError()
+
+        def map(self, fn, *iterables, **kwargs):
+            """Returns an iterator equivalent to map(fn, iter).
+
+            Args:
+                fn: A callable that will take as many arguments as there are
+                    passed iterables.
+                timeout: The maximum number of seconds to wait. If None, then
+                    there is no limit on the wait time.
+                chunksize: The size of the chunks the iterable will be broken
+                    into before being passed to a child process. This argument
+                    is only used by ProcessPoolExecutor; it is ignored by
+                    ThreadPoolExecutor.
+
+            Returns:
+                An iterator equivalent to: map(func, *iterables) but the calls
+                may be evaluated out-of-order.
+
+            Raises:
+                TimeoutError: If the entire result iterator could not be
+                    generated before the given timeout.
+                Exception: If fn(*args) raises for any values.
+            """
+            timeout = kwargs.get('timeout')
+            if timeout is not None:
+                end_time = timeout + time.time()
+
+            fs = [self.submit(fn, *args) for args in zip(*iterables)]
+
+            # Yield must be hidden in closure so that the futures are submitted
+            # before the first iterator value is required.
+            def result_iterator():
+                try:
+                    for future in fs:
+                        if timeout is None:
+                            yield future.result()
+                        else:
+                            yield future.result(end_time - time.time())
+                finally:
+                    for future in fs:
+                        future.cancel()
+            return result_iterator()
+
+        def shutdown(self, wait=True):
+            """Clean-up the resources associated with the Executor.
+
+            It is safe to call this method several times. Otherwise, no other
+            methods can be called after this one.
+
+            Args:
+                wait: If True then shutdown will not return until all running
+                    futures have finished executing and the resources used by
+                    the executor have been reclaimed.
+            """
+            pass
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc_val, exc_tb):
+            self.shutdown(wait=True)
+            return False
+
+
+# To make loky._base.Future instances awaitable  by concurrent.futures.wait,
+# derive our custom Future class from _BaseFuture. _invoke_callback is the only
+# modification made to this class in loky.
+class Future(_BaseFuture):
+    def _invoke_callbacks(self):
+        for callback in self._done_callbacks:
+            try:
+                callback(self)
+            except BaseException:
+                LOGGER.exception('exception calling callback for %r', self)
diff --git a/sklearn/externals/joblib/externals/loky/backend/compat.py b/sklearn/externals/joblib/externals/loky/backend/compat.py
index 6366b23d9f380..729c77c7d9bca 100644
--- a/sklearn/externals/joblib/externals/loky/backend/compat.py
+++ b/sklearn/externals/joblib/externals/loky/backend/compat.py
@@ -9,10 +9,10 @@
 
 if sys.version_info[:2] >= (3, 3):
     import queue
-    from _pickle import PicklingError
 else:
     import Queue as queue
-    from pickle import PicklingError
+
+from pickle import PicklingError
 
 if sys.version_info >= (3, 4):
     from multiprocessing.process import BaseProcess
diff --git a/sklearn/externals/joblib/externals/loky/backend/context.py b/sklearn/externals/joblib/externals/loky/backend/context.py
index b38787efb5d0d..0f744c5918b5c 100644
--- a/sklearn/externals/joblib/externals/loky/backend/context.py
+++ b/sklearn/externals/joblib/externals/loky/backend/context.py
@@ -106,12 +106,13 @@ def cpu_count():
 
     The returned number of CPUs accounts for:
      * the number of CPUs in the system, as given by
-       ``multiprocessing.cpu_count``
+       ``multiprocessing.cpu_count``;
      * the CPU affinity settings of the current process
-       (available with Python 3.4+ on some Unix systems)
-     * CFS scheduler CPU bandwidth limit
-       (available on Linux only)
-    and is given as the minimum of these three constraints.
+       (available with Python 3.4+ on some Unix systems);
+     * CFS scheduler CPU bandwidth limit (available on Linux only, typically
+       set by docker and similar container orchestration systems);
+     * the value of the LOKY_MAX_CPU_COUNT environment variable if defined.
+    and is given as the minimum of these constraints.
     It is also always larger or equal to 1.
     """
     import math
@@ -141,10 +142,15 @@ def cpu_count():
             cfs_period_us = int(fh.read())
 
         if cfs_quota_us > 0 and cfs_period_us > 0:
-            cpu_count_cfs = math.ceil(cfs_quota_us / cfs_period_us)
-            cpu_count_cfs = max(cpu_count_cfs, 1)
-
-    return min(cpu_count_mp, cpu_count_affinity, cpu_count_cfs)
+            # Make sure this quantity is an int as math.ceil returns a
+            # float in python2.7. (See issue #165)
+            cpu_count_cfs = int(math.ceil(cfs_quota_us / cfs_period_us))
+
+    # User defined soft-limit passed as an loky specific environment variable.
+    cpu_count_loky = int(os.environ.get('LOKY_MAX_CPU_COUNT', cpu_count_mp))
+    aggregate_cpu_count = min(cpu_count_mp, cpu_count_affinity, cpu_count_cfs,
+                              cpu_count_loky)
+    return max(aggregate_cpu_count, 1)
 
 
 class LokyContext(BaseContext):
diff --git a/sklearn/externals/joblib/externals/loky/backend/reduction.py b/sklearn/externals/joblib/externals/loky/backend/reduction.py
index 20eb581cbfce7..b621a92930c92 100644
--- a/sklearn/externals/joblib/externals/loky/backend/reduction.py
+++ b/sklearn/externals/joblib/externals/loky/backend/reduction.py
@@ -181,12 +181,13 @@ def h(cls):
 register(type(_C.h), _reduce_method)
 
 
-def _reduce_method_descriptor(m):
-    return getattr, (m.__objclass__, m.__name__)
+if not hasattr(sys, "pypy_version_info"):
+    # PyPy uses functions instead of method_descriptors and wrapper_descriptors
+    def _reduce_method_descriptor(m):
+        return getattr, (m.__objclass__, m.__name__)
 
-
-register(type(list.append), _reduce_method_descriptor)
-register(type(int.__add__), _reduce_method_descriptor)
+    register(type(list.append), _reduce_method_descriptor)
+    register(type(int.__add__), _reduce_method_descriptor)
 
 
 # Make partial func pickable
diff --git a/sklearn/externals/joblib/externals/loky/process_executor.py b/sklearn/externals/joblib/externals/loky/process_executor.py
index c3072453109d9..57a7617d9ab7e 100644
--- a/sklearn/externals/joblib/externals/loky/process_executor.py
+++ b/sklearn/externals/joblib/externals/loky/process_executor.py
@@ -117,14 +117,16 @@
 MAX_DEPTH = int(os.environ.get("LOKY_MAX_DEPTH", 10))
 _CURRENT_DEPTH = 0
 
-# Minimum time interval between two consecutive memory usage checks.
-_MEMORY_CHECK_DELAY = 1.
+# Minimum time interval between two consecutive memory leak protection checks.
+_MEMORY_LEAK_CHECK_DELAY = 1.
 
 # Number of bytes of memory usage allowed over the reference process size.
 _MAX_MEMORY_LEAK_SIZE = int(1e8)
 
+
 try:
     from psutil import Process
+    _USE_PSUTIL = True
 
     def _get_memory_usage(pid, force_gc=False):
         if force_gc:
@@ -133,7 +135,7 @@ def _get_memory_usage(pid, force_gc=False):
         return Process(pid).memory_info().rss
 
 except ImportError:
-    _get_memory_usage = None
+    _USE_PSUTIL = False
 
 
 class _ThreadWakeup:
@@ -383,7 +385,7 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
     global _CURRENT_DEPTH
     _CURRENT_DEPTH = current_depth
     _process_reference_size = None
-    _process_last_memory_check = None
+    _last_memory_leak_check = None
     pid = os.getpid()
 
     mp.util.debug('Worker started with timeout=%s' % timeout)
@@ -422,20 +424,22 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
             result_queue.put(_ResultItem(call_item.work_id, exception=exc))
         else:
             _sendback_result(result_queue, call_item.work_id, result=r)
+            del r
 
         # Free the resource as soon as possible, to avoid holding onto
         # open files or shared memory that is not needed anymore
         del call_item
 
-        if _get_memory_usage is not None:
+        if _USE_PSUTIL:
             if _process_reference_size is None:
                 # Make reference measurement after the first call
                 _process_reference_size = _get_memory_usage(pid, force_gc=True)
-                _process_last_memory_check = time()
+                _last_memory_leak_check = time()
                 continue
-            if time() - _process_last_memory_check > _MEMORY_CHECK_DELAY:
+            if time() - _last_memory_leak_check > _MEMORY_LEAK_CHECK_DELAY:
                 mem_usage = _get_memory_usage(pid)
-                _process_last_memory_check = time()
+                print(mem_usage)
+                _last_memory_leak_check = time()
                 if mem_usage - _process_reference_size < _MAX_MEMORY_LEAK_SIZE:
                     # Memory usage stays within bounds: everything is fine.
                     continue
@@ -444,7 +448,7 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
                 # after a forced garbage collection to break any reference
                 # cycles.
                 mem_usage = _get_memory_usage(pid, force_gc=True)
-                _process_last_memory_check = time()
+                _last_memory_leak_check = time()
                 if mem_usage - _process_reference_size < _MAX_MEMORY_LEAK_SIZE:
                     # The GC managed to free the memory: everything is fine.
                     continue
@@ -455,6 +459,14 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
                 result_queue.put(pid)
                 with worker_exit_lock:
                     return
+        else:
+            # if psutil is not installed, trigger gc.collect events
+            # regularly to limit potential memory leaks due to reference cycles
+            if ((_last_memory_leak_check is None) or
+                    (time() - _last_memory_leak_check >
+                     _MEMORY_LEAK_CHECK_DELAY)):
+                gc.collect()
+                _last_memory_leak_check = time()
 
 
 def _add_call_item_to_queue(pending_work_items,
diff --git a/sklearn/externals/joblib/memory.py b/sklearn/externals/joblib/memory.py
index 5ae6940f91776..e31ba2edb72eb 100644
--- a/sklearn/externals/joblib/memory.py
+++ b/sklearn/externals/joblib/memory.py
@@ -454,12 +454,16 @@ def _cached_call(self, args, kwargs, shelving=False):
         metadata: dict
             Some metadata about wrapped function call (see _persist_input()).
         """
-        # Compare the function code with the previous to see if the
-        # function code has changed
         func_id, args_id = self._get_output_identifiers(*args, **kwargs)
         metadata = None
         msg = None
+
+        # Wether or not the memorized function must be called
+        must_call = False
+
         # FIXME: The statements below should be try/excepted
+        # Compare the function code with the previous to see if the
+        # function code has changed
         if not (self._check_previous_func_code(stacklevel=4) and
                 self.store_backend.contains_item([func_id, args_id])):
             if self._verbose > 10:
@@ -469,16 +473,7 @@ def _cached_call(self, args, kwargs, shelving=False):
                           .format(name, args_id,
                                   self.store_backend.
                                   get_cached_func_info([func_id])['location']))
-            out, metadata = self.call(*args, **kwargs)
-            if self.mmap_mode is not None:
-                # Memmap the output at the first call to be consistent with
-                # later calls
-                if self._verbose:
-                    msg = _format_load_msg(func_id, args_id,
-                                           timestamp=self.timestamp,
-                                           metadata=metadata)
-                out = self.store_backend.load_item([func_id, args_id], msg=msg,
-                                                   verbose=self._verbose)
+            must_call = True
         else:
             try:
                 t0 = time.time()
@@ -507,8 +502,19 @@ def _cached_call(self, args, kwargs, shelving=False):
                 self.warn('Exception while loading results for '
                           '{}\n {}'.format(signature, traceback.format_exc()))
 
-                out, metadata = self.call(*args, **kwargs)
-                args_id = None
+                must_call = True
+
+        if must_call:
+            out, metadata = self.call(*args, **kwargs)
+            if self.mmap_mode is not None:
+                # Memmap the output at the first call to be consistent with
+                # later calls
+                if self._verbose:
+                    msg = _format_load_msg(func_id, args_id,
+                                           timestamp=self.timestamp,
+                                           metadata=metadata)
+                out = self.store_backend.load_item([func_id, args_id], msg=msg,
+                                                   verbose=self._verbose)
 
         return (out, args_id, metadata)
 

From 242410f7cd93e7af8786dd57d6537376b6c0b36d Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 5 Sep 2018 17:33:06 +0200
Subject: [PATCH 019/163] MAINT make pytest collection ignore folders with
 Python scripts (#12011)

Some IDEs such as VS Code use the pytest command to collect all the tests of
the workspace in the background. This can cause unexpected execution of
arbitrary Python scripts in the workspace (examples, benchmarks...).

The doc folder is also ignored because it has python scripts for sphinx
along with copies of the examples.

To safely run pytest in the doc folder, we need to used the find command
to find all "*.rst" files as done in the project Makefile.
---
 setup.cfg | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 09c5c9829ae21..93aca4a44f9e1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,6 +5,10 @@ test = pytest
 # disable-pytest-warnings should be removed once we rewrite tests
 # using yield with parametrize
 addopts =
+    --ignore build_tools
+    --ignore benchmarks
+    --ignore doc
+    --ignore examples
     --doctest-modules
     --disable-pytest-warnings
     -rs

From a9c6ad9baf878015653569109091828ceaf2db8e Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Wed, 5 Sep 2018 17:55:07 +0200
Subject: [PATCH 020/163] [MRG+1] break the tie in Meanshift in case cluster
 intensities are the same (#11901)

---
 doc/whats_new/v0.20.rst                  |  6 ++++++
 sklearn/cluster/mean_shift_.py           |  8 +++++---
 sklearn/cluster/tests/test_mean_shift.py | 12 ++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 2ed336b782174..46b262896145c 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -63,6 +63,7 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
+- :class:`cluster.MeanShift` (bug fix)
 - :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
 - :class:`decomposition.SparsePCA` (bug fix)
 - :class:`ensemble.GradientBoostingClassifier` (bug fix affecting feature importances)
@@ -151,6 +152,11 @@ Support for Python 3.3 has been officially dropped.
   ``n_iter_`` attribute in the docstring of :class:`cluster.KMeans`.
   :issue:`11353` by :user:`Jeremie du Boisberranger <jeremiedbb>`.
 
+- |Fix| Fixed a bug in :func:`cluster.mean_shift` where the assigned labels
+  were not deterministic if there were multiple clusters with the same
+  intensities.
+  :issue:`11901` by :user:`Adrin Jalali <adrinjalali>`.
+
 - |API| Deprecate ``pooling_func`` unused parameter in
   :class:`cluster.AgglomerativeClustering`.
   :issue:`9875` by :user:`Kumar Ashutosh <thechargedneutron>`.
diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py
index 487545ac039d3..800c85c365988 100644
--- a/sklearn/cluster/mean_shift_.py
+++ b/sklearn/cluster/mean_shift_.py
@@ -215,8 +215,10 @@ def mean_shift(X, bandwidth=None, seeds=None, bin_seeding=False,
     # If the distance between two kernels is less than the bandwidth,
     # then we have to remove one because it is a duplicate. Remove the
     # one with fewer points.
+
     sorted_by_intensity = sorted(center_intensity_dict.items(),
-                                 key=lambda tup: tup[1], reverse=True)
+                                 key=lambda tup: (tup[1], tup[0]),
+                                 reverse=True)
     sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
     unique = np.ones(len(sorted_centers), dtype=np.bool)
     nbrs = NearestNeighbors(radius=bandwidth,
@@ -359,9 +361,9 @@ class MeanShift(BaseEstimator, ClusterMixin):
     ...               [4, 7], [3, 5], [3, 6]])
     >>> clustering = MeanShift(bandwidth=2).fit(X)
     >>> clustering.labels_
-    array([0, 0, 0, 1, 1, 1])
+    array([1, 1, 1, 0, 0, 0])
     >>> clustering.predict([[0, 0], [5, 5]])
-    array([0, 1])
+    array([1, 0])
     >>> clustering # doctest: +NORMALIZE_WHITESPACE
     MeanShift(bandwidth=2, bin_seeding=False, cluster_all=True, min_bin_freq=1,
          n_jobs=None, seeds=None)
diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index 1d6940a947dc2..441f822cdbded 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -101,6 +101,18 @@ def test_unfitted():
     assert_false(hasattr(ms, "labels_"))
 
 
+def test_cluster_intensity_tie():
+    X = np.array([[1, 1], [2, 1], [1, 0],
+                  [4, 7], [3, 5], [3, 6]])
+    c1 = MeanShift(bandwidth=2).fit(X)
+
+    X = np.array([[4, 7], [3, 5], [3, 6],
+                  [1, 1], [2, 1], [1, 0]])
+    c2 = MeanShift(bandwidth=2).fit(X)
+    assert_array_equal(c1.labels_, [1, 1, 1, 0, 0, 0])
+    assert_array_equal(c2.labels_, [0, 0, 0, 1, 1, 1])
+
+
 def test_bin_seeds():
     # Test the bin seeding technique which can be used in the mean shift
     # algorithm

From e726f7a3e6a89f898de2e22880aa653fe43949c4 Mon Sep 17 00:00:00 2001
From: Gabriele Calvo <gcalvo87@gmail.com>
Date: Wed, 5 Sep 2018 21:50:20 +0100
Subject: [PATCH 021/163] DOC fix minor spacing issue in the iris dataset
 description (#12019)

---
 sklearn/datasets/descr/iris.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/descr/iris.rst b/sklearn/datasets/descr/iris.rst
index a35edc728c7d9..e05206454d218 100644
--- a/sklearn/datasets/descr/iris.rst
+++ b/sklearn/datasets/descr/iris.rst
@@ -25,7 +25,7 @@ Iris plants dataset
     sepal length:   4.3  7.9   5.84   0.83    0.7826
     sepal width:    2.0  4.4   3.05   0.43   -0.4194
     petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
-    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)
+    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)
     ============== ==== ==== ======= ===== ====================
 
     :Missing Attribute Values: None

From 1fafc5c56d496728ec276e99382efa8e84034b13 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 6 Sep 2018 17:29:37 +1000
Subject: [PATCH 022/163] TST use urlopen monkeypatch for test_decode_*
 (#12020)

Avoid requiring internet for test suite. Examples will still run with internet (as long as cache is occasionally cleared).
---
 sklearn/datasets/tests/test_openml.py | 71 ++++++++++-----------------
 1 file changed, 27 insertions(+), 44 deletions(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 3f5716cb96784..cf9cfcdc81ede 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -24,7 +24,6 @@
 currdir = os.path.dirname(os.path.abspath(__file__))
 # if True, urlopen will be monkey patched to only use local files
 test_offline = True
-test_gzip = True
 
 
 def _test_features_list(data_id):
@@ -138,18 +137,14 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
 
 def _monkey_patch_webbased_functions(context,
                                      data_id,
-                                     gziped_files,
                                      gzip_response):
     url_prefix_data_description = "https://openml.org/api/v1/json/data/"
     url_prefix_data_features = "https://openml.org/api/v1/json/data/features/"
     url_prefix_download_data = "https://openml.org/data/v1/"
     url_prefix_data_list = "https://openml.org/api/v1/json/data/list/"
 
-    path_suffix = ''
-    read_fn = open
-    if gziped_files:
-        path_suffix = '.gz'
-        read_fn = gzip.open
+    path_suffix = '.gz'
+    read_fn = gzip.open
 
     class MockHTTPResponse(object):
         def __init__(self, data, is_gzip):
@@ -264,8 +259,7 @@ def test_fetch_openml_iris(monkeypatch, gzip_response):
     expected_features = 4
     expected_missing = 0
 
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     assert_warns_message(
         UserWarning,
         "Multiple active versions of the dataset matching the name"
@@ -285,8 +279,9 @@ def test_fetch_openml_iris(monkeypatch, gzip_response):
     )
 
 
-def test_decode_iris():
+def test_decode_iris(monkeypatch):
     data_id = 61
+    _monkey_patch_webbased_functions(monkeypatch, data_id, False)
     _test_features_list(data_id)
 
 
@@ -301,8 +296,7 @@ def test_fetch_openml_iris_multitarget(monkeypatch, gzip_response):
     expected_features = 3
     expected_missing = 0
 
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     _fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
                                expected_observations, expected_features,
                                expected_missing,
@@ -321,8 +315,7 @@ def test_fetch_openml_anneal(monkeypatch, gzip_response):
     expected_observations = 11
     expected_features = 38
     expected_missing = 267
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     _fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
                                expected_observations, expected_features,
                                expected_missing,
@@ -330,8 +323,9 @@ def test_fetch_openml_anneal(monkeypatch, gzip_response):
                                compare_default_target=True)
 
 
-def test_decode_anneal():
+def test_decode_anneal(monkeypatch):
     data_id = 2
+    _monkey_patch_webbased_functions(monkeypatch, data_id, False)
     _test_features_list(data_id)
 
 
@@ -346,8 +340,7 @@ def test_fetch_openml_anneal_multitarget(monkeypatch, gzip_response):
     expected_observations = 11
     expected_features = 36
     expected_missing = 267
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     _fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
                                expected_observations, expected_features,
                                expected_missing,
@@ -365,8 +358,7 @@ def test_fetch_openml_cpu(monkeypatch, gzip_response):
     expected_observations = 209
     expected_features = 7
     expected_missing = 0
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     _fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
                                expected_observations, expected_features,
                                expected_missing,
@@ -374,8 +366,9 @@ def test_fetch_openml_cpu(monkeypatch, gzip_response):
                                compare_default_target=True)
 
 
-def test_decode_cpu():
+def test_decode_cpu(monkeypatch):
     data_id = 561
+    _monkey_patch_webbased_functions(monkeypatch, data_id, False)
     _test_features_list(data_id)
 
 
@@ -393,8 +386,7 @@ def test_fetch_openml_australian(monkeypatch, gzip_response):
     expected_observations = 85
     expected_features = 14
     expected_missing = 0
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     assert_warns_message(
         UserWarning,
         "Version 1 of dataset Australian is inactive,",
@@ -426,8 +418,7 @@ def test_fetch_openml_miceprotein(monkeypatch, gzip_response):
     expected_observations = 7
     expected_features = 77
     expected_missing = 7
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     _fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
                                expected_observations, expected_features,
                                expected_missing,
@@ -446,8 +437,7 @@ def test_fetch_openml_emotions(monkeypatch, gzip_response):
     expected_observations = 13
     expected_features = 72
     expected_missing = 0
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
 
     _fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
                                expected_observations, expected_features,
@@ -456,8 +446,9 @@ def test_fetch_openml_emotions(monkeypatch, gzip_response):
                                compare_default_target=True)
 
 
-def test_decode_emotions():
+def test_decode_emotions(monkeypatch):
     data_id = 40589
+    _monkey_patch_webbased_functions(monkeypatch, data_id, False)
     _test_features_list(data_id)
 
 
@@ -466,7 +457,7 @@ def test_open_openml_url_cache(monkeypatch, gzip_response):
     data_id = 61
 
     _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+        monkeypatch, data_id, gzip_response)
     openml_path = sklearn.datasets.openml._DATA_FILE.format(data_id)
     test_directory = os.path.join(os.path.expanduser('~'), 'scikit_learn_data')
     # first fill the cache
@@ -486,8 +477,7 @@ def test_fetch_openml_notarget(monkeypatch, gzip_response):
     expected_observations = 150
     expected_features = 5
 
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     data = fetch_openml(data_id=data_id, target_column=target_column,
                         cache=False)
     assert data.data.shape == (expected_observations, expected_features)
@@ -498,8 +488,7 @@ def test_fetch_openml_notarget(monkeypatch, gzip_response):
 def test_fetch_openml_inactive(monkeypatch, gzip_response):
     # fetch inactive dataset by id
     data_id = 40675
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     glas2 = assert_warns_message(
         UserWarning, "Version 1 of dataset glass2 is inactive,", fetch_openml,
         data_id=data_id, cache=False)
@@ -515,8 +504,7 @@ def test_fetch_openml_inactive(monkeypatch, gzip_response):
 def test_fetch_nonexiting(monkeypatch, gzip_response):
     # there is no active version of glass2
     data_id = 40675
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     # Note that we only want to search by name (not data id)
     assert_raise_message(ValueError, "No active dataset glass2 found",
                          fetch_openml, name='glass2', cache=False)
@@ -526,8 +514,7 @@ def test_fetch_nonexiting(monkeypatch, gzip_response):
 def test_raises_illegal_multitarget(monkeypatch, gzip_response):
     data_id = 61
     targets = ['sepalwidth', 'class']
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     # Note that we only want to search by name (not data id)
     assert_raise_message(ValueError,
                          "Can only handle homogeneous multi-target datasets,",
@@ -540,8 +527,7 @@ def test_warn_ignore_attribute(monkeypatch, gzip_response):
     data_id = 40966
     expected_row_id_msg = "target_column={} has flag is_row_identifier."
     expected_ignore_msg = "target_column={} has flag is_ignore."
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     # single column test
     assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'),
                          fetch_openml, data_id=data_id,
@@ -565,8 +551,7 @@ def test_warn_ignore_attribute(monkeypatch, gzip_response):
 @pytest.mark.parametrize('gzip_response', [True, False])
 def test_string_attribute(monkeypatch, gzip_response):
     data_id = 40945
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     # single column test
     assert_raise_message(ValueError,
                          'STRING attributes are not yet supported',
@@ -576,8 +561,7 @@ def test_string_attribute(monkeypatch, gzip_response):
 @pytest.mark.parametrize('gzip_response', [True, False])
 def test_illegal_column(monkeypatch, gzip_response):
     data_id = 61
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     assert_raise_message(KeyError, "Could not find target_column=",
                          fetch_openml, data_id=data_id,
                          target_column='undefined', cache=False)
@@ -591,8 +575,7 @@ def test_illegal_column(monkeypatch, gzip_response):
 @pytest.mark.parametrize('gzip_response', [True, False])
 def test_fetch_openml_raises_missing_values_target(monkeypatch, gzip_response):
     data_id = 2
-    _monkey_patch_webbased_functions(
-        monkeypatch, data_id, test_gzip, gzip_response)
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     assert_raise_message(ValueError, "Target column ",
                          fetch_openml, data_id=data_id, target_column='family')
 

From 3a80162b422fab6d569d3462cc1f0c047cd53e04 Mon Sep 17 00:00:00 2001
From: Vivek Kumar <vivekk0903@gmail.com>
Date: Thu, 6 Sep 2018 14:08:48 +0530
Subject: [PATCH 023/163] DOC gradient boosting fit() supports sparse X
 (#12022)

---
 sklearn/ensemble/gradient_boosting.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index ec2800ac669d5..c6e0fbee3fe51 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1358,9 +1358,10 @@ def fit(self, X, y, sample_weight=None, monitor=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
-            Training vectors, where n_samples is the number of samples
-            and n_features is the number of features.
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
 
         y : array-like, shape (n_samples,)
             Target values (strings or integers in classification, real numbers

From b4bf033104436ca0789a2e1607d09e7d98ff0b3d Mon Sep 17 00:00:00 2001
From: William de Vazelhes <31916524+wdevazelhes@users.noreply.github.com>
Date: Thu, 6 Sep 2018 10:46:56 +0200
Subject: [PATCH 024/163] DOC: Add pytest version in documentation (#12002)

---
 README.rst                               | 2 +-
 conftest.py                              | 5 +++++
 doc/developers/advanced_installation.rst | 9 +++++----
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/README.rst b/README.rst
index eb1957686acaf..fa2ef793b9e26 100644
--- a/README.rst
+++ b/README.rst
@@ -120,7 +120,7 @@ Testing
 ~~~~~~~
 
 After installation, you can launch the test suite from outside the
-source directory (you will need to have the ``pytest`` package installed)::
+source directory (you will need to have ``pytest`` >= 3.3.0 installed)::
 
     pytest sklearn
 
diff --git a/conftest.py b/conftest.py
index 621097bfc47ab..bad99b5c99272 100644
--- a/conftest.py
+++ b/conftest.py
@@ -11,6 +11,11 @@
 import pytest
 from _pytest.doctest import DoctestItem
 
+PYTEST_MIN_VERSION = '3.3.0'
+
+if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION:
+    raise('Your version of pytest is too old, you should have at least '
+          'pytest >= {} installed.'.format(PYTEST_MIN_VERSION))
 
 def pytest_collection_modifyitems(config, items):
 
diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index 720c11ed98f4c..e146363d0ac4e 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -50,7 +50,9 @@ Building Scikit-learn also requires
 
 Running tests requires
 
-- pytest
+.. |PytestMinVersion| replace:: 3.3.0
+
+- pytest >=\ |PytestMinVersion|
 
 Some tests also require `pandas <https://pandas.pydata.org>`_.
 
@@ -276,9 +278,8 @@ Testing
 Testing scikit-learn once installed
 -----------------------------------
 
-Testing requires having the `pytest
-<https://docs.pytest.org>`_ library. Some tests also require having
-`pandas <https://pandas.pydata.org/>` installed.
+Testing requires having `pytest <https://docs.pytest.org>`_ >=\ |PytestMinVersion|\ .
+Some tests also require having `pandas <https://pandas.pydata.org/>` installed.
 After installation, the package can be tested by executing *from outside* the
 source directory::
 

From a056a573252adebeba98fe84ede93ef3b1e3f5a1 Mon Sep 17 00:00:00 2001
From: Umar Farouk Umar <papaonlegs@gmail.com>
Date: Thu, 6 Sep 2018 12:10:17 +0100
Subject: [PATCH 025/163] DOC fix for linnerud dataset (#12024)

The descriptions were the wrong way around
---
 sklearn/datasets/descr/linnerud.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/datasets/descr/linnerud.rst b/sklearn/datasets/descr/linnerud.rst
index 848ee193e1adc..5585b50a7e42b 100644
--- a/sklearn/datasets/descr/linnerud.rst
+++ b/sklearn/datasets/descr/linnerud.rst
@@ -11,12 +11,12 @@ Linnerrud dataset
 
 The Linnerud dataset constains two small dataset:
 
-- *exercise*: A list containing the following components: exercise data with
-  20 observations on 3 exercise variables: Weight, Waist and Pulse.
+- *physiological* - CSV containing 20 observations on 3 exercise variables:
+   Weight, Waist and Pulse.
 
-- *physiological*: Data frame with 20 observations on 3 physiological variables:
+- *exercise* - CSV containing 20 observations on 3 physiological variables:
    Chins, Situps and Jumps.
 
 .. topic:: References
 
-  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic.
\ No newline at end of file
+  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic.

From d3d09c383cf25f987e54c063c546b3bfeac971cb Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 6 Sep 2018 14:44:10 +0200
Subject: [PATCH 026/163] MAINT skip joblib vendor test on debian (#12027)

---
 sklearn/tests/test_site_joblib.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/tests/test_site_joblib.py b/sklearn/tests/test_site_joblib.py
index 7ceb80a281661..bffd43cc1416f 100644
--- a/sklearn/tests/test_site_joblib.py
+++ b/sklearn/tests/test_site_joblib.py
@@ -1,4 +1,6 @@
 import os
+import pytest
+from sklearn import externals
 from sklearn.externals import joblib as joblib_vendored
 from sklearn.utils import Parallel, delayed, Memory, parallel_backend
 
@@ -9,6 +11,11 @@
 
 
 def test_old_pickle(tmpdir):
+    vendored_joblib_home = os.path.dirname(joblib_vendored.__file__)
+    sklearn_externals_home = os.path.dirname(externals.__file__)
+    if not vendored_joblib_home.startswith(sklearn_externals_home):
+        pytest.skip("joblib is physically unvendored (e.g. as in debian)")
+
     # Check that a pickle that references sklearn.external.joblib can load
     f = tmpdir.join('foo.pkl')
     f.write(b'\x80\x02csklearn.externals.joblib.numpy_pickle\nNumpyArrayWrappe'

From 121dd5ab3bb03203480941ccef2df72cf9cf791d Mon Sep 17 00:00:00 2001
From: jeremiedbb <34657725+jeremiedbb@users.noreply.github.com>
Date: Fri, 7 Sep 2018 15:58:21 +0200
Subject: [PATCH 027/163] MNT Fix utils.sparse import in neural_network.rbm
 (#12032)

---
 sklearn/neural_network/rbm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py
index ccf933ed19b56..c35e8840d23f7 100644
--- a/sklearn/neural_network/rbm.py
+++ b/sklearn/neural_network/rbm.py
@@ -19,7 +19,6 @@
 from ..utils import check_array
 from ..utils import check_random_state
 from ..utils import gen_even_slices
-from ..utils import issparse
 from ..utils.extmath import safe_sparse_dot
 from ..utils.extmath import log_logistic
 from ..utils.validation import check_is_fitted
@@ -310,7 +309,7 @@ def score_samples(self, X):
         # Randomly corrupt one feature in each sample in v.
         ind = (np.arange(v.shape[0]),
                rng.randint(0, v.shape[1], v.shape[0]))
-        if issparse(v):
+        if sp.issparse(v):
             data = -2 * v[ind] + 1
             v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
         else:

From 79f5d147ea2c79efc78cbb8d380b64e15d7bd3ad Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Sun, 9 Sep 2018 00:43:21 +1000
Subject: [PATCH 028/163] MNT Revert the deprecation of min_samples_leaf and
 min_weight_fraction_leaf (#11998)

---
 doc/modules/ensemble.rst                      |   5 +-
 doc/modules/tree.rst                          |  27 ++-
 doc/whats_new/v0.20.rst                       |  13 --
 .../ensemble/plot_adaboost_hastie_10_2.py     |   4 +-
 .../ensemble/plot_gradient_boosting_oob.py    |   2 +-
 .../plot_gradient_boosting_quantile.py        |   3 +-
 sklearn/ensemble/forest.py                    | 167 ++++++++----------
 sklearn/ensemble/gradient_boosting.py         |  72 +++-----
 sklearn/ensemble/tests/test_forest.py         |  20 +--
 .../ensemble/tests/test_gradient_boosting.py  |  34 ++--
 sklearn/tree/tests/test_tree.py               |  69 +++-----
 sklearn/tree/tree.py                          | 162 +++++++----------
 12 files changed, 229 insertions(+), 349 deletions(-)

diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index a41c8201a3fa1..5399f13dbc9f4 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -218,7 +218,7 @@ setting ``oob_score=True``.
     The size of the model with the default parameters is :math:`O( M * N * log (N) )`,
     where :math:`M` is the number of trees and :math:`N` is the number of samples.
     In order to reduce the size of the model, you can change these parameters:
-    ``min_samples_split``, ``max_leaf_nodes`` and ``max_depth``.
+    ``min_samples_split``, ``max_leaf_nodes``, ``max_depth`` and ``min_samples_leaf``.
 
 Parallelization
 ---------------
@@ -393,7 +393,8 @@ The number of weak learners is controlled by the parameter ``n_estimators``. The
 the final combination. By default, weak learners are decision stumps. Different
 weak learners can be specified through the ``base_estimator`` parameter.
 The main parameters to tune to obtain good results are ``n_estimators`` and
-the complexity of the base estimators (e.g., its depth ``max_depth``).
+the complexity of the base estimators (e.g., its depth ``max_depth`` or
+minimum required number of samples to consider a split ``min_samples_split``).
 
 .. topic:: Examples:
 
diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 5d448f86a3f11..86f8b2f6fabdf 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -330,18 +330,31 @@ Tips on practical use
     for each additional level the tree grows to.  Use ``max_depth`` to control
     the size of the tree to prevent overfitting.
 
-  * Use ``min_samples_split`` to control the number of samples at a leaf node.
-    A very small number will usually mean the tree will overfit, whereas a
-    large number will prevent the tree from learning the data. If the sample
-    size varies greatly, a float number can be used as percentage in this
-    parameter. Note that ``min_samples_split`` can create arbitrarily
-    small leaves.
+  * Use ``min_samples_split`` or ``min_samples_leaf`` to ensure that multiple
+    samples inform every decision in the tree, by controlling which splits will
+    be considered. A very small number will usually mean the tree will overfit,
+    whereas a large number will prevent the tree from learning the data. Try
+    ``min_samples_leaf=5`` as an initial value. If the sample size varies
+    greatly, a float number can be used as percentage in these two parameters.
+    While ``min_samples_split`` can create arbitrarily small leaves,
+    ``min_samples_leaf`` guarantees that each leaf has a minimum size, avoiding
+    low-variance, over-fit leaf nodes in regression problems.  For
+    classification with few classes, ``min_samples_leaf=1`` is often the best
+    choice.
 
   * Balance your dataset before training to prevent the tree from being biased
     toward the classes that are dominant. Class balancing can be done by
     sampling an equal number of samples from each class, or preferably by
     normalizing the sum of the sample weights (``sample_weight``) for each
-    class to the same value.
+    class to the same value. Also note that weight-based pre-pruning criteria,
+    such as ``min_weight_fraction_leaf``, will then be less biased toward
+    dominant classes than criteria that are not aware of the sample weights,
+    like ``min_samples_leaf``.
+
+  * If the samples are weighted, it will be easier to optimize the tree
+    structure using weight-based pre-pruning criterion such as
+    ``min_weight_fraction_leaf``, which ensure that leaf nodes contain at least
+    a fraction of the overall sum of the sample weights.
 
   * All decision trees use ``np.float32`` arrays internally.
     If training data is not in this format, a copy of the dataset will be made.
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 46b262896145c..0fe95de46eb42 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -343,12 +343,6 @@ Support for Python 3.3 has been officially dropped.
   while mask does not allow this functionality.
   :issue:`9524` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |API| The parameters ``min_samples_leaf`` and ``min_weight_fraction_leaf`` in
-  tree-based ensembles are deprecated and will be removed (fixed to 1 and 0
-  respectively) in version 0.22.  These parameters were not effective for
-  regularization and at worst would produce bad splits.  :issue:`10773` by
-  :user:`Bob Chen <lasagnaman>` and `Joel Nothman`_.
-
 - |Fix| :class:`ensemble.BaseBagging` where one could not deterministically
   reproduce ``fit`` result using the object attributes when ``random_state``
   is set. :issue:`9723` by :user:`Guillaume Lemaitre <glemaitre>`.
@@ -1035,13 +1029,6 @@ Support for Python 3.3 has been officially dropped.
   considered all samples to be of equal weight importance.
   :issue:`11464` by :user:`John Stott <JohnStott>`.
 
-- |API| The parameters ``min_samples_leaf`` and ``min_weight_fraction_leaf`` in
-  :class:`tree.DecisionTreeClassifier` and :class:`tree.DecisionTreeRegressor`
-  are deprecated and will be removed (fixed to 1 and 0 respectively) in version
-  0.22.  These parameters were not effective for regularization and at worst
-  would produce bad splits.  :issue:`10773` by :user:`Bob Chen <lasagnaman>`
-  and `Joel Nothman`_.
-
 
 :mod:`sklearn.utils`
 ....................
diff --git a/examples/ensemble/plot_adaboost_hastie_10_2.py b/examples/ensemble/plot_adaboost_hastie_10_2.py
index 7fc00a77e3eab..4d48d13dd24f2 100644
--- a/examples/ensemble/plot_adaboost_hastie_10_2.py
+++ b/examples/ensemble/plot_adaboost_hastie_10_2.py
@@ -43,11 +43,11 @@
 X_test, y_test = X[2000:], y[2000:]
 X_train, y_train = X[:2000], y[:2000]
 
-dt_stump = DecisionTreeClassifier(max_depth=1)
+dt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
 dt_stump.fit(X_train, y_train)
 dt_stump_err = 1.0 - dt_stump.score(X_test, y_test)
 
-dt = DecisionTreeClassifier(max_depth=9)
+dt = DecisionTreeClassifier(max_depth=9, min_samples_leaf=1)
 dt.fit(X_train, y_train)
 dt_err = 1.0 - dt.score(X_test, y_test)
 
diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py
index 99f30e750b7ed..ea38b326ce5c9 100644
--- a/examples/ensemble/plot_gradient_boosting_oob.py
+++ b/examples/ensemble/plot_gradient_boosting_oob.py
@@ -55,7 +55,7 @@
 
 # Fit classifier with out-of-bag estimates
 params = {'n_estimators': 1200, 'max_depth': 3, 'subsample': 0.5,
-          'learning_rate': 0.01, 'random_state': 3}
+          'learning_rate': 0.01, 'min_samples_leaf': 1, 'random_state': 3}
 clf = ensemble.GradientBoostingClassifier(**params)
 
 clf.fit(X_train, y_train)
diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py
index 99e7289710e35..6fb2731a513ec 100644
--- a/examples/ensemble/plot_gradient_boosting_quantile.py
+++ b/examples/ensemble/plot_gradient_boosting_quantile.py
@@ -41,7 +41,8 @@ def f(x):
 
 clf = GradientBoostingRegressor(loss='quantile', alpha=alpha,
                                 n_estimators=250, max_depth=3,
-                                learning_rate=.1, min_samples_split=9)
+                                learning_rate=.1, min_samples_leaf=9,
+                                min_samples_split=9)
 
 clf.fit(X, y)
 
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 125f48d5b0da6..542f7ca8043f1 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -784,8 +784,8 @@ class RandomForestClassifier(ForestClassifier):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -793,30 +793,25 @@ class RandomForestClassifier(ForestClassifier):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
 
@@ -963,10 +958,9 @@ class labels (multi-output problem).
     RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                 max_depth=2, max_features='auto', max_leaf_nodes=None,
                 min_impurity_decrease=0.0, min_impurity_split=None,
-                min_samples_leaf='deprecated', min_samples_split=2,
-                min_weight_fraction_leaf='deprecated', n_estimators=100,
-                n_jobs=None, oob_score=False, random_state=0, verbose=0,
-                warm_start=False)
+                min_samples_leaf=1, min_samples_split=2,
+                min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
+                oob_score=False, random_state=0, verbose=0, warm_start=False)
     >>> print(clf.feature_importances_)
     [0.14205973 0.76664038 0.0282433  0.06305659]
     >>> print(clf.predict([[0, 0, 0, 0]]))
@@ -975,7 +969,7 @@ class labels (multi-output problem).
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -1001,8 +995,8 @@ def __init__(self,
                  criterion="gini",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
                  min_impurity_decrease=0.,
@@ -1079,8 +1073,8 @@ class RandomForestRegressor(ForestRegressor):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -1088,30 +1082,25 @@ class RandomForestRegressor(ForestRegressor):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
 
@@ -1220,10 +1209,9 @@ class RandomForestRegressor(ForestRegressor):
     RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
                max_features='auto', max_leaf_nodes=None,
                min_impurity_decrease=0.0, min_impurity_split=None,
-               min_samples_leaf='deprecated', min_samples_split=2,
-               min_weight_fraction_leaf='deprecated', n_estimators=100,
-               n_jobs=None, oob_score=False, random_state=0, verbose=0,
-               warm_start=False)
+               min_samples_leaf=1, min_samples_split=2,
+               min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
+               oob_score=False, random_state=0, verbose=0, warm_start=False)
     >>> print(regr.feature_importances_)
     [0.18146984 0.81473937 0.00145312 0.00233767]
     >>> print(regr.predict([[0, 0, 0, 0]]))
@@ -1232,7 +1220,7 @@ class RandomForestRegressor(ForestRegressor):
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -1265,8 +1253,8 @@ def __init__(self,
                  criterion="mse",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
                  min_impurity_decrease=0.,
@@ -1334,8 +1322,8 @@ class ExtraTreesClassifier(ForestClassifier):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -1343,30 +1331,25 @@ class ExtraTreesClassifier(ForestClassifier):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
 
@@ -1501,7 +1484,7 @@ class labels (multi-output problem).
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -1523,8 +1506,8 @@ def __init__(self,
                  criterion="gini",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
                  min_impurity_decrease=0.,
@@ -1599,8 +1582,8 @@ class ExtraTreesRegressor(ForestRegressor):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -1608,30 +1591,25 @@ class ExtraTreesRegressor(ForestRegressor):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
 
@@ -1729,7 +1707,7 @@ class ExtraTreesRegressor(ForestRegressor):
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -1750,8 +1728,8 @@ def __init__(self,
                  criterion="mse",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features="auto",
                  max_leaf_nodes=None,
                  min_impurity_decrease=0.,
@@ -1820,8 +1798,8 @@ class RandomTreesEmbedding(BaseForest):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` is the minimum
           number of samples for each split.
 
@@ -1829,30 +1807,25 @@ class RandomTreesEmbedding(BaseForest):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` is the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_leaf_nodes : int or None, optional (default=None)
         Grow trees with ``max_leaf_nodes`` in best-first fashion.
         Best nodes are defined as relative reduction in impurity.
@@ -1928,8 +1901,8 @@ def __init__(self,
                  n_estimators='warn',
                  max_depth=5,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_leaf_nodes=None,
                  min_impurity_decrease=0.,
                  min_impurity_split=None,
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index c6e0fbee3fe51..6ae4f6fd1b277 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -25,7 +25,6 @@
 
 from abc import ABCMeta
 from abc import abstractmethod
-import warnings
 
 from .base import BaseEnsemble
 from ..base import ClassifierMixin
@@ -1125,13 +1124,13 @@ class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble)):
 
     @abstractmethod
     def __init__(self, loss, learning_rate, n_estimators, criterion,
-                 min_samples_split, min_weight_fraction_leaf,
+                 min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
                  max_depth, min_impurity_decrease, min_impurity_split,
                  init, subsample, max_features,
                  random_state, alpha=0.9, verbose=0, max_leaf_nodes=None,
-                 min_samples_leaf='deprecated', warm_start=False,
-                 presort='auto', validation_fraction=0.1,
-                 n_iter_no_change=None, tol=1e-4):
+                 warm_start=False, presort='auto',
+                 validation_fraction=0.1, n_iter_no_change=None,
+                 tol=1e-4):
 
         self.n_estimators = n_estimators
         self.learning_rate = learning_rate
@@ -1491,17 +1490,9 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
         n_inbag = max(1, int(self.subsample * n_samples))
         loss_ = self.loss_
 
-        if self.min_weight_fraction_leaf != 'deprecated':
-            warnings.warn("'min_weight_fraction_leaf' is deprecated in 0.20 "
-                          "and will be fixed to a value of 0 in 0.22.",
-                          DeprecationWarning)
-            min_weight_fraction_leaf = self.min_weight_fraction_leaf
-        else:
-            min_weight_fraction_leaf = 0.
-
         # Set min_weight_leaf from min_weight_fraction_leaf
-        if min_weight_fraction_leaf != 0. and sample_weight is not None:
-            min_weight_leaf = (min_weight_fraction_leaf *
+        if self.min_weight_fraction_leaf != 0. and sample_weight is not None:
+            min_weight_leaf = (self.min_weight_fraction_leaf *
                                np.sum(sample_weight))
         else:
             min_weight_leaf = 0.
@@ -1739,8 +1730,8 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -1748,30 +1739,25 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_depth : integer, optional (default=3)
         maximum depth of the individual regression estimators. The maximum
         depth limits the number of nodes in the tree. Tune this parameter
@@ -1948,8 +1934,7 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin):
 
     def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1, min_weight_fraction_leaf=0.,
                  max_depth=3, min_impurity_decrease=0.,
                  min_impurity_split=None, init=None,
                  random_state=None, max_features=None, verbose=0,
@@ -2204,8 +2189,8 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -2213,19 +2198,19 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
@@ -2403,8 +2388,7 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
 
     def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
                  subsample=1.0, criterion='friedman_mse', min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1, min_weight_fraction_leaf=0.,
                  max_depth=3, min_impurity_decrease=0.,
                  min_impurity_split=None, init=None, random_state=None,
                  max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None,
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index a470913f5f327..d7586c2866571 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -762,16 +762,13 @@ def check_min_samples_leaf(name):
     ForestEstimator = FOREST_ESTIMATORS[name]
 
     # test boundary value
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        assert_raises(ValueError,
-                      ForestEstimator(min_samples_leaf=-1).fit, X, y)
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        assert_raises(ValueError,
-                      ForestEstimator(min_samples_leaf=0).fit, X, y)
+    assert_raises(ValueError,
+                  ForestEstimator(min_samples_leaf=-1).fit, X, y)
+    assert_raises(ValueError,
+                  ForestEstimator(min_samples_leaf=0).fit, X, y)
 
     est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        est.fit(X, y)
+    est.fit(X, y)
     out = est.estimators_[0].tree_.apply(X)
     node_counts = np.bincount(out)
     # drop inner nodes
@@ -781,8 +778,7 @@ def check_min_samples_leaf(name):
 
     est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1,
                           random_state=0)
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        est.fit(X, y)
+    est.fit(X, y)
     out = est.estimators_[0].tree_.apply(X)
     node_counts = np.bincount(out)
     # drop inner nodes
@@ -815,9 +811,7 @@ def check_min_weight_fraction_leaf(name):
         if "RandomForest" in name:
             est.bootstrap = False
 
-        with pytest.warns(DeprecationWarning,
-                          match='min_weight_fraction_leaf'):
-            est.fit(X, y, sample_weight=weights)
+        est.fit(X, y, sample_weight=weights)
         out = est.estimators_[0].tree_.apply(X)
         node_weights = np.bincount(out, weights=weights)
         # drop inner nodes
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 332ab89317e1c..6f7654c7d6061 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -106,29 +106,17 @@ def test_classifier_parameter_checks():
     assert_raises(ValueError,
                   GradientBoostingClassifier(min_samples_split=1.1).fit, X, y)
 
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        assert_raises(
-            ValueError,
-            GradientBoostingClassifier(min_samples_leaf=0).fit,
-            X, y
-        )
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        assert_raises(
-            ValueError,
-            GradientBoostingClassifier(min_samples_leaf=-1.0).fit,
-            X, y
-        )
-
-    with pytest.warns(DeprecationWarning, match='min_weight_fraction_leaf'):
-        assert_raises(ValueError,
-                      GradientBoostingClassifier(
-                          min_weight_fraction_leaf=-1.).fit,
-                      X, y)
-    with pytest.warns(DeprecationWarning, match='min_weight_fraction_leaf'):
-        assert_raises(ValueError,
-                      GradientBoostingClassifier(
-                          min_weight_fraction_leaf=0.6).fit,
-                      X, y)
+    assert_raises(ValueError,
+                  GradientBoostingClassifier(min_samples_leaf=0).fit, X, y)
+    assert_raises(ValueError,
+                  GradientBoostingClassifier(min_samples_leaf=-1.0).fit, X, y)
+
+    assert_raises(ValueError,
+                  GradientBoostingClassifier(min_weight_fraction_leaf=-1.).fit,
+                  X, y)
+    assert_raises(ValueError,
+                  GradientBoostingClassifier(min_weight_fraction_leaf=0.6).fit,
+                  X, y)
 
     assert_raises(ValueError,
                   GradientBoostingClassifier(subsample=0.0).fit, X, y)
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 68b5040374290..37eb6582c7023 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -507,28 +507,16 @@ def test_error():
         assert_raises(ValueError, est.predict_proba, X2)
 
     for name, TreeEstimator in ALL_TREES.items():
-        with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-            assert_raises(ValueError,
-                          TreeEstimator(min_samples_leaf=-1).fit, X, y)
-        with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-            assert_raises(ValueError,
-                          TreeEstimator(min_samples_leaf=.6).fit, X, y)
-        with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-            assert_raises(ValueError,
-                          TreeEstimator(min_samples_leaf=0.).fit, X, y)
-        with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-            assert_raises(ValueError,
-                          TreeEstimator(min_samples_leaf=3.).fit, X, y)
-        with pytest.warns(DeprecationWarning,
-                          match='min_weight_fraction_leaf'):
-            assert_raises(ValueError,
-                          TreeEstimator(min_weight_fraction_leaf=-1).fit,
-                          X, y)
-        with pytest.warns(DeprecationWarning,
-                          match='min_weight_fraction_leaf'):
-            assert_raises(ValueError,
-                          TreeEstimator(min_weight_fraction_leaf=0.51).fit,
-                          X, y)
+        assert_raises(ValueError, TreeEstimator(min_samples_leaf=-1).fit, X, y)
+        assert_raises(ValueError, TreeEstimator(min_samples_leaf=.6).fit, X, y)
+        assert_raises(ValueError, TreeEstimator(min_samples_leaf=0.).fit, X, y)
+        assert_raises(ValueError, TreeEstimator(min_samples_leaf=3.).fit, X, y)
+        assert_raises(ValueError,
+                      TreeEstimator(min_weight_fraction_leaf=-1).fit,
+                      X, y)
+        assert_raises(ValueError,
+                      TreeEstimator(min_weight_fraction_leaf=0.51).fit,
+                      X, y)
         assert_raises(ValueError, TreeEstimator(min_samples_split=-1).fit,
                       X, y)
         assert_raises(ValueError, TreeEstimator(min_samples_split=0.0).fit,
@@ -631,8 +619,7 @@ def test_min_samples_leaf():
         est = TreeEstimator(min_samples_leaf=5,
                             max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
-        with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-            est.fit(X, y)
+        est.fit(X, y)
         out = est.tree_.apply(X)
         node_counts = np.bincount(out)
         # drop inner nodes
@@ -644,8 +631,7 @@ def test_min_samples_leaf():
         est = TreeEstimator(min_samples_leaf=0.1,
                             max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
-        with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-            est.fit(X, y)
+        est.fit(X, y)
         out = est.tree_.apply(X)
         node_counts = np.bincount(out)
         # drop inner nodes
@@ -674,9 +660,7 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False):
         est = TreeEstimator(min_weight_fraction_leaf=frac,
                             max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
-        with pytest.warns(DeprecationWarning,
-                          match='min_weight_fraction_leaf'):
-            est.fit(X, y, sample_weight=weights)
+        est.fit(X, y, sample_weight=weights)
 
         if sparse:
             out = est.tree_.apply(X.tocsr())
@@ -701,9 +685,7 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False):
         est = TreeEstimator(min_weight_fraction_leaf=frac,
                             max_leaf_nodes=max_leaf_nodes,
                             random_state=0)
-        with pytest.warns(DeprecationWarning,
-                          match='min_weight_fraction_leaf'):
-            est.fit(X, y)
+        est.fit(X, y)
 
         if sparse:
             out = est.tree_.apply(X.tocsr())
@@ -749,8 +731,7 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
                             max_leaf_nodes=max_leaf_nodes,
                             min_samples_leaf=5,
                             random_state=0)
-        with pytest.warns(DeprecationWarning):
-            est.fit(X, y)
+        est.fit(X, y)
 
         if sparse:
             out = est.tree_.apply(X.tocsr())
@@ -775,8 +756,7 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
                             max_leaf_nodes=max_leaf_nodes,
                             min_samples_leaf=.1,
                             random_state=0)
-        with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-            est.fit(X, y)
+        est.fit(X, y)
 
         if sparse:
             out = est.tree_.apply(X.tocsr())
@@ -1432,16 +1412,10 @@ def check_sparse_parameters(tree, dataset):
     assert_array_almost_equal(s.predict(X), d.predict(X))
 
     # Check min_samples_leaf
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        d = TreeEstimator(
-                random_state=0,
-                min_samples_leaf=X_sparse.shape[0] // 2
-            ).fit(X, y)
-    with pytest.warns(DeprecationWarning, match='min_samples_leaf'):
-        s = TreeEstimator(
-                random_state=0,
-                min_samples_leaf=X_sparse.shape[0] // 2
-            ).fit(X_sparse, y)
+    d = TreeEstimator(random_state=0,
+                      min_samples_leaf=X_sparse.shape[0] // 2).fit(X, y)
+    s = TreeEstimator(random_state=0,
+                      min_samples_leaf=X_sparse.shape[0] // 2).fit(X_sparse, y)
     assert_tree_equal(d.tree_, s.tree_,
                       "{0} with dense and sparse format gave different "
                       "trees".format(tree))
@@ -1586,8 +1560,7 @@ def _check_min_weight_leaf_split_level(TreeEstimator, X, y, sample_weight):
     assert_equal(est.tree_.max_depth, 1)
 
     est = TreeEstimator(random_state=0, min_weight_fraction_leaf=0.4)
-    with pytest.warns(DeprecationWarning, match='min_weight_fraction_leaf'):
-        est.fit(X, y, sample_weight=sample_weight)
+    est.fit(X, y, sample_weight=sample_weight)
     assert_equal(est.tree_.max_depth, 0)
 
 
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 437dc197c7a04..9985cee2eef77 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -85,26 +85,26 @@ def __init__(self,
                  splitter,
                  max_depth,
                  min_samples_split,
+                 min_samples_leaf,
                  min_weight_fraction_leaf,
                  max_features,
                  max_leaf_nodes,
                  random_state,
                  min_impurity_decrease,
                  min_impurity_split,
-                 min_samples_leaf='deprecated',
                  class_weight=None,
                  presort=False):
         self.criterion = criterion
         self.splitter = splitter
         self.max_depth = max_depth
         self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_features = max_features
         self.random_state = random_state
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
         self.min_impurity_split = min_impurity_split
-        self.min_samples_leaf = min_samples_leaf
         self.class_weight = class_weight
         self.presort = presort
 
@@ -173,24 +173,18 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         max_leaf_nodes = (-1 if self.max_leaf_nodes is None
                           else self.max_leaf_nodes)
 
-        if self.min_samples_leaf != 'deprecated':
-            warnings.warn("'min_samples_leaf' is deprecated in 0.20 and "
-                          "will be fixed to a value of 1 in 0.22.",
-                          DeprecationWarning)
-            min_samples_leaf = self.min_samples_leaf
-        else:
-            min_samples_leaf = 1
-        if isinstance(min_samples_leaf, (numbers.Integral, np.integer)):
-            if not 1 <= min_samples_leaf:
+        if isinstance(self.min_samples_leaf, (numbers.Integral, np.integer)):
+            if not 1 <= self.min_samples_leaf:
                 raise ValueError("min_samples_leaf must be at least 1 "
                                  "or in (0, 0.5], got %s"
-                                 % min_samples_leaf)
+                                 % self.min_samples_leaf)
+            min_samples_leaf = self.min_samples_leaf
         else:  # float
-            if not 0. < min_samples_leaf <= 0.5:
+            if not 0. < self.min_samples_leaf <= 0.5:
                 raise ValueError("min_samples_leaf must be at least 1 "
                                  "or in (0, 0.5], got %s"
-                                 % min_samples_leaf)
-            min_samples_leaf = int(ceil(min_samples_leaf * n_samples))
+                                 % self.min_samples_leaf)
+            min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))
 
         if isinstance(self.min_samples_split, (numbers.Integral, np.integer)):
             if not 2 <= self.min_samples_split:
@@ -240,15 +234,7 @@ def fit(self, X, y, sample_weight=None, check_input=True,
         if len(y) != n_samples:
             raise ValueError("Number of labels=%d does not match "
                              "number of samples=%d" % (len(y), n_samples))
-
-        if self.min_weight_fraction_leaf != 'deprecated':
-            warnings.warn("'min_weight_fraction_leaf' is deprecated in 0.20 "
-                          "and will be fixed to a value of 0 in 0.22.",
-                          DeprecationWarning)
-            min_weight_fraction_leaf = self.min_weight_fraction_leaf
-        else:
-            min_weight_fraction_leaf = 0
-        if not 0 <= min_weight_fraction_leaf <= 0.5:
+        if not 0 <= self.min_weight_fraction_leaf <= 0.5:
             raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
         if max_depth <= 0:
             raise ValueError("max_depth must be greater than zero. ")
@@ -283,10 +269,10 @@ def fit(self, X, y, sample_weight=None, check_input=True,
 
         # Set min_weight_leaf from min_weight_fraction_leaf
         if sample_weight is None:
-            min_weight_leaf = (min_weight_fraction_leaf *
+            min_weight_leaf = (self.min_weight_fraction_leaf *
                                n_samples)
         else:
-            min_weight_leaf = (min_weight_fraction_leaf *
+            min_weight_leaf = (self.min_weight_fraction_leaf *
                                np.sum(sample_weight))
 
         if self.min_impurity_split is not None:
@@ -553,8 +539,8 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -562,30 +548,25 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default=None)
         The number of features to consider when looking for the best split:
 
@@ -703,7 +684,7 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -751,8 +732,8 @@ def __init__(self,
                  splitter="best",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
@@ -930,8 +911,8 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -939,30 +920,25 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default=None)
         The number of features to consider when looking for the best split:
 
@@ -1051,7 +1027,7 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -1099,8 +1075,8 @@ def __init__(self,
                  splitter="best",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features=None,
                  random_state=None,
                  max_leaf_nodes=None,
@@ -1197,8 +1173,8 @@ class ExtraTreeClassifier(DecisionTreeClassifier):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -1206,30 +1182,25 @@ class ExtraTreeClassifier(DecisionTreeClassifier):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
 
@@ -1313,7 +1284,7 @@ class ExtraTreeClassifier(DecisionTreeClassifier):
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -1329,8 +1300,8 @@ def __init__(self,
                  splitter="random",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features="auto",
                  random_state=None,
                  max_leaf_nodes=None,
@@ -1390,8 +1361,8 @@ class ExtraTreeRegressor(DecisionTreeRegressor):
     min_samples_split : int, float, optional (default=2)
         The minimum number of samples required to split an internal node:
 
-        - If int, then consider ``min_samples_split`` as the minimum number.
-        - If float, then ``min_samples_split`` is a fraction and
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
@@ -1399,30 +1370,25 @@ class ExtraTreeRegressor(DecisionTreeRegressor):
            Added float values for fractions.
 
     min_samples_leaf : int, float, optional (default=1)
-        The minimum number of samples required to be at a leaf node:
-
-        - If int, then consider ``min_samples_leaf`` as the minimum number.
-        - If float, then ``min_samples_leaf`` is a fraction and
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
         .. versionchanged:: 0.18
            Added float values for fractions.
-        .. deprecated:: 0.20
-           The parameter ``min_samples_leaf`` is deprecated in version 0.20 and
-           will be fixed to a value of 1 in version 0.22. It was not effective
-           for regularization and empirically, 1 is the best value.
 
     min_weight_fraction_leaf : float, optional (default=0.)
         The minimum weighted fraction of the sum total of weights (of all
         the input samples) required to be at a leaf node. Samples have
         equal weight when sample_weight is not provided.
 
-        .. deprecated:: 0.20
-           The parameter ``min_weight_fraction_leaf`` is deprecated in version
-           0.20. Its implementation, like ``min_samples_leaf``, is ineffective
-           for regularization.
-
     max_features : int, float, string or None, optional (default="auto")
         The number of features to consider when looking for the best split:
 
@@ -1486,7 +1452,7 @@ class ExtraTreeRegressor(DecisionTreeRegressor):
     Notes
     -----
     The default values for the parameters controlling the size of the trees
-    (e.g. ``max_depth``, ``min_samples_split``, etc.) lead to fully grown and
+    (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and
     unpruned trees which can potentially be very large on some data sets. To
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
@@ -1502,8 +1468,8 @@ def __init__(self,
                  splitter="random",
                  max_depth=None,
                  min_samples_split=2,
-                 min_samples_leaf='deprecated',
-                 min_weight_fraction_leaf='deprecated',
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.,
                  max_features="auto",
                  random_state=None,
                  min_impurity_decrease=0.,

From e5333f5dfe61a69bede562c20a055359adad7e51 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Sat, 8 Sep 2018 16:46:48 +0200
Subject: [PATCH 029/163] OPTICS remove redundant recursion (#11985)

---
 sklearn/cluster/optics_.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 5c20ddb421845..1d7a677b51fb7 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -680,10 +680,6 @@ def _cluster_tree(node, parent_node, local_maxima_points,
     if reachability_plot[s] < significant_min:
         node.split_point = -1
         # if split_point is not significant, ignore this split and continue
-        _cluster_tree(node, parent_node, local_maxima_points,
-                      reachability_plot, reachability_ordering,
-                      min_cluster_size, maxima_ratio, rejection_ratio,
-                      similarity_threshold, significant_min)
         return
 
     # only check a certain ratio of points in the child

From 2242f4c1d00d11de8fa01e67647f8a5188269fcb Mon Sep 17 00:00:00 2001
From: Max Copeland <maxcopeland88@gmail.com>
Date: Sat, 8 Sep 2018 07:52:01 -0700
Subject: [PATCH 030/163] EXA use openml fetcher in plot_gpr_co2.py  example
 (#12004)

---
 examples/gaussian_process/plot_gpr_co2.py | 34 ++++++++++-------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py
index 8170de01898dc..4c438ce821284 100644
--- a/examples/gaussian_process/plot_gpr_co2.py
+++ b/examples/gaussian_process/plot_gpr_co2.py
@@ -66,7 +66,7 @@
 import numpy as np
 
 from matplotlib import pyplot as plt
-
+from sklearn.datasets import fetch_openml
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels \
     import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
@@ -79,29 +79,25 @@
 print(__doc__)
 
 
-def load_mauna_loa_atmospheric_c02():
-    url = ('http://cdiac.ess-dive.lbl.gov/'
-           'ftp/trends/co2/sio-keel-flask/maunaloa_c.dat')
+def load_mauna_loa_atmospheric_co2():
+    ml_data = fetch_openml(data_id=41187)
     months = []
     ppmv_sums = []
     counts = []
-    for line in urlopen(url):
-        line = line.decode('utf8')
-        if not line.startswith('MLO'):
-            # ignore headers
-            continue
-        station, date, weight, flag, ppmv = line.split()
-        y = date[:2]
-        m = date[2:4]
-        month_float = (int(('20' if y < '20' else '19') + y) +
-                       (int(m) - 1) / 12)
-        if not months or month_float != months[-1]:
-            months.append(month_float)
-            ppmv_sums.append(float(ppmv))
+
+    y = ml_data.data[:, 0]
+    m = ml_data.data[:, 1]
+    month_float = y + (m - 1) / 12
+    ppmvs = ml_data.target
+
+    for month, ppmv in zip(month_float, ppmvs):
+        if not months or month != months[-1]:
+            months.append(month)
+            ppmv_sums.append(ppmv)
             counts.append(1)
         else:
             # aggregate monthly sum to produce average
-            ppmv_sums[-1] += float(ppmv)
+            ppmv_sums[-1] += ppmv
             counts[-1] += 1
 
     months = np.asarray(months).reshape(-1, 1)
@@ -109,7 +105,7 @@ def load_mauna_loa_atmospheric_c02():
     return months, avg_ppmvs
 
 
-X, y = load_mauna_loa_atmospheric_c02()
+X, y = load_mauna_loa_atmospheric_co2()
 
 # Kernel with parameters given in GPML book
 k1 = 66.0**2 * RBF(length_scale=67.0)  # long term smooth rising trend

From 177900bca7f53cb44b02dfed9d21efb6fda7c434 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sun, 9 Sep 2018 04:19:38 +0200
Subject: [PATCH 031/163] CI Workaround to test numpy 1.8.2 and scipy 0.13.3
 (#12042)

---
 build_tools/travis/install.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index d41e746a1ab2e..b15e76ea397ce 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -84,7 +84,11 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
     # and scipy
     virtualenv --system-site-packages testvenv
     source testvenv/bin/activate
-    pip install pytest pytest-cov cython==$CYTHON_VERSION
+    # FIXME: Importing scipy.sparse with numpy 1.8.2 and scipy 0.13.3 produces
+    # a deprecation warning and the test suite fails on such warnings.
+    # To test these numpy/scipy versions, we use pytest<3.8 as it has
+    # a known limitation/bug of not capturing warnings during test collection.
+    pip install pytest==3.7.4 pytest-cov cython==$CYTHON_VERSION
 
 elif [[ "$DISTRIB" == "scipy-dev" ]]; then
     make_conda python=3.7

From 251e58b9e2c098aa805b58dd128864ec66ec782e Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Sun, 9 Sep 2018 12:30:16 +1000
Subject: [PATCH 032/163] FIX ordering_ type and cosmetic changes to structure
 for OPTICS main loop (#11986)

---
 sklearn/cluster/optics_.py           | 79 ++++++++++++++--------------
 sklearn/cluster/tests/test_optics.py | 18 +++++++
 2 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 1d7a677b51fb7..165102d0a52bc 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -331,14 +331,12 @@ def fit(self, X, y=None):
 
         n_samples = len(X)
         # Start all points as 'unprocessed' ##
-        self._processed = np.zeros((n_samples, 1), dtype=bool)
         self.reachability_ = np.empty(n_samples)
         self.reachability_.fill(np.inf)
         self.core_distances_ = np.empty(n_samples)
         self.core_distances_.fill(np.nan)
         # Start all points as noise ##
         self.labels_ = np.full(n_samples, -1, dtype=int)
-        self.ordering_ = []
 
         # Check for valid n_samples relative to min_samples
         if self.min_samples > n_samples:
@@ -357,11 +355,7 @@ def fit(self, X, y=None):
         self.core_distances_[:] = nbrs.kneighbors(X,
                                                   self.min_samples)[0][:, -1]
 
-        # Main OPTICS loop. Not parallelizable. The order that entries are
-        # written to the 'ordering_' list is important!
-        for point in range(n_samples):
-            if not self._processed[point]:
-                self._expand_cluster_order(point, X, nbrs)
+        self.ordering_ = self._calculate_optics_order(X, nbrs)
 
         indices_, self.labels_ = _extract_optics(self.ordering_,
                                                  self.reachability_,
@@ -374,46 +368,53 @@ def fit(self, X, y=None):
         self.core_sample_indices_ = indices_
         return self
 
-    # OPTICS helper functions; these should not be public #
-
-    def _expand_cluster_order(self, point, X, nbrs):
-        # As above, not parallelizable. Parallelizing would allow items in
-        # the 'unprocessed' list to switch to 'processed'
-        if self.core_distances_[point] <= self.max_eps:
-            while not self._processed[point]:
-                self._processed[point] = True
-                self.ordering_.append(point)
-                point = self._set_reach_dist(point, X, nbrs)
-        else:  # For very noisy points
-            self.ordering_.append(point)
-            self._processed[point] = True
-
-    def _set_reach_dist(self, point_index, X, nbrs):
-        P = np.array(X[point_index]).reshape(1, -1)
+    # OPTICS helper functions
+
+    def _calculate_optics_order(self, X, nbrs):
+        # Main OPTICS loop. Not parallelizable. The order that entries are
+        # written to the 'ordering_' list is important!
+        processed = np.zeros(X.shape[0], dtype=bool)
+        ordering = np.zeros(X.shape[0], dtype=int)
+        ordering_idx = 0
+        for point in range(X.shape[0]):
+            if processed[point]:
+                continue
+            if self.core_distances_[point] <= self.max_eps:
+                while not processed[point]:
+                    processed[point] = True
+                    ordering[ordering_idx] = point
+                    ordering_idx += 1
+                    point = self._set_reach_dist(point, processed, X, nbrs)
+            else:  # For very noisy points
+                ordering[ordering_idx] = point
+                ordering_idx += 1
+                processed[point] = True
+        return ordering
+
+    def _set_reach_dist(self, point_index, processed, X, nbrs):
+        P = X[point_index:point_index + 1]
         indices = nbrs.radius_neighbors(P, radius=self.max_eps,
                                         return_distance=False)[0]
 
         # Getting indices of neighbors that have not been processed
-        unproc = np.compress((~np.take(self._processed, indices)).ravel(),
+        unproc = np.compress((~np.take(processed, indices)).ravel(),
                              indices, axis=0)
         # Keep n_jobs = 1 in the following lines...please
-        if len(unproc) > 0:
-            dists = pairwise_distances(P, np.take(X, unproc, axis=0),
-                                       self.metric, n_jobs=None).ravel()
-
-            rdists = np.maximum(dists, self.core_distances_[point_index])
-            new_reach = np.minimum(np.take(self.reachability_, unproc), rdists)
-            self.reachability_[unproc] = new_reach
-
-        # Checks to see if everything is already processed;
-        # if so, return control to main loop
-        if unproc.size > 0:
-            # Define return order based on reachability distance
-            return(unproc[quick_scan(np.take(self.reachability_, unproc),
-                                     dists)])
-        else:
+        if not unproc.size:
+            # Everything is already processed. Return to main loop
             return point_index
 
+        dists = pairwise_distances(P, np.take(X, unproc, axis=0),
+                                   self.metric, n_jobs=1).ravel()
+
+        rdists = np.maximum(dists, self.core_distances_[point_index])
+        new_reach = np.minimum(np.take(self.reachability_, unproc), rdists)
+        self.reachability_[unproc] = new_reach
+
+        # Define return order based on reachability distance
+        return (unproc[quick_scan(np.take(self.reachability_, unproc),
+                                  dists)])
+
     def extract_dbscan(self, eps):
         """Performs DBSCAN extraction for an arbitrary epsilon.
 
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 5a89cb7a0c439..545ffbf0ba797 100755
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -33,6 +33,24 @@ def test_correct_number_of_clusters():
     n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
     assert_equal(n_clusters_1, n_clusters)
 
+    # check attribute types and sizes
+    assert clust.core_sample_indices_.ndim == 1
+    assert clust.core_sample_indices_.size > 0
+    assert clust.core_sample_indices_.dtype.kind == 'i'
+
+    assert clust.labels_.shape == (len(X),)
+    assert clust.labels_.dtype.kind == 'i'
+
+    assert clust.reachability_.shape == (len(X),)
+    assert clust.reachability_.dtype.kind == 'f'
+
+    assert clust.core_distances_.shape == (len(X),)
+    assert clust.core_distances_.dtype.kind == 'f'
+
+    assert clust.ordering_.shape == (len(X),)
+    assert clust.ordering_.dtype.kind == 'i'
+    assert set(clust.ordering_) == set(range(len(X)))
+
 
 def test_minimum_number_of_sample_check():
     # test that we check a minimum number of samples

From a86709fdc379f7d7db76a75f39572890e4ddcad1 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Sun, 9 Sep 2018 12:36:00 +1000
Subject: [PATCH 033/163] [MRG] MNT rename min_cluster_size_ratio to
 min_cluster_size (#11913)

---
 sklearn/cluster/optics_.py           | 66 +++++++++++++++-----------
 sklearn/cluster/tests/test_optics.py | 69 +++++++++++++++-------------
 2 files changed, 77 insertions(+), 58 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 165102d0a52bc..899da518ae796 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -24,7 +24,7 @@
 def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
            p=2, metric_params=None, maxima_ratio=.75,
            rejection_ratio=.7, similarity_threshold=0.4,
-           significant_min=.003, min_cluster_size_ratio=.005,
+           significant_min=.003, min_cluster_size=.005,
            min_maxima_ratio=0.001, algorithm='ball_tree',
            leaf_size=30, n_jobs=None):
     """Perform OPTICS clustering from vector array
@@ -93,8 +93,10 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
     significant_min : float, optional (default=.003)
         Sets a lower threshold on how small a significant maxima can be.
 
-    min_cluster_size_ratio : float, optional (default=.005)
-        Minimum percentage of dataset expected for cluster membership.
+    min_cluster_size : int > 1 or float between 0 and 1 (default=0.005)
+        Minimum number of samples in an OPTICS cluster, expressed as an
+        absolute number or a fraction of the number of samples (rounded
+        to be at least 2).
 
     min_maxima_ratio : float, optional (default=.001)
         Used to determine neighborhood size for minimum cluster membership.
@@ -151,7 +153,7 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
     clust = OPTICS(min_samples, max_eps, metric, p, metric_params,
                    maxima_ratio, rejection_ratio,
                    similarity_threshold, significant_min,
-                   min_cluster_size_ratio, min_maxima_ratio,
+                   min_cluster_size, min_maxima_ratio,
                    algorithm, leaf_size, n_jobs)
     clust.fit(X)
     return clust.core_sample_indices_, clust.labels_
@@ -221,8 +223,10 @@ class OPTICS(BaseEstimator, ClusterMixin):
     significant_min : float, optional (default=.003)
         Sets a lower threshold on how small a significant maxima can be.
 
-    min_cluster_size_ratio : float, optional (default=.005)
-        Minimum percentage of dataset expected for cluster membership.
+    min_cluster_size : int > 1 or float between 0 and 1 (default=0.005)
+        Minimum number of samples in an OPTICS cluster, expressed as an
+        absolute number or a fraction of the number of samples (rounded
+        to be at least 2).
 
     min_maxima_ratio : float, optional (default=.001)
         Used to determine neighborhood size for minimum cluster membership.
@@ -289,7 +293,7 @@ class OPTICS(BaseEstimator, ClusterMixin):
     def __init__(self, min_samples=5, max_eps=np.inf, metric='euclidean',
                  p=2, metric_params=None, maxima_ratio=.75,
                  rejection_ratio=.7, similarity_threshold=0.4,
-                 significant_min=.003, min_cluster_size_ratio=.005,
+                 significant_min=.003, min_cluster_size=.005,
                  min_maxima_ratio=0.001, algorithm='ball_tree',
                  leaf_size=30, n_jobs=None):
 
@@ -299,7 +303,7 @@ def __init__(self, min_samples=5, max_eps=np.inf, metric='euclidean',
         self.rejection_ratio = rejection_ratio
         self.similarity_threshold = similarity_threshold
         self.significant_min = significant_min
-        self.min_cluster_size_ratio = min_cluster_size_ratio
+        self.min_cluster_size = min_cluster_size
         self.min_maxima_ratio = min_maxima_ratio
         self.algorithm = algorithm
         self.metric = metric
@@ -330,6 +334,24 @@ def fit(self, X, y=None):
         X = check_array(X, dtype=np.float)
 
         n_samples = len(X)
+
+        if self.min_samples > n_samples:
+            raise ValueError("Number of training samples (n_samples=%d) must "
+                             "be greater than min_samples (min_samples=%d) "
+                             "used for clustering." %
+                             (n_samples, self.min_samples))
+
+        if self.min_cluster_size <= 0 or (self.min_cluster_size !=
+                                          int(self.min_cluster_size)
+                                          and self.min_cluster_size > 1):
+            raise ValueError('min_cluster_size must be a positive integer or '
+                             'a float between 0 and 1. Got %r' %
+                             self.min_cluster_size)
+        elif self.min_cluster_size > n_samples:
+            raise ValueError('min_cluster_size must be no greater than the '
+                             'number of samples (%d). Got %d' %
+                             (n_samples, self.min_cluster_size))
+
         # Start all points as 'unprocessed' ##
         self.reachability_ = np.empty(n_samples)
         self.reachability_.fill(np.inf)
@@ -338,13 +360,6 @@ def fit(self, X, y=None):
         # Start all points as noise ##
         self.labels_ = np.full(n_samples, -1, dtype=int)
 
-        # Check for valid n_samples relative to min_samples
-        if self.min_samples > n_samples:
-            raise ValueError("Number of training samples (n_samples=%d) must "
-                             "be greater than min_samples (min_samples=%d) "
-                             "used for clustering." %
-                             (n_samples, self.min_samples))
-
         nbrs = NearestNeighbors(n_neighbors=self.min_samples,
                                 algorithm=self.algorithm,
                                 leaf_size=self.leaf_size, metric=self.metric,
@@ -363,7 +378,7 @@ def fit(self, X, y=None):
                                                  self.rejection_ratio,
                                                  self.similarity_threshold,
                                                  self.significant_min,
-                                                 self.min_cluster_size_ratio,
+                                                 self.min_cluster_size,
                                                  self.min_maxima_ratio)
         self.core_sample_indices_ = indices_
         return self
@@ -492,7 +507,7 @@ def _extract_dbscan(ordering, core_distances, reachability, eps):
 
 def _extract_optics(ordering, reachability, maxima_ratio=.75,
                     rejection_ratio=.7, similarity_threshold=0.4,
-                    significant_min=.003, min_cluster_size_ratio=.005,
+                    significant_min=.003, min_cluster_size=.005,
                     min_maxima_ratio=0.001):
     """Performs automatic cluster extraction for variable density data.
 
@@ -530,8 +545,10 @@ def _extract_optics(ordering, reachability, maxima_ratio=.75,
     significant_min : float, optional
         Sets a lower threshold on how small a significant maxima can be.
 
-    min_cluster_size_ratio : float, optional
-        Minimum percentage of dataset expected for cluster membership.
+    min_cluster_size : int > 1 or float between 0 and 1
+        Minimum number of samples in an OPTICS cluster, expressed as an
+        absolute number or a fraction of the number of samples (rounded
+        to be at least 2).
 
     min_maxima_ratio : float, optional
         Used to determine neighborhood size for minimum cluster membership.
@@ -551,7 +568,7 @@ def _extract_optics(ordering, reachability, maxima_ratio=.75,
     root_node = _automatic_cluster(reachability_plot, ordering,
                                    maxima_ratio, rejection_ratio,
                                    similarity_threshold, significant_min,
-                                   min_cluster_size_ratio, min_maxima_ratio)
+                                   min_cluster_size, min_maxima_ratio)
     leaves = _get_leaves(root_node, [])
     # Start cluster id's at 0
     clustid = 0
@@ -570,7 +587,7 @@ def _extract_optics(ordering, reachability, maxima_ratio=.75,
 def _automatic_cluster(reachability_plot, ordering,
                        maxima_ratio, rejection_ratio,
                        similarity_threshold, significant_min,
-                       min_cluster_size_ratio, min_maxima_ratio):
+                       min_cluster_size, min_maxima_ratio):
     """Converts reachability plot to cluster tree and returns root node.
 
     Parameters
@@ -582,13 +599,10 @@ def _automatic_cluster(reachability_plot, ordering,
     """
 
     min_neighborhood_size = 2
-    min_cluster_size = int(min_cluster_size_ratio * len(ordering))
+    if min_cluster_size <= 1:
+        min_cluster_size = max(2, min_cluster_size * len(ordering))
     neighborhood_size = int(min_maxima_ratio * len(ordering))
 
-    # Should this check for < min_samples? Should this be public?
-    if min_cluster_size < 5:
-        min_cluster_size = 5
-
     # Again, should this check < min_samples, should the parameter be public?
     if neighborhood_size < min_neighborhood_size:
         neighborhood_size = min_neighborhood_size
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 545ffbf0ba797..bddf57ec7b5d1 100755
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -2,6 +2,7 @@
 #          Amy X. Zhang <axz@mit.edu>
 # License: BSD 3 clause
 
+from __future__ import print_function, division
 import numpy as np
 import pytest
 
@@ -20,6 +21,17 @@
 from sklearn.cluster.tests.common import generate_clustered_data
 
 
+rng = np.random.RandomState(0)
+n_points_per_cluster = 250
+C1 = [-5, -2] + .8 * rng.randn(n_points_per_cluster, 2)
+C2 = [4, -1] + .1 * rng.randn(n_points_per_cluster, 2)
+C3 = [1, -2] + .2 * rng.randn(n_points_per_cluster, 2)
+C4 = [-2, 3] + .3 * rng.randn(n_points_per_cluster, 2)
+C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)
+C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)
+X = np.vstack((C1, C2, C3, C4, C5, C6))
+
+
 def test_correct_number_of_clusters():
     # in 'auto' mode
 
@@ -135,27 +147,36 @@ def test_dbscan_optics_parity(eps, min_samples):
 
 def test_auto_extract_hier():
     # Tests auto extraction gets correct # of clusters with varying density
+    clust = OPTICS(min_samples=9).fit(X)
+    assert_equal(len(set(clust.labels_)), 6)
 
-    # Generate sample data
-    rng = np.random.RandomState(0)
-    n_points_per_cluster = 250
 
-    C1 = [-5, -2] + .8 * rng.randn(n_points_per_cluster, 2)
-    C2 = [4, -1] + .1 * rng.randn(n_points_per_cluster, 2)
-    C3 = [1, -2] + .2 * rng.randn(n_points_per_cluster, 2)
-    C4 = [-2, 3] + .3 * rng.randn(n_points_per_cluster, 2)
-    C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)
-    C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)
-    X = np.vstack((C1, C2, C3, C4, C5, C6))
+# try arbitrary minimum sizes
+@pytest.mark.parametrize('min_cluster_size', range(2, X.shape[0] // 10, 23))
+def test_min_cluster_size(min_cluster_size):
+    redX = X[::10]  # reduce for speed
+    clust = OPTICS(min_samples=9, min_cluster_size=min_cluster_size).fit(redX)
+    cluster_sizes = np.bincount(clust.labels_[clust.labels_ != -1])
+    if cluster_sizes.size:
+        assert min(cluster_sizes) >= min_cluster_size
+    # check behaviour is the same when min_cluster_size is a fraction
+    clust_frac = OPTICS(min_samples=9,
+                        min_cluster_size=min_cluster_size / redX.shape[0])
+    clust_frac.fit(redX)
+    assert_array_equal(clust.labels_, clust_frac.labels_)
 
-    # Compute OPTICS
 
-    clust = OPTICS(min_samples=9)
+@pytest.mark.parametrize('min_cluster_size', [0, -1, 1.1, 2.2])
+def test_min_cluster_size_invalid(min_cluster_size):
+    clust = OPTICS(min_cluster_size=min_cluster_size)
+    with pytest.raises(ValueError, match="must be a positive integer or a "):
+        clust.fit(X)
 
-    # Run the fit
-    clust.fit(X)
 
-    assert_equal(len(set(clust.labels_)), 6)
+def test_min_cluster_size_invalid2():
+    clust = OPTICS(min_cluster_size=len(X) + 1)
+    with pytest.raises(ValueError, match="must be no greater than the "):
+        clust.fit(X)
 
 
 @pytest.mark.parametrize("reach, n_child, members", [
@@ -187,23 +208,7 @@ def test_cluster_sigmin_pruning(reach, n_child, members):
 def test_reach_dists():
     # Tests against known extraction array
 
-    rng = np.random.RandomState(0)
-    n_points_per_cluster = 250
-
-    C1 = [-5, -2] + .8 * rng.randn(n_points_per_cluster, 2)
-    C2 = [4, -1] + .1 * rng.randn(n_points_per_cluster, 2)
-    C3 = [1, -2] + .2 * rng.randn(n_points_per_cluster, 2)
-    C4 = [-2, 3] + .3 * rng.randn(n_points_per_cluster, 2)
-    C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)
-    C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)
-    X = np.vstack((C1, C2, C3, C4, C5, C6))
-
-    # Compute OPTICS
-
-    clust = OPTICS(min_samples=10, metric='minkowski')
-
-    # Run the fit
-    clust.fit(X)
+    clust = OPTICS(min_samples=10, metric='minkowski').fit(X)
 
     # Expected values, matches 'RD' results from:
     # http://chemometria.us.edu.pl/download/optics.py

From 5ec0001e93d3d0e071689e9b898850b7c35b0851 Mon Sep 17 00:00:00 2001
From: vqean3 <vqean3@users.noreply.github.com>
Date: Tue, 11 Sep 2018 16:27:55 -0700
Subject: [PATCH 034/163] DOC `sample_weight` removed from the docs in `SVR`
 class. (#12046)

---
 sklearn/svm/classes.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index a2d96c322b332..1028843a9bf19 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -841,9 +841,6 @@ class SVR(BaseLibSVM, RegressorMixin):
     intercept_ : array, shape = [1]
         Constants in decision function.
 
-    sample_weight : array-like, shape = [n_samples]
-            Individual weights for each sample
-
     Examples
     --------
     >>> from sklearn.svm import SVR

From f71de6fd264ba350e69737973e4eadebbe900469 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=BCdiger=20Busche?= <ruedigerbusche@web.de>
Date: Wed, 12 Sep 2018 06:48:11 +0200
Subject: [PATCH 035/163] MNT Unify and refactor strategy error (#12050)

---
 sklearn/dummy.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index f2c866413183b..ade45a1735879 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -105,9 +105,11 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
-        if self.strategy not in ("most_frequent", "stratified", "uniform",
-                                 "constant", "prior"):
-            raise ValueError("Unknown strategy type.")
+        allowed_strategies = ("most_frequent", "stratified", "uniform",
+                              "constant", "prior")
+        if self.strategy not in allowed_strategies:
+            raise ValueError("Unknown strategy type: %s, expected one of %s."
+                             % (self.strategy, allowed_strategies))
 
         if self.strategy == "uniform" and sp.issparse(y):
             y = y.toarray()
@@ -386,10 +388,10 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : object
         """
-        if self.strategy not in ("mean", "median", "quantile", "constant"):
-            raise ValueError("Unknown strategy type: %s, expected "
-                             "'mean', 'median', 'quantile' or 'constant'"
-                             % self.strategy)
+        allowed_strategies = ("mean", "median", "quantile", "constant")
+        if self.strategy not in allowed_strategies:
+            raise ValueError("Unknown strategy type: %s, expected one of %s."
+                             % (self.strategy, allowed_strategies))
 
         y = check_array(y, ensure_2d=False)
         if len(y) == 0:

From 2ed18e00f77c8cd5b99f52ab1623ecaa8794b399 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Wed, 12 Sep 2018 09:47:42 +0200
Subject: [PATCH 036/163] [MRG] DOC Examples added to the rest of linear models
 (#11975)

---
 sklearn/linear_model/base.py               | 17 ++++++++++++++
 sklearn/linear_model/coordinate_descent.py | 24 ++++++++++++++++++++
 sklearn/linear_model/huber.py              | 23 +++++++++++++++++++
 sklearn/linear_model/least_angle.py        | 26 ++++++++++++++++++++++
 sklearn/linear_model/omp.py                | 25 +++++++++++++++++++++
 sklearn/linear_model/ransac.py             | 12 ++++++++++
 sklearn/linear_model/theil_sen.py          | 12 ++++++++++
 7 files changed, 139 insertions(+)

diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 30a28cd507f67..29734a2135d8f 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -399,6 +399,23 @@ class LinearRegression(LinearModel, RegressorMixin):
     intercept_ : array
         Independent term in the linear model.
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.linear_model import LinearRegression
+    >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
+    >>> # y = 1 * x_0 + 2 * x_1 + 3
+    >>> y = np.dot(X, np.array([1, 2])) + 3
+    >>> reg = LinearRegression().fit(X, y)
+    >>> reg.score(X, y)
+    1.0
+    >>> reg.coef_
+    array([1., 2.])
+    >>> reg.intercept_ # doctest: +ELLIPSIS
+    3.0000...
+    >>> reg.predict(np.array([[3, 5]]))
+    array([16.])
+
     Notes
     -----
     From the implementation point of view, this is just plain Ordinary
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 6fa71f2dddcf4..2d0723944be4e 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -1368,6 +1368,17 @@ class LassoCV(LinearModelCV, RegressorMixin):
         number of iterations run by the coordinate descent solver to reach
         the specified tolerance for the optimal alpha.
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import LassoCV
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(noise=4, random_state=0)
+    >>> reg = LassoCV(cv=5, random_state=0).fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9993...
+    >>> reg.predict(X[:1,])
+    array([-78.4951...])
+
     Notes
     -----
     For an example, see
@@ -2235,6 +2246,19 @@ class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
         number of iterations run by the coordinate descent solver to reach
         the specified tolerance for the optimal alpha.
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import MultiTaskLassoCV
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(n_targets=2, noise=4, random_state=0)
+    >>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9994...
+    >>> reg.alpha_
+    0.5713...
+    >>> reg.predict(X[:1,])
+    array([[153.7971...,  94.9015...]])
+
     See also
     --------
     MultiTaskElasticNet
diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
index b6f4658ea573d..3270b5d221a51 100644
--- a/sklearn/linear_model/huber.py
+++ b/sklearn/linear_model/huber.py
@@ -192,6 +192,29 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
         A boolean mask which is set to True where the samples are identified
         as outliers.
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.linear_model import HuberRegressor, LinearRegression
+    >>> from sklearn.datasets import make_regression
+    >>> np.random.seed(0)
+    >>> X, y, coef = make_regression(
+    ...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
+    >>> X[:4] = np.random.uniform(10, 20, (4, 2))
+    >>> y[:4] = np.random.uniform(10, 20, 4)
+    >>> huber = HuberRegressor().fit(X, y)
+    >>> huber.score(X, y) # doctest: +ELLIPSIS
+    -7.284608623514573
+    >>> huber.predict(X[:1,])
+    array([806.7200...])
+    >>> linear = LinearRegression().fit(X, y)
+    >>> print("True coefficients:", coef)
+    True coefficients: [20.4923...  34.1698...]
+    >>> print("Huber coefficients:", huber.coef_)
+    Huber coefficients: [17.7906... 31.0106...]
+    >>> print("Linear Regression coefficients:", linear.coef_)
+    Linear Regression coefficients: [-1.9221...  7.0226...]
+
     References
     ----------
     .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index d139560260a87..ce13b99b6aae5 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -1070,6 +1070,19 @@ class LarsCV(Lars):
     n_iter_ : array-like or int
         the number of iterations run by Lars with the optimal alpha.
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import LarsCV
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
+    >>> reg = LarsCV(cv=5).fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9996...
+    >>> reg.alpha_
+    0.0254...
+    >>> reg.predict(X[:1,])
+    array([154.0842...])
+
     See also
     --------
     lars_path, LassoLars, LassoLarsCV
@@ -1290,6 +1303,19 @@ class LassoLarsCV(LarsCV):
     n_iter_ : array-like or int
         the number of iterations run by Lars with the optimal alpha.
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import LassoLarsCV
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(noise=4.0, random_state=0)
+    >>> reg = LassoLarsCV(cv=5).fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9992...
+    >>> reg.alpha_
+    0.0484...
+    >>> reg.predict(X[:1,])
+    array([-77.8723...])
+
     Notes
     -----
 
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
index a0f6d49490948..c304c0f341821 100644
--- a/sklearn/linear_model/omp.py
+++ b/sklearn/linear_model/omp.py
@@ -583,6 +583,17 @@ class OrthogonalMatchingPursuit(LinearModel, RegressorMixin):
     n_iter_ : int or array-like
         Number of active features across every target.
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import OrthogonalMatchingPursuit
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(noise=4, random_state=0)
+    >>> reg = OrthogonalMatchingPursuit().fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9991...
+    >>> reg.predict(X[:1,])
+    array([-78.3854...])
+
     Notes
     -----
     Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
@@ -814,6 +825,20 @@ class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin):
         Number of active features across every target for the model refit with
         the best hyperparameters got by cross-validating across all folds.
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import OrthogonalMatchingPursuitCV
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(n_features=100, n_informative=10,
+    ...                        noise=4, random_state=0)
+    >>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9991...
+    >>> reg.n_nonzero_coefs_
+    10
+    >>> reg.predict(X[:1,])
+    array([-78.3854...])
+
     See also
     --------
     orthogonal_mp
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
index 9dcd044d1f3ea..f929533e871a8 100644
--- a/sklearn/linear_model/ransac.py
+++ b/sklearn/linear_model/ransac.py
@@ -186,6 +186,18 @@ class RANSACRegressor(BaseEstimator, MetaEstimatorMixin, RegressorMixin):
 
         .. versionadded:: 0.19
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import RANSACRegressor
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(
+    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
+    >>> reg = RANSACRegressor(random_state=0).fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9885...
+    >>> reg.predict(X[:1,])
+    array([-31.9417...])
+
     References
     ----------
     .. [1] https://en.wikipedia.org/wiki/RANSAC
diff --git a/sklearn/linear_model/theil_sen.py b/sklearn/linear_model/theil_sen.py
index 0f3b19164b146..00ad26d41b031 100644
--- a/sklearn/linear_model/theil_sen.py
+++ b/sklearn/linear_model/theil_sen.py
@@ -276,6 +276,18 @@ class TheilSenRegressor(LinearModel, RegressorMixin):
         Number of combinations taken into account from 'n choose k', where n is
         the number of samples and k is the number of subsamples.
 
+    Examples
+    --------
+    >>> from sklearn.linear_model import TheilSenRegressor
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(
+    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
+    >>> reg = TheilSenRegressor(random_state=0).fit(X, y)
+    >>> reg.score(X, y) # doctest: +ELLIPSIS
+    0.9884...
+    >>> reg.predict(X[:1,])
+    array([-31.5871...])
+
     References
     ----------
     - Theil-Sen Estimators in a Multiple Linear Regression Model, 2009

From 1906c959a9350ba388bcd349969ad1555fa4e2f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Wed, 12 Sep 2018 17:47:52 +0200
Subject: [PATCH 037/163] DOC Generated author list from github (#11708)

---
 AUTHORS.rst                           |  75 ---------
 build_tools/Makefile                  |   4 +
 build_tools/generate_authors_table.py | 117 ++++++++++++++
 doc/about.rst                         |  26 ++-
 doc/authors.rst                       | 220 ++++++++++++++++++++++++++
 doc/developers/maintainer.rst         |  11 +-
 6 files changed, 376 insertions(+), 77 deletions(-)
 delete mode 100644 AUTHORS.rst
 create mode 100644 build_tools/Makefile
 create mode 100644 build_tools/generate_authors_table.py
 create mode 100644 doc/authors.rst

diff --git a/AUTHORS.rst b/AUTHORS.rst
deleted file mode 100644
index 48427fc0a2b3a..0000000000000
--- a/AUTHORS.rst
+++ /dev/null
@@ -1,75 +0,0 @@
-.. -*- mode: rst -*-
-
-
-This is a community effort, and as such many people have contributed
-to it over the years.
-
-History
--------
-
-This project was started in 2007 as a Google Summer of Code project by
-David Cournapeau. Later that year, Matthieu Brucher started work on
-this project as part of his thesis.
-
-In 2010 Fabian Pedregosa, Gael Varoquaux, Alexandre Gramfort and Vincent
-Michel of INRIA took leadership of the project and made the first public
-release, February the 1st 2010. Since then, several releases have appeared
-following a ~3 month cycle, and a thriving international community has
-been leading the development.
-
-People
-------
-
-The following people have been core contributors to scikit-learn's development and maintenance:
-
-.. hlist::
-
-  * `Mathieu Blondel <http://mblondel.org>`_
-  * `Matthieu Brucher <http://matt.eifelle.com/>`_
-  * Lars Buitinck
-  * David Cournapeau
-  * `Noel Dawe <http://noel.dawe.me>`_
-  * Vincent Dubourg
-  * Edouard Duchesnay
-  * `Tom Dupré la Tour <https://github.com/TomDLT>`_
-  * Alexander Fabisch
-  * `Virgile Fritsch <https://team.inria.fr/parietal/vfritsch/>`_
-  * `Satra Ghosh <http://www.mit.edu/~satra>`_
-  * `Angel Soler Gollonet <http://webylimonada.com>`_
-  * Chris Filo Gorgolewski
-  * `Alexandre Gramfort <http://alexandre.gramfort.net>`_
-  * `Olivier Grisel <https://twitter.com/ogrisel>`_
-  * `Jaques Grobler <https://github.com/jaquesgrobler>`_
-  * `Yaroslav Halchenko <http://www.onerussian.com/>`_
-  * `Brian Holt <http://personal.ee.surrey.ac.uk/Personal/B.Holt/>`_
-  * `Arnaud Joly <http://www.ajoly.org>`_
-  * Thouis (Ray) Jones
-  * `Kyle Kastner <http://kastnerkyle.github.io>`_
-  * `Manoj Kumar <https://manojbits.wordpress.com>`_
-  * Robert Layton
-  * `Guillaume Lemaitre <https://github.com/glemaitre>`_
-  * `Wei Li <http://kuantkid.github.io/>`_
-  * Paolo Losi
-  * `Gilles Louppe <http://glouppe.github.io/>`_
-  * `Jan Hendrik Metzen <https://github.com/jmetzen>`_
-  * Vincent Michel
-  * Jarrod Millman
-  * `Andreas Müller <http://peekaboo-vision.blogspot.com>`_ (release manager)
-  * `Vlad Niculae <http://vene.ro>`_
-  * `Joel Nothman <http://joelnothman.com>`_
-  * `Alexandre Passos <http://atpassos.posterous.com>`_
-  * `Fabian Pedregosa <http://fa.bianp.net/blog/>`_
-  * `Peter Prettenhofer <https://sites.google.com/site/peterprettenhofer/>`_
-  * `Hanmin Qin <https://github.com/qinhanmin2014>`_
-  * Bertrand Thirion
-  * `Joris Van den Bossche <https://github.com/jorisvandenbossche>`_
-  * `Jake VanderPlas <http://staff.washington.edu/jakevdp/>`_
-  * Nelle Varoquaux
-  * `Gael Varoquaux <http://gael-varoquaux.info/>`_
-  * Ron Weiss
-  * `Roman Yurchak <https://github.com/rth>`_
-
-Please do not email the authors directly to ask for assistance or report issues.
-Instead, please see `What's the best way to ask questions about scikit-learn
-<http://scikit-learn.org/stable/faq.html#what-s-the-best-way-to-get-help-on-scikit-learn-usage>`_
-in the FAQ.
diff --git a/build_tools/Makefile b/build_tools/Makefile
new file mode 100644
index 0000000000000..68162733b4b11
--- /dev/null
+++ b/build_tools/Makefile
@@ -0,0 +1,4 @@
+# Makefile for maintenance tools
+
+authors:
+	python generate_authors_table.py > ../doc/authors.rst
diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py
new file mode 100644
index 0000000000000..ea3796473396d
--- /dev/null
+++ b/build_tools/generate_authors_table.py
@@ -0,0 +1,117 @@
+"""
+This script generates an html table of contributors, with names and avatars.
+The list is generated from scikit-learn's teams on GitHub, plus a small number
+of hard-coded contributors.
+
+The table should be updated for each new inclusion in the teams.
+Generating the table requires admin rights.
+"""
+from __future__ import print_function
+
+import sys
+import requests
+import getpass
+
+try:
+    # With authentication: up to 5000 requests per hour.
+    print("user:", file=sys.stderr)
+    user = input()
+    passwd = getpass.getpass()
+    auth = (user, passwd)
+except IndexError:
+    # Without authentication: up to 60 requests per hour.
+    auth = None
+
+ROW_SIZE = 7
+LOGO_URL = 'https://avatars2.githubusercontent.com/u/365630?v=4'
+
+
+def group_iterable(iterable, size):
+    """Group iterable into lines"""
+    group = []
+    for element in iterable:
+        group.append(element)
+        if len(group) == size:
+            yield group
+            group = []
+    if len(group) != 0:
+        yield group
+
+
+def get_contributors():
+    """Get the list of contributor profiles. Require admin rights."""
+    # get members of scikit-learn teams on GitHub
+    members = []
+    for team in [11523, 33471]:
+        for page in [1, 2]:  # 30 per page
+            members.extend(requests.get(
+                "https://api.github.com/teams/%d/members?page=%d"
+                % (team, page), auth=auth).json())
+
+    # keep only the logins
+    logins = [c['login'] for c in members]
+    # add missing contributors with GitHub accounts
+    logins.extend(['dubourg', 'jarrodmillman', 'mbrucher', 'thouis'])
+    # add missing contributors without GitHub accounts
+    logins.extend(['Angel Soler Gollonet'])
+    # remove duplicate
+    logins = set(logins)
+    # remove CI
+    logins.remove('sklearn-ci')
+
+    # get profiles from GitHub
+    profiles = [get_profile(login) for login in logins]
+    # sort by last name
+    profiles = sorted(profiles, key=key)
+
+    return profiles
+
+
+def get_profile(login):
+    """Get the GitHub profile from login"""
+    profile = requests.get("https://api.github.com/users/%s" % login,
+                           auth=auth).json()
+    if 'name' not in profile:
+        # default profile if the login does not exist
+        return dict(name=login, avatar_url=LOGO_URL, html_url="")
+    else:
+        if profile["name"] is None:
+            profile["name"] = profile["login"]
+
+        # fix missing names
+        missing_names = {'bthirion': 'Bertrand Thirion',
+                         'dubourg': 'Vincent Dubourg',
+                         'Duchesnay': 'Edouard Duchesnay',
+                         'Lars': 'Lars Buitinck',
+                         'MechCoder': 'Manoj Kumar'}
+        if profile["name"] in missing_names:
+            profile["name"] = missing_names[profile["name"]]
+        return profile
+
+
+def key(profile):
+    """Get the last name in lower case"""
+    return profile["name"].split(' ')[-1].lower()
+
+
+contributors = get_contributors()
+
+print(".. raw :: html\n")
+print("    <!-- Generated by gen_authors.py -->")
+print("    <table>")
+print("    <col style='width:%d%%' span='%d'>"
+      % (int(100 / ROW_SIZE), ROW_SIZE))
+print("    <style>")
+print("      img.avatar {border-radius: 10px;}")
+print("      td {vertical-align: top;}")
+print("    </style>")
+for row in group_iterable(contributors, size=ROW_SIZE):
+    print("    <tr>")
+    for contributor in row:
+        print("    <td>")
+        print("    <a href='%s'><img src='%s' class='avatar' /></a> <br />"
+              % (contributor["html_url"], contributor["avatar_url"]))
+        print("    <p>%s</p>" % contributor["name"])
+        print("    </td>")
+    print("    </tr>")
+print("    </table>")
diff --git a/doc/about.rst b/doc/about.rst
index 90295b96fb6ff..218b0ad897fe4 100644
--- a/doc/about.rst
+++ b/doc/about.rst
@@ -1,7 +1,31 @@
 About us
 ========
 
-.. include:: ../AUTHORS.rst
+History
+-------
+
+This project was started in 2007 as a Google Summer of Code project by
+David Cournapeau. Later that year, Matthieu Brucher started work on
+this project as part of his thesis.
+
+In 2010 Fabian Pedregosa, Gael Varoquaux, Alexandre Gramfort and Vincent
+Michel of INRIA took leadership of the project and made the first public
+release, February the 1st 2010. Since then, several releases have appeared
+following a ~3 month cycle, and a thriving international community has
+been leading the development.
+
+Authors
+-------
+
+The following people have been core contributors to scikit-learn's development
+and maintenance:
+
+.. include:: authors.rst
+
+Please do not email the authors directly to ask for assistance or report issues.
+Instead, please see `What's the best way to ask questions about scikit-learn
+<http://scikit-learn.org/stable/faq.html#what-s-the-best-way-to-get-help-on-scikit-learn-usage>`_
+in the FAQ.
 
 .. seealso::
 
diff --git a/doc/authors.rst b/doc/authors.rst
new file mode 100644
index 0000000000000..0210dff4bef6e
--- /dev/null
+++ b/doc/authors.rst
@@ -0,0 +1,220 @@
+.. raw :: html
+
+    <!-- Generated by gen_authors.py -->
+    <table>
+    <col style='width:14%' span='7'>
+    <style>
+      img.avatar {border-radius: 10px;}
+      td {vertical-align: top;}
+    </style>
+    <tr>
+    <td>
+    <a href='https://github.com/mblondel'><img src='https://avatars2.githubusercontent.com/u/233706?v=4' class='avatar' /></a> <br />
+    <p>Mathieu Blondel</p>
+    </td>
+    <td>
+    <a href='https://github.com/jorisvandenbossche'><img src='https://avatars2.githubusercontent.com/u/1020496?v=4' class='avatar' /></a> <br />
+    <p>Joris Van den Bossche</p>
+    </td>
+    <td>
+    <a href='https://github.com/mbrucher'><img src='https://avatars1.githubusercontent.com/u/321752?v=4' class='avatar' /></a> <br />
+    <p>Matthieu Brucher</p>
+    </td>
+    <td>
+    <a href='https://github.com/larsmans'><img src='https://avatars1.githubusercontent.com/u/335383?v=4' class='avatar' /></a> <br />
+    <p>Lars Buitinck</p>
+    </td>
+    <td>
+    <a href='https://github.com/cournape'><img src='https://avatars1.githubusercontent.com/u/25111?v=4' class='avatar' /></a> <br />
+    <p>David Cournapeau</p>
+    </td>
+    <td>
+    <a href='https://github.com/ndawe'><img src='https://avatars1.githubusercontent.com/u/202816?v=4' class='avatar' /></a> <br />
+    <p>Noel Dawe</p>
+    </td>
+    <td>
+    <a href='https://github.com/lucidfrontier45'><img src='https://avatars2.githubusercontent.com/u/655305?v=4' class='avatar' /></a> <br />
+    <p>Shiqiao Du</p>
+    </td>
+    </tr>
+    <tr>
+    <td>
+    <a href='https://github.com/dubourg'><img src='https://avatars0.githubusercontent.com/u/401766?v=4' class='avatar' /></a> <br />
+    <p>Vincent Dubourg</p>
+    </td>
+    <td>
+    <a href='https://github.com/duchesnay'><img src='https://avatars1.githubusercontent.com/u/344402?v=4' class='avatar' /></a> <br />
+    <p>Edouard Duchesnay</p>
+    </td>
+    <td>
+    <a href='https://github.com/lesteve'><img src='https://avatars1.githubusercontent.com/u/1680079?v=4' class='avatar' /></a> <br />
+    <p>Loïc Estève</p>
+    </td>
+    <td>
+    <a href='https://github.com/AlexanderFabisch'><img src='https://avatars1.githubusercontent.com/u/869592?v=4' class='avatar' /></a> <br />
+    <p>Alexander Fabisch</p>
+    </td>
+    <td>
+    <a href='https://github.com/VirgileFritsch'><img src='https://avatars3.githubusercontent.com/u/263280?v=4' class='avatar' /></a> <br />
+    <p>Virgile Fritsch</p>
+    </td>
+    <td>
+    <a href='https://github.com/satra'><img src='https://avatars2.githubusercontent.com/u/184063?v=4' class='avatar' /></a> <br />
+    <p>Satrajit Ghosh</p>
+    </td>
+    <td>
+    <a href=''><img src='https://avatars2.githubusercontent.com/u/365630?v=4' class='avatar' /></a> <br />
+    <p>Angel Soler Gollonet</p>
+    </td>
+    </tr>
+    <tr>
+    <td>
+    <a href='https://github.com/chrisfilo'><img src='https://avatars2.githubusercontent.com/u/238759?v=4' class='avatar' /></a> <br />
+    <p>Chris Filo Gorgolewski</p>
+    </td>
+    <td>
+    <a href='https://github.com/agramfort'><img src='https://avatars2.githubusercontent.com/u/161052?v=4' class='avatar' /></a> <br />
+    <p>Alexandre Gramfort</p>
+    </td>
+    <td>
+    <a href='https://github.com/ogrisel'><img src='https://avatars0.githubusercontent.com/u/89061?v=4' class='avatar' /></a> <br />
+    <p>Olivier Grisel</p>
+    </td>
+    <td>
+    <a href='https://github.com/jaquesgrobler'><img src='https://avatars3.githubusercontent.com/u/1378870?v=4' class='avatar' /></a> <br />
+    <p>Jaques Grobler</p>
+    </td>
+    <td>
+    <a href='https://github.com/yarikoptic'><img src='https://avatars3.githubusercontent.com/u/39889?v=4' class='avatar' /></a> <br />
+    <p>Yaroslav Halchenko</p>
+    </td>
+    <td>
+    <a href='https://github.com/bdholt1'><img src='https://avatars0.githubusercontent.com/u/937444?v=4' class='avatar' /></a> <br />
+    <p>Brian Holt</p>
+    </td>
+    <td>
+    <a href='https://github.com/arjoly'><img src='https://avatars0.githubusercontent.com/u/1274722?v=4' class='avatar' /></a> <br />
+    <p>Arnaud Joly</p>
+    </td>
+    </tr>
+    <tr>
+    <td>
+    <a href='https://github.com/thouis'><img src='https://avatars1.githubusercontent.com/u/473043?v=4' class='avatar' /></a> <br />
+    <p>Thouis (Ray) Jones</p>
+    </td>
+    <td>
+    <a href='https://github.com/kastnerkyle'><img src='https://avatars2.githubusercontent.com/u/1563421?v=4' class='avatar' /></a> <br />
+    <p>Kyle Kastner</p>
+    </td>
+    <td>
+    <a href='https://github.com/MechCoder'><img src='https://avatars3.githubusercontent.com/u/1867024?v=4' class='avatar' /></a> <br />
+    <p>Manoj Kumar</p>
+    </td>
+    <td>
+    <a href='https://github.com/robertlayton'><img src='https://avatars2.githubusercontent.com/u/800543?v=4' class='avatar' /></a> <br />
+    <p>Robert Layton</p>
+    </td>
+    <td>
+    <a href='https://github.com/glemaitre'><img src='https://avatars2.githubusercontent.com/u/7454015?v=4' class='avatar' /></a> <br />
+    <p>Guillaume Lemaitre</p>
+    </td>
+    <td>
+    <a href='https://github.com/weilinear'><img src='https://avatars0.githubusercontent.com/u/2232328?v=4' class='avatar' /></a> <br />
+    <p>Wei Li</p>
+    </td>
+    <td>
+    <a href='https://github.com/paolo-losi'><img src='https://avatars1.githubusercontent.com/u/264906?v=4' class='avatar' /></a> <br />
+    <p>Paolo Losi</p>
+    </td>
+    </tr>
+    <tr>
+    <td>
+    <a href='https://github.com/glouppe'><img src='https://avatars3.githubusercontent.com/u/477771?v=4' class='avatar' /></a> <br />
+    <p>Gilles Louppe</p>
+    </td>
+    <td>
+    <a href='https://github.com/jmetzen'><img src='https://avatars1.githubusercontent.com/u/1116263?v=4' class='avatar' /></a> <br />
+    <p>Jan Hendrik Metzen</p>
+    </td>
+    <td>
+    <a href='https://github.com/vmichel'><img src='https://avatars1.githubusercontent.com/u/295195?v=4' class='avatar' /></a> <br />
+    <p>Vincent Michel</p>
+    </td>
+    <td>
+    <a href='https://github.com/jarrodmillman'><img src='https://avatars1.githubusercontent.com/u/123428?v=4' class='avatar' /></a> <br />
+    <p>Jarrod Millman</p>
+    </td>
+    <td>
+    <a href='https://github.com/amueller'><img src='https://avatars3.githubusercontent.com/u/449558?v=4' class='avatar' /></a> <br />
+    <p>Andreas Mueller</p>
+    </td>
+    <td>
+    <a href='https://github.com/vene'><img src='https://avatars0.githubusercontent.com/u/241745?v=4' class='avatar' /></a> <br />
+    <p>Vlad Niculae</p>
+    </td>
+    <td>
+    <a href='https://github.com/jnothman'><img src='https://avatars2.githubusercontent.com/u/78827?v=4' class='avatar' /></a> <br />
+    <p>Joel Nothman</p>
+    </td>
+    </tr>
+    <tr>
+    <td>
+    <a href='https://github.com/alextp'><img src='https://avatars0.githubusercontent.com/u/5061?v=4' class='avatar' /></a> <br />
+    <p>Alexandre Passos</p>
+    </td>
+    <td>
+    <a href='https://github.com/fabianp'><img src='https://avatars3.githubusercontent.com/u/277639?v=4' class='avatar' /></a> <br />
+    <p>Fabian Pedregosa</p>
+    </td>
+    <td>
+    <a href='https://github.com/pprett'><img src='https://avatars0.githubusercontent.com/u/111730?v=4' class='avatar' /></a> <br />
+    <p>Peter Prettenhofer</p>
+    </td>
+    <td>
+    <a href='https://github.com/qinhanmin2014'><img src='https://avatars2.githubusercontent.com/u/12003569?v=4' class='avatar' /></a> <br />
+    <p>Hanmin Qin</p>
+    </td>
+    <td>
+    <a href='https://github.com/raghavrv'><img src='https://avatars3.githubusercontent.com/u/9487348?v=4' class='avatar' /></a> <br />
+    <p>(Venkat) Raghav, Rajagopalan</p>
+    </td>
+    <td>
+    <a href='https://github.com/jmschrei'><img src='https://avatars2.githubusercontent.com/u/3916816?v=4' class='avatar' /></a> <br />
+    <p>Jacob Schreiber</p>
+    </td>
+    <td>
+    <a href='https://github.com/bthirion'><img src='https://avatars1.githubusercontent.com/u/234454?v=4' class='avatar' /></a> <br />
+    <p>Bertrand Thirion</p>
+    </td>
+    </tr>
+    <tr>
+    <td>
+    <a href='https://github.com/TomDLT'><img src='https://avatars2.githubusercontent.com/u/11065596?v=4' class='avatar' /></a> <br />
+    <p>Tom Dupré la Tour</p>
+    </td>
+    <td>
+    <a href='https://github.com/jakevdp'><img src='https://avatars0.githubusercontent.com/u/781659?v=4' class='avatar' /></a> <br />
+    <p>Jake Vanderplas</p>
+    </td>
+    <td>
+    <a href='https://github.com/NelleV'><img src='https://avatars0.githubusercontent.com/u/184798?v=4' class='avatar' /></a> <br />
+    <p>Nelle Varoquaux</p>
+    </td>
+    <td>
+    <a href='https://github.com/GaelVaroquaux'><img src='https://avatars3.githubusercontent.com/u/208217?v=4' class='avatar' /></a> <br />
+    <p>Gael Varoquaux</p>
+    </td>
+    <td>
+    <a href='https://github.com/dwf'><img src='https://avatars1.githubusercontent.com/u/60206?v=4' class='avatar' /></a> <br />
+    <p>David Warde-Farley</p>
+    </td>
+    <td>
+    <a href='https://github.com/ronw'><img src='https://avatars2.githubusercontent.com/u/113819?v=4' class='avatar' /></a> <br />
+    <p>Ron Weiss</p>
+    </td>
+    <td>
+    <a href='https://github.com/rth'><img src='https://avatars0.githubusercontent.com/u/630936?v=4' class='avatar' /></a> <br />
+    <p>Roman Yurchak</p>
+    </td>
+    </tr>
+    </table>
diff --git a/doc/developers/maintainer.rst b/doc/developers/maintainer.rst
index d0d0db8a041bb..a3309abcfbf10 100644
--- a/doc/developers/maintainer.rst
+++ b/doc/developers/maintainer.rst
@@ -1,8 +1,17 @@
 Maintainer / core-developer information
 ========================================
 
+Before a release
+----------------
+
+1. Update authors table::
+
+    $ cd build_tools; make authors; cd ..
+
+   and commit.
+
 Making a release
-------------------
+----------------
 For more information see https://github.com/scikit-learn/scikit-learn/wiki/How-to-make-a-release
 
 

From 3ee1cfc873270fdf075fad2a03a2695388fd5ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=BCdiger=20Busche?= <ruedigerbusche@web.de>
Date: Thu, 13 Sep 2018 10:03:59 +0200
Subject: [PATCH 038/163] ENH Allow scoring of dummies without testsamples
 (#11957)

---
 doc/whats_new/v0.20.rst     |  4 ++
 sklearn/dummy.py            | 69 +++++++++++++++++++++++++++++++++
 sklearn/tests/test_dummy.py | 77 +++++++++++++++++++++++++++++++++++++
 3 files changed, 150 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 0fe95de46eb42..8cbb8074ed735 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -297,6 +297,10 @@ Support for Python 3.3 has been officially dropped.
   only require X to be an object with finite length or shape. :issue:`9832` by
   :user:`Vrishank Bhardwaj <vrishank97>`.
 
+- |Feature| :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`
+  can now be scored without supplying test samples.
+  :issue:`11951` by :user:`Rüdiger Busche <JarnoRFB>`.
+
 
 :mod:`sklearn.ensemble`
 .......................
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index ade45a1735879..2fac84fd7bea4 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -320,6 +320,37 @@ def predict_log_proba(self, X):
         else:
             return [np.log(p) for p in proba]
 
+    def score(self, X, y, sample_weight=None):
+        """Returns the mean accuracy on the given test data and labels.
+
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : {array-like, None}
+            Test samples with shape = (n_samples, n_features) or
+            None. Passing None as test samples gives the same result
+            as passing real test samples, since DummyClassifier
+            operates independently of the sampled observations.
+
+        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+            True labels for X.
+
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of self.predict(X) wrt. y.
+
+        """
+        if X is None:
+            X = np.zeros(shape=(len(y), 1))
+        return super(DummyClassifier, self).score(X, y, sample_weight)
+
 
 class DummyRegressor(BaseEstimator, RegressorMixin):
     """
@@ -480,3 +511,41 @@ def predict(self, X, return_std=False):
             y_std = np.ravel(y_std)
 
         return (y, y_std) if return_std else y
+
+    def score(self, X, y, sample_weight=None):
+        """Returns the coefficient of determination R^2 of the prediction.
+
+        The coefficient R^2 is defined as (1 - u/v), where u is the residual
+        sum of squares ((y_true - y_pred) ** 2).sum() and v is the total
+        sum of squares ((y_true - y_true.mean()) ** 2).sum().
+        The best possible score is 1.0 and it can be negative (because the
+        model can be arbitrarily worse). A constant model that always
+        predicts the expected value of y, disregarding the input features,
+        would get a R^2 score of 0.0.
+
+        Parameters
+        ----------
+        X : {array-like, None}
+            Test samples with shape = (n_samples, n_features) or None.
+            For some estimators this may be a
+            precomputed kernel matrix instead, shape = (n_samples,
+            n_samples_fitted], where n_samples_fitted is the number of
+            samples used in the fitting for the estimator.
+            Passing None as test samples gives the same result
+            as passing real test samples, since DummyRegressor
+            operates independently of the sampled observations.
+
+        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+            True values for X.
+
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            R^2 of self.predict(X) wrt. y.
+        """
+        if X is None:
+            X = np.zeros(shape=(len(y), 1))
+        return super(DummyRegressor, self).score(X, y, sample_weight)
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 5d955f51017a1..805c90a7e018e 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -1,5 +1,7 @@
 from __future__ import division
 
+import pytest
+
 import numpy as np
 import scipy.sparse as sp
 
@@ -200,6 +202,45 @@ def test_string_labels():
     assert_array_equal(clf.predict(X), ["paris"] * 5)
 
 
+@pytest.mark.parametrize("y,y_test", [
+    ([2, 1, 1, 1], [2, 2, 1, 1]),
+    (np.array([[2, 2],
+               [1, 1],
+               [1, 1],
+               [1, 1]]),
+     np.array([[2, 2],
+               [2, 2],
+               [1, 1],
+               [1, 1]]))
+])
+def test_classifier_score_with_None(y, y_test):
+    clf = DummyClassifier(strategy="most_frequent")
+    clf.fit(None, y)
+    assert_equal(clf.score(None, y_test), 0.5)
+
+
+@pytest.mark.parametrize("strategy", [
+    "stratified",
+    "most_frequent",
+    "prior",
+    "uniform",
+    "constant"
+])
+def test_classifier_prediction_independent_of_X(strategy):
+    y = [0, 2, 1, 1]
+    X1 = [[0]] * 4
+    clf1 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
+    clf1.fit(X1, y)
+    predictions1 = clf1.predict(X1)
+
+    X2 = [[1]] * 4
+    clf2 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
+    clf2.fit(X2, y)
+    predictions2 = clf2.predict(X2)
+
+    assert_array_equal(predictions1, predictions2)
+
+
 def test_classifier_exceptions():
     clf = DummyClassifier(strategy="unknown")
     assert_raises(ValueError, clf.fit, [], [])
@@ -633,3 +674,39 @@ def test_dummy_regressor_return_std():
     assert_equal(len(y_pred_list), 2)
     # the second element should be all zeros
     assert_array_equal(y_pred_list[1], y_std_expected)
+
+
+@pytest.mark.parametrize("y,y_test", [
+    ([1, 1, 1, 2], [1.25] * 4),
+    (np.array([[2, 2],
+               [1, 1],
+               [1, 1],
+               [1, 1]]),
+     [[1.25, 1.25]] * 4)
+
+])
+def test_regressor_score_with_None(y, y_test):
+    reg = DummyRegressor()
+    reg.fit(None, y)
+    assert_equal(reg.score(None, y_test), 1.0)
+
+
+@pytest.mark.parametrize("strategy", [
+    "mean",
+    "median",
+    "quantile",
+    "constant"
+])
+def test_regressor_prediction_independent_of_X(strategy):
+    y = [0, 2, 1, 1]
+    X1 = [[0]] * 4
+    reg1 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
+    reg1.fit(X1, y)
+    predictions1 = reg1.predict(X1)
+
+    X2 = [[1]] * 4
+    reg2 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
+    reg2.fit(X2, y)
+    predictions2 = reg2.predict(X2)
+
+    assert_array_equal(predictions1, predictions2)

From dad5c36c5eda5e677dabaac4be330fb7517ca4d6 Mon Sep 17 00:00:00 2001
From: "Zijie (ZJ) Poh" <8103276+zjpoh@users.noreply.github.com>
Date: Thu, 13 Sep 2018 02:01:43 -0700
Subject: [PATCH 039/163] DOC Fix docstring inconsistency in nmf.py (#12063)

---
 sklearn/decomposition/nmf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
index 990d31bf2ccc0..0617a1797fcdc 100644
--- a/sklearn/decomposition/nmf.py
+++ b/sklearn/decomposition/nmf.py
@@ -880,7 +880,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None,
 
     init :  None | 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar' | 'custom'
         Method used to initialize the procedure.
-        Default: 'nndsvd' if n_components < n_features, otherwise random.
+        Default: 'random'.
         Valid options:
 
         - 'random': non-negative random matrices, scaled with:

From 36536c6f46ac060d4b9c9e48d79d42fafa3fb344 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Thu, 13 Sep 2018 11:08:23 +0200
Subject: [PATCH 040/163]  MAINT Fix invalid escape sequence (#12064)

---
 sklearn/cluster/tests/test_k_means.py        | 4 ++--
 sklearn/datasets/mlcomp.py                   | 2 +-
 sklearn/externals/_arff.py                   | 2 +-
 sklearn/model_selection/tests/test_search.py | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 7935e7134d242..5994c770db9c9 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -885,7 +885,7 @@ def test_sparse_validate_centers():
     # Test that a ValueError is raised for validate_center_shape
     classifier = KMeans(n_clusters=3, init=centers, n_init=1)
 
-    msg = "The shape of the initial centers \(\(4L?, 4L?\)\) " \
+    msg = r"The shape of the initial centers \(\(4L?, 4L?\)\) " \
           "does not match the number of clusters 3"
     assert_raises_regex(ValueError, msg, classifier.fit, X)
 
@@ -969,7 +969,7 @@ def test_sample_weight_length():
     # check that an error is raised when passing sample weights
     # with an incompatible shape
     km = KMeans(n_clusters=n_clusters, random_state=42)
-    assert_raises_regex(ValueError, 'len\(sample_weight\)', km.fit, X,
+    assert_raises_regex(ValueError, r'len\(sample_weight\)', km.fit, X,
                         sample_weight=np.ones(2))
 
 
diff --git a/sklearn/datasets/mlcomp.py b/sklearn/datasets/mlcomp.py
index 169df6e55151a..9adb7bbc1c06e 100644
--- a/sklearn/datasets/mlcomp.py
+++ b/sklearn/datasets/mlcomp.py
@@ -24,7 +24,7 @@ def _load_document_classification(dataset_path, metadata, set_=None, **kwargs):
             "in March 2017, the load_mlcomp function was deprecated "
             "in version 0.19 and will be removed in 0.21.")
 def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs):
-    """Load a datasets as downloaded from http://mlcomp.org
+    r"""Load a datasets as downloaded from http://mlcomp.org
 
     Read more in the :ref:`User Guide <datasets>`.
 
diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py
index 7fb445ef9d5a5..eaec6083d0ae4 100644
--- a/sklearn/externals/_arff.py
+++ b/sklearn/externals/_arff.py
@@ -641,7 +641,7 @@ def _decode_comment(self, s):
         :param s: a normalized string.
         :return: a string with the decoded comment.
         '''
-        res = re.sub('^\%( )?', '', s)
+        res = re.sub(r'^\%( )?', '', s)
         return res
 
     def _decode_relation(self, s):
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 969b6288a71e8..916804b384c7b 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -133,10 +133,10 @@ def assert_grid_iter_equals_getitem(grid):
 
 @pytest.mark.parametrize(
     "input, error_type, error_message",
-    [(0, TypeError, 'Parameter grid is not a dict or a list \(0\)'),
-     ([{'foo': [0]}, 0], TypeError, 'Parameter grid is not a dict \(0\)'),
+    [(0, TypeError, r'Parameter grid is not a dict or a list \(0\)'),
+     ([{'foo': [0]}, 0], TypeError, r'Parameter grid is not a dict \(0\)'),
      ({'foo': 0}, TypeError, "Parameter grid value is not iterable "
-      "\(key='foo', value=0\)")]
+      r"\(key='foo', value=0\)")]
 )
 def test_validate_parameter_grid_input(input, error_type, error_message):
     with pytest.raises(error_type, match=error_message):

From 17c6c908738bedda834d49a94a8804c179e9cbfb Mon Sep 17 00:00:00 2001
From: Zach Griffith <griffitzd@gmail.com>
Date: Thu, 13 Sep 2018 04:18:59 -0500
Subject: [PATCH 041/163] DOC fix typos in documentation. (#12059)

---
 doc/modules/lda_qda.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/modules/lda_qda.rst b/doc/modules/lda_qda.rst
index 3d45dd78f3179..e1dfb0c03ea4b 100644
--- a/doc/modules/lda_qda.rst
+++ b/doc/modules/lda_qda.rst
@@ -15,7 +15,7 @@ surface, respectively.
 
 These classifiers are attractive because they have closed-form solutions that
 can be easily computed, are inherently multiclass, have proven to work well in
-practice and have no hyperparameters to tune.
+practice, and have no hyperparameters to tune.
 
 .. |ldaqda| image:: ../auto_examples/classification/images/sphx_glr_plot_lda_qda_001.png
         :target: ../auto_examples/classification/plot_lda_qda.html
@@ -43,7 +43,7 @@ linear subspace consisting of the directions which maximize the separation
 between classes (in a precise sense discussed in the mathematics section
 below). The dimension of the output is necessarily less than the number of
 classes, so this is, in general, a rather strong dimensionality reduction, and
-only makes senses in a multiclass setting.
+only makes sense in a multiclass setting.
 
 This is implemented in
 :func:`discriminant_analysis.LinearDiscriminantAnalysis.transform`. The desired
@@ -70,10 +70,10 @@ the class conditional distribution of the data :math:`P(X|y=k)` for each class
 and we select the class :math:`k` which maximizes this conditional probability.
 
 More specifically, for linear and quadratic discriminant analysis,
-:math:`P(X|y)` is modelled as a multivariate Gaussian distribution with
+:math:`P(X|y)` is modeled as a multivariate Gaussian distribution with
 density:
 
-.. math:: P(X | y=k) = \frac{1}{(2\pi)^{d/2} |\Sigma_k|^{1/2}}\exp\left(-\frac{1}{2} (X-\mu_k)^t \Sigma_k^{-1} (X-\mu_k)\right) 
+.. math:: P(X | y=k) = \frac{1}{(2\pi)^{d/2} |\Sigma_k|^{1/2}}\exp\left(-\frac{1}{2} (X-\mu_k)^t \Sigma_k^{-1} (X-\mu_k)\right)
 
 where :math:`d` is the number of features.
 
@@ -85,7 +85,7 @@ matrices, or by a regularized estimator: see the section on shrinkage below).
 
 In the case of LDA, the Gaussians for each class are assumed to share the same
 covariance matrix: :math:`\Sigma_k = \Sigma` for all :math:`k`. This leads to
-linear decision surfaces between, as can be seen by comparing the
+linear decision surfaces, which can be seen by comparing the
 log-probability ratios :math:`\log[P(y=k | X) / P(y=l | X)]`:
 
 .. math::
@@ -127,7 +127,7 @@ classifier, there is a dimensionality reduction by linear projection onto a
 :math:`K-1` dimensional space.
 
 We can reduce the dimension even more, to a chosen :math:`L`, by projecting
-onto the linear subspace :math:`H_L` which maximize the variance of the
+onto the linear subspace :math:`H_L` which maximizes the variance of the
 :math:`\mu^*_k` after projection (in effect, we are doing a form of PCA for the
 transformed class means :math:`\mu^*_k`). This :math:`L` corresponds to the
 ``n_components`` parameter used in the

From 06b4307fbca82b7ff73b1319cd67a4fab34d7c11 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Thu, 13 Sep 2018 18:17:31 +0800
Subject: [PATCH 042/163] DOC Include fetch_openml doc in user guide (#12065)

---
 doc/datasets/index.rst     | 148 +++++++++++++++++++++++++++++++++++++
 doc/datasets/openml.rst    | 148 -------------------------------------
 sklearn/datasets/openml.py |   2 +
 3 files changed, 150 insertions(+), 148 deletions(-)
 delete mode 100644 doc/datasets/openml.rst

diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
index 947e55f0c4c37..e0640916fbb64 100644
--- a/doc/datasets/index.rst
+++ b/doc/datasets/index.rst
@@ -351,6 +351,154 @@ features::
 
  _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader
 
+..
+    For doctests:
+
+    >>> import numpy as np
+    >>> import os
+
+.. _openml:
+
+Downloading datasets from the openml.org repository
+---------------------------------------------------
+
+`openml.org <https://openml.org>`_ is a public repository for machine learning
+data and experiments, that allows everybody to upload open datasets.
+
+The ``sklearn.datasets`` package is able to download datasets
+from the repository using the function
+:func:`sklearn.datasets.fetch_openml`.
+
+For example, to download a dataset of gene expressions in mice brains::
+
+  >>> from sklearn.datasets import fetch_openml
+  >>> mice = fetch_openml(name='miceprotein', version=4)
+
+To fully specify a dataset, you need to provide a name and a version, though
+the version is optional, see :ref:`openml_versions` below.
+The dataset contains a total of 1080 examples belonging to 8 different
+classes::
+
+  >>> mice.data.shape
+  (1080, 77)
+  >>> mice.target.shape
+  (1080,)
+  >>> np.unique(mice.target) # doctest: +NORMALIZE_WHITESPACE
+  array(['c-CS-m', 'c-CS-s', 'c-SC-m', 'c-SC-s', 't-CS-m', 't-CS-s', 't-SC-m', 't-SC-s'], dtype=object)
+
+You can get more information on the dataset by looking at the ``DESCR``
+and ``details`` attributes::
+
+  >>> print(mice.DESCR) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
+  **Author**: Clara Higuera, Katheleen J. Gardiner, Krzysztof J. Cios
+  **Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Mice+Protein+Expression) - 2015
+  **Please cite**: Higuera C, Gardiner KJ, Cios KJ (2015) Self-Organizing
+  Feature Maps Identify Proteins Critical to Learning in a Mouse Model of Down
+  Syndrome. PLoS ONE 10(6): e0129126...
+
+  >>> mice.details # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
+  {'id': '40966', 'name': 'MiceProtein', 'version': '4', 'format': 'ARFF',
+  'upload_date': '2017-11-08T16:00:15', 'licence': 'Public',
+  'url': 'https://www.openml.org/data/v1/download/17928620/MiceProtein.arff',
+  'file_id': '17928620', 'default_target_attribute': 'class',
+  'row_id_attribute': 'MouseID',
+  'ignore_attribute': ['Genotype', 'Treatment', 'Behavior'],
+  'tag': ['OpenML-CC18', 'study_135', 'study_98', 'study_99'],
+  'visibility': 'public', 'status': 'active',
+  'md5_checksum': '3c479a6885bfa0438971388283a1ce32'}
+
+
+The ``DESCR`` contains a free-text description of the data, while ``details``
+contains a dictionary of meta-data stored by openml, like the dataset id.
+For more details, see the `OpenML documentation
+<https://docs.openml.org/#data>`_ The ``data_id`` of the mice protein dataset
+is 40966, and you can use this (or the name) to get more information on the
+dataset on the openml website::
+
+  >>> mice.url
+  'https://www.openml.org/d/40966'
+
+The ``data_id`` also uniquely identifies a dataset from OpenML::
+
+  >>> mice = fetch_openml(data_id=40966)
+  >>> mice.details # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
+  {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF',
+  'creator': ...,
+  'upload_date': '2016-02-17T14:32:49', 'licence': 'Public', 'url':
+  'https://www.openml.org/data/v1/download/1804243/MiceProtein.ARFF', 'file_id':
+  '1804243', 'default_target_attribute': 'class', 'citation': 'Higuera C,
+  Gardiner KJ, Cios KJ (2015) Self-Organizing Feature Maps Identify Proteins
+  Critical to Learning in a Mouse Model of Down Syndrome. PLoS ONE 10(6):
+  e0129126. [Web Link] journal.pone.0129126', 'tag': ['OpenML100', 'study_14',
+  'study_34'], 'visibility': 'public', 'status': 'active', 'md5_checksum':
+  '3c479a6885bfa0438971388283a1ce32'}
+
+.. _openml_versions:
+
+Dataset Versions
+~~~~~~~~~~~~~~~~
+
+A dataset is uniquely specified by its ``data_id``, but not necessarily by its
+name. Several different "versions" of a dataset with the same name can exist
+which can contain entirely different datasets.
+If a particular version of a dataset has been found to contain significant
+issues, it might be deactivated. Using a name to specify a dataset will yield
+the earliest version of a dataset that is still active. That means that
+``fetch_openml(name="miceprotein")`` can yield different results at different
+times if earlier versions become inactive.
+You can see that the dataset with ``data_id`` 40966 that we fetched above is
+the version 1 of the "miceprotein" dataset::
+
+  >>> mice.details['version']  #doctest: +SKIP
+  '1'
+
+In fact, this dataset only has one version. The iris dataset on the other hand
+has multiple versions::
+
+  >>> iris = fetch_openml(name="iris")
+  >>> iris.details['version']  #doctest: +SKIP
+  '1'
+  >>> iris.details['id']  #doctest: +SKIP
+  '61'
+
+  >>> iris_61 = fetch_openml(data_id=61)
+  >>> iris_61.details['version']
+  '1'
+  >>> iris_61.details['id']
+  '61'
+
+  >>> iris_969 = fetch_openml(data_id=969)
+  >>> iris_969.details['version']
+  '3'
+  >>> iris_969.details['id']
+  '969'
+
+Specifying the dataset by the name "iris" yields the lowest version, version 1,
+with the ``data_id`` 61. To make sure you always get this exact dataset, it is
+safest to specify it by the dataset ``data_id``. The other dataset, with
+``data_id`` 969, is version 3 (version 2 has become inactive), and contains a
+binarized version of the data::
+
+  >>> np.unique(iris_969.target)
+  array(['N', 'P'], dtype=object)
+
+You can also specify both the name and the version, which also uniquely
+identifies the dataset::
+
+  >>> iris_version_3 = fetch_openml(name="iris", version=3)
+  >>> iris_version_3.details['version']
+  '3'
+  >>> iris_version_3.details['id']
+  '969'
+
+
+.. topic:: References:
+
+ * Vanschoren, van Rijn, Bischl and Torgo
+   `"OpenML: networked science in machine learning"
+   <https://arxiv.org/pdf/1407.7722.pdf>`_,
+   ACM SIGKDD Explorations Newsletter, 15(2), 49-60, 2014.
+
 .. _external_datasets:
 
 Loading from external datasets
diff --git a/doc/datasets/openml.rst b/doc/datasets/openml.rst
deleted file mode 100644
index 52dd453919522..0000000000000
--- a/doc/datasets/openml.rst
+++ /dev/null
@@ -1,148 +0,0 @@
-..
-    For doctests:
-
-    >>> import numpy as np
-    >>> import os
-
-
-.. _openml:
-
-Downloading datasets from the openml.org repository
-===================================================
-
-`openml.org <https://openml.org>`_ is a public repository for machine learning
-data and experiments, that allows everybody to upload open datasets.
-
-The ``sklearn.datasets`` package is able to download datasets
-from the repository using the function
-:func:`sklearn.datasets.fetch_openml`.
-
-For example, to download a dataset of gene expressions in mice brains::
-
-  >>> from sklearn.datasets import fetch_openml
-  >>> mice = fetch_openml(name='miceprotein', version=4)
-
-To fully specify a dataset, you need to provide a name and a version, though
-the version is optional, see :ref:`openml_versions` below.
-The dataset contains a total of 1080 examples belonging to 8 different
-classes::
-
-  >>> mice.data.shape
-  (1080, 77)
-  >>> mice.target.shape
-  (1080,)
-  >>> np.unique(mice.target) # doctest: +NORMALIZE_WHITESPACE
-  array(['c-CS-m', 'c-CS-s', 'c-SC-m', 'c-SC-s', 't-CS-m', 't-CS-s', 't-SC-m', 't-SC-s'], dtype=object)
-
-You can get more information on the dataset by looking at the ``DESCR``
-and ``details`` attributes::
-
-  >>> print(mice.DESCR) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
-  **Author**: Clara Higuera, Katheleen J. Gardiner, Krzysztof J. Cios
-  **Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Mice+Protein+Expression) - 2015
-  **Please cite**: Higuera C, Gardiner KJ, Cios KJ (2015) Self-Organizing
-  Feature Maps Identify Proteins Critical to Learning in a Mouse Model of Down
-  Syndrome. PLoS ONE 10(6): e0129126...
-
-  >>> mice.details # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
-  {'id': '40966', 'name': 'MiceProtein', 'version': '4', 'format': 'ARFF',
-  'upload_date': '2017-11-08T16:00:15', 'licence': 'Public',
-  'url': 'https://www.openml.org/data/v1/download/17928620/MiceProtein.arff',
-  'file_id': '17928620', 'default_target_attribute': 'class',
-  'row_id_attribute': 'MouseID',
-  'ignore_attribute': ['Genotype', 'Treatment', 'Behavior'],
-  'tag': ['OpenML-CC18', 'study_135', 'study_98', 'study_99'],
-  'visibility': 'public', 'status': 'active',
-  'md5_checksum': '3c479a6885bfa0438971388283a1ce32'}
-
-
-The ``DESCR`` contains a free-text description of the data, while ``details``
-contains a dictionary of meta-data stored by openml, like the dataset id.
-For more details, see the `OpenML documentation
-<https://docs.openml.org/#data>`_ The ``data_id`` of the mice protein dataset
-is 40966, and you can use this (or the name) to get more information on the
-dataset on the openml website::
-
-  >>> mice.url
-  'https://www.openml.org/d/40966'
-
-The ``data_id`` also uniquely identifies a dataset from OpenML::
-
-  >>> mice = fetch_openml(data_id=40966)
-  >>> mice.details # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS +SKIP
-  {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF',
-  'creator': ...,
-  'upload_date': '2016-02-17T14:32:49', 'licence': 'Public', 'url':
-  'https://www.openml.org/data/v1/download/1804243/MiceProtein.ARFF', 'file_id':
-  '1804243', 'default_target_attribute': 'class', 'citation': 'Higuera C,
-  Gardiner KJ, Cios KJ (2015) Self-Organizing Feature Maps Identify Proteins
-  Critical to Learning in a Mouse Model of Down Syndrome. PLoS ONE 10(6):
-  e0129126. [Web Link] journal.pone.0129126', 'tag': ['OpenML100', 'study_14',
-  'study_34'], 'visibility': 'public', 'status': 'active', 'md5_checksum':
-  '3c479a6885bfa0438971388283a1ce32'}
-
-.. _openml_versions:
-
-Dataset Versions
-----------------
-
-A dataset is uniquely specified by its ``data_id``, but not necessarily by its
-name. Several different "versions" of a dataset with the same name can exist
-which can contain entirely different datasets.
-If a particular version of a dataset has been found to contain significant
-issues, it might be deactivated. Using a name to specify a dataset will yield
-the earliest version of a dataset that is still active. That means that
-``fetch_openml(name="miceprotein")`` can yield different results at different
-times if earlier versions become inactive.
-You can see that the dataset with ``data_id`` 40966 that we fetched above is
-the version 1 of the "miceprotein" dataset::
-
-  >>> mice.details['version']  #doctest: +SKIP
-  '1'
-
-In fact, this dataset only has one version. The iris dataset on the other hand
-has multiple versions::
-
-  >>> iris = fetch_openml(name="iris")
-  >>> iris.details['version']  #doctest: +SKIP
-  '1'
-  >>> iris.details['id']  #doctest: +SKIP
-  '61'
-
-  >>> iris_61 = fetch_openml(data_id=61)
-  >>> iris_61.details['version']
-  '1'
-  >>> iris_61.details['id']
-  '61'
-
-  >>> iris_969 = fetch_openml(data_id=969)
-  >>> iris_969.details['version']
-  '3'
-  >>> iris_969.details['id']
-  '969'
-
-Specifying the dataset by the name "iris" yields the lowest version, version 1,
-with the ``data_id`` 61. To make sure you always get this exact dataset, it is
-safest to specify it by the dataset ``data_id``. The other dataset, with
-``data_id`` 969, is version 3 (version 2 has become inactive), and contains a
-binarized version of the data::
-
-  >>> np.unique(iris_969.target)
-  array(['N', 'P'], dtype=object)
-
-You can also specify both the name and the version, which also uniquely
-identifies the dataset::
-
-  >>> iris_version_3 = fetch_openml(name="iris", version=3)
-  >>> iris_version_3.details['version']
-  '3'
-  >>> iris_version_3.details['id']
-  '969'
-
-
-.. topic:: References:
-
- * Vanschoren, van Rijn, Bischl and Torgo
-   `"OpenML: networked science in machine learning"
-   <https://arxiv.org/pdf/1407.7722.pdf>`_,
-   ACM SIGKDD Explorations Newsletter, 15(2), 49-60, 2014.
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index a58aa7482cda3..d667cb3699b28 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -367,6 +367,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     (not both). In case a name is given, a version can also be
     provided.
 
+    Read more in the :ref:`User Guide <openml>`.
+
     .. note:: EXPERIMENTAL
 
         The API is experimental in version 0.20 (particularly the return value

From e36254c98ed10dca70be3997765387655478b44f Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 13 Sep 2018 13:56:19 +0200
Subject: [PATCH 043/163] MNT: Anonimize IP for Google Analytics (#12038)

---
 doc/themes/scikit-learn/layout.html | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/doc/themes/scikit-learn/layout.html b/doc/themes/scikit-learn/layout.html
index 79ddd08093012..21136856aa6d2 100644
--- a/doc/themes/scikit-learn/layout.html
+++ b/doc/themes/scikit-learn/layout.html
@@ -340,17 +340,13 @@ <h2>Machine Learning in Python</h2>
      </div>
 
     {% if theme_google_analytics|tobool %}
-    <script type="text/javascript">
-      var _gaq = _gaq || [];
-      _gaq.push(['_setAccount', 'UA-22606712-2']);
-      _gaq.push(['_trackPageview']);
-
-      (function() {
-        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
-        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
-        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
-      })();
+    <script>
+        window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
+        ga('create', 'UA-22606712-2', 'auto');
+        ga('set', 'anonymizeIp', true);
+        ga('send', 'pageview');
     </script>
+    <script async src='https://www.google-analytics.com/analytics.js'></script>
     {% endif %}
     <script>
       (function() {

From ec69171d523c24fdb8b004c0a9e2d45aae17798b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 13 Sep 2018 13:57:43 +0200
Subject: [PATCH 044/163] TST Use pytest.raises instead of legacy constructions
 (#12057)

---
 sklearn/ensemble/tests/test_voting_classifier.py | 10 +++-------
 sklearn/model_selection/tests/test_split.py      |  9 +++------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py
index c480d8381f651..16de82e661779 100644
--- a/sklearn/ensemble/tests/test_voting_classifier.py
+++ b/sklearn/ensemble/tests/test_voting_classifier.py
@@ -203,18 +203,14 @@ def test_predict_proba_on_toy_problem():
     assert_almost_equal(t21, eclf_res[2][1], decimal=1)
     assert_almost_equal(t31, eclf_res[3][1], decimal=1)
 
-    try:
+    with pytest.raises(
+            AttributeError,
+            match="predict_proba is not available when voting='hard'"):
         eclf = VotingClassifier(estimators=[
                                 ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                                 voting='hard')
         eclf.fit(X, y).predict_proba(X)
 
-    except AttributeError:
-        pass
-    else:
-        raise AssertionError('AttributeError for voting == "hard"'
-                             ' and with predict_proba not raised')
-
 
 def test_multilabel():
     """Check if error is raised for multilabel classification."""
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 710c194cfc698..28286bf2402fd 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -482,13 +482,10 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
                                                 cv.split(*data)):
                 # cv.split(...) returns an array of tuples, each tuple
                 # consisting of an array with train indices and test indices
-                try:
+                with pytest.raises(AssertionError,
+                                   message="The splits for data, are same even"
+                                           " when random state is not set"):
                     np.testing.assert_array_equal(test_a, test_b)
-                except AssertionError:
-                    pass
-                else:
-                    raise AssertionError("The splits for data, are same even "
-                                         "when random state is not set")
 
 
 def test_shuffle_stratifiedkfold():

From dfdf605f67605cc2638d49b51c0dcb177813b3b5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 13 Sep 2018 14:50:08 +0200
Subject: [PATCH 045/163] BUG always raise on NaN in OneHotEncoder for object
 dtype data (#12033)

---
 sklearn/preprocessing/_encoders.py           | 28 ++++++++++-----
 sklearn/preprocessing/tests/test_encoders.py | 37 ++++++++++++++++++++
 2 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index bd6e10fb62810..10324e17061e8 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -10,11 +10,12 @@
 import numpy as np
 from scipy import sparse
 
+from .. import get_config as _get_config
 from ..base import BaseEstimator, TransformerMixin
 from ..externals import six
 from ..utils import check_array
 from ..utils import deprecated
-from ..utils.fixes import _argmax
+from ..utils.fixes import _argmax, _object_dtype_isnan
 from ..utils.validation import check_is_fitted
 
 from .base import _transform_selected
@@ -37,14 +38,30 @@ class _BaseEncoder(BaseEstimator, TransformerMixin):
 
     """
 
-    def _fit(self, X, handle_unknown='error'):
+    def _check_X(self, X):
+        """
+        Perform custom check_array:
+        - convert list of strings to object dtype
+        - check for missing values for object dtype data (check_array does
+          not do that)
 
+        """
         X_temp = check_array(X, dtype=None)
         if not hasattr(X, 'dtype') and np.issubdtype(X_temp.dtype, np.str_):
             X = check_array(X, dtype=np.object)
         else:
             X = X_temp
 
+        if X.dtype == np.dtype('object'):
+            if not _get_config()['assume_finite']:
+                if _object_dtype_isnan(X).any():
+                    raise ValueError("Input contains NaN")
+
+        return X
+
+    def _fit(self, X, handle_unknown='error'):
+        X = self._check_X(X)
+
         n_samples, n_features = X.shape
 
         if self._categories != 'auto':
@@ -74,12 +91,7 @@ def _fit(self, X, handle_unknown='error'):
             self.categories_.append(cats)
 
     def _transform(self, X, handle_unknown='error'):
-
-        X_temp = check_array(X, dtype=None)
-        if not hasattr(X, 'dtype') and np.issubdtype(X_temp.dtype, np.str_):
-            X = check_array(X, dtype=np.object)
-        else:
-            X = X_temp
+        X = self._check_X(X)
 
         _, n_features = X.shape
         X_int = np.zeros_like(X, dtype=np.int)
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
index 9ec16b85df60d..67169432defdc 100644
--- a/sklearn/preprocessing/tests/test_encoders.py
+++ b/sklearn/preprocessing/tests/test_encoders.py
@@ -497,6 +497,25 @@ def test_one_hot_encoder_feature_names_unicode():
     assert_array_equal([u'n👍me_c❤t1', u'n👍me_dat2'], feature_names)
 
 
+@pytest.mark.parametrize("X", [np.array([[1, np.nan]]).T,
+                               np.array([['a', np.nan]], dtype=object).T],
+                         ids=['numeric', 'object'])
+@pytest.mark.parametrize("handle_unknown", ['error', 'ignore'])
+def test_one_hot_encoder_raise_missing(X, handle_unknown):
+    ohe = OneHotEncoder(categories='auto', handle_unknown=handle_unknown)
+
+    with pytest.raises(ValueError, match="Input contains NaN"):
+        ohe.fit(X)
+
+    with pytest.raises(ValueError, match="Input contains NaN"):
+        ohe.fit_transform(X)
+
+    ohe.fit(X[:1, :])
+
+    with pytest.raises(ValueError, match="Input contains NaN"):
+        ohe.transform(X)
+
+
 @pytest.mark.parametrize("X", [
     [['abc', 2, 55], ['def', 1, 55]],
     np.array([[10, 2, 55], [20, 1, 55]]),
@@ -524,6 +543,24 @@ def test_ordinal_encoder_inverse():
     assert_raises_regex(ValueError, msg, enc.inverse_transform, X_tr)
 
 
+@pytest.mark.parametrize("X", [np.array([[1, np.nan]]).T,
+                               np.array([['a', np.nan]], dtype=object).T],
+                         ids=['numeric', 'object'])
+def test_ordinal_encoder_raise_missing(X):
+    ohe = OrdinalEncoder()
+
+    with pytest.raises(ValueError, match="Input contains NaN"):
+        ohe.fit(X)
+
+    with pytest.raises(ValueError, match="Input contains NaN"):
+        ohe.fit_transform(X)
+
+    ohe.fit(X[:1, :])
+
+    with pytest.raises(ValueError, match="Input contains NaN"):
+        ohe.transform(X)
+
+
 def test_encoder_dtypes():
     # check that dtypes are preserved when determining categories
     enc = OneHotEncoder(categories='auto')

From c2f92062b49eac7e9f7d10a323e4930dd96c92fc Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 13 Sep 2018 16:18:29 +0200
Subject: [PATCH 046/163] MAINT joblib 0.12.5 (#12066)

---
 sklearn/externals/joblib/__init__.py          |  2 +-
 .../joblib/externals/cloudpickle/__init__.py  |  2 +-
 .../externals/cloudpickle/cloudpickle.py      | 20 +++++---
 .../joblib/externals/loky/__init__.py         |  2 +-
 .../joblib/externals/loky/process_executor.py | 50 ++++++++++++-------
 5 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/sklearn/externals/joblib/__init__.py b/sklearn/externals/joblib/__init__.py
index a42646eb4c754..0d008b560522e 100644
--- a/sklearn/externals/joblib/__init__.py
+++ b/sklearn/externals/joblib/__init__.py
@@ -106,7 +106,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = '0.12.4'
+__version__ = '0.12.5'
 
 
 from .memory import Memory, MemorizedResult, register_store_backend
diff --git a/sklearn/externals/joblib/externals/cloudpickle/__init__.py b/sklearn/externals/joblib/externals/cloudpickle/__init__.py
index df671a0f15696..8004dcde0b7de 100644
--- a/sklearn/externals/joblib/externals/cloudpickle/__init__.py
+++ b/sklearn/externals/joblib/externals/cloudpickle/__init__.py
@@ -2,4 +2,4 @@
 
 from .cloudpickle import *
 
-__version__ = '0.5.5'
+__version__ = '0.5.6'
diff --git a/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py b/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py
index b1107ba92c1da..842723539d128 100644
--- a/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py
+++ b/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py
@@ -635,11 +635,12 @@ def extract_func_data(self, func):
 
         base_globals = self.globals_ref.get(id(func.__globals__), None)
         if base_globals is None:
-            # For functions defined in __main__, use vars(__main__) for
-            # base_global. This is necessary to share the global variables
-            # across multiple functions in this module.
-            if func.__module__ == "__main__":
-                base_globals = "__main__"
+            # For functions defined in a well behaved module use
+            # vars(func.__module__) for base_globals. This is necessary to
+            # share the global variables across multiple pickled functions from
+            # this module.
+            if hasattr(func, '__module__') and func.__module__ is not None:
+                base_globals = func.__module__
             else:
                 base_globals = {}
         self.globals_ref[id(func.__globals__)] = base_globals
@@ -934,7 +935,6 @@ def subimport(name):
 def dynamic_subimport(name, vars):
     mod = imp.new_module(name)
     mod.__dict__.update(vars)
-    sys.modules[name] = mod
     return mod
 
 
@@ -1090,7 +1090,13 @@ def _make_skel_func(code, cell_count, base_globals=None):
     if base_globals is None:
         base_globals = {}
     elif isinstance(base_globals, str):
-        base_globals = vars(sys.modules[base_globals])
+        if sys.modules.get(base_globals, None) is not None:
+            # this checks if we can import the previous environment the object
+            # lived in
+            base_globals = vars(sys.modules[base_globals])
+        else:
+            base_globals = {}
+
     base_globals['__builtins__'] = __builtins__
 
     closure = (
diff --git a/sklearn/externals/joblib/externals/loky/__init__.py b/sklearn/externals/joblib/externals/loky/__init__.py
index 18c01d0a6aa04..4f686454588a0 100644
--- a/sklearn/externals/joblib/externals/loky/__init__.py
+++ b/sklearn/externals/joblib/externals/loky/__init__.py
@@ -19,4 +19,4 @@
            "FIRST_COMPLETED", "FIRST_EXCEPTION", "ALL_COMPLETED", ]
 
 
-__version__ = '2.3.0'
+__version__ = '2.3.1'
diff --git a/sklearn/externals/joblib/externals/loky/process_executor.py b/sklearn/externals/joblib/externals/loky/process_executor.py
index 57a7617d9ab7e..cfdd37abce923 100644
--- a/sklearn/externals/joblib/externals/loky/process_executor.py
+++ b/sklearn/externals/joblib/externals/loky/process_executor.py
@@ -62,7 +62,6 @@
 import os
 import gc
 import sys
-import types
 import struct
 import weakref
 import warnings
@@ -438,7 +437,6 @@ def _process_worker(call_queue, result_queue, initializer, initargs,
                 continue
             if time() - _last_memory_leak_check > _MEMORY_LEAK_CHECK_DELAY:
                 mem_usage = _get_memory_usage(pid)
-                print(mem_usage)
                 _last_memory_leak_check = time()
                 if mem_usage - _process_reference_size < _MAX_MEMORY_LEAK_SIZE:
                     # Memory usage stays within bounds: everything is fine.
@@ -618,34 +616,41 @@ def shutdown_all_workers():
         worker_sentinels = [p.sentinel for p in processes.values()]
         ready = wait(readers + worker_sentinels)
 
-        broken = ("A process in the executor was terminated abruptly", None)
+        broken = ("A worker process managed by the executor was unexpectedly "
+                  "terminated. This could be caused by a segmentation fault "
+                  "while calling the function or by an excessive memory usage "
+                  "causing the Operating System to kill the worker.", None,
+                  TerminatedWorkerError)
         if result_reader in ready:
             try:
                 result_item = result_reader.recv()
                 broken = None
                 if isinstance(result_item, _RemoteTraceback):
-                    cause = result_item.tb
-                    broken = ("A task has failed to un-serialize", cause)
+                    broken = ("A task has failed to un-serialize. Please "
+                              "ensure that the arguments of the function are "
+                              "all picklable.", result_item.tb,
+                              BrokenProcessPool)
             except BaseException as e:
                 tb = getattr(e, "__traceback__", None)
                 if tb is None:
                     _, _, tb = sys.exc_info()
-                broken = ("A result has failed to un-serialize",
-                          traceback.format_exception(type(e), e, tb))
+                broken = ("A result has failed to un-serialize. Please "
+                          "ensure that the objects returned by the function "
+                          "are always picklable.",
+                          traceback.format_exception(type(e), e, tb),
+                          BrokenProcessPool)
         elif wakeup_reader in ready:
             broken = None
             result_item = None
         thread_wakeup.clear()
-        if broken:
-            msg, cause = broken
-            # Mark the process pool broken so that submits fail right now.
-            executor_flags.flag_as_broken(
-                msg + ", the pool is not usable anymore.")
-            bpe = BrokenProcessPool(
-                msg + " while the future was running or pending.")
-            if cause is not None:
+        if broken is not None:
+            msg, cause_tb, exc_type = broken
+            bpe = exc_type(msg)
+            if cause_tb is not None:
                 bpe.__cause__ = _RemoteTraceback(
-                    "\n'''\n{}'''".format(''.join(cause)))
+                    "\n'''\n{}'''".format(''.join(cause_tb)))
+            # Mark the process pool broken so that submits fail right now.
+            executor_flags.flag_as_broken(bpe)
 
             # All futures in flight must be marked failed
             for work_id, work_item in pending_work_items.items():
@@ -808,6 +813,15 @@ class LokyRecursionError(RuntimeError):
 
 
 class BrokenProcessPool(_BPPException):
+    """
+    Raised when the executor is broken while a future was in the running state.
+    The cause can an error raised when unpickling the task in the worker
+    process or when unpickling the result value in the parent process. It can
+    also be caused by a worker process being terminated unexpectedly.
+    """
+
+
+class TerminatedWorkerError(BrokenProcessPool):
     """
     Raised when a process in a ProcessPoolExecutor terminated abruptly
     while a future was in the running state.
@@ -998,8 +1012,8 @@ def _ensure_executor_running(self):
 
     def submit(self, fn, *args, **kwargs):
         with self._flags.shutdown_lock:
-            if self._flags.broken:
-                raise BrokenProcessPool(self._flags.broken)
+            if self._flags.broken is not None:
+                raise self._flags.broken
             if self._flags.shutdown:
                 raise ShutdownExecutorError(
                     'cannot schedule new futures after shutdown')

From 5be05c64d9bd222a9ff82e963e0ae80cc2aeefdb Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Thu, 13 Sep 2018 11:20:43 -0400
Subject: [PATCH 047/163] ENH Adds drop in FeatureUnion (#11640)

---
 doc/modules/compose.rst        |  6 +++---
 doc/whats_new/v0.20.rst        |  3 +++
 sklearn/pipeline.py            | 16 ++++++++--------
 sklearn/tests/test_pipeline.py | 14 +++++++++++---
 4 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 5a291bfaebf17..663ca40b8c7fa 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -353,13 +353,13 @@ Like pipelines, feature unions have a shorthand constructor called
 
 
 Like ``Pipeline``, individual steps may be replaced using ``set_params``,
-and ignored by setting to ``None``::
+and ignored by setting to ``'drop'``::
 
-    >>> combined.set_params(kernel_pca=None)
+    >>> combined.set_params(kernel_pca='drop')
     ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
     FeatureUnion(n_jobs=None,
                  transformer_list=[('linear_pca', PCA(copy=True,...)),
-                                   ('kernel_pca', None)],
+                                   ('kernel_pca', 'drop')],
                  transformer_weights=None)
 
 .. topic:: Examples:
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 8cbb8074ed735..3bb7bb100fd3d 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -868,6 +868,9 @@ Support for Python 3.3 has been officially dropped.
   keyword arguments on to the pipeline's last estimator, enabling the use of
   parameters such as ``return_std`` in a pipeline with caution.
   :issue:`9304` by :user:`Breno Freitas <brenolf>`.
+  
+- |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer
+  to drop features. :issue:`11144` by :user:`thomasjpfan`.
 
 
 :mod:`sklearn.preprocessing`
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 87d7180e0c230..294f69a113992 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -630,7 +630,7 @@ class FeatureUnion(_BaseComposition, TransformerMixin):
     Parameters of the transformers may be set using its name and the parameter
     name separated by a '__'. A transformer may be replaced entirely by
     setting the parameter with its name to another transformer,
-    or removed by setting to ``None``.
+    or removed by setting to 'drop' or ``None``.
 
     Read more in the :ref:`User Guide <feature_union>`.
 
@@ -709,7 +709,7 @@ def _validate_transformers(self):
 
         # validate estimators
         for t in transformers:
-            if t is None:
+            if t is None or t == 'drop':
                 continue
             if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
                     hasattr(t, "transform")):
@@ -719,12 +719,13 @@ def _validate_transformers(self):
 
     def _iter(self):
         """
-        Generate (name, trans, weight) tuples excluding None transformers
+        Generate (name, trans, weight) tuples excluding None and
+        'drop' transformers.
         """
         get_weight = (self.transformer_weights or {}).get
         return ((name, trans, get_weight(name))
                 for name, trans in self.transformer_list
-                if trans is not None)
+                if trans is not None and trans != 'drop')
 
     def get_feature_names(self):
         """Get feature names from all transformers.
@@ -830,10 +831,9 @@ def transform(self, X):
 
     def _update_transformer_list(self, transformers):
         transformers = iter(transformers)
-        self.transformer_list[:] = [
-            (name, None if old is None else next(transformers))
-            for name, old in self.transformer_list
-        ]
+        self.transformer_list[:] = [(name, old if old is None or old == 'drop'
+                                     else next(transformers))
+                                    for name, old in self.transformer_list]
 
 
 def make_union(*transformers, **kwargs):
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 8a15238ede1d3..14eba1eb5515f 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -9,6 +9,7 @@
 import pytest
 import numpy as np
 from scipy import sparse
+import pytest
 
 from sklearn.externals.six.moves import zip
 from sklearn.utils.testing import assert_raises
@@ -832,7 +833,8 @@ def test_set_feature_union_steps():
     assert_equal(['mock__x5'], ft.get_feature_names())
 
 
-def test_set_feature_union_step_none():
+@pytest.mark.parametrize('drop', ['drop', None])
+def test_set_feature_union_step_drop(drop):
     mult2 = Mult(2)
     mult2.get_feature_names = lambda: ['x2']
     mult3 = Mult(3)
@@ -844,12 +846,12 @@ def test_set_feature_union_step_none():
     assert_array_equal([[2, 3]], ft.fit_transform(X))
     assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names())
 
-    ft.set_params(m2=None)
+    ft.set_params(m2=drop)
     assert_array_equal([[3]], ft.fit(X).transform(X))
     assert_array_equal([[3]], ft.fit_transform(X))
     assert_equal(['m3__x3'], ft.get_feature_names())
 
-    ft.set_params(m3=None)
+    ft.set_params(m3=drop)
     assert_array_equal([[]], ft.fit(X).transform(X))
     assert_array_equal([[]], ft.fit_transform(X))
     assert_equal([], ft.get_feature_names())
@@ -858,6 +860,12 @@ def test_set_feature_union_step_none():
     ft.set_params(m3=mult3)
     assert_array_equal([[3]], ft.fit(X).transform(X))
 
+    # Check 'drop' step at construction time
+    ft = FeatureUnion([('m2', drop), ('m3', mult3)])
+    assert_array_equal([[3]], ft.fit(X).transform(X))
+    assert_array_equal([[3]], ft.fit_transform(X))
+    assert_equal(['m3__x3'], ft.get_feature_names())
+
 
 def test_step_name_validation():
     bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))]

From 6a57983335bb24088d72d5ba444787fdcddeb242 Mon Sep 17 00:00:00 2001
From: "Zijie (ZJ) Poh" <8103276+zjpoh@users.noreply.github.com>
Date: Thu, 13 Sep 2018 08:34:01 -0700
Subject: [PATCH 048/163] ENH Better error message for metrics of neighbors
 (#11914)

---
 sklearn/neighbors/__init__.py | 4 +++-
 sklearn/neighbors/base.py     | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py
index 852b0a5fe32f6..8510e5d1c8b8e 100644
--- a/sklearn/neighbors/__init__.py
+++ b/sklearn/neighbors/__init__.py
@@ -14,6 +14,7 @@
 from .kde import KernelDensity
 from .approximate import LSHForest
 from .lof import LocalOutlierFactor
+from .base import VALID_METRICS
 
 __all__ = ['BallTree',
            'DistanceMetric',
@@ -28,4 +29,5 @@
            'radius_neighbors_graph',
            'KernelDensity',
            'LSHForest',
-           'LocalOutlierFactor']
+           'LocalOutlierFactor',
+           'VALID_METRICS']
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index fcb221b037a83..8ec53d8e23a0b 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -143,8 +143,11 @@ def _check_algorithm_metric(self):
                     "kd_tree algorithm does not support callable metric '%s'"
                     % self.metric)
         elif self.metric not in VALID_METRICS[alg_check]:
-            raise ValueError("Metric '%s' not valid for algorithm '%s'"
-                             % (self.metric, self.algorithm))
+            raise ValueError("Metric '%s' not valid. Use "
+                             "sorted(sklearn.neighbors.VALID_METRICS['%s']) "
+                             "to get valid options. "
+                             "Metric can also be a callable function."
+                             % (self.metric, alg_check))
 
         if self.metric_params is not None and 'p' in self.metric_params:
             warnings.warn("Parameter p is found in metric_params. "

From 781edc4108c8b4382f0ecb7e61a447c1876d0e2d Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Fri, 14 Sep 2018 09:15:04 +0800
Subject: [PATCH 049/163] MNT Duplicate import in test_pipeline.py

Travis failed to detect it in #11640
---
 sklearn/tests/test_pipeline.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 14eba1eb5515f..6a77d5215d7c3 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -9,7 +9,6 @@
 import pytest
 import numpy as np
 from scipy import sparse
-import pytest
 
 from sklearn.externals.six.moves import zip
 from sklearn.utils.testing import assert_raises

From 1049fb1cbcc3aeb440ee228fe7c66aa6cba73ff5 Mon Sep 17 00:00:00 2001
From: "Zijie (ZJ) Poh" <8103276+zjpoh@users.noreply.github.com>
Date: Fri, 14 Sep 2018 01:23:00 -0700
Subject: [PATCH 050/163] ENH Better error message for sparse metrics of
 neighbors (#12073)

---
 sklearn/neighbors/__init__.py | 5 +++--
 sklearn/neighbors/base.py     | 9 ++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py
index 8510e5d1c8b8e..93c1bbbba0ba8 100644
--- a/sklearn/neighbors/__init__.py
+++ b/sklearn/neighbors/__init__.py
@@ -14,7 +14,7 @@
 from .kde import KernelDensity
 from .approximate import LSHForest
 from .lof import LocalOutlierFactor
-from .base import VALID_METRICS
+from .base import VALID_METRICS, VALID_METRICS_SPARSE
 
 __all__ = ['BallTree',
            'DistanceMetric',
@@ -30,4 +30,5 @@
            'KernelDensity',
            'LSHForest',
            'LocalOutlierFactor',
-           'VALID_METRICS']
+           'VALID_METRICS',
+           'VALID_METRICS_SPARSE']
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 8ec53d8e23a0b..9f30ba3ebd3fc 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -217,9 +217,12 @@ def _fit(self, X):
                               "using brute force")
             if self.effective_metric_ not in VALID_METRICS_SPARSE['brute'] \
                     and not callable(self.effective_metric_):
-
-                raise ValueError("metric '%s' not valid for sparse input"
-                                 % self.effective_metric_)
+                raise ValueError("Metric '%s' not valid for sparse input. "
+                                 "Use sorted(sklearn.neighbors."
+                                 "VALID_METRICS_SPARSE['brute']) "
+                                 "to get valid options. "
+                                 "Metric can also be a callable function."
+                                 % (self.effective_metric_))
             self._fit_X = X.copy()
             self._tree = None
             self._fit_method = 'brute'

From 55ede24d562e12b9a6ff87678b13b3223af3f1f7 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 14 Sep 2018 14:13:33 +0200
Subject: [PATCH 051/163] MAINT: skip doctest for Python 2 (#12074)

---
 conftest.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/conftest.py b/conftest.py
index bad99b5c99272..823a13221075b 100644
--- a/conftest.py
+++ b/conftest.py
@@ -11,6 +11,8 @@
 import pytest
 from _pytest.doctest import DoctestItem
 
+from sklearn.utils.fixes import PY3_OR_LATER
+
 PYTEST_MIN_VERSION = '3.3.0'
 
 if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION:
@@ -28,18 +30,21 @@ def pytest_collection_modifyitems(config, items):
                 item.add_marker(skip_marker)
 
     # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
-    # run doctests only for numpy >= 1.14.
-    skip_doctests = True
+    # run doctests only for numpy >= 1.14. We want to skip the doctest for
+    # python 2 due to unicode.
+    skip_doctests = False
+    if not PY3_OR_LATER:
+        skip_doctests = True
     try:
         import numpy as np
-        if LooseVersion(np.__version__) >= LooseVersion('1.14'):
-            skip_doctests = False
+        if LooseVersion(np.__version__) < LooseVersion('1.14'):
+            skip_doctests = True
     except ImportError:
         pass
 
     if skip_doctests:
         skip_marker = pytest.mark.skip(
-            reason='doctests are only run for numpy >= 1.14')
+            reason='doctests are only run for numpy >= 1.14 and python >= 3')
 
         for item in items:
             if isinstance(item, DoctestItem):

From aa53493e86c1b2a71a7ec9aeca4b6bd2ef31be93 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 14 Sep 2018 13:29:35 -0400
Subject: [PATCH 052/163] fix typo

---
 sklearn/linear_model/stochastic_gradient.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 93a2a6c912094..5e253003a2fe3 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -845,7 +845,7 @@ class SGDClassifier(BaseSGDClassifier):
         The exponent for inverse scaling learning rate [default 0.5].
 
     early_stopping : bool, default=False
-        Whether to use early stopping to terminate training when validation.
+        Whether to use early stopping to terminate training when validation
         score is not improving. If set to True, it will automatically set aside
         a fraction of training data as validation and terminate training when
         validation score is not improving by at least tol for
@@ -1454,7 +1454,7 @@ class SGDRegressor(BaseSGDRegressor):
         The exponent for inverse scaling learning rate [default 0.5].
 
     early_stopping : bool, default=False
-        Whether to use early stopping to terminate training when validation.
+        Whether to use early stopping to terminate training when validation
         score is not improving. If set to True, it will automatically set aside
         a fraction of training data as validation and terminate training when
         validation score is not improving by at least tol for

From a79d44e4a444f9b639234daadf82351f5bd71689 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Sun, 16 Sep 2018 02:09:27 +0200
Subject: [PATCH 053/163] FIX OPTICS Change quick_scan floating point
 comparison to isclose (#11929)

---
 sklearn/cluster/_optics_inner.pyx | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sklearn/cluster/_optics_inner.pyx b/sklearn/cluster/_optics_inner.pyx
index 24e8619078549..0c1b056bb172d 100644
--- a/sklearn/cluster/_optics_inner.pyx
+++ b/sklearn/cluster/_optics_inner.pyx
@@ -5,6 +5,13 @@ cimport cython
 ctypedef np.float64_t DTYPE_t
 ctypedef np.int_t DTYPE
 
+# as defined in PEP485 (python3.5)
+cdef inline isclose(double a, 
+                    double b,
+                    double rel_tol=1e-09,
+                    double abs_tol=0.0):
+    return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 # Checks for smallest reachability distance
@@ -24,7 +31,7 @@ cpdef quick_scan(double[:] rdists, double[:] dists):
             rdist = rdists[i]
             dist = dists[i]
             idx = i
-        if rdists[i] == rdist:
+        elif isclose(rdists[i], rdist):
             if dists[i] < dist:
                 dist = dists[i]
                 idx = i

From 43c8563258c0f72414772b5260eabaa490643e73 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Sat, 15 Sep 2018 18:15:26 -0700
Subject: [PATCH 054/163]  TST: skip test requiring internet using
 --skip-network (#12067)

---
 conftest.py                                      | 14 ++++++++++++++
 sklearn/ensemble/tests/test_gradient_boosting.py |  1 +
 2 files changed, 15 insertions(+)

diff --git a/conftest.py b/conftest.py
index 823a13221075b..f175661165b2d 100644
--- a/conftest.py
+++ b/conftest.py
@@ -19,6 +19,12 @@
     raise('Your version of pytest is too old, you should have at least '
           'pytest >= {} installed.'.format(PYTEST_MIN_VERSION))
 
+
+def pytest_addoption(parser):
+    parser.addoption("--skip-network", action="store_true", default=False,
+                     help="skip network tests")
+
+
 def pytest_collection_modifyitems(config, items):
 
     # FeatureHasher is not compatible with PyPy
@@ -29,6 +35,14 @@ def pytest_collection_modifyitems(config, items):
             if item.name == 'sklearn.feature_extraction.hashing.FeatureHasher':
                 item.add_marker(skip_marker)
 
+    # Skip tests which require internet if the flag is provided
+    if config.getoption("--skip-network"):
+        skip_network = pytest.mark.skip(
+            reason="test requires internet connectivity")
+        for item in items:
+            if "network" in item.keywords:
+                item.add_marker(skip_network)
+
     # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
     # run doctests only for numpy >= 1.14. We want to skip the doctest for
     # python 2 due to unicode.
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 6f7654c7d6061..e407ca8ef2554 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -452,6 +452,7 @@ def test_max_feature_regression():
     assert_true(deviance < 0.5, "GB failed with deviance %.4f" % deviance)
 
 
+@pytest.mark.network
 def test_feature_importance_regression():
     """Test that Gini importance is calculated correctly.
 

From abbdc91e40c93fb35bd935b1e2b7f5aea2bd9d92 Mon Sep 17 00:00:00 2001
From: Zach Griffith <griffitzd@gmail.com>
Date: Sun, 16 Sep 2018 10:12:11 -0500
Subject: [PATCH 055/163] DOC Fix description of SVC intercept_ shape in user
 guide (#12070)

---
 doc/modules/svm.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index bd065c14f7444..4429dd8b13cf6 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -164,11 +164,12 @@ Each row of the coefficients corresponds to one of the ``n_class`` many
 order of the "one" class.
 
 In the case of "one-vs-one" :class:`SVC`, the layout of the attributes
-is a little more involved. In the case of having a linear kernel,
-The layout of ``coef_`` and ``intercept_`` is similar to the one
-described for :class:`LinearSVC` described above, except that the shape of
-``coef_`` is ``[n_class * (n_class - 1) / 2, n_features]``, corresponding to as
-many binary classifiers. The order for classes
+is a little more involved. In the case of having a linear kernel, the
+attributes ``coef_`` and ``intercept_`` have the shape
+``[n_class * (n_class - 1) / 2, n_features]`` and
+``[n_class * (n_class - 1) / 2]`` respectively. This is similar to the
+layout for :class:`LinearSVC` described above, with each row now corresponding
+to a binary classifier. The order for classes
 0 to n is "0 vs 1", "0 vs 2" , ... "0 vs n", "1 vs 2", "1 vs 3", "1 vs n", . .
 . "n-1 vs n".
 

From 6de7957d9775c935ed19b43772b3e5170c826b1c Mon Sep 17 00:00:00 2001
From: Joan Massich <mailsik@gmail.com>
Date: Mon, 17 Sep 2018 01:53:31 +0200
Subject: [PATCH 056/163] TST Ignore warnings in common test to avoid
 collection errors (#12093)

---
 sklearn/tests/test_common.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 8e5f020985b19..c84962ed63e6d 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -72,10 +72,11 @@ def _tested_non_meta_estimators():
 
 
 def _generate_checks_per_estimator(check_generator, estimators):
-    for name, Estimator in estimators:
-        estimator = Estimator()
-        for check in check_generator(name, estimator):
-            yield name, Estimator, check
+    with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
+        for name, Estimator in estimators:
+            estimator = Estimator()
+            for check in check_generator(name, estimator):
+                yield name, Estimator, check
 
 
 def _rename_partial(val):

From efe7b8cc66d687cc333e314b3b8d139deca7b79e Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Mon, 17 Sep 2018 03:00:57 +0200
Subject: [PATCH 057/163] DOC Fix optics metric issues (DOC and precomputed)
 (#12028)

---
 sklearn/cluster/optics_.py           | 65 +++++++++++++++++++++++-----
 sklearn/cluster/tests/test_optics.py | 13 ++++++
 2 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 899da518ae796..94ff3935002a6 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -52,11 +52,30 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
         shorter run times.
 
     metric : string or callable, optional (default='euclidean')
-        The distance metric to use for neighborhood lookups. Default is
-        "euclidean". Other options include "minkowski", "manhattan",
-        "chebyshev", "haversine", "seuclidean", "hamming", "canberra",
-        and "braycurtis". The "wminkowski" and "mahalanobis" metrics are
-        also valid with an additional argument.
+        metric to use for distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string.
+
+        Distance matrices are not supported.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',
+          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+          'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics.
 
     p : integer, optional (default=2)
         Parameter for the Minkowski metric from
@@ -182,11 +201,30 @@ class OPTICS(BaseEstimator, ClusterMixin):
         shorter run times.
 
     metric : string or callable, optional (default='euclidean')
-        The distance metric to use for neighborhood lookups. Default is
-        "euclidean". Other options include "minkowski", "manhattan",
-        "chebyshev", "haversine", "seuclidean", "hamming", "canberra",
-        and "braycurtis". The "wminkowski" and "mahalanobis" metrics are
-        also valid with an additional argument.
+        metric to use for distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string.
+
+        Distance matrices are not supported.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',
+          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+          'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics.
 
     p : integer, optional (default=2)
         Parameter for the Minkowski metric from
@@ -419,8 +457,11 @@ def _set_reach_dist(self, point_index, processed, X, nbrs):
             # Everything is already processed. Return to main loop
             return point_index
 
-        dists = pairwise_distances(P, np.take(X, unproc, axis=0),
-                                   self.metric, n_jobs=1).ravel()
+        if self.metric == 'precomputed':
+            dists = X[point_index, unproc]
+        else:
+            dists = pairwise_distances(P, np.take(X, unproc, axis=0),
+                                       self.metric, n_jobs=None).ravel()
 
         rdists = np.maximum(dists, self.core_distances_[point_index])
         new_reach = np.minimum(np.take(self.reachability_, unproc), rdists)
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index bddf57ec7b5d1..1215746faa4c3 100755
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -11,6 +11,7 @@
 from sklearn.cluster.optics_ import _TreeNode, _cluster_tree
 from sklearn.cluster.optics_ import _find_local_maxima
 from sklearn.metrics.cluster import contingency_matrix
+from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.cluster.dbscan_ import DBSCAN
 from sklearn.utils.testing import assert_equal, assert_warns
 from sklearn.utils.testing import assert_array_equal
@@ -436,3 +437,15 @@ def test_reach_dists():
     else:
         # we compare to truncated decimals, so use atol
         assert_allclose(clust.reachability_, np.array(v), atol=1e-5)
+
+
+def test_precomputed_dists():
+    redX = X[::10]
+    dists = pairwise_distances(redX, metric='euclidean')
+    clust1 = OPTICS(min_samples=10, algorithm='brute',
+                    metric='precomputed').fit(dists)
+    clust2 = OPTICS(min_samples=10, algorithm='brute',
+                    metric='euclidean').fit(redX)
+
+    assert_allclose(clust1.reachability_, clust2.reachability_)
+    assert_array_equal(clust1.labels_, clust2.labels_)

From e616ee3cccf360f7e5767ade9dc01bc63ab107a5 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Mon, 17 Sep 2018 17:47:33 +1000
Subject: [PATCH 058/163] DOC move OPTICS to 0.21

---
 doc/whats_new/v0.20.rst | 4 ----
 doc/whats_new/v0.21.rst | 8 ++++++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 3bb7bb100fd3d..d35b48c00da42 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -108,10 +108,6 @@ Support for Python 3.3 has been officially dropped.
 :mod:`sklearn.cluster`
 ......................
 
-- |MajorFeature| A new clustering algorithm: :class:`cluster.OPTICS`: an
-  algoritm related to :class:`cluster.DBSCAN`, that has hyperparameters easier
-  to set and that scales better, by :user:`Shane <espg>`.
-
 - |MajorFeature| :class:`cluster.AgglomerativeClustering` now supports Single
   Linkage clustering via ``linkage='single'``. :issue:`9372` by :user:`Leland
   McInnes <lmcinnes>` and :user:`Steve Astels <sastels>`.
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 202972f0575c0..03440502aecb2 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -40,6 +40,14 @@ Support for Python 3.4 and below has been officially dropped.
 - An entry goes here
 - An entry goes here
 
+:mod:`sklearn.cluster`
+......................
+
+- |MajorFeature| A new clustering algorithm: :class:`cluster.OPTICS`: an
+  algoritm related to :class:`cluster.DBSCAN`, that has hyperparameters easier
+  to set and that scales better, by :user:`Shane <espg>` and
+  :user:`Adrin Jalali <adrinjalali>`.
+
 Multiple modules
 ................
 

From daaa2a5f777eee65816b855a1381128984ee1776 Mon Sep 17 00:00:00 2001
From: Erich Schubert <kno10@users.noreply.github.com>
Date: Mon, 17 Sep 2018 10:47:04 +0200
Subject: [PATCH 059/163] DOC Reword to avoid that people draw wrong
 conclusions (#12095)

---
 sklearn/cluster/optics_.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 94ff3935002a6..19b6a79f45994 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -30,9 +30,10 @@ def optics(X, min_samples=5, max_eps=np.inf, metric='euclidean',
     """Perform OPTICS clustering from vector array
 
     OPTICS: Ordering Points To Identify the Clustering Structure
-    Equivalent to DBSCAN, finds core sample of high density and expands
+    Closely related to DBSCAN, finds core sample of high density and expands
     clusters from them. Unlike DBSCAN, keeps cluster hierarchy for a variable
-    neighborhood radius. Optimized for usage on large point datasets.
+    neighborhood radius. Better suited for usage on large point datasets than
+    the current sklearn implementation of DBSCAN.
 
     Read more in the :ref:`User Guide <optics>`.
 
@@ -182,9 +183,10 @@ class OPTICS(BaseEstimator, ClusterMixin):
     """Estimate clustering structure from vector array
 
     OPTICS: Ordering Points To Identify the Clustering Structure
-    Equivalent to DBSCAN, finds core sample of high density and expands
+    Closely related to DBSCAN, finds core sample of high density and expands
     clusters from them. Unlike DBSCAN, keeps cluster hierarchy for a variable
-    neighborhood radius. Optimized for usage on large point datasets.
+    neighborhood radius. Better suited for usage on large point datasets than
+    the current sklearn implementation of DBSCAN.
 
     Read more in the :ref:`User Guide <optics>`.
 

From 8068b97e1dd3d3868a3466f21b73736d40ddc195 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 17 Sep 2018 12:12:33 +0200
Subject: [PATCH 060/163] MNT Only checks warnings on latest depedendencies
 versions in CI (#12048)

---
 .travis.yml                       | 4 ++--
 appveyor.yml                      | 9 ++++++++-
 build_tools/travis/install.sh     | 6 +-----
 build_tools/travis/test_script.sh | 7 +++++++
 setup.cfg                         | 3 ---
 5 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7196296a386d3..4b0a7d0f4281b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -46,7 +46,7 @@ matrix:
            CYTHON_VERSION="*" PYAMG_VERSION="*" PILLOW_VERSION="*"
            JOBLIB_VERSION="*" COVERAGE=true
            CHECK_PYTEST_SOFT_DEPENDENCY="true" TEST_DOCSTRINGS="true"
-           SKLEARN_SITE_JOBLIB=1
+           SKLEARN_SITE_JOBLIB=1 CHECK_WARNINGS="true"
       if: type != cron
     # flake8 linting on diff wrt common ancestor with upstream/master
     - env: RUN_FLAKE8="true" SKIP_TESTS="true"
@@ -58,7 +58,7 @@ matrix:
     # installed from their CI wheels in a virtualenv with the Python
     # interpreter provided by travis.
     -  python: 3.6
-       env: DISTRIB="scipy-dev"
+       env: DISTRIB="scipy-dev" CHECK_WARNINGS="true"
        if: type = cron OR commit_message =~ /\[scipy-dev\]/
 
 install: source build_tools/travis/install.sh
diff --git a/appveyor.yml b/appveyor.yml
index 5eb4d08a8737d..c8a464723ff6c 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -20,6 +20,7 @@ environment:
     - PYTHON: "C:\\Python37-x64"
       PYTHON_VERSION: "3.7.0"
       PYTHON_ARCH: "64"
+      CHECK_WARNINGS: "true"
 
     - PYTHON: "C:\\Python27"
       PYTHON_VERSION: "2.7.8"
@@ -72,7 +73,13 @@ test_script:
   # installed library.
   - mkdir "../empty_folder"
   - cd "../empty_folder"
-  - pytest --showlocals --durations=20 --pyargs sklearn
+  - ps: >-
+        if (Test-Path variable:global:CHECK_WARNINGS) {
+            $env:PYTEST_ARGS = "-Werror::DeprecationWarning -Werror::FutureWarning"
+        } else {
+            $env:PYTEST_ARGS = ""
+        }
+  - "pytest --showlocals --durations=20 %PYTEST_ARGS% --pyargs sklearn"
   # Move back to the project folder
   - cd "../scikit-learn"
 
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index b15e76ea397ce..d41e746a1ab2e 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -84,11 +84,7 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
     # and scipy
     virtualenv --system-site-packages testvenv
     source testvenv/bin/activate
-    # FIXME: Importing scipy.sparse with numpy 1.8.2 and scipy 0.13.3 produces
-    # a deprecation warning and the test suite fails on such warnings.
-    # To test these numpy/scipy versions, we use pytest<3.8 as it has
-    # a known limitation/bug of not capturing warnings during test collection.
-    pip install pytest==3.7.4 pytest-cov cython==$CYTHON_VERSION
+    pip install pytest pytest-cov cython==$CYTHON_VERSION
 
 elif [[ "$DISTRIB" == "scipy-dev" ]]; then
     make_conda python=3.7
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index 1cf24d10837c7..5036e19b3a6f0 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -38,6 +38,13 @@ run_tests() {
     if [[ "$COVERAGE" == "true" ]]; then
         TEST_CMD="$TEST_CMD --cov sklearn"
     fi
+
+    if [[ -n "$CHECK_WARNINGS" ]]; then
+        TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning"
+    fi
+
+    set -x  # print executed commands to the terminal
+
     $TEST_CMD sklearn
 
     # Going back to git checkout folder needed to test documentation
diff --git a/setup.cfg b/setup.cfg
index 93aca4a44f9e1..59c4804c14478 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -12,9 +12,6 @@ addopts =
     --doctest-modules
     --disable-pytest-warnings
     -rs
-filterwarnings =
-    error::DeprecationWarning
-    error::FutureWarning
 
 [wheelhouse_uploader]
 artifact_indexes=

From b91cbda20dc6a683db8bea92f7bb901f9091e9a2 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 18 Sep 2018 23:09:07 +0200
Subject: [PATCH 061/163] [MRG] MNT Re-enable PyPy CI (#12039)

---
 .circleci/config.yml                          | 30 +++++++++++++++++++
 build_tools/circle/build_test_pypy.sh         |  9 ++++--
 conftest.py                                   |  4 ++-
 sklearn/feature_extraction/tests/test_text.py |  3 ++
 sklearn/tests/test_docstring_parameters.py    |  5 +++-
 5 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3a1bc848942d3..e1e410c440314 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -65,6 +65,21 @@ jobs:
           path: ~/log.txt
           destination: log.txt
 
+  pypy3:
+    docker:
+      - image: pypy:3-6.0.0
+    steps:
+      - restore_cache:
+          keys:
+            - pypy3-ccache-{{ .Branch }}
+            - pypy3-ccache
+      - checkout
+      - run: ./build_tools/circle/build_test_pypy.sh
+      - save_cache:
+          key: pypy3-ccache-{{ .Branch }}-{{ .BuildNum }}
+          paths:
+            - ~/.ccache
+            - ~/.cache/pip
 
   deploy:
     docker:
@@ -89,6 +104,21 @@ workflows:
     jobs:
       - python3
       - python2
+      - pypy3:
+          filters:
+            branches:
+              only:
+                - 0.20.X
       - deploy:
           requires:
             - python3
+  pypy:
+    triggers:
+      - schedule:
+          cron: "0 0 * * *"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - pypy3
diff --git a/build_tools/circle/build_test_pypy.sh b/build_tools/circle/build_test_pypy.sh
index 18fa361821d14..922bbac8e23a6 100755
--- a/build_tools/circle/build_test_pypy.sh
+++ b/build_tools/circle/build_test_pypy.sh
@@ -18,13 +18,16 @@ source pypy-env/bin/activate
 python --version
 which python
 
-pip install --extra-index https://antocuni.github.io/pypy-wheels/ubuntu numpy==1.14.4 Cython pytest
+pip install --extra-index https://antocuni.github.io/pypy-wheels/ubuntu numpy Cython pytest
 pip install "scipy>=1.1.0" sphinx numpydoc docutils
 
 ccache -M 512M
 export CCACHE_COMPRESS=1
 export PATH=/usr/lib/ccache:$PATH
+export LOKY_MAX_CPU_COUNT="2"
 
-pip install -e .
+pip install -vv -e . 
 
-make test
+python -m pytest sklearn/
+python -m pytest doc/sphinxext/
+python -m pytest $(find doc -name '*.rst' | sort)
diff --git a/conftest.py b/conftest.py
index f175661165b2d..82c4b17faeef0 100644
--- a/conftest.py
+++ b/conftest.py
@@ -32,7 +32,9 @@ def pytest_collection_modifyitems(config, items):
         skip_marker = pytest.mark.skip(
             reason='FeatureHasher is not compatible with PyPy')
         for item in items:
-            if item.name == 'sklearn.feature_extraction.hashing.FeatureHasher':
+            if item.name in (
+                    'sklearn.feature_extraction.hashing.FeatureHasher',
+                    'sklearn.feature_extraction.text.HashingVectorizer'):
                 item.add_marker(skip_marker)
 
     # Skip tests which require internet if the flag is provided
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index b9431bc5439cb..d6b1b2b64b4c0 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -1108,6 +1108,8 @@ def test_vectorizers_invalid_ngram_range(vec):
     message = ("Invalid value for ngram_range=%s "
                "lower boundary larger than the upper boundary."
                % str(invalid_range))
+    if isinstance(vec, HashingVectorizer):
+        pytest.xfail(reason='HashingVectorizer not supported on PyPy')
 
     assert_raise_message(
         ValueError, message, vec.fit, ["good news everyone"])
@@ -1119,6 +1121,7 @@ def test_vectorizers_invalid_ngram_range(vec):
             ValueError, message, vec.transform, ["good news everyone"])
 
 
+@fails_if_pypy
 def test_vectorizer_stop_words_inconsistent():
     if PY2:
         lstr = "[u'and', u'll', u've']"
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 648de6b6e6ca5..b4a831e571c4a 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -19,6 +19,8 @@
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.deprecation import _is_deprecated
 
+import pytest
+
 PUBLIC_MODULES = set([pckg[1] for pckg in walk_packages(prefix='sklearn.',
                                                         path=sklearn.__path__)
                       if not ("._" in pckg[1] or ".tests." in pckg[1])])
@@ -45,7 +47,8 @@
 
 # numpydoc 0.8.0's docscrape tool raises because of collections.abc under
 # Python 3.7
-@ignore_warnings(category=DeprecationWarning)
+@pytest.mark.filterwarnings('ignore::DeprecationWarning')
+@pytest.mark.skipif(IS_PYPY, reason='test segfaults on PyPy')
 def test_docstring_parameters():
     # Test module docstring formatting
 

From 8a5ff27e2f76721546d9df620679ec0b6e24463b Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Wed, 19 Sep 2018 05:59:07 +0200
Subject: [PATCH 062/163] DOC BaggingRegressor missing default value for
 oob_score in docstring (#12108)

---
 sklearn/ensemble/bagging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index 51dce324a0013..abc9db6e35de8 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -472,7 +472,7 @@ class BaggingClassifier(BaseBagging, ClassifierMixin):
     bootstrap_features : boolean, optional (default=False)
         Whether features are drawn with replacement.
 
-    oob_score : bool
+    oob_score : bool, optional (default=False)
         Whether to use out-of-bag samples to estimate
         the generalization error.
 

From f0532c54267af79a329ed5b24ee76c2182001a9b Mon Sep 17 00:00:00 2001
From: louib <louib@users.noreply.github.com>
Date: Wed, 19 Sep 2018 21:10:37 -0400
Subject: [PATCH 063/163] DOC Removing quotes from variant names. (#12113)

---
 doc/modules/feature_extraction.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index b3867373cbf11..827cc13592f56 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -735,9 +735,9 @@ decide better::
   array([[1, 1, 1, 0, 1, 1, 1, 0],
          [1, 1, 0, 1, 1, 1, 0, 1]])
 
-In the above example, ``'char_wb`` analyzer is used, which creates n-grams
+In the above example, ``char_wb`` analyzer is used, which creates n-grams
 only from characters inside word boundaries (padded with space on each
-side). The ``'char'`` analyzer, alternatively, creates n-grams that
+side). The ``char`` analyzer, alternatively, creates n-grams that
 span across words::
 
   >>> ngram_vectorizer = CountVectorizer(analyzer='char_wb', ngram_range=(5, 5))

From 84dd57b7b668352625354fdee9388a684d41f334 Mon Sep 17 00:00:00 2001
From: Oliver Rausch <oliverrausch@users.noreply.github.com>
Date: Thu, 20 Sep 2018 10:02:29 +0200
Subject: [PATCH 064/163] [MRG] Update test_metaestimators to pass y parameter
 when calling score (#12089)

---
 sklearn/tests/test_metaestimators.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index 1c2d5a0873cd9..93e000132b4d4 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -37,9 +37,9 @@ def __init__(self, name, construct, skip_methods=(),
                       est, param_distributions={'param': [5]}, cv=2, n_iter=1),
                   skip_methods=['score']),
     DelegatorData('RFE', RFE,
-                  skip_methods=['transform', 'inverse_transform', 'score']),
+                  skip_methods=['transform', 'inverse_transform']),
     DelegatorData('RFECV', RFECV,
-                  skip_methods=['transform', 'inverse_transform', 'score']),
+                  skip_methods=['transform', 'inverse_transform']),
     DelegatorData('BaggingClassifier', BaggingClassifier,
                   skip_methods=['transform', 'inverse_transform', 'score',
                                 'predict_proba', 'predict_log_proba',
@@ -101,7 +101,7 @@ def decision_function(self, X, *args, **kwargs):
             return np.ones(X.shape[0])
 
         @hides
-        def score(self, X, *args, **kwargs):
+        def score(self, X, y, *args, **kwargs):
             self._check_fit()
             return 1.0
 
@@ -120,15 +120,24 @@ def score(self, X, *args, **kwargs):
                         msg="%s does not have method %r when its delegate does"
                             % (delegator_data.name, method))
             # delegation before fit raises a NotFittedError
-            assert_raises(NotFittedError, getattr(delegator, method),
-                          delegator_data.fit_args[0])
+            if method == 'score':
+                assert_raises(NotFittedError, getattr(delegator, method),
+                              delegator_data.fit_args[0],
+                              delegator_data.fit_args[1])
+            else:
+                assert_raises(NotFittedError, getattr(delegator, method),
+                              delegator_data.fit_args[0])
 
         delegator.fit(*delegator_data.fit_args)
         for method in methods:
             if method in delegator_data.skip_methods:
                 continue
             # smoke test delegation
-            getattr(delegator, method)(delegator_data.fit_args[0])
+            if method == 'score':
+                getattr(delegator, method)(delegator_data.fit_args[0],
+                                           delegator_data.fit_args[1])
+            else:
+                getattr(delegator, method)(delegator_data.fit_args[0])
 
         for method in methods:
             if method in delegator_data.skip_methods:

From e1cf0db5a7f0208f626586053ecaafda8545c1fa Mon Sep 17 00:00:00 2001
From: Albert Thomas <albertthomas88@gmail.com>
Date: Thu, 20 Sep 2018 17:24:05 +0200
Subject: [PATCH 065/163] DOC Update fit_transform docstring of OneHotEncoder
 (#12117)

---
 sklearn/preprocessing/_encoders.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index 10324e17061e8..3faf87a88888b 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -474,13 +474,17 @@ def _legacy_fit_transform(self, X):
     def fit_transform(self, X, y=None):
         """Fit OneHotEncoder to X, then transform X.
 
-        Equivalent to self.fit(X).transform(X), but more convenient and more
-        efficient. See fit for the parameters, transform for the return value.
+        Equivalent to fit(X).transform(X) but more convenient.
 
         Parameters
         ----------
-        X : array-like, shape [n_samples, n_feature]
-            Input array of type int.
+        X : array-like, shape [n_samples, n_features]
+            The data to encode.
+
+        Returns
+        -------
+        X_out : sparse matrix if sparse=True else a 2-d array
+            Transformed input.
         """
         if self.handle_unknown not in ('error', 'ignore'):
             msg = ("handle_unknown should be either 'error' or 'ignore', "

From 0bbd935748c8af6b0681b7b2ee5aaaf8cf69eb50 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Thu, 20 Sep 2018 23:27:40 +0800
Subject: [PATCH 066/163] DOC Typo in OneHotEncoder

---
 sklearn/preprocessing/_encoders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index 3faf87a88888b..b2dee7d926e06 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -393,7 +393,7 @@ def fit(self, X, y=None):
 
         Parameters
         ----------
-        X : array-like, shape [n_samples, n_feature]
+        X : array-like, shape [n_samples, n_features]
             The data to determine the categories of each feature.
 
         Returns

From 81601fb4b8ca43b89e867ec038d9ba5a48d01a49 Mon Sep 17 00:00:00 2001
From: Dmitry Mottl <dmitry.mottl@gmail.com>
Date: Fri, 21 Sep 2018 09:49:45 +0200
Subject: [PATCH 067/163] Fix typo (#12126)

Fix typo in `examples/compose/plot_column_transformer.py`
---
 examples/compose/plot_column_transformer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index 010717583ec00..d9c627e6d3f38 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -89,7 +89,7 @@ def transform(self, posts):
     # Extract the subject & body
     ('subjectbody', SubjectBodyExtractor()),
 
-    # Use C toolumnTransformer to combine the features from subject and body
+    # Use ColumnTransformer to combine the features from subject and body
     ('union', ColumnTransformer(
         [
             # Pulling features from the post's subject line (first column)

From 4035e60a6f0a0a2546bf0442ab603961c6a9cc4a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 21 Sep 2018 13:22:34 +0200
Subject: [PATCH 068/163] [MRG +1] ColumnTransformer: store evaluated function
 column specifier during fit (#12107)

---
 sklearn/compose/_column_transformer.py        | 66 +++++++++++--------
 .../compose/tests/test_column_transformer.py  |  4 ++
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index e09d2d09d7e43..9014623280d2e 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -211,20 +211,29 @@ def set_params(self, **kwargs):
         self._set_params('_transformers', **kwargs)
         return self
 
-    def _iter(self, X=None, fitted=False, replace_strings=False):
-        """Generate (name, trans, column, weight) tuples
+    def _iter(self, fitted=False, replace_strings=False):
+        """
+        Generate (name, trans, X_subset, weight, column) tuples.
+
+        If fitted=True, use the fitted transformers, else use the
+        user specified transformers updated with converted column names
+        and potentially appended with transformer for remainder.
+
         """
         if fitted:
             transformers = self.transformers_
         else:
-            transformers = self.transformers
+            # interleave the validated column specifiers
+            transformers = [
+                (name, trans, column) for (name, trans, _), column
+                in zip(self.transformers, self._columns)
+            ]
+            # add transformer tuple for remainder
             if self._remainder[2] is not None:
                 transformers = chain(transformers, [self._remainder])
         get_weight = (self.transformer_weights or {}).get
 
         for name, trans, column in transformers:
-            sub = None if X is None else _get_column(X, column)
-
             if replace_strings:
                 # replace 'passthrough' with identity transformer and
                 # skip in case of 'drop'
@@ -235,7 +244,7 @@ def _iter(self, X=None, fitted=False, replace_strings=False):
                 elif trans == 'drop':
                     continue
 
-            yield (name, trans, sub, get_weight(name))
+            yield (name, trans, column, get_weight(name))
 
     def _validate_transformers(self):
         if not self.transformers:
@@ -257,6 +266,17 @@ def _validate_transformers(self):
                                 "specifiers. '%s' (type %s) doesn't." %
                                 (t, type(t)))
 
+    def _validate_column_callables(self, X):
+        """
+        Converts callable column specifications.
+        """
+        columns = []
+        for _, _, column in self.transformers:
+            if callable(column):
+                column = column(X)
+            columns.append(column)
+        self._columns = columns
+
     def _validate_remainder(self, X):
         """
         Validates ``remainder`` and defines ``_remainder`` targeting
@@ -274,7 +294,7 @@ def _validate_remainder(self, X):
 
         n_columns = X.shape[1]
         cols = []
-        for _, _, columns in self.transformers:
+        for columns in self._columns:
             cols.extend(_get_column_indices(X, columns))
         remaining_idx = sorted(list(set(range(n_columns)) - set(cols))) or None
 
@@ -320,27 +340,23 @@ def get_feature_names(self):
 
     def _update_fitted_transformers(self, transformers):
         # transformers are fitted; excludes 'drop' cases
-        transformers = iter(transformers)
+        fitted_transformers = iter(transformers)
         transformers_ = []
 
-        transformer_iter = self.transformers
-        if self._remainder[2] is not None:
-            transformer_iter = chain(transformer_iter, [self._remainder])
-
-        for name, old, column in transformer_iter:
+        for name, old, column, _ in self._iter():
             if old == 'drop':
                 trans = 'drop'
             elif old == 'passthrough':
                 # FunctionTransformer is present in list of transformers,
                 # so get next transformer, but save original string
-                next(transformers)
+                next(fitted_transformers)
                 trans = 'passthrough'
             else:
-                trans = next(transformers)
+                trans = next(fitted_transformers)
             transformers_.append((name, trans, column))
 
         # sanity check that transformers is exhausted
-        assert not list(transformers)
+        assert not list(fitted_transformers)
         self.transformers_ = transformers_
 
     def _validate_output(self, result):
@@ -348,7 +364,8 @@ def _validate_output(self, result):
         Ensure that the output of each transformer is 2D. Otherwise
         hstack can raise an error or produce incorrect results.
         """
-        names = [name for name, _, _, _ in self._iter(replace_strings=True)]
+        names = [name for name, _, _, _ in self._iter(fitted=True,
+                                                      replace_strings=True)]
         for Xs, name in zip(result, names):
             if not getattr(Xs, 'ndim', 0) == 2:
                 raise ValueError(
@@ -366,9 +383,9 @@ def _fit_transform(self, X, y, func, fitted=False):
         try:
             return Parallel(n_jobs=self.n_jobs)(
                 delayed(func)(clone(trans) if not fitted else trans,
-                              X_sel, y, weight)
-                for _, trans, X_sel, weight in self._iter(
-                    X=X, fitted=fitted, replace_strings=True))
+                              _get_column(X, column), y, weight)
+                for _, trans, column, weight in self._iter(
+                    fitted=fitted, replace_strings=True))
         except ValueError as e:
             if "Expected 2D array, got 1D array instead" in str(e):
                 raise ValueError(_ERR_MSG_1DCOLUMN)
@@ -419,8 +436,9 @@ def fit_transform(self, X, y=None):
             sparse matrices.
 
         """
-        self._validate_remainder(X)
         self._validate_transformers()
+        self._validate_column_callables(X)
+        self._validate_remainder(X)
 
         result = self._fit_transform(X, y, _fit_transform_one)
 
@@ -545,9 +563,6 @@ def _get_column(X, key):
           can use any hashable object as key).
 
     """
-    if callable(key):
-        key = key(X)
-
     # check whether we have string column names or integers
     if _check_key_type(key, int):
         column_names = False
@@ -589,9 +604,6 @@ def _get_column_indices(X, key):
     """
     n_columns = X.shape[1]
 
-    if callable(key):
-        key = key(X)
-
     if _check_key_type(key, int):
         if isinstance(key, int):
             return [key]
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index f67806a52c543..7e5e5029fa71a 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -873,6 +873,8 @@ def func(X):
                            remainder='drop')
     assert_array_equal(ct.fit_transform(X_array), X_res_first)
     assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first)
+    assert callable(ct.transformers[0][2])
+    assert ct.transformers_[0][2] == [0]
 
     pd = pytest.importorskip('pandas')
     X_df = pd.DataFrame(X_array, columns=['first', 'second'])
@@ -886,3 +888,5 @@ def func(X):
                            remainder='drop')
     assert_array_equal(ct.fit_transform(X_df), X_res_first)
     assert_array_equal(ct.fit(X_df).transform(X_df), X_res_first)
+    assert callable(ct.transformers[0][2])
+    assert ct.transformers_[0][2] == ['first']

From 5d30b2dec62b5272437e5db3c6a7bb9c2af812b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Sat, 22 Sep 2018 09:31:48 +0200
Subject: [PATCH 069/163] ENH (0.21) Make OPTICS more memory efficient when
 calling kneighbors (#12103)

---
 sklearn/cluster/optics_.py | 43 +++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 19b6a79f45994..9b10912cca970 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -14,6 +14,7 @@
 import numpy as np
 
 from ..utils import check_array
+from ..utils import gen_batches, get_chunk_n_rows
 from ..utils.validation import check_is_fitted
 from ..neighbors import NearestNeighbors
 from ..base import BaseEstimator, ClusterMixin
@@ -395,8 +396,6 @@ def fit(self, X, y=None):
         # Start all points as 'unprocessed' ##
         self.reachability_ = np.empty(n_samples)
         self.reachability_.fill(np.inf)
-        self.core_distances_ = np.empty(n_samples)
-        self.core_distances_.fill(np.nan)
         # Start all points as noise ##
         self.labels_ = np.full(n_samples, -1, dtype=int)
 
@@ -407,9 +406,7 @@ def fit(self, X, y=None):
                                 n_jobs=self.n_jobs)
 
         nbrs.fit(X)
-        self.core_distances_[:] = nbrs.kneighbors(X,
-                                                  self.min_samples)[0][:, -1]
-
+        self.core_distances_ = self._compute_core_distances_(X, nbrs)
         self.ordering_ = self._calculate_optics_order(X, nbrs)
 
         indices_, self.labels_ = _extract_optics(self.ordering_,
@@ -425,6 +422,42 @@ def fit(self, X, y=None):
 
     # OPTICS helper functions
 
+    def _compute_core_distances_(self, X, neighbors, working_memory=None):
+        """Compute the k-th nearest neighbor of each sample
+
+        Equivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]
+        but with more memory efficiency.
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features)
+            The data.
+        neighbors : NearestNeighbors instance
+            The fitted nearest neighbors estimator.
+        working_memory : int, optional
+            The sought maximum memory for temporary distance matrix chunks.
+            When None (default), the value of
+            ``sklearn.get_config()['working_memory']`` is used.
+
+        Returns
+        -------
+        core_distances : array, shape (n_samples,)
+            Distance at which each sample becomes a core point.
+            Points which will never be core have a distance of inf.
+        """
+        n_samples = len(X)
+        core_distances = np.empty(n_samples)
+        core_distances.fill(np.nan)
+
+        chunk_n_rows = get_chunk_n_rows(row_bytes=16 * self.min_samples,
+                                        max_n_rows=n_samples,
+                                        working_memory=working_memory)
+        slices = gen_batches(n_samples, chunk_n_rows)
+        for sl in slices:
+            core_distances[sl] = neighbors.kneighbors(
+                X[sl], self.min_samples)[0][:, -1]
+        return core_distances
+
     def _calculate_optics_order(self, X, nbrs):
         # Main OPTICS loop. Not parallelizable. The order that entries are
         # written to the 'ordering_' list is important!

From e81bcd509409704a9a44b398365428b6f7877221 Mon Sep 17 00:00:00 2001
From: Sriharsha Hatwar <sriharsha02hatwar@gmail.com>
Date: Sat, 22 Sep 2018 18:40:46 +0530
Subject: [PATCH 070/163] Added the changes to remove the documentation support
 statements for Python 2 (#12083)

---
 doc/developers/advanced_installation.rst | 36 +++++++-----------------
 doc/developers/utilities.rst             |  3 +-
 doc/other_distributions.rst              |  9 +-----
 3 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index e146363d0ac4e..a81bf1ff6f587 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -34,7 +34,7 @@ Building from source
 
 Scikit-learn requires:
 
-- Python (>= 2.7 or >= 3.4),
+- Python (>= 3.5),
 - NumPy (>= 1.8.2),
 - SciPy (>= 0.13.3).
 
@@ -110,18 +110,11 @@ Linux
 
 Installing from source requires you to have installed the scikit-learn runtime
 dependencies, Python development headers and a working C/C++ compiler.
-Under Debian-based operating systems, which include Ubuntu, if you have
-Python 2 you can install all these requirements by issuing::
-
-    sudo apt-get install build-essential python-dev python-setuptools \
-                         python-numpy python-scipy \
-                         libatlas-dev libatlas3-base
-
-If you have Python 3::
-
+Under Debian-based operating systems, which include Ubuntu::
+    
     sudo apt-get install build-essential python3-dev python3-setuptools \
-                         python3-numpy python3-scipy \
-                         libatlas-dev libatlas3-base
+                     python3-numpy python3-scipy \
+                     libatlas-dev libatlas3-base
 
 On recent Debian and Ubuntu (e.g. Ubuntu 14.04 or later) make sure that ATLAS
 is used to provide the implementation of the BLAS and LAPACK linear algebra
@@ -190,9 +183,7 @@ PATH environment variable.
 32-bit Python
 -------------
 
-For 32-bit python it is possible use the standalone installers for
-`microsoft visual c++ express 2008 <http://download.microsoft.com/download/A/5/4/A54BADB6-9C3F-478D-8657-93B3FC9FE62D/vcsetup.exe>`_
-for Python 2 or Microsoft Visual C++ Express 2010 for Python 3.
+For 32-bit Python use Microsoft Visual C++ Express 2010.
 
 Once installed you should be able to build scikit-learn without any
 particular configuration by running the following command in the scikit-learn
@@ -211,17 +202,13 @@ The Windows SDKs include the MSVC compilers both for 32 and 64-bit
 architectures. They come as a ``GRMSDKX_EN_DVD.iso`` file that can be mounted
 as a new drive with a ``setup.exe`` installer in it.
 
-- For Python 2 you need SDK **v7.0**: `MS Windows SDK for Windows 7 and .NET
-  Framework 3.5 SP1
-  <https://www.microsoft.com/en-us/download/details.aspx?id=18950>`_
-
-- For Python 3 you need SDK **v7.1**: `MS Windows SDK for Windows 7 and .NET
+- For Python  you need SDK **v7.1**: `MS Windows SDK for Windows 7 and .NET
   Framework 4
   <https://www.microsoft.com/en-us/download/details.aspx?id=8442>`_
 
 Both SDKs can be installed in parallel on the same host. To use the Windows
 SDKs, you need to setup the environment of a ``cmd`` console launched with the
-following flags (at least for SDK v7.0)::
+following flags ::
 
     cmd /E:ON /V:ON /K
 
@@ -229,16 +216,13 @@ Then configure the build environment with::
 
     SET DISTUTILS_USE_SDK=1
     SET MSSdk=1
-    "C:\Program Files\Microsoft SDKs\Windows\v7.0\Setup\WindowsSdkVer.exe" -q -version:v7.0
-    "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release
+    "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1
+    "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release
 
 Finally you can build scikit-learn in the same ``cmd`` console::
 
     python setup.py install
 
-Replace ``v7.0`` by the ``v7.1`` in the above commands to do the same for
-Python 3 instead of Python 2.
-
 Replace ``/x64`` by ``/x86``  to build for 32-bit Python instead of 64-bit
 Python.
 
diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst
index e8e8a9723e078..e1b2ca209ad69 100644
--- a/doc/developers/utilities.rst
+++ b/doc/developers/utilities.rst
@@ -175,8 +175,7 @@ Graph Routines
 Benchmarking
 ------------
 
-- :func:`bench.total_seconds` (back-ported from ``timedelta.total_seconds``
-  in Python 2.7).  Used in ``benchmarks/bench_glm.py``.
+- :func:`bench.total_seconds`:  Used in ``benchmarks/bench_glm.py``.
 
 
 Testing Functions
diff --git a/doc/other_distributions.rst b/doc/other_distributions.rst
index a5c5251e67e34..ab0fdb3dfe921 100644
--- a/doc/other_distributions.rst
+++ b/doc/other_distributions.rst
@@ -36,20 +36,13 @@ Arch Linux
 
 Arch Linux's package is provided through the `official repositories
 <https://www.archlinux.org/packages/?q=scikit-learn>`_ as
-``python-scikit-learn`` for Python 3 and ``python2-scikit-learn`` for Python 2.
+``python-scikit-learn`` for Python.
 It can be installed by typing the following command:
 
 .. code-block:: none
 
      # pacman -S python-scikit-learn
 
-or:
-
-.. code-block:: none
-
-     # pacman -S python2-scikit-learn
-
-depending on the version of Python you use.
 
 
 NetBSD

From 0c0a9e8406987fd53ccd705c3e455514c47c49c4 Mon Sep 17 00:00:00 2001
From: Sriharsha Hatwar <sriharsha02hatwar@gmail.com>
Date: Sun, 23 Sep 2018 02:51:05 +0530
Subject: [PATCH 071/163] DOC Replaced the deprecated early_stopping parameter
 with n_iter_no_change. (#12133)

---
 sklearn/ensemble/gradient_boosting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 6ae4f6fd1b277..39da7de21a166 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -2311,7 +2311,7 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin):
     validation_fraction : float, optional, default 0.1
         The proportion of training data to set aside as validation set for
         early stopping. Must be between 0 and 1.
-        Only used if early_stopping is True
+        Only used if ``n_iter_no_change`` is set to an integer.
 
         .. versionadded:: 0.20
 

From e8ca4cd3c0bc40bc88351aee053a9b9c12e208c5 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Mon, 24 Sep 2018 11:33:29 +0200
Subject: [PATCH 072/163] [MRG] DOC covariance doctest examples (#12124)

---
 sklearn/covariance/elliptic_envelope.py     |  1 -
 sklearn/covariance/empirical_covariance_.py | 21 +++++++++++++++++++++
 sklearn/covariance/robust_covariance.py     | 18 ++++++++++++++++++
 sklearn/covariance/shrunk_covariance_.py    | 21 +++++++++++++++++++++
 4 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/sklearn/covariance/elliptic_envelope.py b/sklearn/covariance/elliptic_envelope.py
index 90e5a0f6d6b6b..8f1936aeb2f72 100644
--- a/sklearn/covariance/elliptic_envelope.py
+++ b/sklearn/covariance/elliptic_envelope.py
@@ -3,7 +3,6 @@
 # License: BSD 3 clause
 
 import numpy as np
-import scipy as sp
 import warnings
 from . import MinCovDet
 from ..utils.validation import check_is_fitted, check_array
diff --git a/sklearn/covariance/empirical_covariance_.py b/sklearn/covariance/empirical_covariance_.py
index 84ad65af7a5a8..6265b1a9cca62 100644
--- a/sklearn/covariance/empirical_covariance_.py
+++ b/sklearn/covariance/empirical_covariance_.py
@@ -104,6 +104,9 @@ class EmpiricalCovariance(BaseEstimator):
 
     Attributes
     ----------
+    location_ : array-like, shape (n_features,)
+        Estimated location, i.e. the estimated mean.
+
     covariance_ : 2D ndarray, shape (n_features, n_features)
         Estimated covariance matrix
 
@@ -111,6 +114,24 @@ class EmpiricalCovariance(BaseEstimator):
         Estimated pseudo-inverse matrix.
         (stored only if store_precision is True)
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.covariance import EmpiricalCovariance
+    >>> from sklearn.datasets import make_gaussian_quantiles
+    >>> real_cov = np.array([[.8, .3],
+    ...                      [.3, .4]])
+    >>> np.random.seed(0)
+    >>> X = np.random.multivariate_normal(mean=[0, 0],
+    ...                                   cov=real_cov,
+    ...                                   size=500)
+    >>> cov = EmpiricalCovariance().fit(X)
+    >>> cov.covariance_ # doctest: +ELLIPSIS
+    array([[0.7569..., 0.2818...],
+           [0.2818..., 0.3928...]])
+    >>> cov.location_
+    array([0.0622..., 0.0193...])
+
     """
     def __init__(self, store_precision=True, assume_centered=False):
         self.store_precision = store_precision
diff --git a/sklearn/covariance/robust_covariance.py b/sklearn/covariance/robust_covariance.py
index bcd561319a053..47af47b9702dd 100644
--- a/sklearn/covariance/robust_covariance.py
+++ b/sklearn/covariance/robust_covariance.py
@@ -581,6 +581,24 @@ class MinCovDet(EmpiricalCovariance):
         Mahalanobis distances of the training set (on which `fit` is called)
         observations.
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.covariance import MinCovDet
+    >>> from sklearn.datasets import make_gaussian_quantiles
+    >>> real_cov = np.array([[.8, .3],
+    ...                      [.3, .4]])
+    >>> np.random.seed(0)
+    >>> X = np.random.multivariate_normal(mean=[0, 0],
+    ...                                   cov=real_cov,
+    ...                                   size=500)
+    >>> cov = MinCovDet(random_state=0).fit(X)
+    >>> cov.covariance_ # doctest: +ELLIPSIS
+    array([[0.7411..., 0.2535...],
+           [0.2535..., 0.3053...]])
+    >>> cov.location_
+    array([0.0813... , 0.0427...])
+
     References
     ----------
 
diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
index 5a61759d665ec..4f95fd13ebf36 100644
--- a/sklearn/covariance/shrunk_covariance_.py
+++ b/sklearn/covariance/shrunk_covariance_.py
@@ -84,6 +84,9 @@ class ShrunkCovariance(EmpiricalCovariance):
 
     Attributes
     ----------
+    location_ : array-like, shape (n_features,)
+        Estimated location, i.e. the estimated mean.
+
     covariance_ : array-like, shape (n_features, n_features)
         Estimated covariance matrix
 
@@ -95,6 +98,24 @@ class ShrunkCovariance(EmpiricalCovariance):
         Coefficient in the convex combination used for the computation
         of the shrunk estimate.
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.covariance import ShrunkCovariance
+    >>> from sklearn.datasets import make_gaussian_quantiles
+    >>> real_cov = np.array([[.8, .3],
+    ...                      [.3, .4]])
+    >>> np.random.seed(0)
+    >>> X = np.random.multivariate_normal(mean=[0, 0],
+    ...                                   cov=real_cov,
+    ...                                   size=500)
+    >>> cov = ShrunkCovariance().fit(X)
+    >>> cov.covariance_ # doctest: +ELLIPSIS
+    array([[0.7387..., 0.2536...],
+           [0.2536..., 0.4110...]])
+    >>> cov.location_
+    array([0.0622..., 0.0193...])
+
     Notes
     -----
     The regularized covariance is given by:

From 755c7dda3885e52c3439fc5a50903408d642bc58 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 24 Sep 2018 20:03:49 +0800
Subject: [PATCH 073/163] DOC (0.21) OPTICS Note the order of reachability_ and
 core_distances_ (#12132)

---
 sklearn/cluster/optics_.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 9b10912cca970..37a5d051ee35b 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -308,14 +308,16 @@ class OPTICS(BaseEstimator, ClusterMixin):
         Noisy samples are given the label -1.
 
     reachability_ : array, shape (n_samples,)
-        Reachability distances per sample.
+        Reachability distances per sample, indexed by object order. Use
+        ``clust.reachability_[clust.ordering_]`` to access in cluster order.
 
     ordering_ : array, shape (n_samples,)
         The cluster ordered list of sample indices
 
     core_distances_ : array, shape (n_samples,)
-        Distance at which each sample becomes a core point.
-        Points which will never be core have a distance of inf.
+        Distance at which each sample becomes a core point, indexed by object
+        order. Points which will never be core have a distance of inf. Use
+        ``clust.core_distances_[clust.ordering_]`` to access in cluster order.
 
     See also
     --------

From 2d6603448a9d98212caaec9595b9b39067df54be Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 24 Sep 2018 09:32:05 -0400
Subject: [PATCH 074/163] DOC Removed duplicated doc in tree.rst (#11922)

---
 doc/modules/tree.rst | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 86f8b2f6fabdf..97797191e5e15 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -166,17 +166,6 @@ render these plots inline automatically::
     .. figure:: ../images/iris.pdf
        :align: center
 
-After being fitted, the model can then be used to predict the class of samples::
-
-    >>> clf.predict(iris.data[:1, :])
-    array([0])
-
-Alternatively, the probability of each class can be predicted, which is the
-fraction of training samples of the same class in a leaf::
-
-    >>> clf.predict_proba(iris.data[:1, :])
-    array([[1., 0., 0.]])
-
 .. figure:: ../auto_examples/tree/images/sphx_glr_plot_iris_001.png
    :target: ../auto_examples/tree/plot_iris.html
    :align: center

From fc7d6e698668b983cee2867b1bf3c65f1384e4cf Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 24 Sep 2018 19:22:40 +0200
Subject: [PATCH 075/163] [MRG] Fix FutureWarnings in logistic regression
 examples (#12114)

* Be more specific about logistic regression solver in examples

* Use early stopped SGD (faster) and plot cross-validated error for best models

* Fix LR solver in /plot_voting_probas.pyexamples/ensemble/plot_voting_probas.py

* Fix LR solver & scale data in plot_digits_classification_exercise.py

* Use saga solver in plot_logistic_l1_l2_sparsity.py

* Use LBFGS solver in plot_iris_logistic.py

* Use LBFGS in plot_logistic.py

* Use SAGA solver for Logistic Regression Path example

* Use LBFGS solver in plot_classifier_chain_yeast.py

* Use LBFGS solver in plot_rbm_logistic_classification.py

* typo

* typo

* Bump up pandas dependency to 0.17.1

* Bump up examples minimal deps to match pandas 0.17.1

* Fix figure layout for plot_digits_pipe.py

* Version numbers are not decimal numbers

* Set multinomial, no scaling to keep example simple, fix formatting of example doc

* Missing plt.tight_layout() in plot_voting_probas.py

* Missing plt.tight_layout() in plot_logistic.py
---
 .circleci/config.yml                          | 13 ++---
 README.rst                                    |  4 +-
 .../calibration/plot_compare_calibration.py   |  2 +-
 .../plot_classification_probability.py        | 53 ++++++++++-------
 .../plot_column_transformer_mixed_types.py    |  2 +-
 examples/compose/plot_digits_pipe.py          | 58 ++++++++++++-------
 .../ensemble/plot_feature_transformation.py   | 15 +++--
 examples/ensemble/plot_voting_probas.py       |  3 +-
 .../plot_digits_classification_exercise.py    |  5 +-
 examples/linear_model/plot_iris_logistic.py   | 14 ++---
 examples/linear_model/plot_logistic.py        | 11 ++--
 .../plot_logistic_l1_l2_sparsity.py           |  6 +-
 examples/linear_model/plot_logistic_path.py   | 42 ++++++++++----
 .../plot_classifier_chain_yeast.py            |  5 +-
 .../plot_rbm_logistic_classification.py       | 40 +++++++------
 15 files changed, 164 insertions(+), 109 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index e1e410c440314..0e77f30d18ed7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -41,13 +41,12 @@ jobs:
       # Test examples run with minimal dependencies
       - MINICONDA_PATH: ~/miniconda
       - CONDA_ENV_NAME: testenv
-      - PYTHON_VERSION: 2
-      - NUMPY_VERSION: 1.8.2
-      # XXX: plot_gpc_xor.py fails with scipy 0.13.3
-      - SCIPY_VERSION: 0.14
-      - MATPLOTLIB_VERSION: 1.3
-      - SCIKIT_IMAGE_VERSION: 0.9.3
-      - PANDAS_VERSION: 0.13.1
+      - PYTHON_VERSION: "2"
+      - NUMPY_VERSION: "1.10"
+      - SCIPY_VERSION: "0.16"
+      - MATPLOTLIB_VERSION: "1.4"
+      - SCIKIT_IMAGE_VERSION: "0.11"
+      - PANDAS_VERSION: "0.17.1"
     steps:
       - checkout
       - run: ./build_tools/circle/checkout_merge_commit.sh
diff --git a/README.rst b/README.rst
index fa2ef793b9e26..b4d67af56eec8 100644
--- a/README.rst
+++ b/README.rst
@@ -56,8 +56,8 @@ scikit-learn requires:
 **Scikit-learn 0.20 is the last version to support Python2.7.**
 Scikit-learn 0.21 and later will require Python 3.5 or newer.
 
-For running the examples Matplotlib >= 1.3.1 is required. A few examples
-require scikit-image >= 0.9.3 and a few examples require pandas >= 0.13.1.
+For running the examples Matplotlib >= 1.4 is required. A few examples
+require scikit-image >= 0.11.3 and a few examples require pandas >= 0.17.1.
 
 scikit-learn also uses CBLAS, the C interface to the Basic Linear Algebra
 Subprograms library. scikit-learn comes with a reference implementation, but
diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py
index 2d9d0af0dcbc5..15dd0e57a3021 100644
--- a/examples/calibration/plot_compare_calibration.py
+++ b/examples/calibration/plot_compare_calibration.py
@@ -75,7 +75,7 @@
 y_test = y[train_samples:]
 
 # Create classifiers
-lr = LogisticRegression()
+lr = LogisticRegression(solver='lbfgs')
 gnb = GaussianNB()
 svc = LinearSVC(C=1.0)
 rfc = RandomForestClassifier(n_estimators=100)
diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py
index 4542362817d71..ea4df9e6fb583 100644
--- a/examples/classification/plot_classification_probability.py
+++ b/examples/classification/plot_classification_probability.py
@@ -3,13 +3,17 @@
 Plot classification probability
 ===============================
 
-Plot the classification probability for different classifiers. We use a 3
-class dataset, and we classify it with a Support Vector classifier, L1
-and L2 penalized logistic regression with either a One-Vs-Rest or multinomial
-setting, and Gaussian process classification.
+Plot the classification probability for different classifiers. We use a 3 class
+dataset, and we classify it with a Support Vector classifier, L1 and L2
+penalized logistic regression with either a One-Vs-Rest or multinomial setting,
+and Gaussian process classification.
 
-The logistic regression is not a multiclass classifier out of the box. As
-a result it can identify only the first class.
+Linear SVC is not a probabilistic classifier by default but it has a built-in
+calibration option enabled in this example (`probability=True`).
+
+The logistic regression with One-Vs-Rest is not a multiclass classifier out of
+the box. As a result it has more trouble in separating class 2 and 3 than the
+other estimators.
 """
 print(__doc__)
 
@@ -19,6 +23,7 @@ class dataset, and we classify it with a Support Vector classifier, L1
 import matplotlib.pyplot as plt
 import numpy as np
 
+from sklearn.metrics import accuracy_score
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC
 from sklearn.gaussian_process import GaussianProcessClassifier
@@ -31,19 +36,27 @@ class dataset, and we classify it with a Support Vector classifier, L1
 
 n_features = X.shape[1]
 
-C = 1.0
+C = 10
 kernel = 1.0 * RBF([1.0, 1.0])  # for GPC
 
-# Create different classifiers. The logistic regression cannot do
-# multiclass out of the box.
-classifiers = {'L1 logistic': LogisticRegression(C=C, penalty='l1'),
-               'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2'),
-               'Linear SVC': SVC(kernel='linear', C=C, probability=True,
-                                 random_state=0),
-               'L2 logistic (Multinomial)': LogisticRegression(
-                C=C, solver='lbfgs', multi_class='multinomial'),
-               'GPC': GaussianProcessClassifier(kernel)
-               }
+# Create different classifiers.
+classifiers = {
+    'L1 logistic': LogisticRegression(C=C, penalty='l1',
+                                      solver='saga',
+                                      multi_class='multinomial',
+                                      max_iter=10000),
+    'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',
+                                                    solver='saga',
+                                                    multi_class='multinomial',
+                                                    max_iter=10000),
+    'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',
+                                            solver='saga',
+                                            multi_class='ovr',
+                                            max_iter=10000),
+    'Linear SVC': SVC(kernel='linear', C=C, probability=True,
+                      random_state=0),
+    'GPC': GaussianProcessClassifier(kernel)
+}
 
 n_classifiers = len(classifiers)
 
@@ -59,10 +72,10 @@ class dataset, and we classify it with a Support Vector classifier, L1
     classifier.fit(X, y)
 
     y_pred = classifier.predict(X)
-    classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
-    print("classif_rate for %s : %f " % (name, classif_rate))
+    accuracy = accuracy_score(y, y_pred)
+    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
 
-    # View probabilities=
+    # View probabilities:
     probas = classifier.predict_proba(Xfull)
     n_classes = np.unique(y_pred).size
     for k in range(n_classes):
diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 73ee27f83a907..1da0c7e0d60e8 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -71,7 +71,7 @@
 # Append classifier to preprocessing pipeline.
 # Now we have a full prediction pipeline.
 clf = Pipeline(steps=[('preprocessor', preprocessor),
-                      ('classifier', LogisticRegression())])
+                      ('classifier', LogisticRegression(solver='lbfgs'))])
 
 X = data.drop('survived', axis=1)
 y = data['survived']
diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
index 2352abba4584e..6e722c9861529 100644
--- a/examples/compose/plot_digits_pipe.py
+++ b/examples/compose/plot_digits_pipe.py
@@ -22,42 +22,58 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
+import pandas as pd
 
-from sklearn import linear_model, decomposition, datasets
+from sklearn import datasets
+from sklearn.decomposition import PCA
+from sklearn.linear_model import SGDClassifier
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import GridSearchCV
 
-logistic = linear_model.LogisticRegression()
 
-pca = decomposition.PCA()
+# Define a pipeline to search for the best combination of PCA truncation
+# and classifier regularization.
+logistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,
+                         max_iter=10000, tol=1e-5, random_state=0)
+pca = PCA()
 pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
 
 digits = datasets.load_digits()
 X_digits = digits.data
 y_digits = digits.target
 
+# Parameters of pipelines can be set using ‘__’ separated parameter names:
+param_grid = {
+    'pca__n_components': [5, 20, 30, 40, 50, 64],
+    'logistic__alpha': np.logspace(-4, 4, 5),
+}
+search = GridSearchCV(pipe, param_grid, iid=False, cv=5,
+                      return_train_score=False)
+search.fit(X_digits, y_digits)
+print("Best parameter (CV score=%0.3f):" % search.best_score_)
+print(search.best_params_)
+
 # Plot the PCA spectrum
 pca.fit(X_digits)
 
-plt.figure(1, figsize=(4, 3))
-plt.clf()
-plt.axes([.2, .2, .7, .7])
-plt.plot(pca.explained_variance_, linewidth=2)
-plt.axis('tight')
-plt.xlabel('n_components')
-plt.ylabel('explained_variance_')
+fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))
+ax0.plot(pca.explained_variance_ratio_, linewidth=2)
+ax0.set_ylabel('PCA explained variance')
+
+ax0.axvline(search.best_estimator_.named_steps['pca'].n_components,
+            linestyle=':', label='n_components chosen')
+ax0.legend(prop=dict(size=12))
 
-# Prediction
-n_components = [20, 40, 64]
-Cs = np.logspace(-4, 4, 3)
+# For each number of components, find the best classifier results
+results = pd.DataFrame(search.cv_results_)
+components_col = 'param_pca__n_components'
+best_clfs = results.groupby(components_col).apply(
+    lambda g: g.nlargest(1, 'mean_test_score'))
 
-# Parameters of pipelines can be set using ‘__’ separated parameter names:
-estimator = GridSearchCV(pipe,
-                         dict(pca__n_components=n_components,
-                              logistic__C=Cs), cv=5)
-estimator.fit(X_digits, y_digits)
+best_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',
+               legend=False, ax=ax1)
+ax1.set_ylabel('Classification accuracy (val)')
+ax1.set_xlabel('n_components')
 
-plt.axvline(estimator.best_estimator_.named_steps['pca'].n_components,
-            linestyle=':', label='n_components chosen')
-plt.legend(prop=dict(size=12))
+plt.tight_layout()
 plt.show()
diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py
index 5dbc2754b3a35..085309ed2a942 100644
--- a/examples/ensemble/plot_feature_transformation.py
+++ b/examples/ensemble/plot_feature_transformation.py
@@ -42,19 +42,19 @@
 n_estimator = 10
 X, y = make_classification(n_samples=80000)
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
+
 # It is important to train the ensemble of trees on a different subset
 # of the training data than the linear regression model to avoid
 # overfitting, in particular if the total number of leaves is
 # similar to the number of training samples
-X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
-                                                            y_train,
-                                                            test_size=0.5)
+X_train, X_train_lr, y_train, y_train_lr = train_test_split(
+    X_train, y_train, test_size=0.5)
 
 # Unsupervised transformation based on totally random trees
 rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator,
                           random_state=0)
 
-rt_lm = LogisticRegression()
+rt_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 pipeline = make_pipeline(rt, rt_lm)
 pipeline.fit(X_train, y_train)
 y_pred_rt = pipeline.predict_proba(X_test)[:, 1]
@@ -63,7 +63,7 @@
 # Supervised transformation based on random forests
 rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
 rf_enc = OneHotEncoder(categories='auto')
-rf_lm = LogisticRegression()
+rf_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 rf.fit(X_train, y_train)
 rf_enc.fit(rf.apply(X_train))
 rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)
@@ -71,9 +71,10 @@
 y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
 fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm)
 
+# Supervised transformation based on gradient boosted trees
 grd = GradientBoostingClassifier(n_estimators=n_estimator)
 grd_enc = OneHotEncoder(categories='auto')
-grd_lm = LogisticRegression()
+grd_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 grd.fit(X_train, y_train)
 grd_enc.fit(grd.apply(X_train)[:, :, 0])
 grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)
@@ -82,12 +83,10 @@
     grd_enc.transform(grd.apply(X_test)[:, :, 0]))[:, 1]
 fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm)
 
-
 # The gradient boosted model by itself
 y_pred_grd = grd.predict_proba(X_test)[:, 1]
 fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd)
 
-
 # The random forest model by itself
 y_pred_rf = rf.predict_proba(X_test)[:, 1]
 fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)
diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py
index c729818620a60..e38a618da3782 100644
--- a/examples/ensemble/plot_voting_probas.py
+++ b/examples/ensemble/plot_voting_probas.py
@@ -29,7 +29,7 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.ensemble import VotingClassifier
 
-clf1 = LogisticRegression(random_state=123)
+clf1 = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=123)
 clf2 = RandomForestClassifier(n_estimators=100, random_state=123)
 clf3 = GaussianNB()
 X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
@@ -79,4 +79,5 @@
 plt.ylim([0, 1])
 plt.title('Class probabilities for sample 1 by different classifiers')
 plt.legend([p1[0], p2[0]], ['class 1', 'class 2'], loc='upper left')
+plt.tight_layout()
 plt.show()
diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py
index 25ab7e71c5925..6651a1fa05783 100644
--- a/examples/exercises/plot_digits_classification_exercise.py
+++ b/examples/exercises/plot_digits_classification_exercise.py
@@ -15,7 +15,7 @@
 from sklearn import datasets, neighbors, linear_model
 
 digits = datasets.load_digits()
-X_digits = digits.data
+X_digits = digits.data / digits.data.max()
 y_digits = digits.target
 
 n_samples = len(X_digits)
@@ -26,7 +26,8 @@
 y_test = y_digits[int(.9 * n_samples):]
 
 knn = neighbors.KNeighborsClassifier()
-logistic = linear_model.LogisticRegression()
+logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=1000,
+                                           multi_class='multinomial')
 
 print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))
 print('LogisticRegression score: %f'
diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py
index d2193e9907b56..968598392722d 100644
--- a/examples/linear_model/plot_iris_logistic.py
+++ b/examples/linear_model/plot_iris_logistic.py
@@ -7,29 +7,28 @@
 =========================================================
 
 Show below is a logistic-regression classifiers decision boundaries on the
-`iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The
-datapoints are colored according to their labels.
+first two dimensions (sepal length and width) of the `iris
+<https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The datapoints
+are colored according to their labels.
 
 """
 print(__doc__)
 
-
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
 import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import linear_model, datasets
+from sklearn.linear_model import LogisticRegression
+from sklearn import datasets
 
 # import some data to play with
 iris = datasets.load_iris()
 X = iris.data[:, :2]  # we only take the first two features.
 Y = iris.target
 
-h = .02  # step size in the mesh
-
-logreg = linear_model.LogisticRegression(C=1e5)
+logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
 
 # we create an instance of Neighbours Classifier and fit the data.
 logreg.fit(X, Y)
@@ -38,6 +37,7 @@
 # point in the mesh [x_min, x_max]x[y_min, y_max].
 x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
+h = .02  # step size in the mesh
 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
 
diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py
index 488f1c3543a6a..6d94cb0548601 100644
--- a/examples/linear_model/plot_logistic.py
+++ b/examples/linear_model/plot_logistic.py
@@ -23,8 +23,7 @@
 
 from sklearn import linear_model
 
-# this is our test set, it's just a straight line with some
-# Gaussian noise
+# General a toy dataset:s it's just a straight line with some Gaussian noise:
 xmin, xmax = -5, 5
 n_samples = 100
 np.random.seed(0)
@@ -34,8 +33,9 @@
 X += .3 * np.random.normal(size=n_samples)
 
 X = X[:, np.newaxis]
-# run the classifier
-clf = linear_model.LogisticRegression(C=1e5)
+
+# Fit the classifier
+clf = linear_model.LogisticRegression(C=1e5, solver='lbfgs')
 clf.fit(X, y)
 
 # and plot the result
@@ -47,6 +47,8 @@
 
 def model(x):
     return 1 / (1 + np.exp(-x))
+
+
 loss = model(X_test * clf.coef_ + clf.intercept_).ravel()
 plt.plot(X_test, loss, color='red', linewidth=3)
 
@@ -63,4 +65,5 @@ def model(x):
 plt.xlim(-4, 10)
 plt.legend(('Logistic Regression Model', 'Linear Regression Model'),
            loc="lower right", fontsize='small')
+plt.tight_layout()
 plt.show()
diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py
index be63b144c260a..bffc648965fca 100644
--- a/examples/linear_model/plot_logistic_l1_l2_sparsity.py
+++ b/examples/linear_model/plot_logistic_l1_l2_sparsity.py
@@ -37,10 +37,10 @@
 
 
 # Set regularization parameter
-for i, C in enumerate((100, 1, 0.01)):
+for i, C in enumerate((1, 0.1, 0.01)):
     # turn down tolerance for short training time
-    clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01)
-    clf_l2_LR = LogisticRegression(C=C, penalty='l2', tol=0.01)
+    clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01, solver='saga')
+    clf_l2_LR = LogisticRegression(C=C, penalty='l2', tol=0.01, solver='saga')
     clf_l1_LR.fit(X, y)
     clf_l2_LR.fit(X, y)
 
diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py
index 66a1ab9bd0254..79b5522575eb0 100644
--- a/examples/linear_model/plot_logistic_path.py
+++ b/examples/linear_model/plot_logistic_path.py
@@ -1,10 +1,28 @@
 #!/usr/bin/env python
 """
-=================================
-Path with L1- Logistic Regression
-=================================
+==============================================
+Regularization path of L1- Logistic Regression
+==============================================
 
-Computes path on IRIS dataset.
+
+Train l1-penalized logistic regression models on a binary classification
+problem derived from the Iris dataset.
+
+The models are ordered from strongest regularized to least regularized. The 4
+coefficients of the models are collected and plotted as a "regularization
+path": on the left-hand side of the figure (strong regularizers), all the
+coefficients are exactly 0. When regularization gets progressively looser,
+coefficients can get non-zero values one after the other.
+
+Here we choose the SAGA solver because it can efficiently optimize for the
+Logistic Regression loss with a non-smooth, sparsity inducing l1 penalty.
+
+Also note that we set a low value for the tolerance to make sure that the model
+has converged before collecting the coefficients.
+
+We also use warm_start=True which means that the coefficients of the models are
+reused to initialize the next model fit to speed-up the computation of the
+full-path.
 
 """
 print(__doc__)
@@ -12,7 +30,7 @@
 # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
 # License: BSD 3 clause
 
-from datetime import datetime
+from time import time
 import numpy as np
 import matplotlib.pyplot as plt
 
@@ -27,26 +45,28 @@
 X = X[y != 2]
 y = y[y != 2]
 
-X -= np.mean(X, 0)
+X /= X.max()  # Normalize X to speed-up convergence
 
 # #############################################################################
 # Demo path functions
 
-cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3)
+cs = l1_min_c(X, y, loss='log') * np.logspace(0, 7, 16)
 
 
 print("Computing regularization path ...")
-start = datetime.now()
-clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
+start = time()
+clf = linear_model.LogisticRegression(penalty='l1', solver='saga',
+                                      tol=1e-6, max_iter=int(1e6),
+                                      warm_start=True)
 coefs_ = []
 for c in cs:
     clf.set_params(C=c)
     clf.fit(X, y)
     coefs_.append(clf.coef_.ravel().copy())
-print("This took ", datetime.now() - start)
+print("This took %0.3fs" % (time() - start))
 
 coefs_ = np.array(coefs_)
-plt.plot(np.log10(cs), coefs_)
+plt.plot(np.log10(cs), coefs_, marker='o')
 ymin, ymax = plt.ylim()
 plt.xlabel('log(C)')
 plt.ylabel('Coefficients')
diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py
index cb3a5085e316d..afe0131926dea 100644
--- a/examples/multioutput/plot_classifier_chain_yeast.py
+++ b/examples/multioutput/plot_classifier_chain_yeast.py
@@ -54,14 +54,15 @@
 
 # Fit an independent logistic regression model for each class using the
 # OneVsRestClassifier wrapper.
-ovr = OneVsRestClassifier(LogisticRegression())
+base_lr = LogisticRegression(solver='lbfgs')
+ovr = OneVsRestClassifier(base_lr)
 ovr.fit(X_train, Y_train)
 Y_pred_ovr = ovr.predict(X_test)
 ovr_jaccard_score = jaccard_similarity_score(Y_test, Y_pred_ovr)
 
 # Fit an ensemble of logistic regression classifier chains and take the
 # take the average prediction of all the chains.
-chains = [ClassifierChain(LogisticRegression(), order='random', random_state=i)
+chains = [ClassifierChain(base_lr, order='random', random_state=i)
           for i in range(10)]
 for chain in chains:
     chain.fit(X_train, Y_train)
diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py
index aa75ccc06d1f1..26223ad245214 100644
--- a/examples/neural_networks/plot_rbm_logistic_classification.py
+++ b/examples/neural_networks/plot_rbm_logistic_classification.py
@@ -40,6 +40,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.neural_network import BernoulliRBM
 from sklearn.pipeline import Pipeline
+from sklearn.base import clone
 
 
 # #############################################################################
@@ -67,29 +68,32 @@ def nudge_dataset(X, Y):
          [0, 0, 0],
          [0, 1, 0]]]
 
-    shift = lambda x, w: convolve(x.reshape((8, 8)), mode='constant',
-                                  weights=w).ravel()
+    def shift(x, w):
+        return convolve(x.reshape((8, 8)), mode='constant', weights=w).ravel()
+
     X = np.concatenate([X] +
                        [np.apply_along_axis(shift, 1, X, vector)
                         for vector in direction_vectors])
     Y = np.concatenate([Y for _ in range(5)], axis=0)
     return X, Y
 
+
 # Load Data
 digits = datasets.load_digits()
 X = np.asarray(digits.data, 'float32')
 X, Y = nudge_dataset(X, digits.target)
 X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  # 0-1 scaling
 
-X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
-                                                    test_size=0.2,
-                                                    random_state=0)
+X_train, X_test, Y_train, Y_test = train_test_split(
+    X, Y, test_size=0.2, random_state=0)
 
 # Models we will use
-logistic = linear_model.LogisticRegression()
+logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=10000,
+                                           multi_class='multinomial')
 rbm = BernoulliRBM(random_state=0, verbose=True)
 
-classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
+rbm_features_classifier = Pipeline(
+    steps=[('rbm', rbm), ('logistic', logistic)])
 
 # #############################################################################
 # Training
@@ -102,28 +106,26 @@ def nudge_dataset(X, Y):
 # More components tend to give better prediction performance, but larger
 # fitting time
 rbm.n_components = 100
-logistic.C = 6000.0
+logistic.C = 6000
 
 # Training RBM-Logistic Pipeline
-classifier.fit(X_train, Y_train)
+rbm_features_classifier.fit(X_train, Y_train)
 
-# Training Logistic regression
-logistic_classifier = linear_model.LogisticRegression(C=100.0)
-logistic_classifier.fit(X_train, Y_train)
+# Training the Logistic regression classifier directly on the pixel
+raw_pixel_classifier = clone(logistic)
+raw_pixel_classifier.C = 100.
+raw_pixel_classifier.fit(X_train, Y_train)
 
 # #############################################################################
 # Evaluation
 
-print()
+Y_pred = rbm_features_classifier.predict(X_test)
 print("Logistic regression using RBM features:\n%s\n" % (
-    metrics.classification_report(
-        Y_test,
-        classifier.predict(X_test))))
+    metrics.classification_report(Y_test, Y_pred)))
 
+Y_pred = raw_pixel_classifier.predict(X_test)
 print("Logistic regression using raw pixel features:\n%s\n" % (
-    metrics.classification_report(
-        Y_test,
-        logistic_classifier.predict(X_test))))
+    metrics.classification_report(Y_test, Y_pred)))
 
 # #############################################################################
 # Plotting

From 5431a1a9bfaf1927fade31492ac2b63423ad6384 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 25 Sep 2018 04:00:27 -0700
Subject: [PATCH 076/163] BUG: check equality instead of identity in check_cv
 (#12155)

---
 sklearn/model_selection/_split.py                | 10 +++++-----
 sklearn/model_selection/tests/test_validation.py | 11 +++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 75c8e5d239d08..954a6c2bd443e 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -422,7 +422,7 @@ class KFold(_BaseKFold):
 
     def __init__(self, n_splits='warn', shuffle=False,
                  random_state=None):
-        if n_splits is 'warn':
+        if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
         super(KFold, self).__init__(n_splits, shuffle, random_state)
@@ -493,7 +493,7 @@ class GroupKFold(_BaseKFold):
         stratification of the dataset.
     """
     def __init__(self, n_splits='warn'):
-        if n_splits is 'warn':
+        if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
         super(GroupKFold, self).__init__(n_splits, shuffle=False,
@@ -594,7 +594,7 @@ class StratifiedKFold(_BaseKFold):
     """
 
     def __init__(self, n_splits='warn', shuffle=False, random_state=None):
-        if n_splits is 'warn':
+        if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
         super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
@@ -748,7 +748,7 @@ class TimeSeriesSplit(_BaseKFold):
     where ``n_samples`` is the number of samples.
     """
     def __init__(self, n_splits='warn', max_train_size=None):
-        if n_splits is 'warn':
+        if n_splits == 'warn':
             warnings.warn(NSPLIT_WARNING, FutureWarning)
             n_splits = 3
         super(TimeSeriesSplit, self).__init__(n_splits,
@@ -1939,7 +1939,7 @@ def check_cv(cv='warn', y=None, classifier=False):
         The return value is a cross-validator which generates the train/test
         splits via the ``split`` method.
     """
-    if cv is None or cv is 'warn':
+    if cv is None or cv == 'warn':
         warnings.warn(CV_WARNING, FutureWarning)
         cv = 3
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 0d7a05f39d714..4d83db99d64c9 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -281,6 +281,17 @@ def test_cross_val_score():
                   error_score='raise')
 
 
+@pytest.mark.filterwarnings('ignore:You should specify a value for')  # 0.22
+def test_cross_validate_many_jobs():
+    # regression test for #12154: cv='warn' with n_jobs>1 trigger a copy of
+    # the parameters leading to a failure in check_cv due to cv is 'warn'
+    # instead of cv == 'warn'.
+    X, y = load_iris(return_X_y=True)
+    clf = SVC(gamma='auto')
+    grid = GridSearchCV(clf, param_grid={'C': [1, 10]})
+    cross_validate(grid, X, y, n_jobs=2)
+
+
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_cross_validate_invalid_scoring_param():
     X, y = make_classification(random_state=0)

From f15ebb953b7b8971093962514187f447df4c716c Mon Sep 17 00:00:00 2001
From: Vinayak Mehta <vmehta94@gmail.com>
Date: Tue, 25 Sep 2018 20:07:16 +0530
Subject: [PATCH 077/163] [MRG] Convert ColumnTransformer input list to numpy
 array (#12104)

<!--
Thanks for contributing a pull request! Please ensure you have taken a look at
the contribution guidelines: https://github.com/scikit-learn/scikit-learn/blob/master/CONTRIBUTING.md#pull-request-checklist
-->

#### Reference Issues/PRs
<!--
Example: Fixes #1234. See also #3456.
Please use keywords (e.g., Fixes) to create link to the issues or pull requests
you resolved, so that they will automatically be closed when your pull request
is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
-->
Fixes #12096.

#### What does this implement/fix? Explain your changes.
Converts the input list for ColumnTransformer to a numpy array.

Added a check inside `transform` and `fit_transform` to check if the input `X` is a list, if it is then it gets converted to a numpy array.

#### Any other comments?
Should this conversion be documented in the docstrings for ColumnTransfomer's `fit`, `transform` and `fit_transform`?

<!--
Please be aware that we are a loose team of volunteers so patience is
necessary; assistance handling other issues is very welcome. We value
all user contributions, no matter how minor they are. If we are slow to
review, either the pull request needs some benchmarking, tinkering,
convincing, etc. or more likely the reviewers are simply busy. In either
case, we ask for your understanding during the review process.
For more information, see our FAQ on this topic:
http://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention.

Thanks for contributing!
-->
---
 sklearn/compose/_column_transformer.py        | 14 ++++++++---
 .../compose/tests/test_column_transformer.py  | 24 ++++++++++++++++++-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 9014623280d2e..408c7a530395a 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -16,12 +16,11 @@
 from ..base import clone, TransformerMixin
 from ..utils import Parallel, delayed
 from ..externals import six
-from ..pipeline import (
-    _fit_one_transformer, _fit_transform_one, _transform_one, _name_estimators)
+from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
 from ..preprocessing import FunctionTransformer
 from ..utils import Bunch
 from ..utils.metaestimators import _BaseComposition
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_array, check_is_fitted
 
 
 __all__ = ['ColumnTransformer', 'make_column_transformer']
@@ -436,6 +435,7 @@ def fit_transform(self, X, y=None):
             sparse matrices.
 
         """
+        X = _check_X(X)
         self._validate_transformers()
         self._validate_column_callables(X)
         self._validate_remainder(X)
@@ -485,6 +485,7 @@ def transform(self, X):
         """
         check_is_fitted(self, 'transformers_')
 
+        X = _check_X(X)
         Xs = self._fit_transform(X, None, _transform_one, fitted=True)
         self._validate_output(Xs)
 
@@ -511,6 +512,13 @@ def _hstack(self, Xs):
             return np.hstack(Xs)
 
 
+def _check_X(X):
+    """Use check_array only on lists and other non-array-likes / sparse"""
+    if hasattr(X, '__array__') or sparse.issparse(X):
+        return X
+    return check_array(X, force_all_finite='allow-nan', dtype=np.object)
+
+
 def _check_key_type(key, superclass):
     """
     Check that scalar, list or slice is of a certain type.
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 7e5e5029fa71a..85f902fc32a11 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -18,7 +18,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.externals import six
 from sklearn.compose import ColumnTransformer, make_column_transformer
-from sklearn.exceptions import NotFittedError
+from sklearn.exceptions import NotFittedError, DataConversionWarning
 from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
 from sklearn.feature_extraction import DictVectorizer
 
@@ -278,6 +278,28 @@ def test_column_transformer_sparse_array():
                                      X_res_both)
 
 
+def test_column_transformer_list():
+    X_list = [
+        [1, float('nan'), 'a'],
+        [0, 0, 'b']
+    ]
+    expected_result = np.array([
+        [1, float('nan'), 1, 0],
+        [-1, 0, 0, 1],
+    ])
+
+    ct = ColumnTransformer([
+        ('numerical', StandardScaler(), [0, 1]),
+        ('categorical', OneHotEncoder(), [2]),
+    ])
+
+    with pytest.warns(DataConversionWarning):
+        # TODO: this warning is not very useful in this case, would be good
+        # to get rid of it
+        assert_array_equal(ct.fit_transform(X_list), expected_result)
+        assert_array_equal(ct.fit(X_list).transform(X_list), expected_result)
+
+
 def test_column_transformer_sparse_stacking():
     X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
     col_trans = ColumnTransformer([('trans1', Trans(), [0]),

From 661a8b42bd93a4b77a14d89aa7b96c2df158cf9d Mon Sep 17 00:00:00 2001
From: Jan Koch <Jan.Koch@tu-dortmund.de>
Date: Tue, 25 Sep 2018 16:43:05 +0200
Subject: [PATCH 078/163] add sparse_threshold to make_columns_transformer
 (#12152)

fixes issue #12149
---
 sklearn/compose/_column_transformer.py           | 12 +++++++++++-
 sklearn/compose/tests/test_column_transformer.py |  4 +++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 408c7a530395a..7a83b4d064e66 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -689,6 +689,14 @@ def make_column_transformer(*transformers, **kwargs):
         non-specified columns will use the ``remainder`` estimator. The
         estimator must support `fit` and `transform`.
 
+    sparse_threshold : float, default = 0.3
+        If the transformed output consists of a mix of sparse and dense data,
+        it will be stacked as a sparse matrix if the density is lower than this
+        value. Use ``sparse_threshold=0`` to always return dense.
+        When the transformed output consists of all sparse or all dense data,
+        the stacked result will be sparse or dense, respectively, and this
+        keyword will be ignored.
+
     n_jobs : int or None, optional (default=None)
         Number of jobs to run in parallel.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
@@ -725,9 +733,11 @@ def make_column_transformer(*transformers, **kwargs):
     """
     n_jobs = kwargs.pop('n_jobs', None)
     remainder = kwargs.pop('remainder', 'drop')
+    sparse_threshold = kwargs.pop('sparse_threshold', 0.3)
     if kwargs:
         raise TypeError('Unknown keyword arguments: "{}"'
                         .format(list(kwargs.keys())[0]))
     transformer_list = _get_transformer_list(transformers)
     return ColumnTransformer(transformer_list, n_jobs=n_jobs,
-                             remainder=remainder)
+                             remainder=remainder,
+                             sparse_threshold=sparse_threshold)
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 85f902fc32a11..149df575efae7 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -453,11 +453,13 @@ def test_make_column_transformer_kwargs():
     scaler = StandardScaler()
     norm = Normalizer()
     ct = make_column_transformer(('first', scaler), (['second'], norm),
-                                 n_jobs=3, remainder='drop')
+                                 n_jobs=3, remainder='drop',
+                                 sparse_threshold=0.3)
     assert_equal(ct.transformers, make_column_transformer(
         ('first', scaler), (['second'], norm)).transformers)
     assert_equal(ct.n_jobs, 3)
     assert_equal(ct.remainder, 'drop')
+    assert_equal(ct.sparse_threshold, 0.3)
     # invalid keyword parameters should raise an error message
     assert_raise_message(
         TypeError,

From e58f366e034bcb284efe15ab04117a0ed67af80b Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Tue, 25 Sep 2018 17:03:32 +0200
Subject: [PATCH 079/163] ColumnTransformer generalization to work on empty
 lists (#12084)

---
 sklearn/compose/_column_transformer.py        | 24 +++++++++-
 .../compose/tests/test_column_transformer.py  | 45 +++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 7a83b4d064e66..45849d05995c3 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -108,8 +108,10 @@ class ColumnTransformer(_BaseComposition, TransformerMixin):
     transformers_ : list
         The collection of fitted transformers as tuples of
         (name, fitted_transformer, column). `fitted_transformer` can be an
-        estimator, 'drop', or 'passthrough'. If there are remaining columns,
-        the final element is a tuple of the form:
+        estimator, 'drop', or 'passthrough'. In case there were no columns
+        selected, this will be the unfitted transformer.
+        If there are remaining columns, the final element is a tuple of the
+        form:
         ('remainder', transformer, remaining_columns) corresponding to the
         ``remainder`` parameter. If there are remaining columns, then
         ``len(transformers_)==len(transformers)+1``, otherwise
@@ -242,6 +244,8 @@ def _iter(self, fitted=False, replace_strings=False):
                         check_inverse=False)
                 elif trans == 'drop':
                     continue
+                elif _is_empty_column_selection(column):
+                    continue
 
             yield (name, trans, column, get_weight(name))
 
@@ -350,6 +354,8 @@ def _update_fitted_transformers(self, transformers):
                 # so get next transformer, but save original string
                 next(fitted_transformers)
                 trans = 'passthrough'
+            elif _is_empty_column_selection(column):
+                trans = old
             else:
                 trans = next(fitted_transformers)
             transformers_.append((name, trans, column))
@@ -652,6 +658,20 @@ def _get_column_indices(X, key):
                          "strings, or boolean mask is allowed")
 
 
+def _is_empty_column_selection(column):
+    """
+    Return True if the column selection is empty (empty list or all-False
+    boolean array).
+
+    """
+    if hasattr(column, 'dtype') and np.issubdtype(column.dtype, np.bool_):
+        return not column.any()
+    elif hasattr(column, '__len__'):
+        return len(column) == 0
+    else:
+        return False
+
+
 def _get_transformer_list(estimators):
     """
     Construct (name, trans, column) tuples from list
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 149df575efae7..bbb3054bf455c 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -251,6 +251,51 @@ def transform(self, X, y=None):
     assert_array_equal(ct.transformers_[-1][2], [1])
 
 
+@pytest.mark.parametrize("pandas", [True, False], ids=['pandas', 'numpy'])
+@pytest.mark.parametrize("column", [[], np.array([False, False])],
+                         ids=['list', 'bool'])
+def test_column_transformer_empty_columns(pandas, column):
+    # test case that ensures that the column transformer does also work when
+    # a given transformer doesn't have any columns to work on
+    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
+    X_res_both = X_array
+
+    if pandas:
+        pd = pytest.importorskip('pandas')
+        X = pd.DataFrame(X_array, columns=['first', 'second'])
+    else:
+        X = X_array
+
+    ct = ColumnTransformer([('trans1', Trans(), [0, 1]),
+                            ('trans2', Trans(), column)])
+    assert_array_equal(ct.fit_transform(X), X_res_both)
+    assert_array_equal(ct.fit(X).transform(X), X_res_both)
+    assert len(ct.transformers_) == 2
+    assert isinstance(ct.transformers_[1][1], Trans)
+
+    ct = ColumnTransformer([('trans1', Trans(), column),
+                            ('trans2', Trans(), [0, 1])])
+    assert_array_equal(ct.fit_transform(X), X_res_both)
+    assert_array_equal(ct.fit(X).transform(X), X_res_both)
+    assert len(ct.transformers_) == 2
+    assert isinstance(ct.transformers_[0][1], Trans)
+
+    ct = ColumnTransformer([('trans', Trans(), column)],
+                           remainder='passthrough')
+    assert_array_equal(ct.fit_transform(X), X_res_both)
+    assert_array_equal(ct.fit(X).transform(X), X_res_both)
+    assert len(ct.transformers_) == 2  # including remainder
+    assert isinstance(ct.transformers_[0][1], Trans)
+
+    fixture = np.array([[], [], []])
+    ct = ColumnTransformer([('trans', Trans(), column)],
+                           remainder='drop')
+    assert_array_equal(ct.fit_transform(X), fixture)
+    assert_array_equal(ct.fit(X).transform(X), fixture)
+    assert len(ct.transformers_) == 2  # including remainder
+    assert isinstance(ct.transformers_[0][1], Trans)
+
+
 def test_column_transformer_sparse_array():
     X_sparse = sparse.eye(3, 2).tocsr()
 

From 09851aca9efb51d6a7e48a42b3fe32bf3b63b73e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 25 Sep 2018 17:08:21 +0200
Subject: [PATCH 080/163] TST update make_column_transformer test + add comment
 (#12156)

Follow-up on https://github.com/scikit-learn/scikit-learn/pull/12152
And added comment why transformer_weights is not passed through, see
https://github.com/scikit-learn/scikit-learn/pull/11183#pullrequestreview-125539051
for more discussion
---
 sklearn/compose/_column_transformer.py           | 2 ++
 sklearn/compose/tests/test_column_transformer.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 45849d05995c3..b9955fa3277cd 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -751,6 +751,8 @@ def make_column_transformer(*transformers, **kwargs):
                             ['categorical_column'])])
 
     """
+    # transformer_weights keyword is not passed through because the user
+    # would need to know the automatically generated names of the transformers
     n_jobs = kwargs.pop('n_jobs', None)
     remainder = kwargs.pop('remainder', 'drop')
     sparse_threshold = kwargs.pop('sparse_threshold', 0.3)
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index bbb3054bf455c..31f0a03e521ef 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -499,12 +499,12 @@ def test_make_column_transformer_kwargs():
     norm = Normalizer()
     ct = make_column_transformer(('first', scaler), (['second'], norm),
                                  n_jobs=3, remainder='drop',
-                                 sparse_threshold=0.3)
+                                 sparse_threshold=0.5)
     assert_equal(ct.transformers, make_column_transformer(
         ('first', scaler), (['second'], norm)).transformers)
     assert_equal(ct.n_jobs, 3)
     assert_equal(ct.remainder, 'drop')
-    assert_equal(ct.sparse_threshold, 0.3)
+    assert_equal(ct.sparse_threshold, 0.5)
     # invalid keyword parameters should raise an error message
     assert_raise_message(
         TypeError,

From 9427c36f5480737ef598781d8e0ffaa0a51e63db Mon Sep 17 00:00:00 2001
From: "Dougal J. Sutherland" <dougal@gmail.com>
Date: Tue, 25 Sep 2018 19:12:50 +0100
Subject: [PATCH 081/163] coef0 is a float, not an int (#12161)

---
 sklearn/metrics/pairwise.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 2e56255af0019..afbb200b071c1 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -742,7 +742,7 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):
     gamma : float, default None
         if None, defaults to 1.0 / n_features
 
-    coef0 : int, default 1
+    coef0 : float, default 1
 
     Returns
     -------
@@ -776,7 +776,7 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
     gamma : float, default None
         If None, defaults to 1.0 / n_features
 
-    coef0 : int, default 1
+    coef0 : float, default 1
 
     Returns
     -------

From db5abae2d46aedded75d81328cdb7c3130128915 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 26 Sep 2018 03:18:12 +0200
Subject: [PATCH 082/163] DOC: update link to other versions (#12158)

---
 doc/themes/scikit-learn/layout.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/themes/scikit-learn/layout.html b/doc/themes/scikit-learn/layout.html
index 21136856aa6d2..2add41c63b132 100644
--- a/doc/themes/scikit-learn/layout.html
+++ b/doc/themes/scikit-learn/layout.html
@@ -279,7 +279,7 @@ <h2>Machine Learning in Python</h2>
 
     {%- if pagename != "install" %}
       <p class="doc-version"><b>{{project}} v{{ release|e }}</b><br/>
-      <a href="http://scikit-learn.org/stable/support.html#documentation-resources">Other versions</a></p>
+      <a href="http://scikit-learn.org/dev/versions.html">Other versions</a></p>
     {%- endif %}
     <p class="citing">Please <b><a href="{{ pathto('about').replace('#', '') }}#citing-scikit-learn" style="font-size: 110%;">cite us </a></b>if you use the software.</p>
     {{ toc }}

From c94639197cb328744c48290e867ed942bd2853e1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Tue, 25 Sep 2018 23:47:52 -0400
Subject: [PATCH 083/163] DOC added 0.20 contributors to whats new. (#12160)

---
 doc/whats_new/v0.20.rst | 76 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index d35b48c00da42..fea7398d65de5 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -1189,3 +1189,79 @@ These changes mostly affect library developers.
 - Estimators are now checked for whether ``sample_weight=None`` equates to
   ``sample_weight=np.ones(...)``.
   :issue:`11558` by :user:`Sergul Aydore <sergulaydore>`.
+
+
+Code and Documentation Contributors
+-----------------------------------
+
+Thanks to everyone who has contributed to the maintenance and improvement of the
+project since version 0.19, including:
+
+211217613, Aarshay Jain, absolutelyNoWarranty, Adam Greenhall, Adam Kleczewski,
+Adam Richie-Halford, adelr, AdityaDaflapurkar, Adrin Jalali, Aidan Fitzgerald,
+aishgrt1, Akash Shivram, Alan Liddell, Alan Yee, Albert Thomas, Alexander
+Lenail, Alexander-N, Alexandre Boucaud, Alexandre Gramfort, Alexandre Sevin,
+Alex Egg, Alvaro Perez-Diaz, Amanda, Aman Dalmia, Andreas Bjerre-Nielsen,
+Andreas Mueller, Andrew Peng, Angus Williams, Aniruddha Dave, annaayzenshtat,
+Anthony Gitter, Antonio Quinonez, Anubhav Marwaha, Arik Pamnani, Arthur Ozga,
+Artiem K, Arunava, Arya McCarthy, Attractadore, Aurélien Bellet, Aurélien
+Geron, Ayush Gupta, Balakumaran Manoharan, Bangda Sun, Barry Hart, Bastian
+Venthur, Ben Lawson, Benn Roth, Breno Freitas, Brent Yi, brett koonce, Caio
+Oliveira, Camil Staps, cclauss, Chady Kamar, Charlie Brummitt, Charlie Newey,
+chris, Chris, Chris Catalfo, Chris Foster, Chris Holdgraf, Christian Braune,
+Christian Hirsch, Christian Hogan, Christopher Jenness, Clement Joudet, cnx,
+cwitte, Dallas Card, Dan Barkhorn, Daniel, Daniel Ferreira, Daniel Gomez,
+Daniel Klevebring, Danielle Shwed, Daniel Mohns, Danil Baibak, Darius Morawiec,
+David Beach, David Burns, David Kirkby, David Nicholson, David Pickup, Derek,
+Didi Bar-Zev, diegodlh, Dillon Gardner, Dillon Niederhut, dilutedsauce,
+dlovell, Dmitry Mottl, Dmitry Petrov, Dor Cohen, Douglas Duhaime, Ekaterina
+Tuzova, Eric Chang, Eric Dean Sanchez, Erich Schubert, Eunji, Fang-Chieh Chou,
+FarahSaeed, felix, Félix Raimundo, fenx, filipj8, FrankHui, Franz Wompner,
+Freija Descamps, frsi, Gabriele Calvo, Gael Varoquaux, Gaurav Dhingra, Georgi
+Peev, Gil Forsyth, Giovanni Giuseppe Costa, gkevinyen5418, goncalo-rodrigues,
+Gryllos Prokopis, Guillaume Lemaitre, Guillaume "Vermeille" Sanchez, Gustavo De
+Mari Pereira, hakaa1, Hanmin Qin, Henry Lin, Hong, Honghe, Hossein Pourbozorg,
+Hristo, Hunan Rostomyan, iampat, Ivan PANICO, Jaewon Chung, Jake VanderPlas,
+jakirkham, James Bourbeau, James Malcolm, Jamie Cox, Jan Koch, Jan Margeta, Jan
+Schlüter, janvanrijn, Jason Wolosonovich, JC Liu, Jeb Bearer, jeremiedbb, Jimmy
+Wan, Jinkun Wang, Jiongyan Zhang, jjabl, jkleint, Joan Massich, Joël Billaud,
+Joel Nothman, Johannes Hansen, JohnStott, Jonatan Samoocha, Jonathan Ohayon,
+Jörg Döpfert, Joris Van den Bossche, Jose Perez-Parras Toledano, josephsalmon,
+jotasi, jschendel, Julian Kuhlmann, Julien Chaumond, julietcl, Justin Shenk,
+Karl F, Kasper Primdal Lauritzen, Katrin Leinweber, Kirill, ksemb, Kuai Yu,
+Kumar Ashutosh, Kyeongpil Kang, Kye Taylor, kyledrogo, Leland McInnes, Léo DS,
+Liam Geron, Liutong Zhou, Lizao Li, lkjcalc, Loic Esteve, louib, Luciano Viola,
+Lucija Gregov, Luis Osa, Luis Pedro Coelho, Luke M Craig, Luke Persola, Mabel,
+Mabel Villalba, Maniteja Nandana, MarkIwanchyshyn, Mark Roth, Markus Müller,
+MarsGuy, Martin Gubri, martin-hahn, martin-kokos, mathurinm, Matthias Feurer,
+Max Copeland, Mayur Kulkarni, Meghann Agarwal, Melanie Goetz, Michael A.
+Alcorn, Minghui Liu, Ming Li, Minh Le, Mohamed Ali Jamaoui, Mohamed Maskani,
+Mohammad Shahebaz, Muayyad Alsadi, Nabarun Pal, Nagarjuna Kumar, Naoya Kanai,
+Narendran Santhanam, NarineK, Nathaniel Saul, Nathan Suh, Nicholas Nadeau,
+P.Eng.,  AVS, Nick Hoh, Nicolas Goix, Nicolas Hug, Nicolau Werneck,
+nielsenmarkus11, Nihar Sheth, Nikita Titov, Nilesh Kevlani, Nirvan Anjirbag,
+notmatthancock, nzw, Oleksandr Pavlyk, oliblum90, Oliver Rausch, Olivier
+Grisel, Oren Milman, Osaid Rehman Nasir, pasbi, Patrick Fernandes, Patrick
+Olden, Paul Paczuski, Pedro Morales, Peter, Peter St. John, pierreablin,
+pietruh, Pinaki Nath Chowdhury, Piotr Szymański, Pradeep Reddy Raamana, Pravar
+D Mahajan, pravarmahajan, QingYing Chen, Raghav RV, Rajendra arora,
+RAKOTOARISON Herilalaina, Rameshwar Bhaskaran, RankyLau, Rasul Kerimov,
+Reiichiro Nakano, Rob, Roman Kosobrodov, Roman Yurchak, Ronan Lamy, rragundez,
+Rüdiger Busche, Ryan, Sachin Kelkar, Sagnik Bhattacharya, Sailesh Choyal, Sam
+Radhakrishnan, Sam Steingold, Samuel Bell, Samuel O. Ronsin, Saqib Nizam
+Shamsi, SATISH J, Saurabh Gupta, Scott Gigante, Sebastian Flennerhag, Sebastian
+Raschka, Sebastien Dubois, Sébastien Lerique, Sebastin Santy, Sergey Feldman,
+Sergey Melderis, Sergul Aydore, Shahebaz, Shalil Awaley, Shangwu Yao, Sharad
+Vijalapuram, Sharan Yalburgi, shenhanc78, Shivam Rastogi, Shu Haoran, siftikha,
+Sinclert Pérez, SolutusImmensus, Somya Anand, srajan paliwal, Sriharsha Hatwar,
+Sri Krishna, Stefan van der Walt, Stephen McDowell, Steven Brown, syonekura,
+Taehoon Lee, Takanori Hayashi, tarcusx, Taylor G Smith, theriley106, Thomas,
+Thomas Fan, Thomas Heavey, Tobias Madsen, tobycheese, Tom Augspurger, Tom Dupré
+la Tour, Tommy, Trevor Stephens, Trishnendu Ghorai, Tulio Casagrande,
+twosigmajab, Umar Farouk Umar, Urvang Patel, Utkarsh Upadhyay, Vadim
+Markovtsev, Varun Agrawal, Vathsala Achar, Vilhelm von Ehrenheim, Vinayak
+Mehta, Vinit, Vinod Kumar L, Viraj Mavani, Viraj Navkal, Vivek Kumar, Vlad
+Niculae, vqean3, Vrishank Bhardwaj, vufg, wallygauze, Warut Vijitbenjaronk,
+wdevazelhes, Wenhao Zhang, Wes Barnett, Will, William de Vazelhes, Will
+Rosenfeld, Xin Xiong, Yiming (Paul) Li, ymazari, Yufeng, Zach Griffith, Zé
+Vinícius, Zhenqing Hu, Zhiqing Xiao, Zijie (ZJ) Poh

From e63feeb91eb90cd2272193b4b6cb89f7d9d6caf1 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 26 Sep 2018 11:58:51 +0800
Subject: [PATCH 084/163] DOC More specific about the limitation of
 make_column_transformer (#12163)

---
 sklearn/compose/_column_transformer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index b9955fa3277cd..047aa1fbd0966 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -691,7 +691,7 @@ def make_column_transformer(*transformers, **kwargs):
     This is a shorthand for the ColumnTransformer constructor; it does not
     require, and does not permit, naming the transformers. Instead, they will
     be given names automatically based on their types. It also does not allow
-    weighting.
+    weighting with ``transformer_weights``.
 
     Parameters
     ----------

From b886da5d3dc80ecc1cc7002af2f15f60882bf186 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Wed, 26 Sep 2018 15:18:21 +1000
Subject: [PATCH 085/163] MAINT update comment

---
 sklearn/compose/_column_transformer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 047aa1fbd0966..b16052913c5fe 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -214,7 +214,7 @@ def set_params(self, **kwargs):
 
     def _iter(self, fitted=False, replace_strings=False):
         """
-        Generate (name, trans, X_subset, weight, column) tuples.
+        Generate (name, trans, column, weight) tuples.
 
         If fitted=True, use the fitted transformers, else use the
         user specified transformers updated with converted column names

From b915ca6fd8896928ec513197b25b099683c74170 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 26 Sep 2018 14:32:09 +0800
Subject: [PATCH 086/163] MNT Avoid using "is" when comparing strings (#12168)

---
 sklearn/model_selection/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 4d83db99d64c9..986d701cee651 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -1393,7 +1393,7 @@ def get_expected_predictions(X, y, cv, classes, est, method):
         est.fit(X[train], y[train])
         expected_predictions_ = func(X[test])
         # To avoid 2 dimensional indexing
-        if method is 'predict_proba':
+        if method == 'predict_proba':
             exp_pred_test = np.zeros((len(test), classes))
         else:
             exp_pred_test = np.full((len(test), classes),

From fe05e797ff88915efc1b02c3a3912a2c57350fab Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 26 Sep 2018 09:00:24 +0200
Subject: [PATCH 087/163] OPTIM remove useless overhead caused by nested
 parallelism in mean_shift (#12159)

---
 sklearn/cluster/mean_shift_.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py
index 800c85c365988..537d6c7a2fcd3 100644
--- a/sklearn/cluster/mean_shift_.py
+++ b/sklearn/cluster/mean_shift_.py
@@ -193,7 +193,11 @@ def mean_shift(X, bandwidth=None, seeds=None, bin_seeding=False,
             seeds = X
     n_samples, n_features = X.shape
     center_intensity_dict = {}
-    nbrs = NearestNeighbors(radius=bandwidth, n_jobs=n_jobs).fit(X)
+
+    # We use n_jobs=1 because this will be used in nested calls under
+    # parallel calls to _mean_shift_single_seed so there is no need for
+    # for further parallelism.
+    nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)
 
     # execute iterations on all seeds in parallel
     all_res = Parallel(n_jobs=n_jobs)(

From 6463406490888f0695eaa58573423fae9a397d14 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Wed, 26 Sep 2018 16:37:29 +0800
Subject: [PATCH 088/163] MNT Unused import in plot_gpr_co2.py

---
 examples/gaussian_process/plot_gpr_co2.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py
index 4c438ce821284..72118b628982f 100644
--- a/examples/gaussian_process/plot_gpr_co2.py
+++ b/examples/gaussian_process/plot_gpr_co2.py
@@ -70,11 +70,6 @@
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels \
     import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
-try:
-    from urllib.request import urlopen
-except ImportError:
-    # Python 2
-    from urllib2 import urlopen
 
 print(__doc__)
 

From 7d72720ee87b26019b41a78e046df077ac77da56 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 26 Sep 2018 12:04:11 +0200
Subject: [PATCH 089/163] [MRG] Crash when using SGDClassifier with early
 stopping in a parallel grid search (#12122)

---
 sklearn/linear_model/sgd_fast.pyx           |  22 ++--
 sklearn/linear_model/stochastic_gradient.py | 130 +++++++++++---------
 sklearn/linear_model/tests/test_sgd.py      |  67 +++++++---
 3 files changed, 136 insertions(+), 83 deletions(-)

diff --git a/sklearn/linear_model/sgd_fast.pyx b/sklearn/linear_model/sgd_fast.pyx
index 7724e6e305d57..3c66f88a55ffb 100644
--- a/sklearn/linear_model/sgd_fast.pyx
+++ b/sklearn/linear_model/sgd_fast.pyx
@@ -340,7 +340,7 @@ def plain_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
               double l1_ratio,
               SequentialDataset dataset,
               np.ndarray[unsigned char, ndim=1, mode='c'] validation_mask,
-              bint early_stopping, estimator,
+              bint early_stopping, validation_score_cb,
               int n_iter_no_change,
               int max_iter, double tol, int fit_intercept,
               int verbose, bint shuffle, np.uint32_t seed,
@@ -374,8 +374,9 @@ def plain_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
         Equal to True on the validation set.
     early_stopping : boolean
         Whether to use a stopping criterion based on the validation set.
-    estimator : BaseSGD
-        A concrete object inheriting from ``BaseSGD``.
+    validation_score_cb : callable
+        A callable to compute a validation score given the current
+        coefficients and intercept values.
         Used only if early_stopping is True.
     n_iter_no_change : int
         Number of iteration with no improvement to wait before stopping.
@@ -435,7 +436,7 @@ def plain_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
                                    dataset,
                                    validation_mask,
                                    early_stopping,
-                                   estimator,
+                                   validation_score_cb,
                                    n_iter_no_change,
                                    max_iter, tol, fit_intercept,
                                    verbose, shuffle, seed,
@@ -458,7 +459,7 @@ def average_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
                 double l1_ratio,
                 SequentialDataset dataset,
                 np.ndarray[unsigned char, ndim=1, mode='c'] validation_mask,
-                bint early_stopping, estimator,
+                bint early_stopping, validation_score_cb,
                 int n_iter_no_change,
                 int max_iter, double tol, int fit_intercept,
                 int verbose, bint shuffle, np.uint32_t seed,
@@ -497,8 +498,9 @@ def average_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
         Equal to True on the validation set.
     early_stopping : boolean
         Whether to use a stopping criterion based on the validation set.
-    estimator : BaseSGD
-        A concrete object inheriting from ``BaseSGD``.
+    validation_score_cb : callable
+        A callable to compute a validation score given the current
+        coefficients and intercept values.
         Used only if early_stopping is True.
     n_iter_no_change : int
         Number of iteration with no improvement to wait before stopping.
@@ -562,7 +564,7 @@ def average_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
                       dataset,
                       validation_mask,
                       early_stopping,
-                      estimator,
+                      validation_score_cb,
                       n_iter_no_change,
                       max_iter, tol, fit_intercept,
                       verbose, shuffle, seed,
@@ -584,7 +586,7 @@ def _plain_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
                double l1_ratio,
                SequentialDataset dataset,
                np.ndarray[unsigned char, ndim=1, mode='c'] validation_mask,
-               bint early_stopping, estimator,
+               bint early_stopping, validation_score_cb,
                int n_iter_no_change,
                int max_iter, double tol, int fit_intercept,
                int verbose, bint shuffle, np.uint32_t seed,
@@ -759,7 +761,7 @@ def _plain_sgd(np.ndarray[double, ndim=1, mode='c'] weights,
             # evaluate the score on the validation set
             if early_stopping:
                 with gil:
-                    score = estimator._validation_score(weights, intercept)
+                    score = validation_score_cb(weights, intercept)
                 if tol > -INFINITY and score < best_score + tol:
                     no_improvement_count += 1
                 else:
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index 5e253003a2fe3..5ac1779ee347b 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -11,6 +11,7 @@
 
 from ..utils import Parallel, delayed
 
+from ..base import clone, is_classifier
 from .base import LinearClassifierMixin, SparseCoefMixin
 from .base import make_dataset
 from ..base import BaseEstimator, RegressorMixin
@@ -20,7 +21,7 @@
 from ..utils.validation import check_is_fitted
 from ..exceptions import ConvergenceWarning
 from ..externals import six
-from ..model_selection import train_test_split
+from ..model_selection import StratifiedShuffleSplit, ShuffleSplit
 
 from .sgd_fast import plain_sgd, average_sgd
 from ..utils import compute_class_weight
@@ -43,6 +44,26 @@
 # Default value of ``epsilon`` parameter.
 
 
+class _ValidationScoreCallback(object):
+    """Callback for early stopping based on validation score"""
+
+    def __init__(self, estimator, X_val, y_val, sample_weight_val,
+                 classes=None):
+        self.estimator = clone(estimator)
+        self.estimator.t_ = 1  # to pass check_is_fitted
+        if classes is not None:
+            self.estimator.classes_ = classes
+        self.X_val = X_val
+        self.y_val = y_val
+        self.sample_weight_val = sample_weight_val
+
+    def __call__(self, coef, intercept):
+        est = self.estimator
+        est.coef_ = coef.reshape(1, -1)
+        est.intercept_ = np.atleast_1d(intercept)
+        return est.score(self.X_val, self.y_val, self.sample_weight_val)
+
+
 class BaseSGD(six.with_metaclass(ABCMeta, BaseEstimator, SparseCoefMixin)):
     """Base class for SGD classification and regression."""
 
@@ -248,71 +269,52 @@ def _allocate_parameter_mem(self, n_classes, n_features, coef_init=None,
                                                dtype=np.float64,
                                                order="C")
 
-    def _make_validation_split(self, X, y, sample_weight):
+    def _make_validation_split(self, y):
         """Split the dataset between training set and validation set.
 
         Parameters
         ----------
-        X : {array, sparse matrix}, shape (n_samples, n_features)
-            Training data.
-
         y : array, shape (n_samples, )
             Target values.
 
-        sample_weight : array, shape (n_samples, )
-            Weights applied to individual samples.
-
         Returns
         -------
         validation_mask : array, shape (n_samples, )
             Equal to 1 on the validation set, 0 on the training set.
         """
-        n_samples = X.shape[0]
+        n_samples = y.shape[0]
         validation_mask = np.zeros(n_samples, dtype=np.uint8)
         if not self.early_stopping:
             # use the full set for training, with an empty validation set
             return validation_mask
 
-        tmp = train_test_split(X, y, np.arange(n_samples), sample_weight,
-                               test_size=self.validation_fraction,
-                               random_state=self.random_state)
-        X_train, X_val, y_train, y_val = tmp[:4]
-        idx_train, idx_val, sample_weight_train, sample_weight_val = tmp[4:8]
-        if X_train.shape[0] == 0 or X_val.shape[0] == 0:
+        if is_classifier(self):
+            splitter_type = StratifiedShuffleSplit
+        else:
+            splitter_type = ShuffleSplit
+        cv = splitter_type(test_size=self.validation_fraction,
+                           random_state=self.random_state)
+        idx_train, idx_val = next(cv.split(np.zeros(shape=(y.shape[0], 1)), y))
+        if idx_train.shape[0] == 0 or idx_val.shape[0] == 0:
             raise ValueError(
                 "Splitting %d samples into a train set and a validation set "
                 "with validation_fraction=%r led to an empty set (%d and %d "
                 "samples). Please either change validation_fraction, increase "
                 "number of samples, or disable early_stopping."
-                % (n_samples, self.validation_fraction, X_train.shape[0],
-                   X_val.shape[0]))
+                % (n_samples, self.validation_fraction, idx_train.shape[0],
+                   idx_val.shape[0]))
 
-        self._X_val = X_val
-        self._y_val = y_val
-        self._sample_weight_val = sample_weight_val
         validation_mask[idx_val] = 1
         return validation_mask
 
-    def _delete_validation_split(self):
-        if self.early_stopping:
-            del self._X_val
-            del self._y_val
-            del self._sample_weight_val
-
-    def _validation_score(self, coef, intercept):
-        """Compute the score on the validation set. Used for early stopping."""
-        # store attributes
-        old_coefs, old_intercept = self.coef_, self.intercept_
-
-        # replace them with current coefficients for scoring
-        self.coef_ = coef.reshape(1, -1)
-        self.intercept_ = np.atleast_1d(intercept)
-        score = self.score(self._X_val, self._y_val, self._sample_weight_val)
-
-        # restore old attributes
-        self.coef_, self.intercept_ = old_coefs, old_intercept
+    def _make_validation_score_cb(self, validation_mask, X, y, sample_weight,
+                                  classes=None):
+        if not self.early_stopping:
+            return None
 
-        return score
+        return _ValidationScoreCallback(
+            self, X[validation_mask], y[validation_mask],
+            sample_weight[validation_mask], classes=classes)
 
 
 def _prepare_fit_binary(est, y, i):
@@ -348,7 +350,7 @@ def _prepare_fit_binary(est, y, i):
 
 
 def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
-               pos_weight, neg_weight, sample_weight):
+               pos_weight, neg_weight, sample_weight, validation_mask=None):
     """Fit a single binary classifier.
 
     The i'th class is considered the "positive" class.
@@ -388,6 +390,10 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
 
     sample_weight : numpy array of shape [n_samples, ]
         The weight of each sample
+
+    validation_mask : numpy array of shape [n_samples, ] or None
+        Precomputed validation mask in case _fit_binary is called in the
+        context of a one-vs-rest reduction.
     """
     # if average is not true, average_coef, and average_intercept will be
     # unused
@@ -399,7 +405,11 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
     penalty_type = est._get_penalty_type(est.penalty)
     learning_rate_type = est._get_learning_rate_type(learning_rate)
 
-    validation_mask = est._make_validation_split(X, y, sample_weight)
+    if validation_mask is None:
+        validation_mask = est._make_validation_split(y_i)
+    classes = np.array([-1, 1], dtype=y_i.dtype)
+    validation_score_cb = est._make_validation_score_cb(
+        validation_mask, X, y_i, sample_weight, classes=classes)
 
     # XXX should have random_state_!
     random_state = check_random_state(est.random_state)
@@ -412,8 +422,8 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
     if not est.average:
         result = plain_sgd(coef, intercept, est.loss_function_,
                            penalty_type, alpha, C, est.l1_ratio,
-                           dataset, validation_mask, est.early_stopping, est,
-                           int(est.n_iter_no_change),
+                           dataset, validation_mask, est.early_stopping,
+                           validation_score_cb, int(est.n_iter_no_change),
                            max_iter, tol, int(est.fit_intercept),
                            int(est.verbose), int(est.shuffle), seed,
                            pos_weight, neg_weight,
@@ -426,8 +436,8 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
                                   average_intercept, est.loss_function_,
                                   penalty_type, alpha, C, est.l1_ratio,
                                   dataset, validation_mask, est.early_stopping,
-                                  est, int(est.n_iter_no_change),
-                                  max_iter, tol,
+                                  validation_score_cb,
+                                  int(est.n_iter_no_change), max_iter, tol,
                                   int(est.fit_intercept), int(est.verbose),
                                   int(est.shuffle), seed, pos_weight,
                                   neg_weight, learning_rate_type, est.eta0,
@@ -441,7 +451,6 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
 
         result = standard_coef, standard_intercept, n_iter_
 
-    est._delete_validation_split()
     return result
 
 
@@ -610,14 +619,19 @@ def _fit_multiclass(self, X, y, alpha, C, learning_rate,
         """Fit a multi-class classifier by combining binary classifiers
 
         Each binary classifier predicts one class versus all others. This
-        strategy is called OVA: One Versus All.
+        strategy is called OvA (One versus All) or OvR (One versus Rest).
         """
+        # Precompute the validation split using the multiclass labels
+        # to ensure proper balancing of the classes.
+        validation_mask = self._make_validation_split(y)
+
         # Use joblib to fit OvA in parallel.
         result = Parallel(n_jobs=self.n_jobs, prefer="threads",
                           verbose=self.verbose)(
             delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,
                                 max_iter, self._expanded_class_weight[i],
-                                1., sample_weight)
+                                1., sample_weight,
+                                validation_mask=validation_mask)
             for i in range(len(self.classes_)))
 
         # take the maximum of n_iter_ over every binary fit
@@ -1115,8 +1129,8 @@ def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
         sample_weight = self._validate_sample_weight(sample_weight, n_samples)
 
         if getattr(self, "coef_", None) is None:
-            self._allocate_parameter_mem(1, n_features,
-                                         coef_init, intercept_init)
+            self._allocate_parameter_mem(1, n_features, coef_init,
+                                         intercept_init)
         elif n_features != self.coef_.shape[-1]:
             raise ValueError("Number of features %d does not match previous "
                              "data %d." % (n_features, self.coef_.shape[-1]))
@@ -1124,9 +1138,7 @@ def _partial_fit(self, X, y, alpha, C, loss, learning_rate,
             self.average_coef_ = np.zeros(n_features,
                                           dtype=np.float64,
                                           order="C")
-            self.average_intercept_ = np.zeros(1,
-                                               dtype=np.float64,
-                                               order="C")
+            self.average_intercept_ = np.zeros(1, dtype=np.float64, order="C")
 
         self._fit_regressor(X, y, alpha, C, loss, learning_rate,
                             sample_weight, max_iter)
@@ -1269,7 +1281,9 @@ def _fit_regressor(self, X, y, alpha, C, loss, learning_rate,
         if not hasattr(self, "t_"):
             self.t_ = 1.0
 
-        validation_mask = self._make_validation_split(X, y, sample_weight)
+        validation_mask = self._make_validation_split(y)
+        validation_score_cb = self._make_validation_score_cb(
+            validation_mask, X, y, sample_weight)
 
         random_state = check_random_state(self.random_state)
         # numpy mtrand expects a C long which is a signed 32 bit integer under
@@ -1290,7 +1304,8 @@ def _fit_regressor(self, X, y, alpha, C, loss, learning_rate,
                             alpha, C,
                             self.l1_ratio,
                             dataset,
-                            validation_mask, self.early_stopping, self,
+                            validation_mask, self.early_stopping,
+                            validation_score_cb,
                             int(self.n_iter_no_change),
                             max_iter, tol,
                             int(self.fit_intercept),
@@ -1322,7 +1337,8 @@ def _fit_regressor(self, X, y, alpha, C, loss, learning_rate,
                           alpha, C,
                           self.l1_ratio,
                           dataset,
-                          validation_mask, self.early_stopping, self,
+                          validation_mask, self.early_stopping,
+                          validation_score_cb,
                           int(self.n_iter_no_change),
                           max_iter, tol,
                           int(self.fit_intercept),
@@ -1337,8 +1353,6 @@ def _fit_regressor(self, X, y, alpha, C, loss, learning_rate,
             self.t_ += self.n_iter_ * X.shape[0]
             self.intercept_ = np.atleast_1d(self.intercept_)
 
-        self._delete_validation_split()
-
 
 class SGDRegressor(BaseSGDRegressor):
     """Linear model fitted by minimizing a regularized empirical loss with SGD
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index 15e29ce4d41df..ceab6d3a744c1 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -21,13 +21,14 @@
 from sklearn.utils.testing import ignore_warnings
 
 from sklearn import linear_model, datasets, metrics
-from sklearn.base import clone
+from sklearn.base import clone, is_classifier
 from sklearn.linear_model import SGDClassifier, SGDRegressor
 from sklearn.preprocessing import LabelEncoder, scale, MinMaxScaler
 from sklearn.preprocessing import StandardScaler
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.model_selection import train_test_split
+from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit
 from sklearn.linear_model import sgd_fast
+from sklearn.model_selection import RandomizedSearchCV
 
 
 class SparseSGDClassifier(SGDClassifier):
@@ -278,14 +279,13 @@ def test_sgd_bad_alpha_for_optimal_learning_rate(self):
                       alpha=0, learning_rate="optimal")
 
     def test_early_stopping(self):
+        X = iris.data[iris.target > 0]
+        Y = iris.target[iris.target > 0]
         for early_stopping in [True, False]:
             max_iter = 1000
             clf = self.factory(early_stopping=early_stopping, tol=1e-3,
                                max_iter=max_iter).fit(X, Y)
             assert clf.n_iter_ < max_iter
-            assert not hasattr(clf, '_X_val')
-            assert not hasattr(clf, '_y_val')
-            assert not hasattr(clf, '_sample_weight_val')
 
     def test_adaptive_longer_than_constant(self):
         clf1 = self.factory(learning_rate="adaptive", eta0=0.01, tol=1e-3,
@@ -299,28 +299,37 @@ def test_adaptive_longer_than_constant(self):
     def test_validation_set_not_used_for_training(self):
         X, Y = iris.data, iris.target
         validation_fraction = 0.4
-        random_state = 42
+        seed = 42
         shuffle = False
-        clf1 = self.factory(early_stopping=True, random_state=random_state,
+        max_iter = 10
+        clf1 = self.factory(early_stopping=True,
+                            random_state=np.random.RandomState(seed),
                             validation_fraction=validation_fraction,
                             learning_rate='constant', eta0=0.01,
-                            tol=None, max_iter=1000, shuffle=shuffle)
+                            tol=None, max_iter=max_iter, shuffle=shuffle)
         clf1.fit(X, Y)
+        assert clf1.n_iter_ == max_iter
 
-        idx_train, idx_val = train_test_split(
-            np.arange(X.shape[0]), test_size=validation_fraction,
-            random_state=random_state)
         clf2 = self.factory(early_stopping=False,
-                            random_state=random_state,
+                            random_state=np.random.RandomState(seed),
                             learning_rate='constant', eta0=0.01,
-                            tol=None, max_iter=1000, shuffle=shuffle)
+                            tol=None, max_iter=max_iter, shuffle=shuffle)
+
+        if is_classifier(clf2):
+            cv = StratifiedShuffleSplit(test_size=validation_fraction,
+                                        random_state=seed)
+        else:
+            cv = ShuffleSplit(test_size=validation_fraction,
+                              random_state=seed)
+        idx_train, idx_val = next(cv.split(X, Y))
         idx_train = np.sort(idx_train)  # remove shuffling
-        clf2.fit(X[idx_train], np.array(Y)[idx_train])
+        clf2.fit(X[idx_train], Y[idx_train])
+        assert clf2.n_iter_ == max_iter
 
         assert_array_equal(clf1.coef_, clf2.coef_)
 
-    @ignore_warnings(category=ConvergenceWarning)
     def test_n_iter_no_change(self):
+        X, Y = iris.data, iris.target
         # test that n_iter_ increases monotonically with n_iter_no_change
         for early_stopping in [True, False]:
             n_iter_list = [self.factory(early_stopping=early_stopping,
@@ -1471,3 +1480,31 @@ def test_gradient_squared_epsilon_insensitive():
         (2.0, 2.2, -0.2), (-2.0, 1.0, -5.8)
     ]
     _test_gradient_common(loss, cases)
+
+
+def test_multi_thread_multi_class_and_early_stopping():
+    # This is a non-regression test for a bad interaction between
+    # early stopping internal attribute and thread-based parallelism.
+    clf = SGDClassifier(alpha=1e-3, tol=1e-3, max_iter=1000,
+                        early_stopping=True, n_iter_no_change=100,
+                        random_state=0, n_jobs=2)
+    clf.fit(iris.data, iris.target)
+    assert clf.n_iter_ > clf.n_iter_no_change
+    assert clf.n_iter_ < clf.n_iter_no_change + 20
+    assert clf.score(iris.data, iris.target) > 0.8
+
+
+def test_multi_core_gridsearch_and_early_stopping():
+    # This is a non-regression test for a bad interaction between
+    # early stopping internal attribute and process-based multi-core
+    # parallelism.
+    param_grid = {
+        'alpha': np.logspace(-4, 4, 9),
+        'n_iter_no_change': [5, 10, 50],
+    }
+    clf = SGDClassifier(tol=1e-3, max_iter=1000, early_stopping=True,
+                        random_state=0)
+    search = RandomizedSearchCV(clf, param_grid, n_iter=10, cv=5, n_jobs=2,
+                                random_state=0)
+    search.fit(iris.data, iris.target)
+    assert search.best_score_ > 0.8

From 86931ad6d2d452ba8073af1b4a8ccf42a900c02c Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 26 Sep 2018 13:21:06 +0200
Subject: [PATCH 090/163] Dedicate the release to Raghav

---
 doc/whats_new/v0.20.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index fea7398d65de5..aba6a84910041 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -13,6 +13,8 @@ This release packs in a mountain of bug fixes, features and enhancements for
 the Scikit-learn library, and improvements to the documentation and examples.
 Thanks to our contributors!
 
+This release is dedicated to the memory of Raghav Rajagopalan.
+
 .. warning::
 
     Version 0.20 is the last version of scikit-learn to support Python 2.7 and Python 3.4.

From 42072255ec974ab7e3813f623cbbaaac9352e67d Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 26 Sep 2018 18:22:57 +0200
Subject: [PATCH 091/163] DOC start section for the 0.20.1 bugfix notes
 (#12170)

---
 doc/whats_new/v0.20.rst | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index aba6a84910041..2bfd4bdf8faf5 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -2,6 +2,27 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_0_20_1:
+
+Version 0.20.1
+==============
+
+**October XX, 2018**
+
+This is a bug-fix release with some minor documentation improvements and
+enhancements to features released in 0.20.0.
+
+- |Efficiency| make :class:`cluster.MeanShift` no longer try to do nested
+  parallelism as the overhead would hurt performance significantly when
+  ``n_jobs > 1``.
+  :issue:`12159` by :user:`Olivier Grisel <ogrisel>`.
+
+- |Fix| :func:`linear_model.SGDClassifier` and variants
+  with ``early_stopping=True`` would not use a consistent validation
+  split in the multiclass case and this would cause a crash when using
+  those estimators as part of parallel parameter search or cross-validation.
+  :issue:`12122` by :user:`Olivier Grisel <ogrisel>`.
+
 .. _changes_0_20:
 
 Version 0.20.0

From dbfd872be4d68a6d824bd81ace09420383bf2d7b Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 27 Sep 2018 13:30:55 +1000
Subject: [PATCH 092/163] DOC add a known issue entry for euclidean_distances
 precision (#12176)

---
 doc/whats_new/v0.20.rst | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 2bfd4bdf8faf5..e4ba61eefd65a 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -116,11 +116,17 @@ cannot assure that this list is complete.)
 Known Major Bugs
 ----------------
 
-* :issue:`11924`: :class:`LogisticRegressionCV` with `solver='lbfgs'` and
-  `multi_class='multinomial'` may be non-deterministic or otherwise broken on
-  macOS. This appears to be the case on Travis CI servers, but has not been
-  confirmed on personal MacBooks! This issue has been present in previous
-  releases.
+* :issue:`11924`: :class:`linear_model.LogisticRegressionCV` with
+  `solver='lbfgs'` and `multi_class='multinomial'` may be non-deterministic or
+  otherwise broken on macOS. This appears to be the case on Travis CI servers,
+  but has not been confirmed on personal MacBooks! This issue has been present
+  in previous releases.
+
+* :issue:`9354`: :func:`metrics.pairwise.euclidean_distances` (which is used
+  several times throughout the library) gives results with poor precision,
+  which particularly affects its use with 32-bit float inputs. This became
+  more problematic in versions 0.18 and 0.19 when some algorithms were changed
+  to avoid casting 32-bit data into 64-bit.
 
 Changelog
 ---------

From 88b49e5caf01ee8e6d803f8daca2bf1666219b0b Mon Sep 17 00:00:00 2001
From: Thomas Moreau <thomas.moreau.2010@gmail.com>
Date: Thu, 27 Sep 2018 11:37:35 +0200
Subject: [PATCH 093/163] Fix parallel backend neighbors (#12172)

---
 doc/whats_new/v0.20.rst                   |  5 +++++
 sklearn/neighbors/base.py                 |  4 +++-
 sklearn/neighbors/tests/test_neighbors.py | 21 +++++++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index e4ba61eefd65a..34aab000e92f7 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -23,6 +23,11 @@ enhancements to features released in 0.20.0.
   those estimators as part of parallel parameter search or cross-validation.
   :issue:`12122` by :user:`Olivier Grisel <ogrisel>`.
 
+- |Fix| force the parallelism backend to :code:`threading` for
+  :class:`neighbors.KDTree` and :class:`neighbors.BallTree` in Python 2.7 to
+  avoid pickling errors caused by the serialization of their methods.
+  :issue:`12171` by :user:`Thomas Moreau <tomMoral>`
+
 .. _changes_0_20:
 
 Version 0.20.0
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 9f30ba3ebd3fc..dedcc658c0d2f 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -9,6 +9,7 @@
 from functools import partial
 from distutils.version import LooseVersion
 
+import sys
 import warnings
 from abc import ABCMeta, abstractmethod
 
@@ -429,7 +430,8 @@ class from an array representing our data set and ask who's
                 raise ValueError(
                     "%s does not work with sparse matrices. Densify the data, "
                     "or set algorithm='brute'" % self._fit_method)
-            if LooseVersion(joblib_version) < LooseVersion('0.12'):
+            if (sys.version_info < (3,) or
+                    LooseVersion(joblib_version) < LooseVersion('0.12')):
                 # Deal with change of API in joblib
                 delayed_query = delayed(self._tree.query,
                                         check_pickle=False)
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 9b244cde09536..160f3dc5c5eed 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -27,6 +27,8 @@
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.validation import check_random_state
 
+from sklearn.externals.joblib import parallel_backend
+
 rng = np.random.RandomState(0)
 # load and shuffle iris dataset
 iris = datasets.load_iris()
@@ -1316,6 +1318,25 @@ def test_same_radius_neighbors_parallel(algorithm):
     assert_array_almost_equal(graph, graph_parallel)
 
 
+@pytest.mark.parametrize('backend', ['loky', 'multiprocessing', 'threading'])
+@pytest.mark.parametrize('algorithm', ALGORITHMS)
+def test_knn_forcing_backend(backend, algorithm):
+    # Non-regression test which ensure the knn methods are properly working
+    # even when forcing the global joblib backend.
+    with parallel_backend(backend):
+        X, y = datasets.make_classification(n_samples=30, n_features=5,
+                                            n_redundant=0, random_state=0)
+        X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+        clf = neighbors.KNeighborsClassifier(n_neighbors=3,
+                                             algorithm=algorithm,
+                                             n_jobs=3)
+        clf.fit(X_train, y_train)
+        clf.predict(X_test)
+        clf.kneighbors(X_test)
+        clf.kneighbors_graph(X_test, mode='distance').toarray()
+
+
 def test_dtype_convert():
     classifier = neighbors.KNeighborsClassifier(n_neighbors=1)
     CLASSES = 15

From a11154e727ca17be64090083849c564eb4ffbeac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Thu, 27 Sep 2018 14:13:50 +0200
Subject: [PATCH 094/163] [MRG] improve check_non_negative for sparse matrices
 (#12106)

---
 sklearn/metrics/pairwise.py            |  5 +++++
 sklearn/metrics/tests/test_pairwise.py |  9 +++++++--
 sklearn/utils/tests/test_validation.py | 22 +++++++++++++++++++++-
 sklearn/utils/validation.py            | 14 ++++++++++++--
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index afbb200b071c1..526d4d9f3d512 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -19,6 +19,7 @@
 from scipy.sparse import issparse
 
 from ..utils.validation import _num_samples
+from ..utils.validation import check_non_negative
 from ..utils import check_array
 from ..utils import gen_even_slices
 from ..utils import gen_batches, get_chunk_n_rows
@@ -1381,6 +1382,10 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=None, **kwds):
 
     if metric == "precomputed":
         X, _ = check_pairwise_arrays(X, Y, precomputed=True)
+
+        whom = ("`pairwise_distances`. Precomputed distance "
+                " need to have non-negative values.")
+        check_non_negative(X, whom=whom)
         return X
     elif metric in PAIRWISE_DISTANCE_FUNCTIONS:
         func = PAIRWISE_DISTANCE_FUNCTIONS[metric]
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index e63219a817bed..e28453ee70086 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -50,8 +50,6 @@
 from sklearn.preprocessing import normalize
 from sklearn.exceptions import DataConversionWarning
 
-import pytest
-
 
 def test_pairwise_distances():
     # Test the pairwise_distance helper function.
@@ -178,6 +176,13 @@ def test_pairwise_precomputed(func):
     assert_true(isinstance(S, np.ndarray))
 
 
+def test_pairwise_precomputed_non_negative():
+    # Test non-negative values
+    assert_raises_regexp(ValueError, '.* non-negative values.*',
+                         pairwise_distances, np.full((5, 5), -1),
+                         metric='precomputed')
+
+
 def check_pairwise_parallel(func, metric, kwds):
     rng = np.random.RandomState(0)
     for make_data in (np.array, csr_matrix):
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 5b32d9e2115d3..23db3323941fb 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -39,7 +39,8 @@
     check_consistent_length,
     assert_all_finite,
     check_memory,
-    LARGE_SPARSE_SUPPORTED
+    check_non_negative,
+    LARGE_SPARSE_SUPPORTED,
 )
 import sklearn
 
@@ -760,3 +761,22 @@ def test_check_array_memmap(copy):
         X_checked = check_array(X_memmap, copy=copy)
         assert np.may_share_memory(X_memmap, X_checked) == (not copy)
         assert X_checked.flags['WRITEABLE'] == copy
+
+
+@pytest.mark.parametrize('retype', [
+    np.asarray, sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.lil_matrix,
+    sp.bsr_matrix, sp.dok_matrix, sp.dia_matrix
+])
+def test_check_non_negative(retype):
+    A = np.array([[1, 1, 0, 0],
+                  [1, 1, 0, 0],
+                  [0, 0, 0, 0],
+                  [0, 0, 0, 0]])
+    X = retype(A)
+    check_non_negative(X, "")
+    X = retype([[0, 0], [0, 0]])
+    check_non_negative(X, "")
+
+    A[0, 0] = -1
+    X = retype(A)
+    assert_raises_regex(ValueError, "Negative ", check_non_negative, X, "")
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index facc51e2c5655..08679dbeebdb8 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -954,6 +954,16 @@ def check_non_negative(X, whom):
     whom : string
         Who passed X to this function.
     """
-    X = X.data if sp.issparse(X) else X
-    if (X < 0).any():
+    # avoid X.min() on sparse matrix since it also sorts the indices
+    if sp.issparse(X):
+        if X.format in ['lil', 'dok']:
+            X = X.tocsr()
+        if X.data.size == 0:
+            X_min = 0
+        else:
+            X_min = X.data.min()
+    else:
+        X_min = X.min()
+
+    if X_min < 0:
         raise ValueError("Negative values in data passed to %s" % whom)

From da0cb32270ce18963799906a8a0a75216749e21c Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 27 Sep 2018 23:51:49 +1000
Subject: [PATCH 095/163] FIX Use take instead of choose in
 compute_sample_weight (#12165)

---
 doc/whats_new/v0.20.rst                  |  4 ++++
 sklearn/utils/class_weight.py            | 12 ++++++------
 sklearn/utils/tests/test_class_weight.py |  8 ++++++++
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 34aab000e92f7..238599346b769 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -17,6 +17,10 @@ enhancements to features released in 0.20.0.
   ``n_jobs > 1``.
   :issue:`12159` by :user:`Olivier Grisel <ogrisel>`.
 
+- |Fix| Fixed a bug mostly affecting :class:`ensemble.RandomForestClassifier`
+  where ``class_weight='balanced_subsample'`` failed with more than 32 classes.
+  :issue:`12165` by `Joel Nothman`_.
+
 - |Fix| :func:`linear_model.SGDClassifier` and variants
   with ``early_stopping=True`` would not use a consistent validation
   split in the multiclass case and this would cause a crash when using
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
index 5b7637c0c3ee3..cd2a91601cf9b 100644
--- a/sklearn/utils/class_weight.py
+++ b/sklearn/utils/class_weight.py
@@ -150,12 +150,12 @@ def compute_sample_weight(class_weight, y, indices=None):
             y_subsample = y[indices, k]
             classes_subsample = np.unique(y_subsample)
 
-            weight_k = np.choose(np.searchsorted(classes_subsample,
-                                                 classes_full),
-                                 compute_class_weight(class_weight_k,
-                                                      classes_subsample,
-                                                      y_subsample),
-                                 mode='clip')
+            weight_k = np.take(compute_class_weight(class_weight_k,
+                                                    classes_subsample,
+                                                    y_subsample),
+                               np.searchsorted(classes_subsample,
+                                               classes_full),
+                               mode='clip')
 
             classes_missing = set(classes_full) - set(classes_subsample)
         else:
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index c2d03595fb860..3c81e2f4700f6 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -251,3 +251,11 @@ def test_compute_sample_weight_errors():
 
     # Incorrect length list for multi-output
     assert_raises(ValueError, compute_sample_weight, [{1: 2, 2: 1}], y_)
+
+
+def test_compute_sample_weight_more_than_32():
+    # Non-regression smoke test for #12146
+    y = np.arange(50)  # more than 32 distinct classes
+    indices = np.arange(50)  # use subsampling
+    weight = compute_sample_weight('balanced', y, indices=indices)
+    assert_array_almost_equal(weight, np.ones(y.shape[0]))

From d88bef17eaa9d81e152c8b914fad7cf45f5e8393 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Fri, 28 Sep 2018 01:56:02 +0800
Subject: [PATCH 096/163] DOC Add sections to whats new 0.20.1 (#12183)

---
 doc/whats_new/v0.20.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 238599346b769..bec73b5366681 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -12,21 +12,36 @@ Version 0.20.1
 This is a bug-fix release with some minor documentation improvements and
 enhancements to features released in 0.20.0.
 
+Changelog
+---------
+
+:mod:`sklearn.cluster`
+......................
+
 - |Efficiency| make :class:`cluster.MeanShift` no longer try to do nested
   parallelism as the overhead would hurt performance significantly when
   ``n_jobs > 1``.
   :issue:`12159` by :user:`Olivier Grisel <ogrisel>`.
 
+:mod:`sklearn.ensemble`
+.......................
+
 - |Fix| Fixed a bug mostly affecting :class:`ensemble.RandomForestClassifier`
   where ``class_weight='balanced_subsample'`` failed with more than 32 classes.
   :issue:`12165` by `Joel Nothman`_.
 
+:mod:`sklearn.linear_model`
+...........................
+
 - |Fix| :func:`linear_model.SGDClassifier` and variants
   with ``early_stopping=True`` would not use a consistent validation
   split in the multiclass case and this would cause a crash when using
   those estimators as part of parallel parameter search or cross-validation.
   :issue:`12122` by :user:`Olivier Grisel <ogrisel>`.
 
+:mod:`sklearn.neighbors`
+........................
+
 - |Fix| force the parallelism backend to :code:`threading` for
   :class:`neighbors.KDTree` and :class:`neighbors.BallTree` in Python 2.7 to
   avoid pickling errors caused by the serialization of their methods.

From 2e2e69d412b205a6cb65dae964e6da2639e26803 Mon Sep 17 00:00:00 2001
From: haroldfox <harold.fox@gmail.com>
Date: Thu, 27 Sep 2018 21:53:48 -0400
Subject: [PATCH 097/163] DOC KDE normalisation clarified (#11275)

---
 sklearn/neighbors/kde.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sklearn/neighbors/kde.py b/sklearn/neighbors/kde.py
index ff5920b68ea52..be5002e579423 100644
--- a/sklearn/neighbors/kde.py
+++ b/sklearn/neighbors/kde.py
@@ -159,7 +159,9 @@ def score_samples(self, X):
         Returns
         -------
         density : ndarray, shape (n_samples,)
-            The array of log(density) evaluations.
+            The array of log(density) evaluations. These are normalized to be
+            probability densities, so values will be low for high-dimensional
+            data.
         """
         # The returned density is normalized to the number of points.
         # For it to be a probability, we must scale it.  For this reason
@@ -177,7 +179,7 @@ def score_samples(self, X):
         return log_density
 
     def score(self, X, y=None):
-        """Compute the total log probability under the model.
+        """Compute the total log probability density under the model.
 
         Parameters
         ----------
@@ -188,7 +190,9 @@ def score(self, X, y=None):
         Returns
         -------
         logprob : float
-            Total log-likelihood of the data in X.
+            Total log-likelihood of the data in X. This is normalized to be a
+            probability density, so the value will be low for high-dimensional
+            data.
         """
         return np.sum(self.score_samples(X))
 

From 819d8ef8b7532dac0f7cb00f5d3905eed9237b90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Fri, 28 Sep 2018 16:22:12 +0200
Subject: [PATCH 098/163] [MRG] Fix diagonal in DBSCAN with precomputed sparse
 neighbors graph (#12105)

---
 doc/whats_new/v0.20.rst              | 10 +++++++---
 doc/whats_new/v0.21.rst              |  6 +++++-
 sklearn/cluster/dbscan_.py           | 14 ++++++++------
 sklearn/cluster/tests/test_dbscan.py | 21 +++++++++++++++++++--
 4 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index bec73b5366681..c8f5498ee3a64 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -47,6 +47,10 @@ Changelog
   avoid pickling errors caused by the serialization of their methods.
   :issue:`12171` by :user:`Thomas Moreau <tomMoral>`
 
+- |Fix| Fixed a bug in :class:`cluster.DBSCAN` with precomputed sparse neighbors
+  graph, which would add explicitly zeros on the diagonal even when already
+  present. :issue:`12105` by `Tom Dupre la Tour`_.
+
 .. _changes_0_20:
 
 Version 0.20.0
@@ -663,7 +667,7 @@ Support for Python 3.3 has been officially dropped.
 
 - |Feature| :func:`metrics.classification_report` now reports all applicable averages on
   the given data, including micro, macro and weighted average as well as samples
-  average for multilabel data. :issue:`11679` by :user:`Alexander Pacha <apacha>`. 
+  average for multilabel data. :issue:`11679` by :user:`Alexander Pacha <apacha>`.
 
 - |Feature| :func:`metrics.average_precision_score` now supports binary
   ``y_true`` other than ``{0, 1}`` or ``{-1, 1}`` through ``pos_label``
@@ -917,7 +921,7 @@ Support for Python 3.3 has been officially dropped.
   keyword arguments on to the pipeline's last estimator, enabling the use of
   parameters such as ``return_std`` in a pipeline with caution.
   :issue:`9304` by :user:`Breno Freitas <brenolf>`.
-  
+
 - |API| :class:`pipeline.FeatureUnion` now supports ``'drop'`` as a transformer
   to drop features. :issue:`11144` by :user:`thomasjpfan`.
 
@@ -1039,7 +1043,7 @@ Support for Python 3.3 has been officially dropped.
 - |API| The NaN marker for the missing values has been changed
   between the :class:`preprocessing.Imputer` and the
   :class:`impute.SimpleImputer`.
-  ``missing_values='NaN'`` should now be
+  ``missing_values='NaN'`` should now be
   ``missing_values=np.nan``. :issue:`11211` by
   :user:`Jeremie du Boisberranger <jeremiedbb>`.
 
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 03440502aecb2..27a756d9eefe5 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -17,7 +17,7 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
-- please add class and reason here (see version 0.20 what's new)
+- :class:`cluster.DBSCAN` (bug fix)
 
 Details are listed in the changelog below.
 
@@ -48,6 +48,10 @@ Support for Python 3.4 and below has been officially dropped.
   to set and that scales better, by :user:`Shane <espg>` and
   :user:`Adrin Jalali <adrinjalali>`.
 
+- |Fix| Fixed a bug in :class:`cluster.DBSCAN` with precomputed sparse neighbors
+  graph, which would add explicitly zeros on the diagonal even when already
+  present. :issue:`12105` by `Tom Dupre la Tour`_.
+
 Multiple modules
 ................
 
diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
index c1239b1388dce..e58afc2589350 100644
--- a/sklearn/cluster/dbscan_.py
+++ b/sklearn/cluster/dbscan_.py
@@ -14,6 +14,7 @@
 
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_array, check_consistent_length
+from ..utils.testing import ignore_warnings
 from ..neighbors import NearestNeighbors
 
 from ._dbscan_inner import dbscan_inner
@@ -142,15 +143,16 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski', metric_params=None,
     if metric == 'precomputed' and sparse.issparse(X):
         neighborhoods = np.empty(X.shape[0], dtype=object)
         X.sum_duplicates()  # XXX: modifies X's internals in-place
+
+        # set the diagonal to explicit values, as a point is its own neighbor
+        with ignore_warnings():
+            X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place
+
         X_mask = X.data <= eps
         masked_indices = X.indices.astype(np.intp, copy=False)[X_mask]
-        masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))[X.indptr[1:]]
+        masked_indptr = np.concatenate(([0], np.cumsum(X_mask)))
+        masked_indptr = masked_indptr[X.indptr[1:-1]]
 
-        # insert the diagonal: a point is its own neighbor, but 0 distance
-        # means absence from sparse matrix data
-        masked_indices = np.insert(masked_indices, masked_indptr,
-                                   np.arange(X.shape[0]))
-        masked_indptr = masked_indptr[:-1] + np.arange(1, X.shape[0])
         # split into rows
         neighborhoods[:] = np.split(masked_indices, masked_indptr)
     else:
diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
index f25cc8d7310d0..0c4ec6c78179c 100644
--- a/sklearn/cluster/tests/test_dbscan.py
+++ b/sklearn/cluster/tests/test_dbscan.py
@@ -81,10 +81,12 @@ def test_dbscan_sparse():
     assert_array_equal(labels_dense, labels_sparse)
 
 
-def test_dbscan_sparse_precomputed():
+@pytest.mark.parametrize('include_self', [False, True])
+def test_dbscan_sparse_precomputed(include_self):
     D = pairwise_distances(X)
     nn = NearestNeighbors(radius=.9).fit(X)
-    D_sparse = nn.radius_neighbors_graph(mode='distance')
+    X_ = X if include_self else None
+    D_sparse = nn.radius_neighbors_graph(X=X_, mode='distance')
     # Ensure it is sparse not merely on diagonals:
     assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
     core_sparse, labels_sparse = dbscan(D_sparse,
@@ -97,6 +99,21 @@ def test_dbscan_sparse_precomputed():
     assert_array_equal(labels_dense, labels_sparse)
 
 
+@pytest.mark.parametrize('use_sparse', [True, False])
+@pytest.mark.parametrize('metric', ['precomputed', 'minkowski'])
+def test_dbscan_input_not_modified(use_sparse, metric):
+    # test that the input is not modified by dbscan
+    X = np.random.RandomState(0).rand(10, 10)
+    X = sparse.csr_matrix(X) if use_sparse else X
+    X_copy = X.copy()
+    dbscan(X, metric=metric)
+
+    if use_sparse:
+        assert_array_equal(X.toarray(), X_copy.toarray())
+    else:
+        assert_array_equal(X, X_copy)
+
+
 def test_dbscan_no_core_samples():
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)

From ea521615803f78b2ffcbbbd3792700a626cc5f99 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Fri, 28 Sep 2018 23:27:50 +0800
Subject: [PATCH 099/163] MNT Move what's new entry

---
 doc/whats_new/v0.20.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index c8f5498ee3a64..e35990fe40006 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -23,6 +23,10 @@ Changelog
   ``n_jobs > 1``.
   :issue:`12159` by :user:`Olivier Grisel <ogrisel>`.
 
+- |Fix| Fixed a bug in :class:`cluster.DBSCAN` with precomputed sparse neighbors
+  graph, which would add explicitly zeros on the diagonal even when already
+  present. :issue:`12105` by `Tom Dupre la Tour`_.
+
 :mod:`sklearn.ensemble`
 .......................
 
@@ -45,11 +49,7 @@ Changelog
 - |Fix| force the parallelism backend to :code:`threading` for
   :class:`neighbors.KDTree` and :class:`neighbors.BallTree` in Python 2.7 to
   avoid pickling errors caused by the serialization of their methods.
-  :issue:`12171` by :user:`Thomas Moreau <tomMoral>`
-
-- |Fix| Fixed a bug in :class:`cluster.DBSCAN` with precomputed sparse neighbors
-  graph, which would add explicitly zeros on the diagonal even when already
-  present. :issue:`12105` by `Tom Dupre la Tour`_.
+  :issue:`12171` by :user:`Thomas Moreau <tomMoral>`.
 
 .. _changes_0_20:
 

From a358d7d2fef665c2365e838210b59b07064be867 Mon Sep 17 00:00:00 2001
From: Lee Yi Jie Joel <lee.yi.jie.joel@gmail.com>
Date: Fri, 28 Sep 2018 23:37:35 +0800
Subject: [PATCH 100/163] DOC Add Versionadded tag to sklearn/_config.py
 (#12187)

---
 sklearn/_config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/_config.py b/sklearn/_config.py
index 2b8a2e795bf86..47e56f3d7927d 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -36,6 +36,8 @@ def set_config(assume_finite=None, working_memory=None):
         to this number of MiB (per job when parallelised), often saving both
         computation time and memory on expensive operations that can be
         performed in chunks. Global default: 1024.
+
+    .. versionadded:: 0.19
     """
     if assume_finite is not None:
         _global_config['assume_finite'] = assume_finite

From 239482ffdf144a4d395cea331499337ebec52ac7 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Fri, 28 Sep 2018 19:49:58 +0200
Subject: [PATCH 101/163] BaseSearchCV._run_search raises NotImplementedError
 instead of being an abstractmethod (#12182)

* _run_search raises NotImplementedError instead of being and abstractmethod

* add error message

* test for a BaseSearchCV child w/o a _run_search

* make the test python2 compatible, still in 0.20 zone.

* specify cv in tests not to trigger the related FutureWarning

* PEP8
---
 sklearn/model_selection/_search.py           |  4 ++--
 sklearn/model_selection/tests/test_search.py | 22 +++++++++++++++++++-
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index b4cd9d068f9b4..5c1b89bbb6d00 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -580,11 +580,10 @@ def classes_(self):
         self._check_is_fitted("classes_")
         return self.best_estimator_.classes_
 
-    @abstractmethod
     def _run_search(self, evaluate_candidates):
         """Repeatedly calls `evaluate_candidates` to conduct a search.
 
-        This method, implemented in sub-classes, makes it is possible to
+        This method, implemented in sub-classes, makes it possible to
         customize the the scheduling of evaluations: GridSearchCV and
         RandomizedSearchCV schedule evaluations for their whole parameter
         search space at once but other more sequential approaches are also
@@ -613,6 +612,7 @@ def _run_search(self, evaluate_candidates):
                 if score[0] < score[1]:
                     evaluate_candidates([{'C': 0.1}])
         """
+        raise NotImplementedError("_run_search not implemented.")
 
     def fit(self, X, y=None, groups=None, **fit_params):
         """Run fit with all sets of parameters.
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 916804b384c7b..ac9a478c234ec 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -182,7 +182,6 @@ def test_parameter_grid():
 
 @pytest.mark.filterwarnings('ignore: The default of the `iid`')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
-
 def test_grid_search():
     # Test that the best estimator contains the right value for foo_param
     clf = MockClassifier()
@@ -1678,6 +1677,27 @@ def _run_search(self, evaluate):
                    "Attribute %s not equal" % attr
 
 
+def test__custom_fit_no_run_search():
+    class NoRunSearchSearchCV(BaseSearchCV):
+        def __init__(self, estimator, **kwargs):
+            super(NoRunSearchSearchCV, self).__init__(estimator, **kwargs)
+
+        def fit(self, X, y=None, groups=None, **fit_params):
+            return self
+
+    # this should not raise any exceptions
+    NoRunSearchSearchCV(SVC(), cv=5).fit(X, y)
+
+    class BadSearchCV(BaseSearchCV):
+        def __init__(self, estimator, **kwargs):
+            super(BadSearchCV, self).__init__(estimator, **kwargs)
+
+    with pytest.raises(NotImplementedError,
+                       match="_run_search not implemented."):
+        # this should raise a NotImplementedError
+        BadSearchCV(SVC(), cv=5).fit(X, y)
+
+
 def test_deprecated_grid_search_iid():
     depr_message = ("The default of the `iid` parameter will change from True "
                     "to False in version 0.22")

From 2cf145d4f36d74b2f3c26313db904fa2b5f561a3 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Sat, 29 Sep 2018 11:49:53 +0800
Subject: [PATCH 102/163] DOC Add versionadded to set_config (#12196)

---
 sklearn/_config.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/_config.py b/sklearn/_config.py
index 47e56f3d7927d..0b5cae113d8e1 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -23,6 +23,8 @@ def get_config():
 def set_config(assume_finite=None, working_memory=None):
     """Set global scikit-learn configuration
 
+    .. versionadded:: 0.19
+
     Parameters
     ----------
     assume_finite : bool, optional
@@ -37,7 +39,6 @@ def set_config(assume_finite=None, working_memory=None):
         computation time and memory on expensive operations that can be
         performed in chunks. Global default: 1024.
 
-    .. versionadded:: 0.19
     """
     if assume_finite is not None:
         _global_config['assume_finite'] = assume_finite

From 0b58bc395d49fb51b3c8a7c4f08c0106050956b3 Mon Sep 17 00:00:00 2001
From: Lily Xiong <lily9423@163.com>
Date: Sat, 29 Sep 2018 18:47:28 -0400
Subject: [PATCH 103/163] DOC Improve ColumnTransformer docstrings (#12206)

---
 sklearn/compose/_column_transformer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index b16052913c5fe..8f33488a28e2f 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -38,8 +38,8 @@ class ColumnTransformer(_BaseComposition, TransformerMixin):
     deprecation.
 
     This estimator allows different columns or column subsets of the input
-    to be transformed separately and the results combined into a single
-    feature space.
+    to be transformed separately and the features generated by each transformer
+    will be concatenated to form a single feature space.
     This is useful for heterogeneous or columnar data, to combine several
     feature extraction mechanisms or transformations into a single transformer.
 

From 9d58ca5ad4700a1a06659196d8d0d925d71ab179 Mon Sep 17 00:00:00 2001
From: Rebekah Kim <rebekah.kim@columbia.edu>
Date: Sat, 29 Sep 2018 20:11:25 -0400
Subject: [PATCH 104/163] MNT Remove duplicate import of warnings & unused
 variables (#12203)

---
 sklearn/ensemble/bagging.py |  1 -
 sklearn/ensemble/forest.py  | 11 +++++------
 sklearn/ensemble/iforest.py | 23 +++++++++++------------
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index abc9db6e35de8..31e45be174675 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -579,7 +579,6 @@ def _validate_estimator(self):
     def _set_oob_score(self, X, y):
         n_samples = y.shape[0]
         n_classes_ = self.n_classes_
-        classes_ = self.classes_
 
         predictions = np.zeros((n_samples, n_classes_))
 
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 542f7ca8043f1..1feef0ed16897 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -41,8 +41,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 
 from __future__ import division
 
-import warnings
-from warnings import warn
+from warnings import catch_warnings, simplefilter, warn
 import threading
 
 from abc import ABCMeta, abstractmethod
@@ -112,8 +111,8 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
         curr_sample_weight *= sample_counts
 
         if class_weight == 'subsample':
-            with warnings.catch_warnings():
-                warnings.simplefilter('ignore', DeprecationWarning)
+            with catch_warnings():
+                simplefilter('ignore', DeprecationWarning)
                 curr_sample_weight *= compute_sample_weight('auto', y, indices)
         elif class_weight == 'balanced_subsample':
             curr_sample_weight *= compute_sample_weight('balanced', y, indices)
@@ -244,7 +243,7 @@ def fit(self, X, y, sample_weight=None):
         """
 
         if self.n_estimators == 'warn':
-            warnings.warn("The default value of n_estimators will change from "
+            warn("The default value of n_estimators will change from "
                           "10 in version 0.20 to 100 in 0.22.", FutureWarning)
             self.n_estimators = 10
 
@@ -259,7 +258,7 @@ def fit(self, X, y, sample_weight=None):
             X.sort_indices()
 
         # Remap output
-        n_samples, self.n_features_ = X.shape
+        self.n_features_ = X.shape[1]
 
         y = np.atleast_1d(y)
         if y.ndim == 2 and y.shape[1] == 1:
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index 72d1d206f478b..00f440aefe73a 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -5,7 +5,6 @@
 from __future__ import division
 
 import numpy as np
-import warnings
 from warnings import warn
 from sklearn.utils.fixes import euler_gamma
 
@@ -208,20 +207,20 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
         """
         if self.contamination == "legacy":
-            warnings.warn('default contamination parameter 0.1 will change '
-                          'in version 0.22 to "auto". This will change the '
-                          'predict method behavior.',
-                          FutureWarning)
+            warn('default contamination parameter 0.1 will change '
+                 'in version 0.22 to "auto". This will change the '
+                 'predict method behavior.',
+                 FutureWarning)
             self._contamination = 0.1
         else:
             self._contamination = self.contamination
 
         if self.behaviour == 'old':
-            warnings.warn('behaviour="old" is deprecated and will be removed '
-                          'in version 0.22. Please use behaviour="new", which '
-                          'makes the decision_function change to match '
-                          'other anomaly detection algorithm API.',
-                          FutureWarning)
+            warn('behaviour="old" is deprecated and will be removed '
+                 'in version 0.22. Please use behaviour="new", which '
+                 'makes the decision_function change to match '
+                 'other anomaly detection algorithm API.',
+                 FutureWarning)
 
         X = check_array(X, accept_sparse=['csc'])
         if issparse(X):
@@ -414,8 +413,8 @@ def threshold_(self):
         if self.behaviour != 'old':
             raise AttributeError("threshold_ attribute does not exist when "
                                  "behaviour != 'old'")
-        warnings.warn("threshold_ attribute is deprecated in 0.20 and will"
-                      " be removed in 0.22.", DeprecationWarning)
+        warn("threshold_ attribute is deprecated in 0.20 and will"
+             " be removed in 0.22.", DeprecationWarning)
         return self._threshold_
 
 

From 663e024ad7728836b4eb6b1e4ccf152eaa7a3024 Mon Sep 17 00:00:00 2001
From: jdethurens <jdethurens@users.noreply.github.com>
Date: Sat, 29 Sep 2018 20:12:21 -0400
Subject: [PATCH 105/163] DOC Fix typo in neighbors/nearest_centroid.py
 (#12223)

---
 sklearn/neighbors/nearest_centroid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/neighbors/nearest_centroid.py b/sklearn/neighbors/nearest_centroid.py
index 73705bf64942b..316e933c78843 100644
--- a/sklearn/neighbors/nearest_centroid.py
+++ b/sklearn/neighbors/nearest_centroid.py
@@ -89,7 +89,7 @@ def fit(self, X, y):
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
-            Training vector, where n_samples in the number of samples and
+            Training vector, where n_samples is the number of samples and
             n_features is the number of features.
             Note that centroid shrinking cannot be used with sparse matrices.
         y : array, shape = [n_samples]

From e1c3c228b28c0fd1b1c6763a3da579d21bee63d1 Mon Sep 17 00:00:00 2001
From: Mark Hannel <mdh386@nyu.edu>
Date: Sat, 29 Sep 2018 20:39:41 -0400
Subject: [PATCH 106/163] DOC Fixing summary table in the linear model
 documentation. (#12220)

---
 doc/modules/linear_model.rst | 37 ++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index ab6b2994835f9..cfbfda371cd12 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -775,20 +775,29 @@ The "saga" solver [7]_ is a variant of "sag" that also supports the
 non-smooth `penalty="l1"` option. This is therefore the solver of choice
 for sparse multinomial logistic regression.
 
-In a nutshell, the following table summarizes the solvers characteristics:
-
-============================   ===========  =======  ===========  =====  ======
-solver                         'liblinear'  'lbfgs'  'newton-cg'  'sag'  'saga'
-============================   ===========  =======  ===========  =====  ======
-Multinomial + L2 penalty       no           yes      yes          yes    yes
-OVR + L2 penalty               yes          yes      yes          yes    yes
-Multinomial + L1 penalty       no           no       no           no     yes
-OVR + L1 penalty               yes          no       no           no     yes
-============================   ===========  =======  ===========  =====  ======
-Penalize the intercept (bad)   yes          no       no           no     no
-Faster for large datasets      no           no       no           yes    yes
-Robust to unscaled datasets    yes          yes      yes          no     no
-============================   ===========  =======  ===========  =====  ======
+In a nutshell, the following table summarizes the penalties supported by each solver:
+
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+|                              |                       **Solvers**                                        |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| **Penalties**                | **'liblinear'** | **'lbfgs'** | **'newton-cg'** | **'sag'** | **'saga'** |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| Multinomial + L2 penalty     |       no        |     yes     |       yes       |    yes    |    yes     |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| OVR + L2 penalty             |       yes       |     yes     |       yes       |    yes    |    yes     |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| Multinomial + L1 penalty     |       no        |     no      |       no        |    no     |    yes     |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| OVR + L1 penalty             |       yes       |     no      |       no        |    no     |    yes     |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| **Behaviors**                |                                                                          |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| Penalize the intercept (bad) |       yes       |     no      |       no        |    no     |    no      |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| Faster for large datasets    |       no        |     no      |       no        |    yes    |    yes     |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
+| Robust to unscaled datasets  |       yes       |     yes     |       yes       |    no     |    no      |
++------------------------------+-----------------+-------------+-----------------+-----------+------------+
 
 The "saga" solver is often the best choice. The "liblinear" solver is
 used by default for historical reasons.

From a3616f65dc550b2c93888be478ee098df2b01888 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 1 Oct 2018 22:32:42 +0800
Subject: [PATCH 107/163] MNT Use name instead of float to specify colors
 (#12199)

---
 examples/linear_model/plot_ols_ridge_variance.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py
index a68ed005aef4c..4d589d42e5f81 100644
--- a/examples/linear_model/plot_ols_ridge_variance.py
+++ b/examples/linear_model/plot_ols_ridge_variance.py
@@ -53,12 +53,12 @@
         this_X = .1 * np.random.normal(size=(2, 1)) + X_train
         clf.fit(this_X, y_train)
 
-        ax.plot(X_test, clf.predict(X_test), color='.5')
-        ax.scatter(this_X, y_train, s=3, c='.5', marker='o', zorder=10)
+        ax.plot(X_test, clf.predict(X_test), color='gray')
+        ax.scatter(this_X, y_train, s=3, c='gray', marker='o', zorder=10)
 
     clf.fit(X_train, y_train)
     ax.plot(X_test, clf.predict(X_test), linewidth=2, color='blue')
-    ax.scatter(X_train, y_train, s=30, c='r', marker='+', zorder=10)
+    ax.scatter(X_train, y_train, s=30, c='red', marker='+', zorder=10)
 
     ax.set_xticks(())
     ax.set_yticks(())

From 94c70ff235a19312063ef089ef587957f40db656 Mon Sep 17 00:00:00 2001
From: Rohan Singh <31292443+ramanujam@users.noreply.github.com>
Date: Mon, 1 Oct 2018 09:27:04 -0700
Subject: [PATCH 108/163] [MRG] More informative error message in
 OneHotEncoder(categories=None) with negative integer values (#12180)

* Fix Issue #12179

OneHotEncoder "only non-negative integers" message should suggest using
categories='auto'

* Fix Issue #12179

    OneHotEncoder "only non-negative integers" message should suggest using
    categories='auto'

* Fix Issue #12179

OneHotEncoder "only non-negative integers" message should suggest using
categories='auto'

* Fixes #12180 Modify the error message

* Fix the spacing
---
 sklearn/preprocessing/_encoders.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index b2dee7d926e06..c44607d10e6d0 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -421,7 +421,11 @@ def _legacy_fit_transform(self, X):
         dtype = getattr(X, 'dtype', None)
         X = check_array(X, dtype=np.int)
         if np.any(X < 0):
-            raise ValueError("X needs to contain only non-negative integers.")
+            raise ValueError("OneHotEncoder in legacy mode cannot handle "
+                             "categories encoded as negative integers. "
+                             "Please set categories='auto' explicitly to "
+                             "be able to use arbitrary integer values as "
+                             "category identifiers.")
         n_samples, n_features = X.shape
         if (isinstance(self.n_values, six.string_types) and
                 self.n_values == 'auto'):
@@ -504,7 +508,11 @@ def _legacy_transform(self, X):
         """Assumes X contains only categorical features."""
         X = check_array(X, dtype=np.int)
         if np.any(X < 0):
-            raise ValueError("X needs to contain only non-negative integers.")
+            raise ValueError("OneHotEncoder in legacy mode cannot handle "
+                             "categories encoded as negative integers. "
+                             "Please set categories='auto' explicitly to "
+                             "be able to use arbitrary integer values as "
+                             "category identifiers.")
         n_samples, n_features = X.shape
 
         indices = self._feature_indices_

From 59b15c524914cbc73f347da1565e199634960f1f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 1 Oct 2018 15:17:18 -0400
Subject: [PATCH 109/163] add explicit mention of scaing for saga in
 logisticregression docs. (#12236)

---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index cfbfda371cd12..7825278245945 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -799,7 +799,7 @@ In a nutshell, the following table summarizes the penalties supported by each so
 | Robust to unscaled datasets  |       yes       |     yes     |       yes       |    no     |    no      |
 +------------------------------+-----------------+-------------+-----------------+-----------+------------+
 
-The "saga" solver is often the best choice. The "liblinear" solver is
+The "saga" solver is often the best choice but requires scaling. The "liblinear" solver is
 used by default for historical reasons.
 
 For large dataset, you may also consider using :class:`SGDClassifier`

From 11612fc8674e896837be5a5a73792835fc31d922 Mon Sep 17 00:00:00 2001
From: Sam Waterbury <samwaterbury1@gmail.com>
Date: Mon, 1 Oct 2018 19:06:58 -0500
Subject: [PATCH 110/163] MNT Raise error for duplicate classes when
 constructing a MultiLabelBinarizer (#12195)

---
 sklearn/preprocessing/label.py            | 7 ++++++-
 sklearn/preprocessing/tests/test_label.py | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
index 51faccf1a30a1..809b537831356 100644
--- a/sklearn/preprocessing/label.py
+++ b/sklearn/preprocessing/label.py
@@ -772,7 +772,8 @@ class MultiLabelBinarizer(BaseEstimator, TransformerMixin):
     Parameters
     ----------
     classes : array-like of shape [n_classes] (optional)
-        Indicates an ordering for the class labels
+        Indicates an ordering for the class labels.
+        All entries should be unique (cannot contain duplicate classes).
 
     sparse_output : boolean (default: False),
         Set to true if output binary array is desired in CSR sparse format
@@ -825,6 +826,10 @@ def fit(self, y):
         """
         if self.classes is None:
             classes = sorted(set(itertools.chain.from_iterable(y)))
+        elif len(set(self.classes)) < len(self.classes):
+            raise ValueError("The classes argument contains duplicate "
+                             "classes. Remove these duplicates before passing "
+                             "them to MultiLabelBinarizer.")
         else:
             classes = self.classes
         dtype = np.int if all(isinstance(c, int) for c in classes) else object
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index f8f4ee4870acf..57c95ab5f7e2d 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -374,6 +374,12 @@ def test_multilabel_binarizer_given_classes():
     mlb = MultiLabelBinarizer(classes=[1, 3, 2])
     assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
 
+    # ensure a ValueError is thrown if given duplicate classes
+    err_msg = "The classes argument contains duplicate classes. Remove " \
+              "these duplicates before passing them to MultiLabelBinarizer."
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3])
+    assert_raise_message(ValueError, err_msg, mlb.fit, inp)
+
 
 def test_multilabel_binarizer_same_length_sequence():
     # Ensure sequences of the same length are not interpreted as a 2-d array

From 53133259cfbf11a3749bcdef6764784fbdea7147 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 2 Oct 2018 08:07:48 +0800
Subject: [PATCH 111/163] DOC Encourage contributors to use
 sklearn.show_versions() (#12225)

---
 CONTRIBUTING.md   | 22 +++++++++++++++-------
 ISSUE_TEMPLATE.md |  3 +++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8ae29353a5ccf..7dfd598c29b43 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -178,13 +178,21 @@ following rules before submitting:
    as your Python, scikit-learn, numpy, and scipy versions. This information
    can be found by running the following code snippet:
 
-  ```python
-  import platform; print(platform.platform())
-  import sys; print("Python", sys.version)
-  import numpy; print("NumPy", numpy.__version__)
-  import scipy; print("SciPy", scipy.__version__)
-  import sklearn; print("Scikit-Learn", sklearn.__version__)
-  ```
+   For scikit-learn >= 0.20:
+
+   ```python
+   import sklearn; sklearn.show_versions()
+   ```
+
+   For scikit-learn < 0.20:
+
+   ```python
+   import platform; print(platform.platform())
+   import sys; print("Python", sys.version)
+   import numpy; print("NumPy", numpy.__version__)
+   import scipy; print("SciPy", scipy.__version__)
+   import sklearn; print("Scikit-Learn", sklearn.__version__)
+   ```
 
 -  Please be specific about what estimators and/or functions are involved
    and the shape of the data, as appropriate; please include a
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
index 70e9e84d48d29..e41b8ca31c915 100644
--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
@@ -43,6 +43,9 @@ it in the issue: https://gist.github.com
 #### Versions
 <!--
 Please run the following snippet and paste the output below.
+For scikit-learn >= 0.20:
+import sklearn; sklearn.show_versions()
+For scikit-learn < 0.20:
 import platform; print(platform.platform())
 import sys; print("Python", sys.version)
 import numpy; print("NumPy", numpy.__version__)

From acf3babfe5aa16e3189f1d2f7d9ffe45357630a8 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 2 Oct 2018 11:24:07 +0800
Subject: [PATCH 112/163] MNT Add versionadded to set_config parameters

---
 sklearn/_config.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/_config.py b/sklearn/_config.py
index 0b5cae113d8e1..bcd206ca9a688 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -33,12 +33,16 @@ def set_config(assume_finite=None, working_memory=None):
         False, validation for finiteness will be performed,
         avoiding error.  Global default: False.
 
+        .. versionadded:: 0.19
+
     working_memory : int, optional
         If set, scikit-learn will attempt to limit the size of temporary arrays
         to this number of MiB (per job when parallelised), often saving both
         computation time and memory on expensive operations that can be
         performed in chunks. Global default: 1024.
 
+        .. versionadded:: 0.20
+
     """
     if assume_finite is not None:
         _global_config['assume_finite'] = assume_finite

From dd3b705f7b30388b1595601a40c7212589ec0fb3 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 2 Oct 2018 17:12:19 +0800
Subject: [PATCH 113/163] MNT Unused imports in examples

---
 examples/cluster/plot_birch_vs_minibatchkmeans.py | 1 -
 examples/ensemble/plot_ensemble_oob.py            | 2 +-
 examples/model_selection/plot_roc_crossval.py     | 1 -
 examples/plot_multilabel.py                       | 1 -
 4 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/examples/cluster/plot_birch_vs_minibatchkmeans.py b/examples/cluster/plot_birch_vs_minibatchkmeans.py
index d9dc6855e8b24..b9f8a03a69bb5 100644
--- a/examples/cluster/plot_birch_vs_minibatchkmeans.py
+++ b/examples/cluster/plot_birch_vs_minibatchkmeans.py
@@ -25,7 +25,6 @@
 import matplotlib.pyplot as plt
 import matplotlib.colors as colors
 
-from sklearn.preprocessing import StandardScaler
 from sklearn.cluster import Birch, MiniBatchKMeans
 from sklearn.datasets.samples_generator import make_blobs
 
diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py
index 081025c8170de..1a8dd315d3022 100644
--- a/examples/ensemble/plot_ensemble_oob.py
+++ b/examples/ensemble/plot_ensemble_oob.py
@@ -23,7 +23,7 @@
 
 from collections import OrderedDict
 from sklearn.datasets import make_classification
-from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
+from sklearn.ensemble import RandomForestClassifier
 
 # Author: Kian Ho <hui.kian.ho@gmail.com>
 #         Gilles Louppe <g.louppe@gmail.com>
diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
index 14eb9c2a768f3..038c58ad82109 100644
--- a/examples/model_selection/plot_roc_crossval.py
+++ b/examples/model_selection/plot_roc_crossval.py
@@ -34,7 +34,6 @@
 import numpy as np
 from scipy import interp
 import matplotlib.pyplot as plt
-from itertools import cycle
 
 from sklearn import svm, datasets
 from sklearn.metrics import roc_curve, auc
diff --git a/examples/plot_multilabel.py b/examples/plot_multilabel.py
index 8a0f29e75cdb5..62ba0de7efc5f 100644
--- a/examples/plot_multilabel.py
+++ b/examples/plot_multilabel.py
@@ -37,7 +37,6 @@
 from sklearn.datasets import make_multilabel_classification
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.svm import SVC
-from sklearn.preprocessing import LabelBinarizer
 from sklearn.decomposition import PCA
 from sklearn.cross_decomposition import CCA
 

From 60cf1d62d2c0c1bf8ad321c7c098f6494abf4597 Mon Sep 17 00:00:00 2001
From: Roman Feldbauer <sci@feldbauer.org>
Date: Tue, 2 Oct 2018 14:15:56 +0200
Subject: [PATCH 114/163] Fix numpy.int overflow in make_classification
 (#10811)

---
 sklearn/datasets/samples_generator.py           |  3 ++-
 .../datasets/tests/test_samples_generator.py    | 17 +++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
index 04415f799bc73..50ceb12bdaf90 100644
--- a/sklearn/datasets/samples_generator.py
+++ b/sklearn/datasets/samples_generator.py
@@ -160,7 +160,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
         raise ValueError("Number of informative, redundant and repeated "
                          "features must sum to less than the number of total"
                          " features")
-    if 2 ** n_informative < n_classes * n_clusters_per_class:
+    # Use log2 to avoid overflow errors
+    if n_informative < np.log2(n_classes * n_clusters_per_class):
         raise ValueError("n_classes * n_clusters_per_class must"
                          " be smaller or equal 2 ** n_informative")
     if weights and len(weights) not in [n_classes, n_classes - 1]:
diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index c5a0c48b16ed0..1e1f110d9c41b 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -84,7 +84,8 @@ def test_make_classification_informative_features():
                                                          (2, [1/4] * 4, 1),
                                                          (2, [1/2] * 2, 2),
                                                          (2, [3/4, 1/4], 2),
-                                                         (10, [1/3] * 3, 10)
+                                                         (10, [1/3] * 3, 10),
+                                                         (np.int(64), [1], 1)
                                                          ]:
         n_classes = len(weights)
         n_clusters = n_classes * n_clusters_per_class
@@ -128,19 +129,19 @@ def test_make_classification_informative_features():
             for cluster in range(len(unique_signs)):
                 centroid = X[cluster_index == cluster].mean(axis=0)
                 if hypercube:
-                    assert_array_almost_equal(np.abs(centroid),
-                                              [class_sep] * n_informative,
-                                              decimal=0,
+                    assert_array_almost_equal(np.abs(centroid) / class_sep,
+                                              np.ones(n_informative),
+                                              decimal=5,
                                               err_msg="Clusters are not "
                                                       "centered on hypercube "
                                                       "vertices")
                 else:
                     assert_raises(AssertionError,
                                   assert_array_almost_equal,
-                                  np.abs(centroid),
-                                  [class_sep] * n_informative,
-                                  decimal=0,
-                                  err_msg="Clusters should not be cenetered "
+                                  np.abs(centroid) / class_sep,
+                                  np.ones(n_informative),
+                                  decimal=5,
+                                  err_msg="Clusters should not be centered "
                                           "on hypercube vertices")
 
     assert_raises(ValueError, make, n_features=2, n_informative=2, n_classes=5,

From 7166cd52868fba8fb7e307f282542442a1985032 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 2 Oct 2018 22:58:57 +0800
Subject: [PATCH 115/163] MNT Remove duplicate entry in whats new

---
 doc/whats_new/v0.21.rst | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 27a756d9eefe5..03440502aecb2 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -17,7 +17,7 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
-- :class:`cluster.DBSCAN` (bug fix)
+- please add class and reason here (see version 0.20 what's new)
 
 Details are listed in the changelog below.
 
@@ -48,10 +48,6 @@ Support for Python 3.4 and below has been officially dropped.
   to set and that scales better, by :user:`Shane <espg>` and
   :user:`Adrin Jalali <adrinjalali>`.
 
-- |Fix| Fixed a bug in :class:`cluster.DBSCAN` with precomputed sparse neighbors
-  graph, which would add explicitly zeros on the diagonal even when already
-  present. :issue:`12105` by `Tom Dupre la Tour`_.
-
 Multiple modules
 ................
 

From dfd009d61ebc2b323543035474bb16e5b0e7e316 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 2 Oct 2018 18:13:48 +0200
Subject: [PATCH 116/163] Remove test_import_sklearn_no_warnings (#12244)

---
 sklearn/tests/test_init.py | 41 --------------------------------------
 1 file changed, 41 deletions(-)

diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py
index 75c9dd92129f4..17f12e8da478e 100644
--- a/sklearn/tests/test_init.py
+++ b/sklearn/tests/test_init.py
@@ -1,12 +1,5 @@
 # Basic unittests to test functioning of module's top-level
 
-import subprocess
-
-import pkgutil
-
-import pytest
-
-import sklearn
 from sklearn.utils.testing import assert_equal
 
 __author__ = 'Yaroslav Halchenko'
@@ -25,37 +18,3 @@ def test_import_skl():
     # "import *" is discouraged outside of the module level, hence we
     # rely on setting up the variable above
     assert_equal(_top_import_error, None)
-
-
-def test_import_sklearn_no_warnings():
-    # Test that importing scikit-learn main modules doesn't raise any warnings.
-
-    try:
-        pkgs = pkgutil.iter_modules(path=sklearn.__path__, prefix='sklearn.')
-        import_modules = '; '.join(['import ' + modname
-                                    for _, modname, _ in pkgs
-                                    if (not modname.startswith('_') and
-                                        # add deprecated top level modules
-                                        # below to ignore them
-                                        modname not in [])])
-
-        message = subprocess.check_output(['python', '-Wdefault',
-                                           '-c', import_modules],
-                                          stderr=subprocess.STDOUT)
-        message = message.decode("utf-8")
-        message = '\n'.join([line for line in message.splitlines()
-                             if not (
-                                     # ignore ImportWarning due to Cython
-                                     "ImportWarning" in line or
-                                     # ignore DeprecationWarning due to pytest
-                                     "pytest" in line or
-                                     # ignore DeprecationWarnings due to
-                                     # numpy.oldnumeric
-                                     "oldnumeric" in line
-                                     )])
-        assert 'Warning' not in message
-        assert 'Error' not in message
-
-    except Exception as e:
-        pytest.skip('soft-failed test_import_sklearn_no_warnings.\n'
-                    ' %s, \n %s' % (e, message))

From cbbe489e86abc47c48744f7ab20d5640bc91386a Mon Sep 17 00:00:00 2001
From: Sebastian Raschka <mail@sebastianraschka.com>
Date: Wed, 3 Oct 2018 02:05:08 -0500
Subject: [PATCH 117/163] DOC fix cross-entropy typo in tree docs (#12242)

---
 doc/modules/tree.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 97797191e5e15..9c252e5e9e101 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -453,7 +453,7 @@ Common measures of impurity are Gini
 
     H(X_m) = \sum_k p_{mk} (1 - p_{mk})
 
-Cross-Entropy
+Entropy
 
 .. math::
 

From bfab306bc6a5866d2afdaf725554dcd87cd30279 Mon Sep 17 00:00:00 2001
From: Xing Han Lu <xhlperso@gmail.com>
Date: Wed, 3 Oct 2018 03:39:24 -0400
Subject: [PATCH 118/163] [MRG] Added Tips in SVM user guide for tuning C
 parameter in LinearSVC and LinearSVR (#12185)

* Added Tip for tuning C parameter in LinearSVC/SVR

* Added reference about performance improvement

* Clearer explanation about large C values

* Update svm.rst
---
 doc/modules/svm.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 4429dd8b13cf6..eac2b35ebfbf6 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -392,6 +392,11 @@ Tips on Practical Use
   * **Setting C**: ``C`` is ``1`` by default and it's a reasonable default
     choice.  If you have a lot of noisy observations you should decrease it.
     It corresponds to regularize more the estimation.
+    
+    :class:`LinearSVC` and :class`LinearSVR` are less sensitive to ``C`` when
+    it becomes large, and prediction results stop improving after a certain 
+    threshold. Meanwhile, larger ``C`` values will take more time to train, 
+    sometimes up to 10 times longer, as shown by Fan et al. (2008)
 
   * Support Vector Machine algorithms are not scale invariant, so **it
     is highly recommended to scale your data**. For example, scale each
@@ -434,6 +439,13 @@ Tips on Practical Use
     be calculated using :func:`l1_min_c`.
 
 
+.. topic:: References:
+
+ * Fan, Rong-En, et al.,
+   `"LIBLINEAR: A library for large linear classification."
+   <https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf>`_,
+   Journal of machine learning research 9.Aug (2008): 1871-1874.
+
 .. _svm_kernels:
 
 Kernel functions

From da858158bde28f2ceb9982d57cf2ef9636039c3f Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 3 Oct 2018 17:57:50 +0200
Subject: [PATCH 119/163] DOC what's new entry for "Fix numpy.int overflow in
 make_classification #10811"

---
 doc/whats_new/v0.21.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 03440502aecb2..a6c6548c08b81 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -48,6 +48,10 @@ Support for Python 3.4 and below has been officially dropped.
   to set and that scales better, by :user:`Shane <espg>` and
   :user:`Adrin Jalali <adrinjalali>`.
 
+- |Fix| Fixed integer overflow in :func:`datasets.make_classification`
+  for values of ``n_informative`` parameter larger than 64.
+  :issue:10811 by :user:`Roman Feldbauer <VarIr>`.
+
 Multiple modules
 ................
 

From 3e5777a0bda66d787397aafbe0f01825c8bc65f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=20Dupr=C3=A9=20la=20Tour?= <tom.dupre-la-tour@m4x.org>
Date: Wed, 3 Oct 2018 19:06:07 +0200
Subject: [PATCH 120/163] [MRG] Fast PolynomialFeatures on dense arrays
 (#12251)

---
 doc/whats_new/v0.21.rst                  | 11 +++++++++++
 sklearn/preprocessing/data.py            | 15 ++++++++++++---
 sklearn/preprocessing/tests/test_data.py | 11 +++++++++++
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index a6c6548c08b81..955d688ea59e9 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -48,10 +48,21 @@ Support for Python 3.4 and below has been officially dropped.
   to set and that scales better, by :user:`Shane <espg>` and
   :user:`Adrin Jalali <adrinjalali>`.
 
+:mod:`sklearn.preprocessing`
+............................
+
+- |Efficiency| |API| Speed improvement in :class:`preprocessing.PolynomialFeatures`,
+  in the dense case. Also added a new parameter ``order`` which controls output
+  order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.
+
+:mod:`sklearn.datasets`
+............................
+
 - |Fix| Fixed integer overflow in :func:`datasets.make_classification`
   for values of ``n_informative`` parameter larger than 64.
   :issue:10811 by :user:`Roman Feldbauer <VarIr>`.
 
+
 Multiple modules
 ................
 
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 0a33f9140f902..96b6feaac7cf4 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -1323,6 +1323,12 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
         all polynomial powers are zero (i.e. a column of ones - acts as an
         intercept term in a linear model).
 
+    order : str in {'C', 'F'}, default 'C'
+        Order of output array in the dense case. 'F' order is faster to
+        compute, but may slow down subsequent estimators.
+
+        .. versionadded:: 0.21
+
     Examples
     --------
     >>> X = np.arange(6).reshape(3, 2)
@@ -1363,10 +1369,12 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
     See :ref:`examples/linear_model/plot_polynomial_interpolation.py
     <sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py>`
     """
-    def __init__(self, degree=2, interaction_only=False, include_bias=True):
+    def __init__(self, degree=2, interaction_only=False, include_bias=True,
+                 order='C'):
         self.degree = degree
         self.interaction_only = interaction_only
         self.include_bias = include_bias
+        self.order = order
 
     @staticmethod
     def _combinations(n_features, degree, interaction_only, include_bias):
@@ -1454,7 +1462,7 @@ def transform(self, X):
         """
         check_is_fitted(self, ['n_input_features_', 'n_output_features_'])
 
-        X = check_array(X, dtype=FLOAT_DTYPES, accept_sparse='csc')
+        X = check_array(X, order='F', dtype=FLOAT_DTYPES, accept_sparse='csc')
         n_samples, n_features = X.shape
 
         if n_features != self.n_input_features_:
@@ -1475,7 +1483,8 @@ def transform(self, X):
                     columns.append(sparse.csc_matrix(np.ones((X.shape[0], 1))))
             XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
         else:
-            XP = np.empty((n_samples, self.n_output_features_), dtype=X.dtype)
+            XP = np.empty((n_samples, self.n_output_features_), dtype=X.dtype,
+                          order=self.order)
             for i, comb in enumerate(combinations):
                 XP[:, i] = X[:, comb].prod(1)
 
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index f4d0b5af9799f..65c2119c2f936 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -157,6 +157,17 @@ def test_polynomial_feature_names():
                        feature_names)
 
 
+def test_polynomial_feature_array_order():
+    X = np.arange(10).reshape(5, 2)
+
+    def is_c_contiguous(a):
+        return np.isfortran(a.T)
+
+    assert is_c_contiguous(PolynomialFeatures().fit_transform(X))
+    assert is_c_contiguous(PolynomialFeatures(order='C').fit_transform(X))
+    assert np.isfortran(PolynomialFeatures(order='F').fit_transform(X))
+
+
 @pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
                          [(1, True, False, int),
                           (2, True, False, int),

From fb7be879a3dd08c2a8209639d7624fb67d98b46e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Wed, 3 Oct 2018 13:24:14 -0400
Subject: [PATCH 121/163] [MRG + 1] return_train_score deprecation (#12241)

---
 doc/modules/cross_validation.rst              | 10 +-
 examples/compose/plot_digits_pipe.py          |  3 +-
 sklearn/model_selection/_search.py            | 80 +++-------------
 sklearn/model_selection/_validation.py        | 24 +----
 sklearn/model_selection/tests/test_search.py  | 94 +------------------
 .../model_selection/tests/test_validation.py  | 24 -----
 sklearn/utils/deprecation.py                  | 47 +---------
 sklearn/utils/tests/test_deprecation.py       | 16 ----
 8 files changed, 27 insertions(+), 271 deletions(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 2d05e4b81c69d..17d9ea680a2cd 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -191,9 +191,9 @@ And for multiple metric evaluation, the return value is a dict with the
 following keys -
 ``['test_<scorer1_name>', 'test_<scorer2_name>', 'test_<scorer...>', 'fit_time', 'score_time']``
 
-``return_train_score`` is set to ``True`` by default. It adds train score keys
-for all the scorers. If train scores are not needed, this should be set to
-``False`` explicitly.
+``return_train_score`` is set to ``False`` by default to save computation time.
+To evaluate the scores on the training set as well you need to be set to
+``True``.
 
 You may also retain the estimator fitted on each training set by setting
 ``return_estimator=True``.
@@ -206,7 +206,7 @@ predefined scorer names::
     >>> scoring = ['precision_macro', 'recall_macro']
     >>> clf = svm.SVC(kernel='linear', C=1, random_state=0)
     >>> scores = cross_validate(clf, iris.data, iris.target, scoring=scoring,
-    ...                         cv=5, return_train_score=False)
+    ...                         cv=5)
     >>> sorted(scores.keys())
     ['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']
     >>> scores['test_recall_macro']                       # doctest: +ELLIPSIS
@@ -231,7 +231,7 @@ Here is an example of ``cross_validate`` using a single metric::
     ...                         scoring='precision_macro', cv=5,
     ...                         return_estimator=True)
     >>> sorted(scores.keys())
-    ['estimator', 'fit_time', 'score_time', 'test_score', 'train_score']
+    ['estimator', 'fit_time', 'score_time', 'test_score']
 
 
 Obtaining predictions by cross-validation
diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
index 6e722c9861529..c5b0fb2a13609 100644
--- a/examples/compose/plot_digits_pipe.py
+++ b/examples/compose/plot_digits_pipe.py
@@ -47,8 +47,7 @@
     'pca__n_components': [5, 20, 30, 40, 50, 64],
     'logistic__alpha': np.logspace(-4, 4, 5),
 }
-search = GridSearchCV(pipe, param_grid, iid=False, cv=5,
-                      return_train_score=False)
+search = GridSearchCV(pipe, param_grid, iid=False, cv=5)
 search.fit(X_digits, y_digits)
 print("Best parameter (CV score=%0.3f):" % search.best_score_)
 print(search.best_params_)
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 5c1b89bbb6d00..46a92436f9bd6 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -39,7 +39,6 @@
 from ..utils.random import sample_without_replacement
 from ..utils.validation import indexable, check_is_fitted
 from ..utils.metaestimators import if_delegate_has_method
-from ..utils.deprecation import DeprecationDict
 from ..metrics.scorer import _check_multimetric_scoring
 from ..metrics.scorer import check_scoring
 
@@ -635,18 +634,6 @@ def fit(self, X, y=None, groups=None, **fit_params):
         **fit_params : dict of string -> object
             Parameters passed to the ``fit`` method of the estimator
         """
-
-        if self.fit_params is not None:
-            warnings.warn('"fit_params" as a constructor argument was '
-                          'deprecated in version 0.19 and will be removed '
-                          'in version 0.21. Pass fit parameters to the '
-                          '"fit" method instead.', DeprecationWarning)
-            if fit_params:
-                warnings.warn('Ignoring fit_params passed as a constructor '
-                              'argument in favor of keyword arguments to '
-                              'the "fit" method.', RuntimeWarning)
-            else:
-                fit_params = self.fit_params
         estimator = self.estimator
         cv = check_cv(self.cv, y, classifier=is_classifier(estimator))
 
@@ -768,9 +755,7 @@ def _format_results(self, candidate_params, scorers, n_splits, out):
         if self.return_train_score:
             train_scores = _aggregate_score_dicts(train_score_dicts)
 
-        # TODO: replace by a dict in 0.21
-        results = (DeprecationDict() if self.return_train_score == 'warn'
-                   else {})
+        results = {}
 
         def _store(key_name, array, weights=None, splits=False, rank=False):
             """A small helper to store the scores/times to the cv_results_"""
@@ -847,18 +832,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                    splits=True, rank=True,
                    weights=test_sample_counts if iid else None)
             if self.return_train_score:
-                prev_keys = set(results.keys())
                 _store('train_%s' % scorer_name, train_scores[scorer_name],
                        splits=True)
-                if self.return_train_score == 'warn':
-                    for key in set(results.keys()) - prev_keys:
-                        message = (
-                            'You are accessing a training score ({!r}), '
-                            'which will not be available by default '
-                            'any more in 0.21. If you need training scores, '
-                            'please set return_train_score=True').format(key)
-                        # warn on key access
-                        results.add_warning(key, message, FutureWarning)
 
         return results
 
@@ -907,14 +882,6 @@ class GridSearchCV(BaseSearchCV):
 
         If None, the estimator's default scorer (if available) is used.
 
-    fit_params : dict, optional
-        Parameters to pass to the fit method.
-
-        .. deprecated:: 0.19
-           ``fit_params`` as a constructor argument was deprecated in version
-           0.19 and will be removed in version 0.21. Pass fit parameters to
-           the ``fit`` method instead.
-
     n_jobs : int or None, optional (default=None)
         Number of jobs to run in parallel.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
@@ -944,7 +911,7 @@ class GridSearchCV(BaseSearchCV):
         identically distributed across the folds, and the loss minimized is
         the total loss per sample, and not the mean loss across the folds. If
         False, return the average score across folds. Default is True, but
-        will change to False in version 0.21, to correspond to the standard
+        will change to False in version 0.22, to correspond to the standard
         definition of cross-validation.
 
         .. versionchanged:: 0.20
@@ -1001,13 +968,9 @@ class GridSearchCV(BaseSearchCV):
         step, which will always raise the error. Default is 'raise' but from
         version 0.22 it will change to np.nan.
 
-    return_train_score : boolean, optional
+    return_train_score : boolean, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
-
-        Current default is ``'warn'``, which behaves as ``True`` in addition
-        to raising a warning when a training score is looked up.
-        That default will be changed to ``False`` in 0.21.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -1031,17 +994,16 @@ class GridSearchCV(BaseSearchCV):
                          kernel='rbf', max_iter=-1, probability=False,
                          random_state=None, shrinking=True, tol=...,
                          verbose=False),
-           fit_params=None, iid=..., n_jobs=None,
+           iid=..., n_jobs=None,
            param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
            scoring=..., verbose=...)
     >>> sorted(clf.cv_results_.keys())
     ...                             # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
     ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
-     'mean_train_score', 'param_C', 'param_kernel', 'params',...
+     'param_C', 'param_kernel', 'params',...
      'rank_test_score', 'split0_test_score',...
-     'split0_train_score', 'split1_test_score', 'split1_train_score',...
-     'split2_test_score', 'split2_train_score',...
-     'std_fit_time', 'std_score_time', 'std_test_score', 'std_train_score'...]
+     'split2_test_score', ...
+     'std_fit_time', 'std_score_time', 'std_test_score']
 
     Attributes
     ----------
@@ -1174,12 +1136,12 @@ class GridSearchCV(BaseSearchCV):
 
     """
 
-    def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
+    def __init__(self, estimator, param_grid, scoring=None,
                  n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
                  pre_dispatch='2*n_jobs', error_score='raise-deprecating',
-                 return_train_score="warn"):
+                 return_train_score=False):
         super(GridSearchCV, self).__init__(
-            estimator=estimator, scoring=scoring, fit_params=fit_params,
+            estimator=estimator, scoring=scoring,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
@@ -1254,14 +1216,6 @@ class RandomizedSearchCV(BaseSearchCV):
 
         If None, the estimator's default scorer (if available) is used.
 
-    fit_params : dict, optional
-        Parameters to pass to the fit method.
-
-        .. deprecated:: 0.19
-           ``fit_params`` as a constructor argument was deprecated in version
-           0.19 and will be removed in version 0.21. Pass fit parameters to
-           the ``fit`` method instead.
-
     n_jobs : int or None, optional (default=None)
         Number of jobs to run in parallel.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
@@ -1291,7 +1245,7 @@ class RandomizedSearchCV(BaseSearchCV):
         identically distributed across the folds, and the loss minimized is
         the total loss per sample, and not the mean loss across the folds. If
         False, return the average score across folds. Default is True, but
-        will change to False in version 0.21, to correspond to the standard
+        will change to False in version 0.22, to correspond to the standard
         definition of cross-validation.
 
         .. versionchanged:: 0.20
@@ -1356,13 +1310,9 @@ class RandomizedSearchCV(BaseSearchCV):
         step, which will always raise the error. Default is 'raise' but from
         version 0.22 it will change to np.nan.
 
-    return_train_score : boolean, optional
+    return_train_score : boolean, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
-
-        Current default is ``'warn'``, which behaves as ``True`` in addition
-        to raising a warning when a training score is looked up.
-        That default will be changed to ``False`` in 0.21.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -1495,15 +1445,15 @@ class RandomizedSearchCV(BaseSearchCV):
     """
 
     def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
-                 fit_params=None, n_jobs=None, iid='warn', refit=True,
+                 n_jobs=None, iid='warn', refit=True,
                  cv='warn', verbose=0, pre_dispatch='2*n_jobs',
                  random_state=None, error_score='raise-deprecating',
-                 return_train_score="warn"):
+                 return_train_score=False):
         self.param_distributions = param_distributions
         self.n_iter = n_iter
         self.random_state = random_state
         super(RandomizedSearchCV, self).__init__(
-            estimator=estimator, scoring=scoring, fit_params=fit_params,
+            estimator=estimator, scoring=scoring,
             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
             pre_dispatch=pre_dispatch, error_score=error_score,
             return_train_score=return_train_score)
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 4ddfc5edac6ad..c60ce54e8fe1b 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -22,7 +22,6 @@
 
 from ..base import is_classifier, clone
 from ..utils import indexable, check_random_state, safe_indexing
-from ..utils.deprecation import DeprecationDict
 from ..utils.validation import _is_arraylike, _num_samples
 from ..utils.metaestimators import _safe_split
 from ..utils import Parallel, delayed
@@ -40,7 +39,7 @@
 
 def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
                    n_jobs=None, verbose=0, fit_params=None,
-                   pre_dispatch='2*n_jobs', return_train_score="warn",
+                   pre_dispatch='2*n_jobs', return_train_score=False,
                    return_estimator=False, error_score='raise-deprecating'):
     """Evaluate metric(s) by cross-validation and also record fit/score times.
 
@@ -126,12 +125,8 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    return_train_score : boolean, optional
+    return_train_score : boolean, default=False
         Whether to include train scores.
-
-        Current default is ``'warn'``, which behaves as ``True`` in addition
-        to raising a warning when a training score is looked up.
-        That default will be changed to ``False`` in 0.21.
         Computing training scores is used to get insights on how different
         parameter settings impact the overfitting/underfitting trade-off.
         However computing the scores on the training set can be computationally
@@ -191,8 +186,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
 
     Single metric evaluation using ``cross_validate``
 
-    >>> cv_results = cross_validate(lasso, X, y, cv=3,
-    ...                             return_train_score=False)
+    >>> cv_results = cross_validate(lasso, X, y, cv=3)
     >>> sorted(cv_results.keys())                         # doctest: +ELLIPSIS
     ['fit_time', 'score_time', 'test_score']
     >>> cv_results['test_score']    # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
@@ -248,8 +242,7 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
     test_scores, fit_times, score_times = zipped_scores
     test_scores = _aggregate_score_dicts(test_scores)
 
-    # TODO: replace by a dict in 0.21
-    ret = DeprecationDict() if return_train_score == 'warn' else {}
+    ret = {}
     ret['fit_time'] = np.array(fit_times)
     ret['score_time'] = np.array(score_times)
 
@@ -261,14 +254,6 @@ def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn',
         if return_train_score:
             key = 'train_%s' % name
             ret[key] = np.array(train_scores[name])
-            if return_train_score == 'warn':
-                message = (
-                    'You are accessing a training score ({!r}), '
-                    'which will not be available by default '
-                    'any more in 0.21. If you need training scores, '
-                    'please set return_train_score=True').format(key)
-                # warn on key access
-                ret.add_warning(key, message, FutureWarning)
 
     return ret
 
@@ -395,7 +380,6 @@ def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv='warn',
 
     cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,
                                 scoring={'score': scorer}, cv=cv,
-                                return_train_score=False,
                                 n_jobs=n_jobs, verbose=verbose,
                                 fit_params=fit_params,
                                 pre_dispatch=pre_dispatch,
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index ac9a478c234ec..27fd330e35586 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -7,7 +7,6 @@
 import sys
 from types import GeneratorType
 import re
-import warnings
 
 import numpy as np
 import scipy.sparse as sp
@@ -37,7 +36,6 @@
 from sklearn.base import BaseEstimator
 from sklearn.base import clone
 from sklearn.exceptions import NotFittedError
-from sklearn.exceptions import ConvergenceWarning
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_multilabel_classification
@@ -237,57 +235,6 @@ def test_random_search_with_fit_params():
                                                   error_score='raise')
 
 
-@pytest.mark.filterwarnings('ignore: The default of the `iid`')  # 0.22
-def test_grid_search_fit_params_deprecation():
-    # NOTE: Remove this test in v0.21
-
-    # Use of `fit_params` in the class constructor is deprecated,
-    # but will still work until v0.21.
-    X = np.arange(100).reshape(10, 10)
-    y = np.array([0] * 5 + [1] * 5)
-    clf = CheckingClassifier(expected_fit_params=['spam'])
-    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]},
-                               fit_params={'spam': np.ones(10)})
-    assert_warns(DeprecationWarning, grid_search.fit, X, y)
-
-
-@pytest.mark.filterwarnings('ignore: The default of the `iid`')  # 0.22
-@pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
-def test_grid_search_fit_params_two_places():
-    # NOTE: Remove this test in v0.21
-
-    # If users try to input fit parameters in both
-    # the constructor (deprecated use) and the `fit`
-    # method, we'll ignore the values passed to the constructor.
-    X = np.arange(100).reshape(10, 10)
-    y = np.array([0] * 5 + [1] * 5)
-    clf = CheckingClassifier(expected_fit_params=['spam'])
-
-    # The "spam" array is too short and will raise an
-    # error in the CheckingClassifier if used.
-    grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]},
-                               fit_params={'spam': np.ones(1)})
-
-    expected_warning = ('Ignoring fit_params passed as a constructor '
-                        'argument in favor of keyword arguments to '
-                        'the "fit" method.')
-    assert_warns_message(RuntimeWarning, expected_warning,
-                         grid_search.fit, X, y, spam=np.ones(10))
-
-    # Verify that `fit` prefers its own kwargs by giving valid
-    # kwargs in the constructor and invalid in the method call
-    with warnings.catch_warnings():
-        # JvR: As passing fit params to the constructor is deprecated, this
-        # unit test raises a warning (unit test can be removed after version
-        # 0.22)
-        warnings.filterwarnings("ignore", category=DeprecationWarning)
-        grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]},
-                                   fit_params={'spam': np.ones(10)},
-                                   error_score='raise')
-        assert_raise_message(AssertionError, "Fit parameter spam has length 1",
-                             grid_search.fit, X, y, spam=np.ones(1))
-
-
 @ignore_warnings
 def test_grid_search_no_score():
     # Test grid-search on classifier that has no score function.
@@ -374,44 +321,6 @@ def test_grid_search_groups():
         gs.fit(X, y)
 
 
-def test_return_train_score_warn():
-    # Test that warnings are raised. Will be removed in 0.21
-
-    X = np.arange(100).reshape(10, 10)
-    y = np.array([0] * 5 + [1] * 5)
-    grid = {'C': [1, 2]}
-
-    estimators = [GridSearchCV(LinearSVC(random_state=0), grid,
-                               iid=False, cv=3),
-                  RandomizedSearchCV(LinearSVC(random_state=0), grid,
-                                     n_iter=2, iid=False, cv=3)]
-
-    result = {}
-    for estimator in estimators:
-        for val in [True, False, 'warn']:
-            estimator.set_params(return_train_score=val)
-            fit_func = ignore_warnings(estimator.fit,
-                                       category=ConvergenceWarning)
-            result[val] = assert_no_warnings(fit_func, X, y).cv_results_
-
-    train_keys = ['split0_train_score', 'split1_train_score',
-                  'split2_train_score', 'mean_train_score', 'std_train_score']
-    for key in train_keys:
-        msg = (
-            'You are accessing a training score ({!r}), '
-            'which will not be available by default '
-            'any more in 0.21. If you need training scores, '
-            'please set return_train_score=True').format(key)
-        train_score = assert_warns_message(FutureWarning, msg,
-                                           result['warn'].get, key)
-        assert np.allclose(train_score, result[True][key])
-        assert key not in result[False]
-
-    for key in result['warn']:
-        if key not in train_keys:
-            assert_no_warnings(result['warn'].get, key)
-
-
 @pytest.mark.filterwarnings('ignore: The default of the `iid`')  # 0.22
 @pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
 def test_classes__property():
@@ -1530,8 +1439,7 @@ def test_search_train_scores_set_to_false():
     y = [0, 0, 0, 1, 1, 1]
     clf = LinearSVC(random_state=0)
 
-    gs = GridSearchCV(clf, param_grid={'C': [0.1, 0.2]},
-                      return_train_score=False)
+    gs = GridSearchCV(clf, param_grid={'C': [0.1, 0.2]})
     gs.fit(X, y)
 
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 986d701cee651..77a86218cb960 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -22,7 +22,6 @@
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_less
@@ -405,28 +404,6 @@ def test_cross_validate():
         check_cross_validate_multi_metric(est, X, y, scores)
 
 
-def test_cross_validate_return_train_score_warn():
-    # Test that warnings are raised. Will be removed in 0.21
-
-    X, y = make_classification(random_state=0)
-    estimator = MockClassifier()
-
-    result = {}
-    for val in [False, True, 'warn']:
-        result[val] = assert_no_warnings(cross_validate, estimator, X, y,
-                                         return_train_score=val, cv=5)
-
-    msg = (
-        'You are accessing a training score ({!r}), '
-        'which will not be available by default '
-        'any more in 0.21. If you need training scores, '
-        'please set return_train_score=True').format('train_score')
-    train_score = assert_warns_message(FutureWarning, msg,
-                                       result['warn'].get, 'train_score')
-    assert np.allclose(train_score, result[True]['train_score'])
-    assert 'train_score' not in result[False]
-
-
 def check_cross_validate_single_metric(clf, X, y, scores):
     (train_mse_scores, test_mse_scores, train_r2_scores,
      test_r2_scores, fitted_estimators) = scores
@@ -434,7 +411,6 @@ def check_cross_validate_single_metric(clf, X, y, scores):
     for (return_train_score, dict_len) in ((True, 4), (False, 3)):
         # Single metric passed as a string
         if return_train_score:
-            # It must be True by default - deprecated
             mse_scores_dict = cross_validate(clf, X, y, cv=5,
                                              scoring='neg_mean_squared_error',
                                              return_train_score=True)
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index b84e0bd9b4fa9..fbdb2524aeb7a 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -2,7 +2,7 @@
 import warnings
 import functools
 
-__all__ = ["deprecated", "DeprecationDict"]
+__all__ = ["deprecated"]
 
 
 class deprecated(object):
@@ -105,48 +105,3 @@ def _is_deprecated(func):
                                               for c in closures
                      if isinstance(c.cell_contents, str)]))
     return is_deprecated
-
-
-class DeprecationDict(dict):
-    """A dict which raises a warning when some keys are looked up
-
-    Note, this does not raise a warning for __contains__ and iteration.
-
-    It also will raise a warning even after the key has been manually set by
-    the user.
-    """
-    def __init__(self, *args, **kwargs):
-        self._deprecations = {}
-        super(DeprecationDict, self).__init__(*args, **kwargs)
-
-    def __getitem__(self, key):
-        if key in self._deprecations:
-            warn_args, warn_kwargs = self._deprecations[key]
-            warnings.warn(*warn_args, **warn_kwargs)
-        return super(DeprecationDict, self).__getitem__(key)
-
-    def get(self, key, default=None):
-        """Return the value corresponding to key, else default.
-
-        Parameters
-        ----------
-        key : any hashable object
-            The key
-        default : object, optional
-            The default returned when key is not in dict
-        """
-        # dict does not implement it like this, hence it needs to be overridden
-        try:
-            return self[key]
-        except KeyError:
-            return default
-
-    def add_warning(self, key, *args, **kwargs):
-        """Add a warning to be triggered when the specified key is read
-
-        Parameters
-        ----------
-        key : any hashable object
-            The key
-        """
-        self._deprecations[key] = (args, kwargs)
diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py
index d7b3f48c183c1..e5a1f021cda7e 100644
--- a/sklearn/utils/tests/test_deprecation.py
+++ b/sklearn/utils/tests/test_deprecation.py
@@ -8,9 +8,7 @@
 from sklearn.utils.deprecation import _is_deprecated
 from sklearn.utils.deprecation import deprecated
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.deprecation import DeprecationDict
 
 
 @deprecated('qwerty')
@@ -62,17 +60,3 @@ def test_is_deprecated():
 
 def test_pickle():
     pickle.loads(pickle.dumps(mock_function))
-
-
-def test_deprecationdict():
-    dd = DeprecationDict()
-    dd.add_warning('a', 'hello')
-    dd.add_warning('b', 'world', DeprecationWarning)
-    assert 1 == assert_warns_message(UserWarning, 'hello', dd.get, 'a', 1)
-    dd['a'] = 5
-    dd['b'] = 6
-    dd['c'] = 7
-    assert 5 == assert_warns_message(UserWarning, 'hello', dd.__getitem__, 'a')
-    assert 6 == assert_warns_message(DeprecationWarning, 'world',
-                                     dd.__getitem__, 'b')
-    assert 7 == assert_no_warnings(dd.get, 'c')

From e0e738760625129be71839fac2dad9325fb90225 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 4 Oct 2018 03:08:17 +0200
Subject: [PATCH 122/163] Remove unused private functions (#12253)

---
 sklearn/cluster/tests/test_bicluster.py       |  1 +
 sklearn/datasets/svmlight_format.py           |  1 -
 .../decomposition/tests/test_sparse_pca.py    |  4 +--
 .../decomposition/tests/test_truncated_svd.py |  2 +-
 sklearn/metrics/tests/test_score_objects.py   |  1 -
 sklearn/model_selection/_search.py            | 25 +------------------
 sklearn/neighbors/tests/test_neighbors.py     |  3 ---
 sklearn/utils/extmath.py                      | 10 --------
 sklearn/utils/linear_assignment_.py           | 10 --------
 9 files changed, 5 insertions(+), 52 deletions(-)

diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index eacc208d4ef08..ec5934f252adc 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -208,6 +208,7 @@ def test_project_and_cluster():
 
 
 def test_perfect_checkerboard():
+    # XXX test always skipped
     raise SkipTest("This test is failing on the buildbot, but cannot"
                    " reproduce. Temporarily disabling it until it can be"
                    " reproduced and  fixed.")
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index 42de5943b6d5d..accf478d73ae1 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -179,7 +179,6 @@ def _open_and_load(f, dtype, multilabel, zero_based, query_id,
         actual_dtype, data, ind, indptr, labels, query = \
             _load_svmlight_file(f, dtype, multilabel, zero_based, query_id,
                                 offset, length)
-    # XXX remove closing when Python 2.7+/3.1+ required
     else:
         with closing(_gen_open(f)) as f:
             actual_dtype, data, ind, indptr, labels, query = \
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index 5365ccb8f0d36..70c78cce203b5 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -9,7 +9,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_allclose
-from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_warns_message
@@ -165,10 +164,11 @@ def test_mini_batch_correct_shapes(norm_comp):
     assert_equal(U.shape, (12, 13))
 
 
+# XXX: test always skipped
+@pytest.mark.skipif(True, reason="skipping mini_batch_fit_transform.")
 @pytest.mark.filterwarnings("ignore:normalize_components")
 @pytest.mark.parametrize("norm_comp", [False, True])
 def test_mini_batch_fit_transform(norm_comp):
-    raise SkipTest("skipping mini_batch_fit_transform.")
     alpha = 1
     rng = np.random.RandomState(0)
     Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
diff --git a/sklearn/decomposition/tests/test_truncated_svd.py b/sklearn/decomposition/tests/test_truncated_svd.py
index 205944883a414..2e185c348bce3 100644
--- a/sklearn/decomposition/tests/test_truncated_svd.py
+++ b/sklearn/decomposition/tests/test_truncated_svd.py
@@ -13,7 +13,7 @@
 
 
 # Make an X that looks somewhat like a small tf-idf matrix.
-# XXX newer versions of SciPy have scipy.sparse.rand for this.
+# XXX newer versions of SciPy >0.16 have scipy.sparse.rand for this.
 shape = 60, 55
 n_samples, n_features = shape
 rng = check_random_state(42)
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index da04b4215dce0..9033a2b2d86ee 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -17,7 +17,6 @@
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_not_equal
-from sklearn.utils.testing import assert_warns_message
 
 from sklearn.base import BaseEstimator
 from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score,
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 46a92436f9bd6..39eaf967fc847 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -13,7 +13,7 @@
 # License: BSD 3 clause
 
 from abc import ABCMeta, abstractmethod
-from collections import namedtuple, defaultdict
+from collections import defaultdict
 from functools import partial, reduce
 from itertools import product
 import operator
@@ -382,29 +382,6 @@ def _check_param_grid(param_grid):
                                  "to be a non-empty sequence.".format(name))
 
 
-# XXX Remove in 0.20
-class _CVScoreTuple (namedtuple('_CVScoreTuple',
-                                ('parameters',
-                                 'mean_validation_score',
-                                 'cv_validation_scores'))):
-    # A raw namedtuple is very memory efficient as it packs the attributes
-    # in a struct to get rid of the __dict__ of attributes in particular it
-    # does not copy the string for the keys on each instance.
-    # By deriving a namedtuple class just to introduce the __repr__ method we
-    # would also reintroduce the __dict__ on the instance. By telling the
-    # Python interpreter that this subclass uses static __slots__ instead of
-    # dynamic attributes. Furthermore we don't need any additional slot in the
-    # subclass so we set __slots__ to the empty tuple.
-    __slots__ = ()
-
-    def __repr__(self):
-        """Simple custom repr to summarize the main info"""
-        return "mean: {0:.5f}, std: {1:.5f}, params: {2}".format(
-            self.mean_validation_score,
-            np.std(self.cv_validation_scores),
-            self.parameters)
-
-
 class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator,
                                       MetaEstimatorMixin)):
     """Abstract base class for hyper parameter search with cross-validation.
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 160f3dc5c5eed..d28625915c93c 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -366,9 +366,6 @@ def test_radius_neighbors_classifier_when_no_neighbors():
                                    clf.predict(z1))
                 if outlier_label is None:
                     assert_raises(ValueError, clf.predict, z2)
-                elif False:
-                    assert_array_equal(np.array([1, outlier_label]),
-                                       clf.predict(z2))
 
 
 def test_radius_neighbors_classifier_outlier_labeling():
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 95e464f071644..4d0360cd9d58a 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -109,16 +109,6 @@ def fast_logdet(A):
     return ld
 
 
-def _impose_f_order(X):
-    """Helper Function"""
-    # important to access flags instead of calling np.isfortran,
-    # this catches corner cases.
-    if X.flags.c_contiguous:
-        return check_array(X.T, copy=False, order='F'), True
-    else:
-        return check_array(X, copy=False, order='F'), False
-
-
 @deprecated("sklearn.utils.extmath.fast_dot was deprecated in version 0.19 "
             "and will be removed in 0.21. Use the equivalent np.dot instead.")
 def fast_dot(a, b, out=None):
diff --git a/sklearn/utils/linear_assignment_.py b/sklearn/utils/linear_assignment_.py
index 6545d2c4a8041..7c3570a9744ab 100644
--- a/sklearn/utils/linear_assignment_.py
+++ b/sklearn/utils/linear_assignment_.py
@@ -92,16 +92,6 @@ def __init__(self, cost_matrix):
         self.path = np.zeros((n + m, 2), dtype=int)
         self.marked = np.zeros((n, m), dtype=int)
 
-    def _find_prime_in_row(self, row):
-        """
-        Find the first prime element in the specified row. Returns
-        the column index, or -1 if no starred element was found.
-        """
-        col = np.argmax(self.marked[row] == 2)
-        if self.marked[row, col] != 2:
-            col = -1
-        return col
-
     def _clear_covers(self):
         """Clear all covered matrix cells"""
         self.row_uncovered[:] = True

From 24aa6b853def989cc3294e83a1b2873a2d712d92 Mon Sep 17 00:00:00 2001
From: Jacopo Notarstefano <jacopo.notarstefano@gmail.com>
Date: Thu, 4 Oct 2018 03:18:06 +0200
Subject: [PATCH 123/163] DOC Remove mentions of removed AUTHORS.rst file
 (#12262)

---
 MANIFEST.in | 1 -
 README.rst  | 6 ++++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index db605f55f748c..789a935c7e20a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,6 +4,5 @@ recursive-include examples *
 recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi
 recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz
 include COPYING
-include AUTHORS.rst
 include README.rst
 
diff --git a/README.rst b/README.rst
index b4d67af56eec8..695c2706dc8ec 100644
--- a/README.rst
+++ b/README.rst
@@ -34,7 +34,8 @@ SciPy and distributed under the 3-Clause BSD license.
 
 The project was started in 2007 by David Cournapeau as a Google Summer
 of Code project, and since then many volunteers have contributed. See
-the `AUTHORS.rst <AUTHORS.rst>`_ file for a complete list of contributors.
+the `About us <http://scikit-learn.org/dev/about.html#authors>`_ page
+for a list of core contributors.
 
 It is currently maintained by a team of volunteers.
 
@@ -143,7 +144,8 @@ Project History
 
 The project was started in 2007 by David Cournapeau as a Google Summer
 of Code project, and since then many volunteers have contributed. See
-the  `AUTHORS.rst <AUTHORS.rst>`_ file for a complete list of contributors.
+the  `About us <http://scikit-learn.org/dev/about.html#authors>`_ page
+for a list of core contributors.
 
 The project is currently maintained by a team of volunteers.
 

From f456a4082879656c3485adde3d12c143b8dc6c3a Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Thu, 4 Oct 2018 09:38:35 +0800
Subject: [PATCH 124/163] MNT Change show_versions format to suit markdown
 (#12255)

---
 sklearn/utils/_show_versions.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py
index 5973e8afb823b..38246408b139a 100644
--- a/sklearn/utils/_show_versions.py
+++ b/sklearn/utils/_show_versions.py
@@ -103,17 +103,14 @@ def show_versions():
     deps_info = _get_deps_info()
     blas_info = _get_blas_info()
 
-    print('\nSystem')
-    print('------')
+    print('\nSystem:')
     for k, stat in sys_info.items():
         print("{k:>10}: {stat}".format(k=k, stat=stat))
 
-    print('\nBLAS')
-    print('----')
+    print('\nBLAS:')
     for k, stat in blas_info.items():
         print("{k:>10}: {stat}".format(k=k, stat=stat))
 
-    print('\nPython deps')
-    print('-----------')
+    print('\nPython deps:')
     for k, stat in deps_info.items():
         print("{k:>10}: {stat}".format(k=k, stat=stat))

From 5e2476232284090cd689bd332fce20350a6a2e09 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 4 Oct 2018 17:50:09 +1000
Subject: [PATCH 125/163] DOC add note on discretization creating non-linearity
 (#12269)

---
 doc/modules/preprocessing.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index dd1f798ccb3aa..116f6d52102cc 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -554,6 +554,10 @@ features into discrete values. Certain datasets with continuous features
 may benefit from discretization, because discretization can transform the dataset
 of continuous attributes to one with only nominal attributes.
 
+One-hot encoded discretized features can make a model more expressive, while
+maintaining interpretability. Pre-processing with a discretizer can introduce
+nonlinearity to linear models, for instance.
+
 K-bins discretization
 ---------------------
 

From e9cdb5588b3e1561639197c20b089169419349f2 Mon Sep 17 00:00:00 2001
From: TakingItCasual <TakingItCasual@gmail.com>
Date: Thu, 4 Oct 2018 10:55:34 +0300
Subject: [PATCH 126/163] MNT Updated PyPI URLs (#12274)

---
 README.rst                           | 2 +-
 doc/developers/contributing.rst      | 4 ++--
 doc/developers/performance.rst       | 4 ++--
 sklearn/externals/joblib/__init__.py | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.rst b/README.rst
index 695c2706dc8ec..5b0c62338f6b0 100644
--- a/README.rst
+++ b/README.rst
@@ -101,7 +101,7 @@ Important links
 ~~~~~~~~~~~~~~~
 
 - Official source code repo: https://github.com/scikit-learn/scikit-learn
-- Download releases: https://pypi.python.org/pypi/scikit-learn
+- Download releases: https://pypi.org/project/scikit-learn/
 - Issue tracker: https://github.com/scikit-learn/scikit-learn/issues
 
 Source code
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 31f5e5ef840a7..c2846f0e6a23c 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -560,7 +560,7 @@ We expect code coverage of new features to be at least around 90%.
 .. note:: **Workflow to improve test coverage**
 
    To test code coverage, you need to install the `coverage
-   <https://pypi.python.org/pypi/coverage>`_ package in addition to pytest.
+   <https://pypi.org/project/coverage/>`_ package in addition to pytest.
 
    1. Run 'make test-coverage'. The output lists for each file the line
       numbers that are not tested.
@@ -1195,7 +1195,7 @@ the correct interface more easily.
     * directory structures and scripts to compile documentation and example
       galleries
     * scripts to manage continuous integration (testing on Linux and Windows)
-    * instructions from getting started to publishing on `PyPi <https://pypi.python.org/pypi>`_
+    * instructions from getting started to publishing on `PyPi <https://pypi.org/>`_
 
 .. topic:: ``BaseEstimator`` and mixins:
 
diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index 89ee4af1325ff..dcbdaf5177bea 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -250,7 +250,7 @@ Memory usage profiling
 ======================
 
 You can analyze in detail the memory usage of any Python code with the help of
-`memory_profiler <https://pypi.python.org/pypi/memory_profiler>`_. First,
+`memory_profiler <https://pypi.org/project/memory_profiler/>`_. First,
 install the latest version::
 
     $ pip install -U memory_profiler
@@ -344,7 +344,7 @@ Using yep and google-perftools
 
 Easy profiling without special compilation options use yep:
 
-- https://pypi.python.org/pypi/yep
+- https://pypi.org/project/yep/
 - http://fa.bianp.net/blog/2011/a-profiler-for-python-extensions
 
 .. note::
diff --git a/sklearn/externals/joblib/__init__.py b/sklearn/externals/joblib/__init__.py
index 0d008b560522e..4383c00eea936 100644
--- a/sklearn/externals/joblib/__init__.py
+++ b/sklearn/externals/joblib/__init__.py
@@ -14,7 +14,7 @@
     ==================== ===============================================
     **Documentation:**       https://joblib.readthedocs.io
 
-    **Download:**            http://pypi.python.org/pypi/joblib#downloads
+    **Download:**            https://pypi.org/project/joblib/#files
 
     **Source code:**         http://github.com/joblib/joblib
 

From 2eca77b98f64ca14991718f2499e2de9b96737ff Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 4 Oct 2018 06:12:34 -0400
Subject: [PATCH 127/163] MNT complete VotingClassifier flatten_transform
 deprecation (#12252)

---
 .../ensemble/tests/test_voting_classifier.py    | 11 ++---------
 sklearn/ensemble/voting_classifier.py           | 17 ++++-------------
 2 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py
index 16de82e661779..2f386fcf4282f 100644
--- a/sklearn/ensemble/tests/test_voting_classifier.py
+++ b/sklearn/ensemble/tests/test_voting_classifier.py
@@ -7,7 +7,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_equal, assert_true, assert_false
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_warns_message
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LogisticRegression
 from sklearn.naive_bayes import GaussianNB
@@ -454,16 +453,10 @@ def test_transform():
         voting='soft',
         flatten_transform=False).fit(X, y)
 
-    warn_msg = ("'flatten_transform' default value will be "
-                "changed to True in 0.21. "
-                "To silence this warning you may"
-                " explicitly set flatten_transform=False.")
-    res = assert_warns_message(DeprecationWarning, warn_msg,
-                               eclf1.transform, X)
-    assert_array_equal(res.shape, (3, 4, 2))
+    assert_array_equal(eclf1.transform(X).shape, (4, 6))
     assert_array_equal(eclf2.transform(X).shape, (4, 6))
     assert_array_equal(eclf3.transform(X).shape, (3, 4, 2))
-    assert_array_almost_equal(res.swapaxes(0, 1).reshape((4, 6)),
+    assert_array_almost_equal(eclf1.transform(X),
                               eclf2.transform(X))
     assert_array_almost_equal(
             eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)),
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index da08a163f3900..731a6fa53f00c 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -12,7 +12,6 @@
 # License: BSD 3 clause
 
 import numpy as np
-import warnings
 
 from ..base import ClassifierMixin
 from ..base import TransformerMixin
@@ -65,7 +64,7 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
-    flatten_transform : bool, optional (default=None)
+    flatten_transform : bool, optional (default=True)
         Affects shape of transform output only when voting='soft'
         If voting='soft' and flatten_transform=True, transform method returns
         matrix with shape (n_samples, n_classifiers * n_classes). If
@@ -125,7 +124,7 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
     """
 
     def __init__(self, estimators, voting='hard', weights=None, n_jobs=None,
-                 flatten_transform=None):
+                 flatten_transform=True):
         self.estimators = estimators
         self.voting = voting
         self.weights = weights
@@ -300,17 +299,9 @@ class labels predicted by each classifier.
 
         if self.voting == 'soft':
             probas = self._collect_probas(X)
-            if self.flatten_transform is None:
-                warnings.warn("'flatten_transform' default value will be "
-                              "changed to True in 0.21. "
-                              "To silence this warning you may"
-                              " explicitly set flatten_transform=False.",
-                              DeprecationWarning)
+            if not self.flatten_transform:
                 return probas
-            elif not self.flatten_transform:
-                return probas
-            else:
-                return np.hstack(probas)
+            return np.hstack(probas)
 
         else:
             return self._predict(X)

From 8e0802813f7d344668feec4f7f98d4f7744104f4 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 4 Oct 2018 20:36:34 +1000
Subject: [PATCH 128/163] DOC Move 'for instance' to front

---
 doc/modules/preprocessing.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index 116f6d52102cc..3b407a2b2acaf 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -555,8 +555,8 @@ may benefit from discretization, because discretization can transform the datase
 of continuous attributes to one with only nominal attributes.
 
 One-hot encoded discretized features can make a model more expressive, while
-maintaining interpretability. Pre-processing with a discretizer can introduce
-nonlinearity to linear models, for instance.
+maintaining interpretability. For instance, pre-processing with a discretizer
+can introduce nonlinearity to linear models.
 
 K-bins discretization
 ---------------------

From 58228cb6d35b069c701a5baf1e25510f76fd5ef3 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Thu, 4 Oct 2018 15:37:39 +0200
Subject: [PATCH 129/163] [MRG+1] ColumnTransformer fix having mixed types in a
 single passthrough (#12200)

* fix having mixed types in a single passthrough

* use check_array

* add comment

* Reference the relevant issue in the test

* don't force finite on check_array

* add the whats_new entry

* modify whats_new entry for a [probably] better description.

* raise a value error if columns can't be stacked.

* test the custom ValueError

* take sparse.hstack out of try/catch

* improve whats_new entry

* add comment for the dtype conversion

* change the tests to use an np of dtype 'O' instead
---
 doc/whats_new/v0.20.rst                       |  7 +++++
 sklearn/compose/_column_transformer.py        | 14 ++++++++-
 .../compose/tests/test_column_transformer.py  | 30 +++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index e35990fe40006..f36fa80c807d5 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -27,6 +27,13 @@ Changelog
   graph, which would add explicitly zeros on the diagonal even when already
   present. :issue:`12105` by `Tom Dupre la Tour`_.
 
+:mod:`sklearn.compose`
+......................
+
+- |Fix| Fixed an issue in :class:`compose.ColumnTransformer` when stacking
+  columns with types not convertible to a numeric.
+  :issue:`11912` by :user:`Adrin Jalali <adrinjalali>`.
+
 :mod:`sklearn.ensemble`
 .......................
 
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 8f33488a28e2f..cbb28f834c711 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -512,7 +512,19 @@ def _hstack(self, Xs):
         Xs : List of numpy arrays, sparse arrays, or DataFrames
         """
         if self.sparse_output_:
-            return sparse.hstack(Xs).tocsr()
+            try:
+                # since all columns should be numeric before stacking them
+                # in a sparse matrix, `check_array` is used for the
+                # dtype conversion if necessary.
+                converted_Xs = [check_array(X,
+                                            accept_sparse=True,
+                                            force_all_finite=False)
+                                for X in Xs]
+            except ValueError:
+                raise ValueError("For a sparse output, all columns should"
+                                 " be a numeric or convertible to a numeric.")
+
+            return sparse.hstack(converted_Xs).tocsr()
         else:
             Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]
             return np.hstack(Xs)
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 31f0a03e521ef..ffa09a10e5890 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -368,6 +368,36 @@ def test_column_transformer_sparse_stacking():
     assert_array_equal(X_trans[:, 1:], np.eye(X_trans.shape[0]))
 
 
+def test_column_transformer_mixed_cols_sparse():
+    df = np.array([['a', 1, True],
+                   ['b', 2, False]],
+                  dtype='O')
+
+    ct = make_column_transformer(
+        ([0], OneHotEncoder()),
+        ([1, 2], 'passthrough'),
+        sparse_threshold=1.0
+    )
+
+    # this shouldn't fail, since boolean can be coerced into a numeric
+    # See: https://github.com/scikit-learn/scikit-learn/issues/11912
+    X_trans = ct.fit_transform(df)
+    assert X_trans.getformat() == 'csr'
+    assert_array_equal(X_trans.toarray(), np.array([[1, 0, 1, 1],
+                                                    [0, 1, 2, 0]]))
+
+    ct = make_column_transformer(
+        ([0], OneHotEncoder()),
+        ([0], 'passthrough'),
+        sparse_threshold=1.0
+    )
+    with pytest.raises(ValueError,
+                       match="For a sparse output, all columns should"):
+        # this fails since strings `a` and `b` cannot be
+        # coerced into a numeric.
+        ct.fit_transform(df)
+
+
 def test_column_transformer_sparse_threshold():
     X_array = np.array([['a', 'b'], ['A', 'B']], dtype=object).T
     # above data has sparsity of 4 / 8 = 0.5

From b7259218322d8b7ef1ba0fcd36d1eb122e3091e5 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 4 Oct 2018 09:43:47 -0400
Subject: [PATCH 130/163] [MRG] Created 'cross-validation estimator' entry in
 glossary (#11661)

* Created 'cross-validation estimator' in glossary and referenced it where
needed

* Addressed adrinjalali comments

* updated glossary entry

* updaed docstrings according to commets
---
 doc/glossary.rst                           | 16 ++++++++++++++++
 sklearn/calibration.py                     |  2 ++
 sklearn/covariance/graph_lasso_.py         |  8 ++++++--
 sklearn/feature_selection/rfe.py           |  2 ++
 sklearn/linear_model/coordinate_descent.py | 16 +++++++++++-----
 sklearn/linear_model/least_angle.py        |  8 ++++++--
 sklearn/linear_model/logistic.py           |  2 ++
 sklearn/linear_model/omp.py                |  4 +++-
 sklearn/linear_model/ridge.py              |  4 ++++
 9 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/doc/glossary.rst b/doc/glossary.rst
index 4d93696c3ab38..a31d32ec690d2 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -955,6 +955,22 @@ such as:
         and do not provide :term:`set_params` or :term:`get_params`.
         Parameter validation may be performed in ``__init__``.
 
+    cross-validation estimator
+        An estimator that has built-in cross-validation capabilities to
+        automatically select the best hyper-parameters (see the :ref:`User
+        Guide <grid_search>`). Some example of cross-validation estimators
+        are :class:`ElasticNetCV <linear_model.ElasticNetCV>` and
+        :class:`LogisticRegressionCV <linear_model.LogisticRegressionCV>`.
+        Cross-validation estimators are named `EstimatorCV` and tend to be
+        roughly equivalent to `GridSearchCV(Estimator(), ...)`. The
+        advantage of using a cross-validation estimator over the canonical
+        `Estimator` class along with :ref:`grid search <grid_search>` is
+        that they can take advantage of warm-starting by reusing precomputed
+        results in the previous steps of the cross-validation process. This
+        generally leads to speed improvements. An exception is the
+        :class:`RidgeCV <linear_model.RidgeCV>` class, which can instead
+        perform efficient Leave-One-Out CV.
+
     scorer
         A non-estimator callable object which evaluates an estimator on given
         test data, returning a number. Unlike :term:`evaluation metrics`,
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index ba7dcd0cb54f5..ed80523880cfd 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -30,6 +30,8 @@
 class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
     """Probability calibration with isotonic regression or sigmoid.
 
+    See glossary entry for :term:`cross-validation estimator`.
+
     With this class, the base_estimator is fit on the train set of the
     cross-validation generator and the test set is used for calibration.
     The probabilities for each of the folds are then averaged
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index b10e3c7f3f828..79d5897b8eb99 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -467,7 +467,9 @@ def graphical_lasso_path(X, alphas, cov_init=None, X_test=None, mode='cd',
 
 
 class GraphicalLassoCV(GraphicalLasso):
-    """Sparse inverse covariance w/ cross-validated choice of the l1 penalty
+    """Sparse inverse covariance w/ cross-validated choice of the l1 penalty.
+
+    See glossary entry for :term:`cross-validation estimator`.
 
     Read more in the :ref:`User Guide <sparse_inverse_covariance>`.
 
@@ -875,7 +877,9 @@ class GraphLasso(GraphicalLasso):
 @deprecated("The 'GraphLassoCV' was renamed to 'GraphicalLassoCV' "
             "in version 0.20 and will be removed in 0.22.")
 class GraphLassoCV(GraphicalLassoCV):
-    """Sparse inverse covariance w/ cross-validated choice of the l1 penalty
+    """Sparse inverse covariance w/ cross-validated choice of the l1 penalty.
+
+    See glossary entry for :term:`cross-validation estimator`.
 
     This class implements the Graphical Lasso algorithm.
 
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index 6aa4c101747e1..d48894a4e97b8 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -325,6 +325,8 @@ class RFECV(RFE, MetaEstimatorMixin):
     """Feature ranking with recursive feature elimination and cross-validated
     selection of the best number of features.
 
+    See glossary entry for :term:`cross-validation estimator`.
+
     Read more in the :ref:`User Guide <rfe>`.
 
     Parameters
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 2d0723944be4e..c17aab86eab1c 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -1246,7 +1246,9 @@ def fit(self, X, y):
 
 
 class LassoCV(LinearModelCV, RegressorMixin):
-    """Lasso linear model with iterative fitting along a regularization path
+    """Lasso linear model with iterative fitting along a regularization path.
+
+    See glossary entry for :term:`cross-validation estimator`.
 
     The best model is selected by cross-validation.
 
@@ -1411,9 +1413,9 @@ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
 
 
 class ElasticNetCV(LinearModelCV, RegressorMixin):
-    """Elastic Net model with iterative fitting along a regularization path
+    """Elastic Net model with iterative fitting along a regularization path.
 
-    The best model is selected by cross-validation.
+    See glossary entry for :term:`cross-validation estimator`.
 
     Read more in the :ref:`User Guide <elastic_net>`.
 
@@ -1823,7 +1825,7 @@ def fit(self, X, y):
 
 
 class MultiTaskLasso(MultiTaskElasticNet):
-    """Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer
+    """Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.
 
     The optimization objective for Lasso is::
 
@@ -1944,6 +1946,8 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False,
 class MultiTaskElasticNetCV(LinearModelCV, RegressorMixin):
     """Multi-task L1/L2 ElasticNet with built-in cross-validation.
 
+    See glossary entry for :term:`cross-validation estimator`.
+
     The optimization objective for MultiTaskElasticNet is::
 
         (1 / (2 * n_samples)) * ||Y - XW||^Fro_2
@@ -2130,7 +2134,9 @@ def __init__(self, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
 
 
 class MultiTaskLassoCV(LinearModelCV, RegressorMixin):
-    """Multi-task L1/L2 Lasso with built-in cross-validation.
+    """Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.
+
+    See glossary entry for :term:`cross-validation estimator`.
 
     The optimization objective for MultiTaskLasso is::
 
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index ce13b99b6aae5..c24a2b7b1c673 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -970,7 +970,9 @@ def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None,
 
 
 class LarsCV(Lars):
-    """Cross-validated Least Angle Regression model
+    """Cross-validated Least Angle Regression model.
+
+    See glossary entry for :term:`cross-validation estimator`.
 
     Read more in the :ref:`User Guide <least_angle_regression>`.
 
@@ -1194,7 +1196,9 @@ def alpha(self):
 
 
 class LassoLarsCV(LarsCV):
-    """Cross-validated Lasso, using the LARS algorithm
+    """Cross-validated Lasso, using the LARS algorithm.
+
+    See glossary entry for :term:`cross-validation estimator`.
 
     The optimization objective for Lasso is::
 
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 01a4f78ab0157..902f95022f238 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1438,6 +1438,8 @@ class LogisticRegressionCV(LogisticRegression, BaseEstimator,
                            LinearClassifierMixin):
     """Logistic Regression CV (aka logit, MaxEnt) classifier.
 
+    See glossary entry for :term:`cross-validation estimator`.
+
     This class implements logistic regression using liblinear, newton-cg, sag
     of lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2
     regularization with primal formulation. The liblinear solver supports both
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
index c304c0f341821..46da8413a9562 100644
--- a/sklearn/linear_model/omp.py
+++ b/sklearn/linear_model/omp.py
@@ -754,7 +754,9 @@ def _omp_path_residues(X_train, y_train, X_test, y_test, copy=True,
 
 
 class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin):
-    """Cross-validated Orthogonal Matching Pursuit model (OMP)
+    """Cross-validated Orthogonal Matching Pursuit model (OMP).
+
+    See glossary entry for :term:`cross-validation estimator`.
 
     Read more in the :ref:`User Guide <omp>`.
 
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 089540efed0f3..2bcf75d153317 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -1170,6 +1170,8 @@ def fit(self, X, y, sample_weight=None):
 class RidgeCV(_BaseRidgeCV, RegressorMixin):
     """Ridge regression with built-in cross-validation.
 
+    See glossary entry for :term:`cross-validation estimator`.
+
     By default, it performs Generalized Cross-Validation, which is a form of
     efficient Leave-One-Out cross-validation.
 
@@ -1279,6 +1281,8 @@ class RidgeCV(_BaseRidgeCV, RegressorMixin):
 class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
     """Ridge classifier with built-in cross-validation.
 
+    See glossary entry for :term:`cross-validation estimator`.
+
     By default, it performs Generalized Cross-Validation, which is a form of
     efficient Leave-One-Out cross-validation. Currently, only the n_features >
     n_samples case is handled efficiently.

From 3f5bf97e81087771a402aa940c896acd1b8c3922 Mon Sep 17 00:00:00 2001
From: Stephen Tierney <sjtrny@gmail.com>
Date: Thu, 4 Oct 2018 23:48:40 +1000
Subject: [PATCH 131/163] DOC removed ambiguity in pipeline gridsearch example
 (#12272)

* Removed ambiguity in pipeline gridsearch example

* Formatted to pep8
---
 examples/compose/plot_compare_reduction.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py
index 1eca8e40a0727..838f85d0a1cb8 100755
--- a/examples/compose/plot_compare_reduction.py
+++ b/examples/compose/plot_compare_reduction.py
@@ -43,7 +43,8 @@
 print(__doc__)
 
 pipe = Pipeline([
-    ('reduce_dim', PCA()),
+    # the reduce_dim stage is populated by the param_grid
+    ('reduce_dim', None),
     ('classify', LinearSVC())
 ])
 

From 74b56dbc57d9295df8fb653adccb265da356b670 Mon Sep 17 00:00:00 2001
From: TakingItCasual <TakingItCasual@gmail.com>
Date: Thu, 4 Oct 2018 17:37:57 +0300
Subject: [PATCH 132/163] MNT Converting http to https (#12277)

---
 CONTRIBUTING.md                               |  6 ++--
 ISSUE_TEMPLATE.md                             |  2 +-
 README.rst                                    |  2 +-
 appveyor.yml                                  |  4 +--
 benchmarks/bench_plot_nmf.py                  |  2 +-
 benchmarks/bench_plot_randomized_svd.py       |  4 +--
 build_tools/appveyor/install.ps1              |  4 +--
 build_tools/appveyor/run_with_env.cmd         |  4 +--
 build_tools/travis/after_success.sh           |  2 +-
 build_tools/travis/install.sh                 |  2 +-
 build_tools/travis/test_script.sh             |  2 +-
 .../windows/windows_testing_downloader.ps1    |  4 +--
 doc/about.rst                                 | 30 +++++++++----------
 doc/datasets/index.rst                        |  2 +-
 doc/developers/advanced_installation.rst      |  2 +-
 doc/developers/contributing.rst               |  6 ++--
 doc/developers/performance.rst                |  2 +-
 doc/developers/tips.rst                       | 10 +++----
 doc/faq.rst                                   | 16 +++++-----
 doc/glossary.rst                              | 10 +++----
 doc/index.rst                                 |  4 +--
 doc/modules/decomposition.rst                 |  2 +-
 doc/modules/label_propagation.rst             |  2 +-
 doc/modules/manifold.rst                      |  2 +-
 doc/modules/metrics.rst                       |  2 +-
 doc/modules/model_evaluation.rst              |  2 +-
 doc/modules/neural_networks_supervised.rst    |  4 +--
 doc/modules/sgd.rst                           |  2 +-
 doc/presentations.rst                         |  8 ++---
 doc/related_projects.rst                      | 12 ++++----
 doc/support.rst                               |  4 +--
 .../machine_learning_map/pyparsing.py         |  2 +-
 doc/tutorial/statistical_inference/index.rst  |  6 ++--
 doc/whats_new/_contributors.rst               |  4 +--
 doc/whats_new/v0.19.rst                       |  2 +-
 doc/whats_new/v0.20.rst                       |  2 +-
 .../plot_species_distribution_modeling.py     |  2 +-
 examples/neighbors/plot_species_kde.py        |  2 +-
 sklearn/decomposition/truncated_svd.py        |  2 +-
 sklearn/externals/joblib/__init__.py          |  4 +--
 sklearn/externals/joblib/compressor.py        |  2 +-
 .../externals/loky/backend/reduction.py       |  2 +-
 sklearn/externals/joblib/pool.py              |  2 +-
 sklearn/manifold/t_sne.py                     |  2 +-
 sklearn/metrics/pairwise.py                   |  4 +--
 sklearn/metrics/ranking.py                    |  2 +-
 sklearn/metrics/tests/test_ranking.py         |  2 +-
 sklearn/utils/_unittest_backport.py           |  2 +-
 sklearn/utils/bench.py                        |  2 +-
 sklearn/utils/deprecation.py                  |  2 +-
 sklearn/utils/extmath.py                      |  6 ++--
 sklearn/utils/seq_dataset.pyx                 |  2 +-
 sklearn/utils/tests/test_extmath.py           |  2 +-
 sklearn/utils/tests/test_utils.py             |  2 +-
 sklearn/utils/weight_vector.pyx               |  2 +-
 55 files changed, 111 insertions(+), 111 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7dfd598c29b43..54938a511f905 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -196,7 +196,7 @@ following rules before submitting:
 
 -  Please be specific about what estimators and/or functions are involved
    and the shape of the data, as appropriate; please include a
-   [reproducible](http://stackoverflow.com/help/mcve) code snippet
+   [reproducible](https://stackoverflow.com/help/mcve) code snippet
    or link to a [gist](https://gist.github.com). If an exception is raised,
    please provide the traceback.
 
@@ -230,8 +230,8 @@ be placed in ``_build/html/stable`` and are viewable in a web browser. See the
 
 For building the documentation, you will need
 [sphinx](http://sphinx.pocoo.org/),
-[matplotlib](http://matplotlib.org/), and
-[pillow](http://pillow.readthedocs.io/en/latest/).
+[matplotlib](https://matplotlib.org/), and
+[pillow](https://pillow.readthedocs.io/en/latest/).
 
 When you are writing documentation, it is important to keep a good
 compromise between mathematical and algorithmic details, and give
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
index e41b8ca31c915..c8ce3e4905b37 100644
--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
@@ -1,6 +1,6 @@
 <!--
 If your issue is a usage question, submit it here instead:
-- StackOverflow with the scikit-learn tag: http://stackoverflow.com/questions/tagged/scikit-learn
+- StackOverflow with the scikit-learn tag: https://stackoverflow.com/questions/tagged/scikit-learn
 - Mailing List: https://mail.python.org/mailman/listinfo/scikit-learn
 For more information, see User Questions: http://scikit-learn.org/stable/support.html#user-questions
 -->
diff --git a/README.rst b/README.rst
index 5b0c62338f6b0..1868f9f508000 100644
--- a/README.rst
+++ b/README.rst
@@ -167,7 +167,7 @@ Communication
 
 - Mailing list: https://mail.python.org/mailman/listinfo/scikit-learn
 - IRC channel: ``#scikit-learn`` at ``webchat.freenode.net``
-- Stack Overflow: http://stackoverflow.com/questions/tagged/scikit-learn
+- Stack Overflow: https://stackoverflow.com/questions/tagged/scikit-learn
 - Website: http://scikit-learn.org
 
 Citation
diff --git a/appveyor.yml b/appveyor.yml
index c8a464723ff6c..e26a02c90cd39 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -6,7 +6,7 @@ environment:
   global:
     # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
     # /E:ON and /V:ON options are not enabled in the batch script interpreter
-    # See: http://stackoverflow.com/a/13751649/163740
+    # See: https://stackoverflow.com/a/13751649/163740
     CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\build_tools\\appveyor\\run_with_env.cmd"
     WHEELHOUSE_UPLOADER_USERNAME: sklearn-appveyor
     WHEELHOUSE_UPLOADER_SECRET:
@@ -46,7 +46,7 @@ install:
         Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
         throw "There are newer queued builds for this pull request, failing early." }
 
-  # Install Python (from the official .msi of http://python.org) and pip when
+  # Install Python (from the official .msi of https://python.org) and pip when
   # not already installed.
   - "powershell ./build_tools/appveyor/install.ps1"
   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 87885f091da88..7ed07df7b4e29 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -96,7 +96,7 @@ def _nls_subproblem(X, W, H, tol, max_iter, alpha=0., l1_ratio=0.,
     ----------
     C.-J. Lin. Projected gradient methods for non-negative matrix
     factorization. Neural Computation, 19(2007), 2756-2779.
-    http://www.csie.ntu.edu.tw/~cjlin/nmf/
+    https://www.csie.ntu.edu.tw/~cjlin/nmf/
     """
     WtX = safe_sparse_dot(W.T, X)
     WtW = np.dot(W.T, W)
diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py
index ae4b7e64bd3b1..7c14bcaa56b3c 100644
--- a/benchmarks/bench_plot_randomized_svd.py
+++ b/benchmarks/bench_plot_randomized_svd.py
@@ -52,7 +52,7 @@
 ----------
 (1) Finding structure with randomness: Stochastic algorithms for constructing
     approximate matrix decompositions
-    Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
+    Halko, et al., 2009 https://arxiv.org/abs/0909.4061
 
 (2) A randomized algorithm for the decomposition of matrices
     Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
@@ -106,7 +106,7 @@
 MAX_MEMORY = np.int(2e9)
 
 # The following datasets can be dowloaded manually from:
-# CIFAR 10: http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
+# CIFAR 10: https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
 # SVHN: http://ufldl.stanford.edu/housenumbers/train_32x32.mat
 CIFAR_FOLDER = "./cifar-10-batches-py/"
 SVHN_FOLDER = "./SVHN/"
diff --git a/build_tools/appveyor/install.ps1 b/build_tools/appveyor/install.ps1
index 160ba55c07370..df3609d2a19ca 100644
--- a/build_tools/appveyor/install.ps1
+++ b/build_tools/appveyor/install.ps1
@@ -1,8 +1,8 @@
 # Sample script to install Python and pip under Windows
 # Authors: Olivier Grisel, Jonathan Helmus, Kyle Kastner, and Alex Willmer
-# License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
+# License: CC0 1.0 Universal: https://creativecommons.org/publicdomain/zero/1.0/
 
-$MINICONDA_URL = "http://repo.continuum.io/miniconda/"
+$MINICONDA_URL = "https://repo.continuum.io/miniconda/"
 $BASE_URL = "https://www.python.org/ftp/python/"
 $GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py"
 $GET_PIP_PATH = "C:\get-pip.py"
diff --git a/build_tools/appveyor/run_with_env.cmd b/build_tools/appveyor/run_with_env.cmd
index 5da547c499eea..57e28bd101f63 100644
--- a/build_tools/appveyor/run_with_env.cmd
+++ b/build_tools/appveyor/run_with_env.cmd
@@ -14,10 +14,10 @@
 ::
 :: More details at:
 :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
-:: http://stackoverflow.com/a/13751649/163740
+:: https://stackoverflow.com/a/13751649/163740
 ::
 :: Author: Olivier Grisel
-:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
+:: License: CC0 1.0 Universal: https://creativecommons.org/publicdomain/zero/1.0/
 ::
 :: Notes about batch files for Python people:
 ::
diff --git a/build_tools/travis/after_success.sh b/build_tools/travis/after_success.sh
index f15aaabd07097..faf917a10117c 100755
--- a/build_tools/travis/after_success.sh
+++ b/build_tools/travis/after_success.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # This script is meant to be called by the "after_success" step defined in
-# .travis.yml. See http://docs.travis-ci.com/ for more details.
+# .travis.yml. See https://docs.travis-ci.com/ for more details.
 
 # License: 3-clause BSD
 
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
index d41e746a1ab2e..c941dc4f79f9e 100755
--- a/build_tools/travis/install.sh
+++ b/build_tools/travis/install.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # This script is meant to be called by the "install" step defined in
-# .travis.yml. See http://docs.travis-ci.com/ for more details.
+# .travis.yml. See https://docs.travis-ci.com/ for more details.
 # The behavior of the script is controlled by environment variabled defined
 # in the .travis.yml in the top level folder of the project.
 
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index 5036e19b3a6f0..0f3c35fc20695 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # This script is meant to be called by the "script" step defined in
-# .travis.yml. See http://docs.travis-ci.com/ for more details.
+# .travis.yml. See https://docs.travis-ci.com/ for more details.
 # The behavior of the script is controlled by environment variabled defined
 # in the .travis.yml in the top level folder of the project.
 
diff --git a/build_tools/windows/windows_testing_downloader.ps1 b/build_tools/windows/windows_testing_downloader.ps1
index d72b6786ee504..ff0768cdaa838 100644
--- a/build_tools/windows/windows_testing_downloader.ps1
+++ b/build_tools/windows/windows_testing_downloader.ps1
@@ -27,7 +27,7 @@ param (
 
 function DisableInternetExplorerESC {
     # Disables InternetExplorerESC to enable easier manual downloads of testing packages.
-    # http://stackoverflow.com/questions/9368305/disable-ie-security-on-windows-server-via-powershell
+    # https://stackoverflow.com/questions/9368305/disable-ie-security-on-windows-server-via-powershell
     $AdminKey = "HKLM:\SOFTWARE\Microsoft\Active Setup\Installed Components\{A509B1A7-37EF-4b3f-8CFC-4F3A74704073}"
     $UserKey = "HKLM:\SOFTWARE\Microsoft\Active Setup\Installed Components\{A509B1A8-37EF-4b3f-8CFC-4F3A74704073}"
     Set-ItemProperty -Path $AdminKey -Name "IsInstalled" -Value 0
@@ -153,7 +153,7 @@ function InstallGit {
 }
 
 function ReadAndUpdateFromRegistry {
-    # http://stackoverflow.com/questions/14381650/how-to-update-windows-powershell-session-environment-variables-from-registry
+    # https://stackoverflow.com/questions/14381650/how-to-update-windows-powershell-session-environment-variables-from-registry
     foreach($level in "Machine","User") {
     [Environment]::GetEnvironmentVariables($level).GetEnumerator() | % {
        # For Path variables, append the new values, if they're not already in there
diff --git a/doc/about.rst b/doc/about.rst
index 218b0ad897fe4..ca5017cd39933 100644
--- a/doc/about.rst
+++ b/doc/about.rst
@@ -61,7 +61,7 @@ If you want to cite scikit-learn for its API or design, you may also want to con
 following paper:
 
   `API design for machine learning software: experiences from the scikit-learn
-  project <http://arxiv.org/abs/1309.0238>`_, Buitinck *et al.*, 2013.
+  project <https://arxiv.org/abs/1309.0238>`_, Buitinck *et al.*, 2013.
 
   Bibtex entry::
 
@@ -108,14 +108,14 @@ funded one year for a developer to work on the project full-time
    :align: center
    :target: http://www.datascience-paris-saclay.fr
 
-`NYU Moore-Sloan Data Science Environment <http://cds.nyu.edu/mooresloan/>`_
+`NYU Moore-Sloan Data Science Environment <https://cds.nyu.edu/mooresloan/>`_
 funded Andreas Mueller (2014-2016) to work on this project. The Moore-Sloan Data Science
 Environment also funds several students to work on the project part-time.
 
 .. image:: images/nyu_short_color.png
    :width: 200pt
    :align: center
-   :target: http://cds.nyu.edu/mooresloan/
+   :target: https://cds.nyu.edu/mooresloan/
 
 
 `Télécom Paristech <http://www.telecom-paristech.com>`_ funded Manoj Kumar (2014),
@@ -128,12 +128,12 @@ and Albert Thomas (2017) to work on scikit-learn.
    :target: http://www.telecom-paristech.fr/
 
 
-`Columbia University <http://columbia.edu>`_ funds Andreas Müller since 2016.
+`Columbia University <https://columbia.edu/>`_ funds Andreas Müller since 2016.
 
 .. image:: themes/scikit-learn/static/img/columbia.png
    :width: 100pt
    :align: center
-   :target: http://www.columbia.edu/
+   :target: https://www.columbia.edu/
 
 Andreas Müller also received a grant to improve scikit-learn from the `Alfred P. Sloan Foundation <https://sloan.org>`_ in 2017.
 
@@ -142,12 +142,12 @@ Andreas Müller also received a grant to improve scikit-learn from the `Alfred P
    :align: center
    :target: https://sloan.org/
 
-`The University of Sydney <http://sydney.edu.au>`_ funds Joel Nothman since July 2017.
+`The University of Sydney <https://sydney.edu.au/>`_ funds Joel Nothman since July 2017.
 
 .. image:: themes/scikit-learn/static/img/sydney-primary.jpeg
    :width: 200pt
    :align: center
-   :target: http://www.sydney.edu.au/
+   :target: https://sydney.edu.au/
 
 `The Labex DigiCosme <https://digicosme.lri.fr>`_ funded Nicolas Goix (2015-2016),
 Tom Dupré la Tour (2015-2016 and 2017-2018), Mathurin Massias (2018-2019) to work part time
@@ -169,7 +169,7 @@ program.
 - 2013 - Kemal Eren, Nicolas Trésegnie
 - 2014 - Hamzeh Alsalhi, Issam Laradji, Maheshakya Wijewardena, Manoj Kumar.
 - 2015 - `Raghav RV <https://github.com/raghavrv>`_, Wei Xue
-- 2016 - `Nelson Liu <http://nelsonliu.me>`_, `YenChen Lin <http://yclin.me>`_
+- 2016 - `Nelson Liu <http://nelsonliu.me>`_, `YenChen Lin <https://yclin.me/>`_
 
 It also provided funding for sprints and events around scikit-learn. If
 you would like to participate in the next Google Summer of code
@@ -177,9 +177,9 @@ program, please see `this page
 <https://github.com/scikit-learn/scikit-learn/wiki/SummerOfCode>`_.
 
 The `NeuroDebian <http://neuro.debian.net>`_ project providing `Debian
-<http://www.debian.org>`_ packaging and contributions is supported by
+<https://www.debian.org/>`_ packaging and contributions is supported by
 `Dr. James V. Haxby <http://haxbylab.dartmouth.edu/>`_ (`Dartmouth
-College <http://pbs.dartmouth.edu>`_).
+College <https://pbs.dartmouth.edu/>`_).
 
 The `PSF <https://www.python.org/psf/>`_ helped find and manage funding for our
 2011 Granada sprint. More information can be found `here
@@ -193,12 +193,12 @@ Donating to the project
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 If you are interested in donating to the project or to one of our code-sprints, you can use
-the *Paypal* button below or the `NumFOCUS Donations Page <http://www.numfocus.org/support-numfocus.html>`_ (if you use the latter, please indicate that you are donating for the scikit-learn project).
+the *Paypal* button below or the `NumFOCUS Donations Page <https://www.numfocus.org/support-numfocus.html>`_ (if you use the latter, please indicate that you are donating for the scikit-learn project).
 
 All donations will be handled by `NumFOCUS
-<http://www.numfocus.org>`_, a non-profit-organization which is
+<https://numfocus.org/>`_, a non-profit-organization which is
 managed by a board of `Scipy community members
-<http://www.numfocus.org/board.html>`_. NumFOCUS's mission is to foster
+<https://numfocus.org/board.html>`_. NumFOCUS's mission is to foster
 scientific computing software, in particular in Python. As a fiscal home
 of scikit-learn, it ensures that money is available when needed to keep
 the project funded and available while in compliance with tax regulations.
@@ -245,7 +245,7 @@ The 2013 Paris international sprint
 
 .. |telecom| image:: themes/scikit-learn/static/img/telecom.png
    :width: 120pt
-   :target: http://www.telecom-paristech.fr/
+   :target: https://www.telecom-paristech.fr/
 
 
 .. |tinyclues| image:: https://www.tinyclues.com/web/wp-content/uploads/2016/06/Tinyclues-PNG-logo.png
@@ -268,7 +268,7 @@ The 2013 Paris international sprint
 
 .. figure:: images/dysco.png
    :width: 120pt
-   :target: http://sites.uclouvain.be/dysco/
+   :target: https://sites.uclouvain.be/dysco/
 
    IAP VII/19 - DYSCO
 
diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
index e0640916fbb64..5e1a19af189b2 100644
--- a/doc/datasets/index.rst
+++ b/doc/datasets/index.rst
@@ -530,7 +530,7 @@ format usable by scikit-learn:
 For some miscellaneous data such as images, videos, and audio, you may wish to
 refer to:
 
-* `skimage.io <http://scikit-image.org/docs/dev/api/skimage.io.html>`_ or
+* `skimage.io <https://scikit-image.org/docs/dev/api/skimage.io.html>`_ or
   `Imageio <https://imageio.readthedocs.io/en/latest/userapi.html>`_ 
   for loading images and videos into numpy arrays
 * `scipy.io.wavfile.read 
diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index a81bf1ff6f587..968787e1d1cd8 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -86,7 +86,7 @@ builds the extension in place and creates a link to the development directory
 .. note::
 
     This is fundamentally similar to using the command ``python setup.py develop``
-    (see `the setuptool docs <http://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode>`_).
+    (see `the setuptool docs <https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode>`_).
     It is however preferred to use pip.
 
 .. note::
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index c2846f0e6a23c..507a97f302a64 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -128,7 +128,7 @@ feedback:
 
 - The ideal bug report contains a **short reproducible code snippet**, this way
   anyone can try to reproduce the bug easily (see `this
-  <http://stackoverflow.com/help/mcve>`_ for more details). If your snippet is
+  <https://stackoverflow.com/help/mcve>`_ for more details). If your snippet is
   longer than around 50 lines, please link to a `gist
   <https://gist.github.com>`_ or a github repo.
 
@@ -354,7 +354,7 @@ and Cython optimizations.
    workflow, please pay a visit to the `Scipy Development Workflow
    <http://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html>`_ -
    and the `Astropy Workflow for Developers
-   <http://astropy.readthedocs.io/en/latest/development/workflow/development_workflow.html>`_
+   <https://astropy.readthedocs.io/en/latest/development/workflow/development_workflow.html>`_
    sections.
 
 .. topic:: Continuous Integration (CI)
@@ -899,7 +899,7 @@ just remember that ``print`` is a function and
 integer division is written ``//``.
 String handling has been overhauled, though, as have parts of
 the Python standard library.
-The `six <http://pythonhosted.org/six/>`_ package helps with
+The `six <https://pythonhosted.org/six/>`_ package helps with
 cross-compatibility and is included in scikit-learn as
 ``sklearn.externals.six``.
 
diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index dcbdaf5177bea..325199a464fab 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -40,7 +40,7 @@ this means trying to **replace any nested for loops by calls to equivalent
 Numpy array methods**. The goal is to avoid the CPU wasting time in the
 Python interpreter rather than crunching numbers to fit your statistical
 model. It's generally a good idea to consider NumPy and SciPy performance tips:
-http://scipy.github.io/old-wiki/pages/PerformanceTips
+https://scipy.github.io/old-wiki/pages/PerformanceTips
 
 Sometimes however an algorithm cannot be expressed efficiently in simple
 vectorized Numpy code. In this case, the recommended strategy is the
diff --git a/doc/developers/tips.rst b/doc/developers/tips.rst
index 9369b650fbc59..5f5f55f7cc26e 100644
--- a/doc/developers/tips.rst
+++ b/doc/developers/tips.rst
@@ -14,8 +14,8 @@ such as `TamperMonkey`_ or `GreaseMonkey`_; to set up userscripts you must have
 one of these extensions installed, enabled and running.  We provide userscripts
 as GitHub gists; to install them, click on the "Raw" button on the gist page.
 
-.. _TamperMonkey: https://tampermonkey.net
-.. _GreaseMonkey: http://www.greasespot.net
+.. _TamperMonkey: https://tampermonkey.net/
+.. _GreaseMonkey: https://www.greasespot.net/
 
 
 .. _viewing_rendered_html_documentation:
@@ -177,7 +177,7 @@ PR-NEW: Fix #
 PR-NEW or Issue: Maintenance cost
     ::
 
-        Every feature we include has a [maintenance cost](http://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](http://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](http://scikit-learn-contrib.github.io).
+        Every feature we include has a [maintenance cost](http://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](http://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](https://scikit-learn-contrib.github.io).
 
 PR-WIP: What's needed before merge?
     ::
@@ -244,8 +244,8 @@ code. Follow these steps:
        $> valgrind -v --suppressions=valgrind-python.supp python my_test_script.py
 
 .. _valgrind: http://valgrind.org
-.. _`README.valgrind`: http://svn.python.org/projects/python/trunk/Misc/README.valgrind
-.. _`valgrind-python.supp`: http://svn.python.org/projects/python/trunk/Misc/valgrind-python.supp
+.. _`README.valgrind`: https://svn.python.org/projects/python/trunk/Misc/README.valgrind
+.. _`valgrind-python.supp`: https://svn.python.org/projects/python/trunk/Misc/valgrind-python.supp
 
 
 The result will be a list of all the memory-related errors, which reference
diff --git a/doc/faq.rst b/doc/faq.rst
index f6e557ef74f0c..c49f3df86027e 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -21,7 +21,7 @@ Why scikit?
 ------------
 There are multiple scikits, which are scientific toolboxes built around SciPy.
 You can find a list at `<https://scikits.appspot.com/scikits>`_.
-Apart from scikit-learn, another popular one is `scikit-image <http://scikit-image.org/>`_.
+Apart from scikit-learn, another popular one is `scikit-image <https://scikit-image.org/>`_.
 
 How can I contribute to scikit-learn?
 -----------------------------------------
@@ -33,9 +33,9 @@ of scikit-learn directly regarding contributing to scikit-learn.
 What's the best way to get help on scikit-learn usage?
 --------------------------------------------------------------
 **For general machine learning questions**, please use
-`Cross Validated <http://stats.stackexchange.com>`_ with the ``[machine-learning]`` tag.
+`Cross Validated <https://stats.stackexchange.com/>`_ with the ``[machine-learning]`` tag.
 
-**For scikit-learn usage questions**, please use `Stack Overflow <http://stackoverflow.com/questions/tagged/scikit-learn>`_
+**For scikit-learn usage questions**, please use `Stack Overflow <https://stackoverflow.com/questions/tagged/scikit-learn>`_
 with the ``[scikit-learn]`` and ``[python]`` tags. You can alternatively use the `mailing list
 <https://mail.python.org/mailman/listinfo/scikit-learn>`_.
 
@@ -50,7 +50,7 @@ shell with scikit-learn installed. Do not forget to include the import statement
 
 More guidance to write good reproduction code snippets can be found at:
 
-http://stackoverflow.com/help/mcve
+https://stackoverflow.com/help/mcve
 
 If your problem raises an exception that you do not understand (even after googling it),
 please make sure to include the full traceback that you obtain when running the
@@ -117,7 +117,7 @@ in a scikit-learn compatible way, upload it to GitHub and let us know. We
 will be happy to list it under :ref:`related_projects`. If you already have
 a package on GitHub following the scikit-learn API, you may also be
 interested to look at `scikit-learn-contrib
-<http://scikit-learn-contrib.github.io>`_.
+<https://scikit-learn-contrib.github.io>`_.
 
 .. _selectiveness:
 
@@ -156,12 +156,12 @@ would likely collapse under its own weight.
 There are two project with API similar to scikit-learn that
 do structured prediction:
 
-* `pystruct <http://pystruct.github.io/>`_ handles general structured
+* `pystruct <https://pystruct.github.io/>`_ handles general structured
   learning (focuses on SSVMs on arbitrary graph structures with
   approximate inference; defines the notion of sample as an instance of
   the graph structure)
 
-* `seqlearn <http://larsmans.github.io/seqlearn/>`_ handles sequences only
+* `seqlearn <https://larsmans.github.io/seqlearn/>`_ handles sequences only
   (focuses on exact inference; has HMMs, but mostly for the sake of
   completeness; treats a feature vector as a sample and uses an offset encoding
   for the dependencies between feature vectors)
@@ -179,7 +179,7 @@ careful choice of algorithms.
 Do you support PyPy?
 --------------------
 
-In case you didn't know, `PyPy <http://pypy.org/>`_ is an alternative
+In case you didn't know, `PyPy <https://pypy.org/>`_ is an alternative
 Python implementation with a built-in just-in-time compiler. Experimental
 support for PyPy3-v5.10+ has been added, which requires Numpy 1.14.0+,
 and scipy 1.1.0+.
diff --git a/doc/glossary.rst b/doc/glossary.rst
index a31d32ec690d2..dbd9961654a21 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -225,7 +225,7 @@ General Concepts
         accessible as the object's ``__doc__`` attribute.
 
         We try to adhere to `PEP257
-        <http://www.python.org/dev/peps/pep-0257/>`_, and follow `NumpyDoc
+        <https://www.python.org/dev/peps/pep-0257/>`_, and follow `NumpyDoc
         conventions <numpydoc.readthedocs.io/en/latest/format.html>`_.
 
     double underscore
@@ -461,7 +461,7 @@ General Concepts
         and/or :term:`transform` methods.
 
     joblib
-        A Python library (http://joblib.readthedocs.io) used in Scikit-learn to
+        A Python library (https://joblib.readthedocs.io) used in Scikit-learn to
         facilite simple parallelism and caching.  Joblib is oriented towards
         efficiently working with numpy arrays, such as through use of
         :term:`memory mapping`. See :ref:`parallelism` for more
@@ -620,7 +620,7 @@ General Concepts
         structures.
 
     pd
-        A shorthand for `Pandas <http://pandas.pydata.org>`_ due to the
+        A shorthand for `Pandas <https://pandas.pydata.org>`_ due to the
         conventional import statement::
 
             import pandas as pd
@@ -673,7 +673,7 @@ General Concepts
         A venue for publishing Scikit-learn-compatible libraries that are
         broadly authorized by the core developers and the contrib community,
         but not maintained by the core developer team.
-        See http://scikit-learn-contrib.github.io.
+        See https://scikit-learn-contrib.github.io.
 
     semi-supervised
     semi-supervised learning
@@ -1547,7 +1547,7 @@ functions or non-estimator constructors.
             worthwhile checking that your results are stable across a
             number of different distinct random seeds. Popular integer
             random seeds are 0 and `42
-            <http://en.wikipedia.org/wiki/Answer_to_the_Ultimate_Question_of_Life%2C_the_Universe%2C_and_Everything>`_.
+            <https://en.wikipedia.org/wiki/Answer_to_the_Ultimate_Question_of_Life%2C_the_Universe%2C_and_Everything>`_.
 
         A :class:`numpy.random.RandomState` instance
             Use the provided random state, only affecting other users
diff --git a/doc/index.rst b/doc/index.rst
index d97d28f7011f1..e5010099b245c 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -226,9 +226,9 @@
                     <ul>
                     <li><em>About us</em> See <a href="about.html#people">authors</a> and <a href="developers/contributing.html">contributing</a></li>
                     <li><em>More Machine Learning</em> Find <a href="related_projects.html">related projects</a></li>
-                    <li><em>Questions?</em> See <a href="faq.html">FAQ</a> and <a href="http://stackoverflow.com/questions/tagged/scikit-learn">stackoverflow</a></li>
+                    <li><em>Questions?</em> See <a href="faq.html">FAQ</a> and <a href="https://stackoverflow.com/questions/tagged/scikit-learn">stackoverflow</a></li>
                     <li><em>Mailing list:</em> <a href="https://mail.python.org/mailman/listinfo/scikit-learn">scikit-learn@python.org</a></li>
-                    <li><em>IRC:</em> #scikit-learn @ <a href="http://webchat.freenode.net/">freenode</a></li>
+                    <li><em>IRC:</em> #scikit-learn @ <a href="https://webchat.freenode.net/">freenode</a></li>
                     </ul>
 
                     <form target="_top" id="paypal-form" method="post" action="https://www.paypal.com/cgi-bin/webscr">
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 608e7b7d0d90f..73fb683321b02 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -167,7 +167,7 @@ Note: the implementation of ``inverse_transform`` in :class:`PCA` with
 
     * `"Finding structure with randomness: Stochastic algorithms for
       constructing approximate matrix decompositions"
-      <http://arxiv.org/abs/0909.4061>`_
+      <https://arxiv.org/abs/0909.4061>`_
       Halko, et al., 2009
 
 
diff --git a/doc/modules/label_propagation.rst b/doc/modules/label_propagation.rst
index 5737368b868a3..6f063e83c374c 100644
--- a/doc/modules/label_propagation.rst
+++ b/doc/modules/label_propagation.rst
@@ -96,5 +96,5 @@ which can drastically reduce running times.
 
     [2] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient
     Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005
-    http://research.microsoft.com/en-us/people/nicolasl/efficient_ssl.pdf
+    https://research.microsoft.com/en-us/people/nicolasl/efficient_ssl.pdf
 
diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst
index 76a49145191f2..7061a4c035c26 100644
--- a/doc/modules/manifold.rst
+++ b/doc/modules/manifold.rst
@@ -611,7 +611,7 @@ the internal structure of the data.
     (2008)
 
   * `"t-Distributed Stochastic Neighbor Embedding"
-    <http://lvdmaaten.github.io/tsne/>`_
+    <https://lvdmaaten.github.io/tsne/>`_
     van der Maaten, L.J.P.
 
   * `"Accelerating t-SNE using Tree-Based Algorithms."
diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst
index 58cb636c6b9e2..1d48bf4a642dd 100644
--- a/doc/modules/metrics.rst
+++ b/doc/modules/metrics.rst
@@ -200,5 +200,5 @@ The chi squared kernel is most commonly used on histograms (bags) of visual word
       Local features and kernels for classification of texture and object
       categories: A comprehensive study
       International Journal of Computer Vision 2007
-      http://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf
+      https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf
 
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 89bc3bb4a84e9..ab66ba32d0bbb 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -719,7 +719,7 @@ from the ground truth label and a score given by the classifier
 by varying a decision threshold.
 
 The :func:`average_precision_score` function computes the
-`average precision <http://en.wikipedia.org/w/index.php?title=Information_retrieval&oldid=793358396#Average_precision>`_
+`average precision <https://en.wikipedia.org/w/index.php?title=Information_retrieval&oldid=793358396#Average_precision>`_
 (AP) from prediction scores. The value is between 0 and 1 and higher is better.
 AP is defined as
 
diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
index 582e4c83543d6..a905eaec982c4 100644
--- a/doc/modules/neural_networks_supervised.rst
+++ b/doc/modules/neural_networks_supervised.rst
@@ -196,7 +196,7 @@ Algorithms
 
 MLP trains using `Stochastic Gradient Descent
 <https://en.wikipedia.org/wiki/Stochastic_gradient_descent>`_,
-`Adam <http://arxiv.org/abs/1412.6980>`_, or
+`Adam <https://arxiv.org/abs/1412.6980>`_, or
 `L-BFGS <https://en.wikipedia.org/wiki/Limited-memory_BFGS>`__.
 Stochastic Gradient Descent (SGD) updates parameters using the gradient of the
 loss function with respect to a parameter that needs adaptation, i.e.
@@ -381,5 +381,5 @@ or want to do additional monitoring, using ``warm_start=True`` and
       of the Trade 1998.
 
     *  `"Adam: A method for stochastic optimization."
-       <http://arxiv.org/pdf/1412.6980v8.pdf>`_
+       <https://arxiv.org/pdf/1412.6980v8.pdf>`_
        Kingma, Diederik, and Jimmy Ba. arXiv preprint arXiv:1412.6980 (2014).
diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
index 5792badf508b8..d51b9cd1f170a 100644
--- a/doc/modules/sgd.rst
+++ b/doc/modules/sgd.rst
@@ -421,7 +421,7 @@ The model parameters can be accessed through the members ``coef_`` and
 
  * `"Towards Optimal One Pass Large Scale Learning with
    Averaged Stochastic Gradient Descent"
-   <http://arxiv.org/pdf/1107.2490v2.pdf>`_
+   <https://arxiv.org/pdf/1107.2490v2.pdf>`_
    Xu, Wei
 
 
diff --git a/doc/presentations.rst b/doc/presentations.rst
index ceb2d32e86f6f..dd90eaa3bc9ae 100644
--- a/doc/presentations.rst
+++ b/doc/presentations.rst
@@ -9,7 +9,7 @@ New to Scientific Python?
 ==========================
 For those that are still new to the scientific Python ecosystem, we highly
 recommend the `Python Scientific Lecture Notes
-<http://www.scipy-lectures.org/>`_. This will help you find your footing a
+<https://www.scipy-lectures.org/>`_. This will help you find your footing a
 bit and will definitely improve your scikit-learn experience.  A basic
 understanding of NumPy arrays is recommended to make the most of scikit-learn.
 
@@ -19,7 +19,7 @@ External Tutorials
 There are several online tutorials available which are geared toward
 specific subject areas:
 
-- `Machine Learning for NeuroImaging in Python <http://nilearn.github.io/>`_
+- `Machine Learning for NeuroImaging in Python <https://nilearn.github.io/>`_
 - `Machine Learning for Astronomical Data Analysis <https://github.com/astroML/sklearn_tutorial>`_
 
 .. _videos:
@@ -40,7 +40,7 @@ Videos
     A three minute video from a very early stage of scikit-learn, explaining the
     basic idea and approach we are following.
 
-- `Introduction to statistical learning with scikit-learn <http://archive.org/search.php?query=scikit-learn>`_
+- `Introduction to statistical learning with scikit-learn <https://archive.org/search.php?query=scikit-learn>`_
   by `Gael Varoquaux`_ at SciPy 2011
 
     An extensive tutorial, consisting of four sessions of one hour.
@@ -51,7 +51,7 @@ Videos
 
 - `Statistical Learning for Text Classification with scikit-learn and NLTK
   <http://www.pyvideo.org/video/417/pycon-2011--statistical-machine-learning-for-text>`_
-  (and `slides <http://www.slideshare.net/ogrisel/statistical-machine-learning-for-text-classification-with-scikitlearn-and-nltk>`_)
+  (and `slides <https://www.slideshare.net/ogrisel/statistical-machine-learning-for-text-classification-with-scikitlearn-and-nltk>`_)
   by `Olivier Grisel`_ at PyCon 2011
 
     Thirty minute introduction to text classification. Explains how to
diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index ce5f5c24dbf3a..fe50ec1494a4c 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -139,7 +139,7 @@ and tasks.
 - `pylearn2 <http://deeplearning.net/software/pylearn2/>`_ A deep learning and
   neural network library build on theano with scikit-learn like interface.
 
-- `sklearn_theano <http://sklearn-theano.github.io/>`_ scikit-learn compatible
+- `sklearn_theano <https://sklearn-theano.github.io/>`_ scikit-learn compatible
   estimators, transformers, and datasets which use Theano internally
 
 - `nolearn <https://github.com/dnouri/nolearn>`_ A number of wrappers and
@@ -164,7 +164,7 @@ and tasks.
 - `xgboost <https://github.com/dmlc/xgboost>`_ Optimised gradient boosted decision
   tree library.
 
-- `ML-Ensemble <http://mlens.readthedocs.io/en/latest/>`_ Generalized
+- `ML-Ensemble <https://mlens.readthedocs.io/>`_ Generalized
   ensemble learning (stacking, blending, subsemble, deep ensembles,
   etc.).
 
@@ -228,17 +228,17 @@ Statistical learning with Python
 --------------------------------
 Other packages useful for data analysis and machine learning.
 
-- `Pandas <http://pandas.pydata.org>`_ Tools for working with heterogeneous and
+- `Pandas <https://pandas.pydata.org/>`_ Tools for working with heterogeneous and
   columnar data, relational queries, time series and basic statistics.
 
 - `theano <http://deeplearning.net/software/theano/>`_ A CPU/GPU array
   processing framework geared towards deep learning research.
 
-- `statsmodels <http://www.statsmodels.org>`_ Estimating and analysing
+- `statsmodels <https://www.statsmodels.org>`_ Estimating and analysing
   statistical models. More focused on statistical tests and less on prediction
   than scikit-learn.
 
-- `PyMC <http://pymc-devs.github.io/pymc/>`_ Bayesian statistical models and
+- `PyMC <https://pymc-devs.github.io/pymc/>`_ Bayesian statistical models and
   fitting algorithms.
 
 - `Sacred <https://github.com/IDSIA/Sacred>`_ Tool to help you configure,
@@ -254,7 +254,7 @@ Other packages useful for data analysis and machine learning.
 Domain specific packages
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- `scikit-image <http://scikit-image.org/>`_ Image processing and computer
+- `scikit-image <https://scikit-image.org/>`_ Image processing and computer
   vision in python.
 
 - `Natural language toolkit (nltk) <http://www.nltk.org/>`_ Natural language
diff --git a/doc/support.rst b/doc/support.rst
index 70efd7a109b01..3f346406de57c 100644
--- a/doc/support.rst
+++ b/doc/support.rst
@@ -24,7 +24,7 @@ User questions
 ==============
 
 - Some scikit-learn developers support users on StackOverflow using
-  the `[scikit-learn] <http://stackoverflow.com/questions/tagged/scikit-learn>`_
+  the `[scikit-learn] <https://stackoverflow.com/questions/tagged/scikit-learn>`_
   tag.
 
 - For general theoretical or methodological Machine Learning questions
@@ -82,7 +82,7 @@ Some developers like to hang out on channel ``#scikit-learn`` on
 ``irc.freenode.net``.
 
 If you do not have an IRC client or are behind a firewall this web
-client works fine: http://webchat.freenode.net
+client works fine: https://webchat.freenode.net
 
 
 .. _documentation_resources:
diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py
index ba9833d6d6130..c366c5ae71fc3 100644
--- a/doc/tutorial/machine_learning_map/pyparsing.py
+++ b/doc/tutorial/machine_learning_map/pyparsing.py
@@ -2751,7 +2751,7 @@ class Regex(Token):
     Example::
         realnum = Regex(r"[+-]?\d+\.\d*")
         date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
-        # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
+        # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
         roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
     """
     compiledREtype = type(re.compile("[A-Z]"))
diff --git a/doc/tutorial/statistical_inference/index.rst b/doc/tutorial/statistical_inference/index.rst
index a298e61d03b13..f4aa9f8833129 100644
--- a/doc/tutorial/statistical_inference/index.rst
+++ b/doc/tutorial/statistical_inference/index.rst
@@ -20,9 +20,9 @@ A tutorial on statistical-learning for scientific data processing
 
     Scikit-learn is a Python module integrating classic machine
     learning algorithms in the tightly-knit world of scientific Python
-    packages (`NumPy <http://www.scipy.org>`_, `SciPy
-    <http://www.scipy.org>`_, `matplotlib
-    <http://matplotlib.org>`_).
+    packages (`NumPy <https://www.numpy.org/>`_, `SciPy
+    <https://scipy.org/>`_, `matplotlib
+    <https://matplotlib.org/>`_).
 
 .. include:: ../../includes/big_toc_css.rst
 
diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
index aeb8b0638d72c..fc2577bee2487 100644
--- a/doc/whats_new/_contributors.rst
+++ b/doc/whats_new/_contributors.rst
@@ -84,7 +84,7 @@
 
 .. _Kemal Eren: http://www.kemaleren.com
 
-.. _Yann Dauphin: http://ynd.github.io/
+.. _Yann Dauphin: https://ynd.github.io/
 
 .. _Yannick Schwartz: https://team.inria.fr/parietal/schwarty/
 
@@ -120,7 +120,7 @@
 
 .. _Eric Martin: http://www.ericmart.in
 
-.. _Nicolas Goix: http://ngoix.github.io
+.. _Nicolas Goix: https://ngoix.github.io/
 
 .. _Sebastian Raschka: http://sebastianraschka.com
 
diff --git a/doc/whats_new/v0.19.rst b/doc/whats_new/v0.19.rst
index a689f40aee4fe..2740e0752f266 100644
--- a/doc/whats_new/v0.19.rst
+++ b/doc/whats_new/v0.19.rst
@@ -756,7 +756,7 @@ Metrics
 - :func:`metrics.average_precision_score` no longer linearly
   interpolates between operating points, and instead weighs precisions
   by the change in recall since the last operating point, as per the
-  `Wikipedia entry <http://en.wikipedia.org/wiki/Average_precision>`_.
+  `Wikipedia entry <https://en.wikipedia.org/wiki/Average_precision>`_.
   (`#7356 <https://github.com/scikit-learn/scikit-learn/pull/7356>`_). By
   :user:`Nick Dingwall <ndingwall>` and `Gael Varoquaux`_.
 
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index f36fa80c807d5..c374f5c98eae4 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -257,7 +257,7 @@ Support for Python 3.3 has been officially dropped.
 .......................
 
 - |MajorFeature| Added :func:`datasets.fetch_openml` to fetch datasets from
-  `OpenML <http://openml.org>`_. OpenML is a free, open data sharing platform
+  `OpenML <https://openml.org>`_. OpenML is a free, open data sharing platform
   and will be used instead of mldata as it provides better service availability.
   :issue:`9908` by `Andreas Müller`_ and :user:`Jan N. van Rijn <janvanrijn>`.
 
diff --git a/examples/applications/plot_species_distribution_modeling.py b/examples/applications/plot_species_distribution_modeling.py
index a16b5b7153ce3..b64f052901cd3 100644
--- a/examples/applications/plot_species_distribution_modeling.py
+++ b/examples/applications/plot_species_distribution_modeling.py
@@ -13,7 +13,7 @@
 by the package `sklearn.svm` as our modeling tool.
 The dataset is provided by Phillips et. al. (2006).
 If available, the example uses
-`basemap <http://matplotlib.org/basemap>`_
+`basemap <https://matplotlib.org/basemap/>`_
 to plot the coast lines and national boundaries of South America.
 
 The two species are:
diff --git a/examples/neighbors/plot_species_kde.py b/examples/neighbors/plot_species_kde.py
index a79805bd8f1ef..ef169ad0546ef 100644
--- a/examples/neighbors/plot_species_kde.py
+++ b/examples/neighbors/plot_species_kde.py
@@ -7,7 +7,7 @@
 Haversine distance metric -- i.e. distances over points in latitude/longitude.
 The dataset is provided by Phillips et. al. (2006).
 If available, the example uses
-`basemap <http://matplotlib.org/basemap>`_
+`basemap <https://matplotlib.org/basemap/>`_
 to plot the coast lines and national boundaries of South America.
 
 This example does not perform any learning over the data
diff --git a/sklearn/decomposition/truncated_svd.py b/sklearn/decomposition/truncated_svd.py
index 74e8ffa444082..cbaa5e19008fd 100644
--- a/sklearn/decomposition/truncated_svd.py
+++ b/sklearn/decomposition/truncated_svd.py
@@ -105,7 +105,7 @@ class TruncatedSVD(BaseEstimator, TransformerMixin):
     ----------
     Finding structure with randomness: Stochastic algorithms for constructing
     approximate matrix decompositions
-    Halko, et al., 2009 (arXiv:909) http://arxiv.org/pdf/0909.4061
+    Halko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf
 
     Notes
     -----
diff --git a/sklearn/externals/joblib/__init__.py b/sklearn/externals/joblib/__init__.py
index 4383c00eea936..61d50ecc0e6d8 100644
--- a/sklearn/externals/joblib/__init__.py
+++ b/sklearn/externals/joblib/__init__.py
@@ -16,9 +16,9 @@
 
     **Download:**            https://pypi.org/project/joblib/#files
 
-    **Source code:**         http://github.com/joblib/joblib
+    **Source code:**         https://github.com/joblib/joblib
 
-    **Report issues:**       http://github.com/joblib/joblib/issues
+    **Report issues:**       https://github.com/joblib/joblib/issues
     ==================== ===============================================
 
 
diff --git a/sklearn/externals/joblib/compressor.py b/sklearn/externals/joblib/compressor.py
index 7692fd9f2888c..ef7d0735b97e5 100644
--- a/sklearn/externals/joblib/compressor.py
+++ b/sklearn/externals/joblib/compressor.py
@@ -30,7 +30,7 @@
     lz4 = None
 
 LZ4_NOT_INSTALLED_ERROR = ('LZ4 is not installed. Install it with pip: '
-                           'http://python-lz4.readthedocs.io/')
+                           'https://python-lz4.readthedocs.io/')
 
 # Registered compressors
 _COMPRESSORS = {}
diff --git a/sklearn/externals/joblib/externals/loky/backend/reduction.py b/sklearn/externals/joblib/externals/loky/backend/reduction.py
index b621a92930c92..8159479689bf8 100644
--- a/sklearn/externals/joblib/externals/loky/backend/reduction.py
+++ b/sklearn/externals/joblib/externals/loky/backend/reduction.py
@@ -78,7 +78,7 @@ class _LokyPickler(Pickler):
     # We override the pure Python pickler as its the only way to be able to
     # customize the dispatch table without side effects in Python 2.6
     # to 3.2. For Python 3.3+ leverage the new dispatch_table
-    # feature from http://bugs.python.org/issue14166 that makes it possible
+    # feature from https://bugs.python.org/issue14166 that makes it possible
     # to use the C implementation of the Pickler which is faster.
 
     if hasattr(Pickler, 'dispatch'):
diff --git a/sklearn/externals/joblib/pool.py b/sklearn/externals/joblib/pool.py
index 396a3dfb4efcc..606f529b5833e 100644
--- a/sklearn/externals/joblib/pool.py
+++ b/sklearn/externals/joblib/pool.py
@@ -68,7 +68,7 @@ class CustomizablePickler(Pickler):
     # We override the pure Python pickler as its the only way to be able to
     # customize the dispatch table without side effects in Python 2.7
     # to 3.2. For Python 3.3+ leverage the new dispatch_table
-    # feature from http://bugs.python.org/issue14166 that makes it possible
+    # feature from https://bugs.python.org/issue14166 that makes it possible
     # to use the C implementation of the Pickler which is faster.
 
     def __init__(self, writer, reducers=None, protocol=HIGHEST_PROTOCOL):
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index 1c69036d0d27a..e8aba5f7ccc93 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -618,7 +618,7 @@ class TSNE(BaseEstimator):
 
     [3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.
         Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
-        http://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf
+        https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf
     """
     # Control the number of exploration iterations with early_exaggeration on
     _EXPLORATION_N_ITER = 250
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 526d4d9f3d512..552a3d7623414 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -947,7 +947,7 @@ def additive_chi2_kernel(X, Y=None):
       Local features and kernels for classification of texture and object
       categories: A comprehensive study
       International Journal of Computer Vision 2007
-      http://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf
+      https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf
 
 
     See also
@@ -1005,7 +1005,7 @@ def chi2_kernel(X, Y=None, gamma=1.):
       Local features and kernels for classification of texture and object
       categories: A comprehensive study
       International Journal of Computer Vision 2007
-      http://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf
+      https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf
 
     See also
     --------
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 2037f42374788..3a01d5c4467a3 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -191,7 +191,7 @@ def average_precision_score(y_true, y_score, average="macro", pos_label=1,
     References
     ----------
     .. [1] `Wikipedia entry for the Average precision
-           <http://en.wikipedia.org/w/index.php?title=Information_retrieval&
+           <https://en.wikipedia.org/w/index.php?title=Information_retrieval&
            oldid=793358396#Average_precision>`_
 
     See also
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index b921fb1124ae6..a8b6e38a42d52 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -137,7 +137,7 @@ def _average_precision_slow(y_true, y_score):
     References
     ----------
     .. [1] `Wikipedia entry for the Average precision
-       <http://en.wikipedia.org/wiki/Average_precision>`_
+       <https://en.wikipedia.org/wiki/Average_precision>`_
     """
     precision, recall, threshold = precision_recall_curve(y_true, y_score)
     precision = list(reversed(precision))
diff --git a/sklearn/utils/_unittest_backport.py b/sklearn/utils/_unittest_backport.py
index a7cfe267280e8..90de7e9c9bac3 100644
--- a/sklearn/utils/_unittest_backport.py
+++ b/sklearn/utils/_unittest_backport.py
@@ -28,7 +28,7 @@ def testMultiply(self):
 
 Further information is available in the bundled documentation, and from
 
-  http://docs.python.org/library/unittest.html
+  https://docs.python.org/library/unittest.html
 
 Copyright (c) 1999-2003 Steve Purcell
 Copyright (c) 2003-2010 Python Software Foundation
diff --git a/sklearn/utils/bench.py b/sklearn/utils/bench.py
index 1a04ed2bb9f8e..3ea26ec6b395f 100644
--- a/sklearn/utils/bench.py
+++ b/sklearn/utils/bench.py
@@ -8,7 +8,7 @@ def total_seconds(delta):
     helper function to emulate function total_seconds,
     introduced in python2.7
 
-    http://docs.python.org/library/datetime.html\
+    https://docs.python.org/library/datetime.html\
 #datetime.timedelta.total_seconds
 
     Parameters
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index fbdb2524aeb7a..d6388a5c5f4b9 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -28,7 +28,7 @@ class deprecated(object):
           to be added to the deprecation messages
     """
 
-    # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary,
+    # Adapted from https://wiki.python.org/moin/PythonDecoratorLibrary,
     # but with many changes.
 
     def __init__(self, extra=''):
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 4d0360cd9d58a..1f177af916904 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -208,7 +208,7 @@ def randomized_range_finder(A, size, n_iter,
     Follows Algorithm 4.3 of
     Finding structure with randomness: Stochastic algorithms for constructing
     approximate matrix decompositions
-    Halko, et al., 2009 (arXiv:909) http://arxiv.org/pdf/0909.4061
+    Halko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf
 
     An implementation of a randomized algorithm for principal component
     analysis
@@ -321,7 +321,7 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
     ----------
     * Finding structure with randomness: Stochastic algorithms for constructing
       approximate matrix decompositions
-      Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
+      Halko, et al., 2009 https://arxiv.org/abs/0909.4061
 
     * A randomized algorithm for the decomposition of matrices
       Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
@@ -651,7 +651,7 @@ def softmax(X, copy=True):
 def safe_min(X):
     """Returns the minimum value of a dense or a CSR/CSC matrix.
 
-    Adapated from http://stackoverflow.com/q/13426580
+    Adapated from https://stackoverflow.com/q/13426580
 
     Parameters
     ----------
diff --git a/sklearn/utils/seq_dataset.pyx b/sklearn/utils/seq_dataset.pyx
index b4e099774493f..5fd2ca9eb73f9 100644
--- a/sklearn/utils/seq_dataset.pyx
+++ b/sklearn/utils/seq_dataset.pyx
@@ -326,7 +326,7 @@ cdef enum:
 
 
 # rand_r replacement using a 32bit XorShift generator
-# See http://www.jstatsoft.org/v08/i14/paper for details
+# See https://www.jstatsoft.org/v08/i14/paper for details
 # XXX copied over from sklearn/tree/_tree.pyx, should refactor
 cdef inline np.uint32_t our_rand_r(np.uint32_t* seed) nogil:
     seed[0] ^= <np.uint32_t>(seed[0] << 13)
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 3de67e5a2130c..79b3cb9748ded 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -489,7 +489,7 @@ def naive_log_logistic(x):
 
 def test_incremental_variance_update_formulas():
     # Test Youngs and Cramer incremental variance formulas.
-    # Doggie data from http://www.mathsisfun.com/data/standard-deviation.html
+    # Doggie data from https://www.mathsisfun.com/data/standard-deviation.html
     A = np.array([[600, 470, 170, 430, 300],
                   [600, 470, 170, 430, 300],
                   [600, 470, 170, 430, 300],
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index c2474c58c13f7..d6799b666c879 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -47,7 +47,7 @@ def test_make_rng():
 
 def test_deprecated():
     # Test whether the deprecated decorator issues appropriate warnings
-    # Copied almost verbatim from http://docs.python.org/library/warnings.html
+    # Copied almost verbatim from https://docs.python.org/library/warnings.html
 
     # First a function...
     with warnings.catch_warnings(record=True) as w:
diff --git a/sklearn/utils/weight_vector.pyx b/sklearn/utils/weight_vector.pyx
index bb4e8522216a4..5d8e3b24f8273 100644
--- a/sklearn/utils/weight_vector.pyx
+++ b/sklearn/utils/weight_vector.pyx
@@ -104,7 +104,7 @@ cdef class WeightVector(object):
         self.sq_norm += (xsqnorm * c * c) + (2.0 * innerprod * wscale * c)
 
     # Update the average weights according to the sparse trick defined
-    # here: http://research.microsoft.com/pubs/192769/tricks-2012.pdf
+    # here: https://research.microsoft.com/pubs/192769/tricks-2012.pdf
     # by Leon Bottou
     cdef void add_average(self, double *x_data_ptr, int *x_ind_ptr, int xnnz,
                           double c, double num_iter) nogil:

From 877e3f3235dc5caa86838ef87df75a31e08649c4 Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Thu, 4 Oct 2018 22:58:02 +0800
Subject: [PATCH 133/163] MNT Move what's new entry

---
 doc/whats_new/v0.20.rst | 7 +++++++
 doc/whats_new/v0.21.rst | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index c374f5c98eae4..2c4ef814a0a31 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -34,6 +34,13 @@ Changelog
   columns with types not convertible to a numeric.
   :issue:`11912` by :user:`Adrin Jalali <adrinjalali>`.
 
+:mod:`sklearn.datasets`
+............................
+
+- |Fix| Fixed integer overflow in :func:`datasets.make_classification`
+  for values of ``n_informative`` parameter larger than 64.
+  :issue:10811 by :user:`Roman Feldbauer <VarIr>`.
+
 :mod:`sklearn.ensemble`
 .......................
 
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 955d688ea59e9..6406950b5dff7 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -55,13 +55,6 @@ Support for Python 3.4 and below has been officially dropped.
   in the dense case. Also added a new parameter ``order`` which controls output
   order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.
 
-:mod:`sklearn.datasets`
-............................
-
-- |Fix| Fixed integer overflow in :func:`datasets.make_classification`
-  for values of ``n_informative`` parameter larger than 64.
-  :issue:10811 by :user:`Roman Feldbauer <VarIr>`.
-
 
 Multiple modules
 ................

From 1e052e9da9adec04cbf5875b3158995443078f2f Mon Sep 17 00:00:00 2001
From: TakingItCasual <TakingItCasual@gmail.com>
Date: Fri, 5 Oct 2018 00:06:14 +0300
Subject: [PATCH 134/163] Converting http to https (2)... (#12292)

---
 doc/developers/contributing.rst               |  4 +-
 doc/modules/computing.rst                     |  8 ++--
 doc/modules/cross_validation.rst              |  2 +-
 doc/modules/linear_model.rst                  |  4 +-
 doc/modules/model_evaluation.rst              |  4 +-
 doc/modules/naive_bayes.rst                   |  2 +-
 doc/modules/neural_networks_supervised.rst    |  6 +--
 doc/modules/sgd.rst                           | 10 ++---
 doc/modules/svm.rst                           | 14 +++----
 doc/related_projects.rst                      |  6 +--
 doc/testimonials/testimonials.rst             | 38 +++++++++----------
 .../text_analytics/working_with_text_data.rst |  2 +-
 doc/whats_new/_contributors.rst               | 30 +++++++--------
 sklearn/datasets/olivetti_faces.py            |  6 +--
 sklearn/datasets/svmlight_format.py           |  6 +--
 sklearn/datasets/twenty_newsgroups.py         |  2 +-
 sklearn/externals/_arff.py                    |  6 +--
 sklearn/linear_model/logistic.py              |  4 +-
 sklearn/linear_model/ridge.py                 |  2 +-
 sklearn/manifold/spectral_embedding_.py       |  4 +-
 sklearn/manifold/t_sne.py                     |  2 +-
 sklearn/metrics/cluster/unsupervised.py       |  8 ++--
 sklearn/metrics/ranking.py                    |  2 +-
 sklearn/mixture/bayesian_mixture.py           |  4 +-
 sklearn/naive_bayes.py                        |  2 +-
 sklearn/neighbors/lof.py                      |  2 +-
 sklearn/neural_network/rbm.py                 |  2 +-
 sklearn/svm/classes.py                        |  2 +-
 sklearn/svm/libsvm.pyx                        |  2 +-
 sklearn/svm/src/libsvm/svm.cpp                |  2 +-
 sklearn/tests/test_naive_bayes.py             |  6 +--
 sklearn/tree/_utils.pyx                       |  2 +-
 sklearn/tree/tree.py                          |  4 +-
 sklearn/utils/_scipy_sparse_lsqr_backport.py  |  2 +-
 sklearn/utils/src/gamma.c                     |  2 +-
 35 files changed, 102 insertions(+), 102 deletions(-)

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 507a97f302a64..8360b2f3bb331 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -226,7 +226,7 @@ mailing list for more visibility.
 
 If any of the above seems like magic to you, then look up the `Git documentation
 <https://git-scm.com/documentation>`_ and the `Git development workflow
-<http://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html>`_ on the
+<https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html>`_ on the
 web.
 
 If some conflicts arise between your branch and the ``master`` branch, you need
@@ -352,7 +352,7 @@ and Cython optimizations.
 
    For two very well documented and more detailed guides on development
    workflow, please pay a visit to the `Scipy Development Workflow
-   <http://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html>`_ -
+   <https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html>`_ -
    and the `Astropy Workflow for Developers
    <https://astropy.readthedocs.io/en/latest/development/workflow/development_workflow.html>`_
    sections.
diff --git a/doc/modules/computing.rst b/doc/modules/computing.rst
index 6c28bd3385962..25c36e0510ab7 100644
--- a/doc/modules/computing.rst
+++ b/doc/modules/computing.rst
@@ -254,7 +254,7 @@ Influence of the Input Data Representation
 Scipy provides sparse matrix data structures which are optimized for storing
 sparse data. The main feature of sparse formats is that you don't store zeros
 so if your data is sparse then you use much less memory. A non-zero value in
-a sparse (`CSR or CSC <http://docs.scipy.org/doc/scipy/reference/sparse.html>`_)
+a sparse (`CSR or CSC <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_)
 representation will only take on average one 32bit integer position + the 64
 bit floating point value + an additional 32bit per row or column in the matrix.
 Using sparse input on a dense (or sparse) linear model can speedup prediction
@@ -277,7 +277,7 @@ Here is sample code to test the sparsity of your input::
 
 As a rule of thumb you can consider that if the sparsity ratio is greater
 than 90% you can probably benefit from sparse formats. Check Scipy's sparse
-matrix formats `documentation <http://docs.scipy.org/doc/scipy/reference/sparse.html>`_
+matrix formats `documentation <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
 for more information on how to build (or convert your data to) sparse matrix
 formats. Most of the time the ``CSR`` and ``CSC`` formats work best.
 
@@ -424,7 +424,7 @@ Optimized BLAS / LAPACK implementations include:
  - MKL
  - Apple Accelerate and vecLib frameworks (OSX only)
 
-More information can be found on the `Scipy install page <http://docs.scipy.org/doc/numpy/user/install.html>`_
+More information can be found on the `Scipy install page <https://docs.scipy.org/doc/numpy/user/install.html>`_
 and in this
 `blog post <http://danielnouri.org/notes/2012/12/19/libblas-and-liblapack-issues-and-speed,-with-scipy-and-ubuntu/>`_
 from Daniel Nouri which has some nice step by step install instructions for
@@ -519,7 +519,7 @@ Links
 ......
 
   - `scikit-learn developer performance documentation <../developers/performance.html>`_
-  - `Scipy sparse matrix formats documentation <http://docs.scipy.org/doc/scipy/reference/sparse.html>`_
+  - `Scipy sparse matrix formats documentation <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
 
 Parallelism, resource management, and configuration
 =====================================================
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 17d9ea680a2cd..a6343c1b39efd 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -420,7 +420,7 @@ fold cross validation should be preferred to LOO.
  * R. Kohavi, `A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model Selection
    <http://web.cs.iastate.edu/~jtian/cs573/Papers/Kohavi-IJCAI-95.pdf>`_, Intl. Jnt. Conf. AI
  * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation
-   <http://people.csail.mit.edu/romer/papers/CrossVal_SDM08.pdf>`_, SIAM 2008;
+   <https://people.csail.mit.edu/romer/papers/CrossVal_SDM08.pdf>`_, SIAM 2008;
  * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to
    Statistical Learning <http://www-bcf.usc.edu/~gareth/ISL>`_, Springer 2013.
 
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 7825278245945..d859d4b14517b 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -152,7 +152,7 @@ as GridSearchCV except that it defaults to Generalized Cross-Validation
     * "Notes on Regularized Least Squares", Rifkin & Lippert (`technical report
       <http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf>`_,
       `course slides
-      <http://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf>`_).
+      <https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf>`_).
 
 
 .. _lasso:
@@ -751,7 +751,7 @@ are "liblinear", "newton-cg", "lbfgs", "sag" and "saga":
 
 The solver "liblinear" uses a coordinate descent (CD) algorithm, and relies
 on the excellent C++ `LIBLINEAR library
-<http://www.csie.ntu.edu.tw/~cjlin/liblinear/>`_, which is shipped with
+<https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`_, which is shipped with
 scikit-learn. However, the CD algorithm implemented in liblinear cannot learn
 a true multinomial (multiclass) model; instead, the optimization problem is
 decomposed in a "one-vs-rest" fashion so separate binary classifiers are
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index ab66ba32d0bbb..0be68a0ba96cf 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -486,10 +486,10 @@ or *informedness*.
 
   .. [Guyon2015] I. Guyon, K. Bennett, G. Cawley, H.J. Escalante, S. Escalera, T.K. Ho, N. Macià,
      B. Ray, M. Saeed, A.R. Statnikov, E. Viegas, `Design of the 2015 ChaLearn AutoML Challenge
-     <http://ieeexplore.ieee.org/document/7280767/>`_,
+     <https://ieeexplore.ieee.org/document/7280767>`_,
      IJCNN 2015.
   .. [Mosley2013] L. Mosley, `A balanced approach to the multi-class imbalance problem
-     <http://lib.dr.iastate.edu/etd/13537/>`_,
+     <https://lib.dr.iastate.edu/etd/13537/>`_,
      IJCV 2010.
   .. [Kelleher2015] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, `Fundamentals of
      Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples,
diff --git a/doc/modules/naive_bayes.rst b/doc/modules/naive_bayes.rst
index 229ce6654d7c5..14bfd9802cbbd 100644
--- a/doc/modules/naive_bayes.rst
+++ b/doc/modules/naive_bayes.rst
@@ -175,7 +175,7 @@ match.
 
  * Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).
    `Tackling the poor assumptions of naive bayes text classifiers.
-   <http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf>`_
+   <https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf>`_
    In ICML (Vol. 3, pp. 616-623).
 
 .. _bernoulli_naive_bayes:
diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
index a905eaec982c4..d3e3ac5710cb1 100644
--- a/doc/modules/neural_networks_supervised.rst
+++ b/doc/modules/neural_networks_supervised.rst
@@ -223,7 +223,7 @@ L-BFGS is a solver that approximates the Hessian matrix which represents the
 second-order partial derivative of a function. Further it approximates the
 inverse of the Hessian matrix to perform parameter updates. The implementation
 uses the Scipy version of `L-BFGS
-<http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_l_bfgs_b.html>`_.
+<https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_l_bfgs_b.html>`_.
 
 If the selected solver is 'L-BFGS', training does not support online nor
 mini-batch learning.
@@ -368,10 +368,10 @@ or want to do additional monitoring, using ``warm_start=True`` and
 .. topic:: References:
 
     * `"Learning representations by back-propagating errors."
-      <http://www.iro.umontreal.ca/~pift6266/A06/refs/backprop_old.pdf>`_
+      <https://www.iro.umontreal.ca/~pift6266/A06/refs/backprop_old.pdf>`_
       Rumelhart, David E., Geoffrey E. Hinton, and Ronald J. Williams.
 
-    * `"Stochastic Gradient Descent" <http://leon.bottou.org/projects/sgd>`_ L. Bottou - Website, 2010.
+    * `"Stochastic Gradient Descent" <https://leon.bottou.org/projects/sgd>`_ L. Bottou - Website, 2010.
 
     * `"Backpropagation" <http://ufldl.stanford.edu/wiki/index.php/Backpropagation_Algorithm>`_
       Andrew Ng, Jiquan Ngiam, Chuan Yu Foo, Yifan Mai, Caroline Suen - Website, 2011.
diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
index d51b9cd1f170a..08e864a71b76e 100644
--- a/doc/modules/sgd.rst
+++ b/doc/modules/sgd.rst
@@ -215,7 +215,7 @@ Stochastic Gradient Descent for sparse data
 There is built-in support for sparse data given in any matrix in a format
 supported by `scipy.sparse <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_. For maximum efficiency, however, use the CSR
 matrix format as defined in `scipy.sparse.csr_matrix
-<http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html>`_.
+<https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html>`_.
 
 .. topic:: Examples:
 
@@ -429,7 +429,7 @@ Implementation details
 ======================
 
 The implementation of SGD is influenced by the `Stochastic Gradient SVM
-<http://leon.bottou.org/projects/sgd>`_  of Léon Bottou. Similar to SvmSGD,
+<https://leon.bottou.org/projects/sgd>`_  of Léon Bottou. Similar to SvmSGD,
 the weight vector is represented as the product of a scalar and a vector
 which allows an efficient weight update in the case of L2 regularization.
 In the case of sparse feature vectors, the intercept is updated with a
@@ -444,14 +444,14 @@ The code is written in Cython.
 
 .. topic:: References:
 
- * `"Stochastic Gradient Descent" <http://leon.bottou.org/projects/sgd>`_ L. Bottou - Website, 2010.
+ * `"Stochastic Gradient Descent" <https://leon.bottou.org/projects/sgd>`_ L. Bottou - Website, 2010.
 
- * `"The Tradeoffs of Large Scale Machine Learning" <http://leon.bottou.org/slides/largescale/lstut.pdf>`_ L. Bottou - Website, 2011.
+ * `"The Tradeoffs of Large Scale Machine Learning" <https://leon.bottou.org/slides/largescale/lstut.pdf>`_ L. Bottou - Website, 2011.
 
  * `"Pegasos: Primal estimated sub-gradient solver for svm"
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.74.8513>`_
    S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07.
 
  * `"Stochastic gradient descent training for l1-regularized log-linear models with cumulative penalty"
-   <http://www.aclweb.org/anthology/P/P09/P09-1054.pdf>`_
+   <https://www.aclweb.org/anthology/P/P09/P09-1054.pdf>`_
    Y. Tsuruoka, J. Tsujii, S. Ananiadou -  In Proceedings of the AFNLP/ACL '09.
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index eac2b35ebfbf6..b770ae2e46767 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -239,13 +239,13 @@ and use ``decision_function`` instead of ``predict_proba``.
 
  * Wu, Lin and Weng,
    `"Probability estimates for multi-class classification by pairwise coupling"
-   <http://www.csie.ntu.edu.tw/~cjlin/papers/svmprob/svmprob.pdf>`_,
+   <https://www.csie.ntu.edu.tw/~cjlin/papers/svmprob/svmprob.pdf>`_,
    JMLR 5:975-1005, 2004.
  
  
  * Platt
    `"Probabilistic outputs for SVMs and comparisons to regularized likelihood methods"
-   <http://www.cs.colorado.edu/~mozer/Teaching/syllabi/6622/papers/Platt1999.pdf>`_.
+   <https://www.cs.colorado.edu/~mozer/Teaching/syllabi/6622/papers/Platt1999.pdf>`_.
 
 Unbalanced problems
 --------------------
@@ -637,7 +637,7 @@ term :math:`\rho` :
 
 
  * `"Support-vector networks"
-   <http://link.springer.com/article/10.1007%2FBF00994018>`_,
+   <https://link.springer.com/article/10.1007%2FBF00994018>`_,
    C. Cortes, V. Vapnik - Machine Learning, 20, 273-297 (1995).
 
 
@@ -712,8 +712,8 @@ Implementation details
 Internally, we use `libsvm`_ and `liblinear`_ to handle all
 computations. These libraries are wrapped using C and Cython.
 
-.. _`libsvm`: http://www.csie.ntu.edu.tw/~cjlin/libsvm/
-.. _`liblinear`: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
+.. _`libsvm`: https://www.csie.ntu.edu.tw/~cjlin/libsvm/
+.. _`liblinear`: https://www.csie.ntu.edu.tw/~cjlin/liblinear/
 
 .. topic:: References:
 
@@ -721,9 +721,9 @@ computations. These libraries are wrapped using C and Cython.
   used, please refer to
 
     - `LIBSVM: A Library for Support Vector Machines
-      <http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_.
+      <https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_.
 
     - `LIBLINEAR -- A Library for Large Linear Classification
-      <http://www.csie.ntu.edu.tw/~cjlin/liblinear/>`_.
+      <https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`_.
 
 
diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index fe50ec1494a4c..89bf7c8ed809b 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -257,7 +257,7 @@ Domain specific packages
 - `scikit-image <https://scikit-image.org/>`_ Image processing and computer
   vision in python.
 
-- `Natural language toolkit (nltk) <http://www.nltk.org/>`_ Natural language
+- `Natural language toolkit (nltk) <https://www.nltk.org/>`_ Natural language
   processing and some machine learning.
 
 - `gensim <https://radimrehurek.com/gensim/>`_  A library for topic modelling,
@@ -265,12 +265,12 @@ Domain specific packages
 
 - `NiLearn <https://nilearn.github.io/>`_ Machine learning for neuro-imaging.
 
-- `AstroML <http://www.astroml.org/>`_  Machine learning for astronomy.
+- `AstroML <https://www.astroml.org/>`_  Machine learning for astronomy.
 
 - `MSMBuilder <http://msmbuilder.org/>`_  Machine learning for protein
   conformational dynamics time series.
 
-- `scikit-surprise <http://surpriselib.com>`_ A scikit for building and
+- `scikit-surprise <https://surpriselib.com/>`_ A scikit for building and
   evaluating recommender systems.
 
 Snippets and tidbits
diff --git a/doc/testimonials/testimonials.rst b/doc/testimonials/testimonials.rst
index f30e14f12a97d..6bb1ddbc8a264 100644
--- a/doc/testimonials/testimonials.rst
+++ b/doc/testimonials/testimonials.rst
@@ -11,7 +11,7 @@ Who is using scikit-learn?
 
 .. to add a testimonials, just XXX
 
-`Spotify <http://www.spotify.com>`_
+`Spotify <https://www.spotify.com>`_
 ------------------------------------
 
 .. raw:: html
@@ -20,7 +20,7 @@ Who is using scikit-learn?
 
 .. image:: images/spotify.png
     :width: 120pt
-    :target: http://www.spotify.com
+    :target: https://www.spotify.com
 
 .. raw:: html
 
@@ -42,7 +42,7 @@ Erik Bernhardsson, Engineering Manager Music Discovery & Machine Learning, Spoti
 
    </span>
 
-`Inria <http://www.inria.fr>`_
+`Inria <https://www.inria.fr/>`_
 -------------------------------
 
 .. raw:: html
@@ -51,7 +51,7 @@ Erik Bernhardsson, Engineering Manager Music Discovery & Machine Learning, Spoti
 
 .. image:: images/inria.png
     :width: 120pt
-    :target: http://www.inria.fr
+    :target: https://www.inria.fr/
 
 .. raw:: html
 
@@ -63,7 +63,7 @@ Erik Bernhardsson, Engineering Manager Music Discovery & Machine Learning, Spoti
 
 At INRIA, we use scikit-learn to support leading-edge basic research in many
 teams: `Parietal <https://team.inria.fr/parietal/>`_ for neuroimaging, `Lear
-<http://lear.inrialpes.fr/>`_ for computer vision, `Visages
+<https://lear.inrialpes.fr/>`_ for computer vision, `Visages
 <https://team.inria.fr/visages/>`_ for medical image analysis, `Privatics
 <https://team.inria.fr/privatics>`_ for security. The project is a fantastic
 tool to address difficult applications of machine learning in an academic
@@ -188,7 +188,7 @@ Mark Ayzenshtat, VP, Augmented Intelligence
 
    </span>
 
-`Télécom ParisTech <http://www.telecom-paristech.fr>`_
+`Télécom ParisTech <https://www.telecom-paristech.fr/>`_
 --------------------------------------------------------
 
 .. raw:: html
@@ -197,7 +197,7 @@ Mark Ayzenshtat, VP, Augmented Intelligence
 
 .. image:: images/telecomparistech.jpg
     :width: 120pt
-    :target: https://www.telecom-paristech.fr
+    :target: https://www.telecom-paristech.fr/
 
 .. raw:: html
 
@@ -221,7 +221,7 @@ Alexandre Gramfort, Assistant Professor
    </span>
 
 
-`Booking.com <http://booking.com>`_
+`Booking.com <https://www.booking.com>`_
 -------------------------------------
 .. raw:: html
 
@@ -229,7 +229,7 @@ Alexandre Gramfort, Assistant Professor
 
 .. image:: images/booking.png
     :width: 120pt
-    :target: http://www.booking.com
+    :target: https://www.booking.com
 
 .. raw:: html
 
@@ -256,7 +256,7 @@ Melanie Mueller, Data Scientist
 
    </span>
 
-`AWeber <http://www.aweber.com>`_
+`AWeber <https://www.aweber.com/>`_
 ------------------------------------------
 
 .. raw:: html
@@ -265,7 +265,7 @@ Melanie Mueller, Data Scientist
 
 .. image:: images/aweber.png
     :width: 120pt
-    :target: http://www.aweber.com
+    :target: https://www.aweber.com/
 
 .. raw:: html
 
@@ -460,7 +460,7 @@ Vijay Ramesh, Software Engineer in Data/science at Change.org
 
    </span>
 
-`PHIMECA Engineering <http://www.phimeca.com/?lang=en>`_
+`PHIMECA Engineering <https://www.phimeca.com/?lang=en>`_
 ----------------------------------------------------------
 
 .. raw:: html
@@ -469,7 +469,7 @@ Vijay Ramesh, Software Engineer in Data/science at Change.org
 
 .. image:: images/phimeca.png
     :width: 120pt
-    :target: http://www.phimeca.com/?lang=en
+    :target: https://www.phimeca.com/?lang=en
 
 .. raw:: html
 
@@ -694,7 +694,7 @@ Guillaume Lebourgeois & Samuel Charron - Data Scientists at Data Publica
 
 
 
-`Machinalis <http://www.machinalis.com>`_
+`Machinalis <https://www.machinalis.com/>`_
 -----------------------------------------
 
 .. raw:: html
@@ -703,7 +703,7 @@ Guillaume Lebourgeois & Samuel Charron - Data Scientists at Data Publica
 
 .. image:: images/machinalis.png
     :width: 120pt
-    :target: http://www.machinalis.com
+    :target: https://www.machinalis.com/
 
 .. raw:: html
 
@@ -728,7 +728,7 @@ Scikit-learn in one word: Awesome.
 Rafael Carrascosa, Lead developer
 
 
-`solido <http://www.solidodesign.com/>`_
+`solido <https://www.solidodesign.com/>`_
 -----------------------------------------
 
 .. raw:: html
@@ -737,7 +737,7 @@ Rafael Carrascosa, Lead developer
 
 .. image:: images/solido_logo.png
     :width: 120pt
-    :target: http://www.solidodesign.com
+    :target: https://www.solidodesign.com/
 
 .. raw:: html
 
@@ -800,7 +800,7 @@ Thorsten Kranz, Data Scientist, Coma Soft AG.
   </span>
 
 
-`Dataiku <http://www.dataiku.com/>`_
+`Dataiku <https://www.dataiku.com/>`_
 -----------------------------------------
 
 .. raw:: html
@@ -809,7 +809,7 @@ Thorsten Kranz, Data Scientist, Coma Soft AG.
 
 .. image:: images/dataiku_logo.png
     :width: 120pt
-    :target: http://www.dataiku.com
+    :target: https://www.dataiku.com/
 
 .. raw:: html
 
diff --git a/doc/tutorial/text_analytics/working_with_text_data.rst b/doc/tutorial/text_analytics/working_with_text_data.rst
index 589d83006f110..5ba798e7a70bc 100644
--- a/doc/tutorial/text_analytics/working_with_text_data.rst
+++ b/doc/tutorial/text_analytics/working_with_text_data.rst
@@ -410,7 +410,7 @@ with computer graphics.
   optimizer for the same cost function based on the liblinear_ C++
   library.
 
-.. _liblinear: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
+.. _liblinear: https://www.csie.ntu.edu.tw/~cjlin/liblinear/
 
 
 Parameter tuning using grid search
diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
index fc2577bee2487..218122981889f 100644
--- a/doc/whats_new/_contributors.rst
+++ b/doc/whats_new/_contributors.rst
@@ -32,11 +32,11 @@
 
 .. _James Bergstra: http://www-etud.iro.umontreal.ca/~bergstrj/
 
-.. _liblinear: http://www.csie.ntu.edu.tw/~cjlin/liblinear/
+.. _liblinear: https://www.csie.ntu.edu.tw/~cjlin/liblinear/
 
 .. _Yaroslav Halchenko: http://www.onerussian.com/
 
-.. _Vlad Niculae: http://vene.ro
+.. _Vlad Niculae: https://vene.ro/
 
 .. _Edouard Duchesnay: https://sites.google.com/site/duchesnay/home
 
@@ -48,7 +48,7 @@
 
 .. _Bertrand Thirion: https://team.inria.fr/parietal/bertrand-thirions-page
 
-.. _Andreas Müller: http://peekaboo-vision.blogspot.com
+.. _Andreas Müller: https://peekaboo-vision.blogspot.com/
 
 .. _Matthieu Perrot: http://brainvisa.info/biblio/lnao/en/Author/PERROT-M.html
 
@@ -56,7 +56,7 @@
 
 .. _Gilles Louppe: http://www.montefiore.ulg.ac.be/~glouppe/
 
-.. _INRIA: http://www.inria.fr
+.. _INRIA: https://www.inria.fr/
 
 .. _Parietal Team: http://parietal.saclay.inria.fr/
 
@@ -70,17 +70,17 @@
 
 .. _Scott White: https://twitter.com/scottblanc
 
-.. _David Marek: http://www.davidmarek.cz/
+.. _David Marek: https://davidmarek.cz/
 
 .. _Christian Osendorfer: https://osdf.github.io
 
 .. _Arnaud Joly: http://www.ajoly.org
 
-.. _Rob Zinkov: http://zinkov.com
+.. _Rob Zinkov: https://www.zinkov.com/
 
-.. _Joel Nothman: http://joelnothman.com
+.. _Joel Nothman: https://joelnothman.com/
 
-.. _Nicolas Trésegnie : http://nicolastr.com/
+.. _Nicolas Trésegnie: https://github.com/NicolasTr
 
 .. _Kemal Eren: http://www.kemaleren.com
 
@@ -88,7 +88,7 @@
 
 .. _Yannick Schwartz: https://team.inria.fr/parietal/schwarty/
 
-.. _Kyle Kastner: http://kastnerkyle.github.io
+.. _Kyle Kastner: https://kastnerkyle.github.io/
 
 .. _Daniel Nouri: http://danielnouri.org
 
@@ -100,7 +100,7 @@
 
 .. _Antony Lee: https://www.ocf.berkeley.edu/~antonyl/
 
-.. _Martin Billinger: http://tnsre.embs.org/author/martinbillinger
+.. _Martin Billinger: https://tnsre.embs.org/author/martinbillinger/
 
 .. _Matteo Visconti di Oleggio Castello: http://www.mvdoc.me
 
@@ -110,19 +110,19 @@
 
 .. _Will Dawson: http://www.dawsonresearch.com
 
-.. _Andrew Tulloch: http://tullo.ch/
+.. _Andrew Tulloch: https://tullo.ch/
 
-.. _Hanna Wallach: http://dirichlet.net/
+.. _Hanna Wallach: https://dirichlet.net/
 
 .. _Yan Yi: http://seowyanyi.org
 
-.. _Hervé Bredin: http://herve.niderb.fr/
+.. _Hervé Bredin: https://herve.niderb.fr/
 
 .. _Eric Martin: http://www.ericmart.in
 
 .. _Nicolas Goix: https://ngoix.github.io/
 
-.. _Sebastian Raschka: http://sebastianraschka.com
+.. _Sebastian Raschka: https://sebastianraschka.com/
 
 .. _Brian McFee: https://bmcfee.github.io
 
@@ -150,7 +150,7 @@
 
 .. _Srivatsan Ramesh: https://github.com/srivatsan-ramesh
 
-.. _Ron Weiss: http://www.ee.columbia.edu/~ronw
+.. _Ron Weiss: https://www.ee.columbia.edu/~ronw/
 
 .. _Kathleen Chen: https://github.com/kchen17
 
diff --git a/sklearn/datasets/olivetti_faces.py b/sklearn/datasets/olivetti_faces.py
index 74915c6c69577..a1dbbe143639c 100644
--- a/sklearn/datasets/olivetti_faces.py
+++ b/sklearn/datasets/olivetti_faces.py
@@ -2,12 +2,12 @@
 
 The original database was available from (now defunct)
 
-    http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
+    https://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
 
 The version retrieved here comes in MATLAB format from the personal
 web page of Sam Roweis:
 
-    http://www.cs.nyu.edu/~roweis/
+    https://cs.nyu.edu/~roweis/
 """
 
 # Copyright (c) 2011 David Warde-Farley <wardefar at iro dot umontreal dot ca>
@@ -27,7 +27,7 @@
 from ..externals import joblib
 
 # The original data can be found at:
-# http://cs.nyu.edu/~roweis/data/olivettifaces.mat
+# https://cs.nyu.edu/~roweis/data/olivettifaces.mat
 FACES = RemoteFileMetadata(
     filename='olivettifaces.mat',
     url='https://ndownloader.figshare.com/files/5976027',
diff --git a/sklearn/datasets/svmlight_format.py b/sklearn/datasets/svmlight_format.py
index accf478d73ae1..0a1776a1722dd 100644
--- a/sklearn/datasets/svmlight_format.py
+++ b/sklearn/datasets/svmlight_format.py
@@ -95,7 +95,7 @@ def load_svmlight_file(f, n_features=None, dtype=np.float64,
 
     multilabel : boolean, optional, default False
         Samples may have several labels each (see
-        http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
+        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
 
     zero_based : boolean or "auto", optional, default "auto"
         Whether column indices in f are zero-based (True) or one-based
@@ -238,7 +238,7 @@ def load_svmlight_files(files, n_features=None, dtype=np.float64,
 
     multilabel : boolean, optional
         Samples may have several labels each (see
-        http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
+        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
 
     zero_based : boolean or "auto", optional
         Whether column indices in f are zero-based (True) or one-based
@@ -425,7 +425,7 @@ def dump_svmlight_file(X, y, f,  zero_based=True, comment=None, query_id=None,
 
     multilabel : boolean, optional
         Samples may have several labels each (see
-        http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
+        https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)
 
         .. versionadded:: 0.17
            parameter *multilabel* to support multilabel datasets.
diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py
index 8df908a2e2fcb..36fef3dfbd5cf 100644
--- a/sklearn/datasets/twenty_newsgroups.py
+++ b/sklearn/datasets/twenty_newsgroups.py
@@ -50,7 +50,7 @@
 logger = logging.getLogger(__name__)
 
 # The original data can be found at:
-# http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz
+# https://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz
 ARCHIVE = RemoteFileMetadata(
     filename='20news-bydate.tar.gz',
     url='https://ndownloader.figshare.com/files/5975967',
diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py
index eaec6083d0ae4..82f504542f9a9 100644
--- a/sklearn/externals/_arff.py
+++ b/sklearn/externals/_arff.py
@@ -73,7 +73,7 @@
 ``@ATTRIBUTE``, and ``@DATA`` are all case insensitive and obligatory.
 
 For more information and details about the ARFF file description, consult
-http://www.cs.waikato.ac.nz/~ml/weka/arff.html
+https://www.cs.waikato.ac.nz/~ml/weka/arff.html
 
 
 ARFF Files in Python
@@ -128,7 +128,7 @@
 
 - Read and write ARFF files using python built-in structures, such dictionaries
   and lists;
-- Supports `scipy.sparse.coo <http://docs.scipy
+- Supports `scipy.sparse.coo <https://docs.scipy
   .org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html#scipy.sparse.coo_matrix>`_
   and lists of dictionaries as used by SVMLight
 - Supports the following attribute types: NUMERIC, REAL, INTEGER, STRING, and
@@ -139,7 +139,7 @@
 - Supports missing values and names with spaces;
 - Supports unicode values and names;
 - Fully compatible with Python 2.7+, Python 3.3+, pypy and pypy3;
-- Under `MIT License <http://opensource.org/licenses/MIT>`_
+- Under `MIT License <https://opensource.org/licenses/MIT>`_
 
 '''
 __author__ = 'Renato de Pontes Pereira, Matthias Feurer, Joel Nothman'
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 902f95022f238..ef3b93cb15467 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1203,7 +1203,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
     ----------
 
     LIBLINEAR -- A Library for Large Linear Classification
-        http://www.csie.ntu.edu.tw/~cjlin/liblinear/
+        https://www.csie.ntu.edu.tw/~cjlin/liblinear/
 
     SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach
         Minimizing Finite Sums with the Stochastic Average Gradient
@@ -1217,7 +1217,7 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
     Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent
         methods for logistic regression and maximum entropy models.
         Machine Learning 85(1-2):41-75.
-        http://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf
+        https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf
     """
 
     def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 2bcf75d153317..606cdecc17e66 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -894,7 +894,7 @@ class _RidgeGCV(LinearModel):
     References
     ----------
     http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf
-    http://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf
+    https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf
     """
 
     def __init__(self, alphas=(0.1, 1.0, 10.0),
diff --git a/sklearn/manifold/spectral_embedding_.py b/sklearn/manifold/spectral_embedding_.py
index 71c822e3b4356..d0c226b51ca5e 100644
--- a/sklearn/manifold/spectral_embedding_.py
+++ b/sklearn/manifold/spectral_embedding_.py
@@ -246,11 +246,11 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
         # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
         # /lobpcg/lobpcg.py#L237
         # or matlab:
-        # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
+        # https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
         laplacian = _set_diag(laplacian, 1, norm_laplacian)
 
         # Here we'll use shift-invert mode for fast eigenvalues
-        # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
+        # (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
         #  for a short explanation of what this means)
         # Because the normalized Laplacian has eigenvalues between 0 and 2,
         # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index e8aba5f7ccc93..7fe7a368809b4 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -6,7 +6,7 @@
 # This is the exact and Barnes-Hut t-SNE implementation. There are other
 # modifications of the algorithm:
 # * Fast Optimization for t-SNE:
-#   http://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf
+#   https://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf
 from __future__ import division
 
 import warnings
diff --git a/sklearn/metrics/cluster/unsupervised.py b/sklearn/metrics/cluster/unsupervised.py
index 4e34cd6cab708..610c8a6545ed3 100644
--- a/sklearn/metrics/cluster/unsupervised.py
+++ b/sklearn/metrics/cluster/unsupervised.py
@@ -100,7 +100,7 @@ def silhouette_score(X, labels, metric='euclidean', sample_size=None,
     .. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
        Interpretation and Validation of Cluster Analysis". Computational
        and Applied Mathematics 20: 53-65.
-       <http://www.sciencedirect.com/science/article/pii/0377042787901257>`_
+       <https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
 
     .. [2] `Wikipedia entry on the Silhouette Coefficient
            <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
@@ -203,7 +203,7 @@ def silhouette_samples(X, labels, metric='euclidean', **kwds):
     .. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
        Interpretation and Validation of Cluster Analysis". Computational
        and Applied Mathematics 20: 53-65.
-       <http://www.sciencedirect.com/science/article/pii/0377042787901257>`_
+       <https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
 
     .. [2] `Wikipedia entry on the Silhouette Coefficient
        <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
@@ -264,7 +264,7 @@ def calinski_harabaz_score(X, labels):
     ----------
     .. [1] `T. Calinski and J. Harabasz, 1974. "A dendrite method for cluster
        analysis". Communications in Statistics
-       <http://www.tandfonline.com/doi/abs/10.1080/03610927408827101>`_
+       <https://www.tandfonline.com/doi/abs/10.1080/03610927408827101>`_
     """
     X, labels = check_X_y(X, labels)
     le = LabelEncoder()
@@ -314,7 +314,7 @@ def davies_bouldin_score(X, labels):
     ----------
     .. [1] Davies, David L.; Bouldin, Donald W. (1979).
        `"A Cluster Separation Measure"
-       <http://ieeexplore.ieee.org/document/4766909>`__.
+       <https://ieeexplore.ieee.org/document/4766909>`__.
        IEEE Transactions on Pattern Analysis and Machine Intelligence.
        PAMI-1 (2): 224-227
     """
diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index 3a01d5c4467a3..a582c3859e47a 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -296,7 +296,7 @@ def roc_auc_score(y_true, y_score, average="macro", sample_weight=None,
            Letters, 2006, 27(8):861-874.
 
     .. [3] `Analyzing a portion of the ROC curve. McClish, 1989
-            <http://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
+            <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
 
     See also
     --------
diff --git a/sklearn/mixture/bayesian_mixture.py b/sklearn/mixture/bayesian_mixture.py
index aef6828fa7951..749422ad1e83f 100644
--- a/sklearn/mixture/bayesian_mixture.py
+++ b/sklearn/mixture/bayesian_mixture.py
@@ -294,7 +294,7 @@ class BayesianGaussianMixture(BaseMixture):
 
     .. [1] `Bishop, Christopher M. (2006). "Pattern recognition and machine
        learning". Vol. 4 No. 4. New York: Springer.
-       <http://www.springer.com/kr/book/9780387310732>`_
+       <https://www.springer.com/kr/book/9780387310732>`_
 
     .. [2] `Hagai Attias. (2000). "A Variational Bayesian Framework for
        Graphical Models". In Advances in Neural Information Processing
@@ -303,7 +303,7 @@ class BayesianGaussianMixture(BaseMixture):
 
     .. [3] `Blei, David M. and Michael I. Jordan. (2006). "Variational
        inference for Dirichlet process mixtures". Bayesian analysis 1.1
-       <http://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_
+       <https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_
     """
 
     def __init__(self, n_components=1, covariance_type='full', tol=1e-3,
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index cf2c65b3acc0a..2b7f3a3279631 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -796,7 +796,7 @@ class ComplementNB(BaseDiscreteNB):
     Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).
     Tackling the poor assumptions of naive bayes text classifiers. In ICML
     (Vol. 3, pp. 616-623).
-    http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf
+    https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf
     """
 
     def __init__(self, alpha=1.0, fit_prior=True, class_prior=None,
diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py
index df7b57c54bdd1..99a909bbbcd15 100644
--- a/sklearn/neighbors/lof.py
+++ b/sklearn/neighbors/lof.py
@@ -82,7 +82,7 @@ class LocalOutlierFactor(NeighborsBase, KNeighborsMixin, UnsupervisedMixin,
 
         See the documentation for scipy.spatial.distance for details on these
         metrics:
-        http://docs.scipy.org/doc/scipy/reference/spatial.distance.html
+        https://docs.scipy.org/doc/scipy/reference/spatial.distance.html
 
     p : integer, optional (default=2)
         Parameter for the Minkowski metric from
diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py
index c35e8840d23f7..1361bffe0d240 100644
--- a/sklearn/neural_network/rbm.py
+++ b/sklearn/neural_network/rbm.py
@@ -90,7 +90,7 @@ class BernoulliRBM(BaseEstimator, TransformerMixin):
 
     [1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for
         deep belief nets. Neural Computation 18, pp 1527-1554.
-        http://www.cs.toronto.edu/~hinton/absps/fastnc.pdf
+        https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf
 
     [2] Tieleman, T. Training Restricted Boltzmann Machines using
         Approximations to the Likelihood Gradient. International Conference
diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 1028843a9bf19..eb1fe9311b959 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -145,7 +145,7 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     References
     ----------
     `LIBLINEAR: A Library for Large Linear Classification
-    <http://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__
+    <https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__
 
     See also
     --------
diff --git a/sklearn/svm/libsvm.pyx b/sklearn/svm/libsvm.pyx
index 978dbf7552d26..9db1810dd8213 100644
--- a/sklearn/svm/libsvm.pyx
+++ b/sklearn/svm/libsvm.pyx
@@ -14,7 +14,7 @@ to run out of memory a MemoryError will be raised. In practice this is
 not very helpful since hight changes are malloc fails inside svm.cpp,
 where no sort of memory checks are done.
 
-[1] http://www.csie.ntu.edu.tw/~cjlin/libsvm/
+[1] https://www.csie.ntu.edu.tw/~cjlin/libsvm/
 
 Notes
 -----
diff --git a/sklearn/svm/src/libsvm/svm.cpp b/sklearn/svm/src/libsvm/svm.cpp
index 29a4dfd8a71f6..749201132691d 100644
--- a/sklearn/svm/src/libsvm/svm.cpp
+++ b/sklearn/svm/src/libsvm/svm.cpp
@@ -44,7 +44,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    - Add support for instance weights, Fabian Pedregosa based on work
      by Ming-Wei Chang, Hsuan-Tien Lin, Ming-Hen Tsai, Chia-Hua Ho and
      Hsiang-Fu Yu,
-     <http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/#weights_for_data_instances>.
+     <https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/#weights_for_data_instances>.
 
    - Make labels sorted in svm_group_classes, Fabian Pedregosa.
 
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 6b090ce4684f9..9533cff66662d 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -492,7 +492,7 @@ def test_feature_log_prob_bnb():
     # Tests that the feature log prob value computed by BernoulliNB when
     # alpha=1.0 is equal to the expression given in Manning, Raghavan,
     # and Schuetze's "Introduction to Information Retrieval" book:
-    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+    # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
 
     X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
     Y = np.array([0, 0, 1, 2, 2])
@@ -514,7 +514,7 @@ def test_bnb():
     # Tests that BernoulliNB when alpha=1.0 gives the same values as
     # those given for the toy example in Manning, Raghavan, and
     # Schuetze's "Introduction to Information Retrieval" book:
-    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+    # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
 
     # Training data points are:
     # Chinese Beijing Chinese (class: China)
@@ -558,7 +558,7 @@ def test_bnb():
 def test_cnb():
     # Tests ComplementNB when alpha=1.0 for the toy example in Manning,
     # Raghavan, and Schuetze's "Introduction to Information Retrieval" book:
-    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+    # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
 
     # Training data points are:
     # Chinese Beijing Chinese (class: China)
diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx
index 80f3000c74ddc..9c646730d170b 100644
--- a/sklearn/tree/_utils.pyx
+++ b/sklearn/tree/_utils.pyx
@@ -53,7 +53,7 @@ def _realloc_test():
 
 
 # rand_r replacement using a 32bit XorShift generator
-# See http://www.jstatsoft.org/v08/i14/paper for details
+# See https://www.jstatsoft.org/v08/i14/paper for details
 cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil:
     seed[0] ^= <UINT32_t>(seed[0] << 13)
     seed[0] ^= <UINT32_t>(seed[0] >> 17)
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 9985cee2eef77..7ad25ff7282c8 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -712,7 +712,7 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin):
            Learning", Springer, 2009.
 
     .. [4] L. Breiman, and A. Cutler, "Random Forests",
-           http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm
+           https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm
 
     Examples
     --------
@@ -1055,7 +1055,7 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin):
            Learning", Springer, 2009.
 
     .. [4] L. Breiman, and A. Cutler, "Random Forests",
-           http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm
+           https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm
 
     Examples
     --------
diff --git a/sklearn/utils/_scipy_sparse_lsqr_backport.py b/sklearn/utils/_scipy_sparse_lsqr_backport.py
index 7ebb24d905e9e..43aa7155a4ba9 100644
--- a/sklearn/utils/_scipy_sparse_lsqr_backport.py
+++ b/sklearn/utils/_scipy_sparse_lsqr_backport.py
@@ -75,7 +75,7 @@ def _sym_ortho(a, b):
     ----------
     .. [1] S.-C. Choi, "Iterative Methods for Singular Linear Equations
            and Least-Squares Problems", Dissertation,
-           http://www.stanford.edu/group/SOL/dissertations/sou-cheng-choi-thesis.pdf
+           https://www.stanford.edu/group/SOL/dissertations/sou-cheng-choi-thesis.pdf
 
     """
     if b == 0:
diff --git a/sklearn/utils/src/gamma.c b/sklearn/utils/src/gamma.c
index 41f61de426a5e..20869a9b210bc 100644
--- a/sklearn/utils/src/gamma.c
+++ b/sklearn/utils/src/gamma.c
@@ -1,6 +1,6 @@
 /*
  * John D. Cook's public domain version of lgamma, from
- * http://www.johndcook.com/stand_alone_code.html
+ * https://www.johndcook.com/stand_alone_code.html
  *
  * Replaces the C99 standard lgamma for stone-age C compilers like the one
  * from Redmond.

From 0bbb7d022e9354f01f3c26545bdee1f551bcee6f Mon Sep 17 00:00:00 2001
From: Andrea Navarrete <andrea.navarrete126@gmail.com>
Date: Fri, 5 Oct 2018 02:30:23 -0400
Subject: [PATCH 135/163] DOC Add class example for LedoitWolf (#12214)

---
 sklearn/covariance/shrunk_covariance_.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/sklearn/covariance/shrunk_covariance_.py b/sklearn/covariance/shrunk_covariance_.py
index 4f95fd13ebf36..57cc8992c7e02 100644
--- a/sklearn/covariance/shrunk_covariance_.py
+++ b/sklearn/covariance/shrunk_covariance_.py
@@ -354,6 +354,9 @@ class LedoitWolf(EmpiricalCovariance):
 
     Attributes
     ----------
+    location_ : array-like, shape (n_features,)
+        Estimated location, i.e. the estimated mean.
+
     covariance_ : array-like, shape (n_features, n_features)
         Estimated covariance matrix
 
@@ -365,6 +368,23 @@ class LedoitWolf(EmpiricalCovariance):
         Coefficient in the convex combination used for the computation
         of the shrunk estimate.
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.covariance import LedoitWolf
+    >>> real_cov = np.array([[.4, .2],
+    ...                      [.2, .8]])
+    >>> np.random.seed(0)
+    >>> X = np.random.multivariate_normal(mean=[0, 0],
+    ...                                   cov=real_cov,
+    ...                                   size=50)
+    >>> cov = LedoitWolf().fit(X)
+    >>> cov.covariance_ # doctest: +ELLIPSIS
+    array([[0.4406..., 0.1616...],
+           [0.1616..., 0.8022...]])
+    >>> cov.location_
+    array([ 0.0595... , -0.0075...])
+
     Notes
     -----
     The regularised covariance is:

From afa0694d129e6f04d61073131ac75c84ef038f7b Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Fri, 5 Oct 2018 02:36:06 -0400
Subject: [PATCH 136/163] FIX cache of OpenML fetcher (#12246)

---
 doc/whats_new/v0.20.rst               |  3 ++
 sklearn/datasets/openml.py            | 34 +++++++++++++++-------
 sklearn/datasets/tests/test_openml.py | 42 +++++++++++++++++++++++----
 3 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 2c4ef814a0a31..d53983419533e 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -37,6 +37,9 @@ Changelog
 :mod:`sklearn.datasets`
 ............................
 
+- |Fix| :func:`dataset.fetch_openml` to correctly use the local cache.
+  :issue:`12246` by :user:`Jan N. van Rijn <janvanrijn>`.
+
 - |Fix| Fixed integer overflow in :func:`datasets.make_classification`
   for values of ``n_informative`` parameter larger than 64.
   :issue:10811 by :user:`Roman Feldbauer <VarIr>`.
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index d667cb3699b28..5820ff68c0925 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -31,6 +31,10 @@
 _DATA_FILE = "data/v1/download/{}"
 
 
+def _get_local_path(openml_path, data_home):
+    return os.path.join(data_home, 'openml.org', openml_path + ".gz")
+
+
 def _open_openml_url(openml_path, data_home):
     """
     Returns a resource from OpenML.org. Caches it to data_home if required.
@@ -50,20 +54,23 @@ def _open_openml_url(openml_path, data_home):
     result : stream
         A stream to the OpenML resource
     """
+    def is_gzip(_fsrc):
+        return _fsrc.info().get('Content-Encoding', '') == 'gzip'
+
     req = Request(_OPENML_PREFIX + openml_path)
     req.add_header('Accept-encoding', 'gzip')
-    fsrc = urlopen(req)
-    is_gzip = fsrc.info().get('Content-Encoding', '') == 'gzip'
 
     if data_home is None:
-        if is_gzip:
+        fsrc = urlopen(req)
+        if is_gzip(fsrc):
             if PY2:
                 fsrc = BytesIO(fsrc.read())
             return gzip.GzipFile(fileobj=fsrc, mode='rb')
         return fsrc
 
-    local_path = os.path.join(data_home, 'openml.org', openml_path + ".gz")
+    local_path = _get_local_path(openml_path, data_home)
     if not os.path.exists(local_path):
+        fsrc = urlopen(req)
         try:
             os.makedirs(os.path.dirname(local_path))
         except OSError:
@@ -71,16 +78,21 @@ def _open_openml_url(openml_path, data_home):
             pass
 
         try:
-            with open(local_path, 'wb') as fdst:
-                shutil.copyfileobj(fsrc, fdst)
-                fsrc.close()
+            if is_gzip(fsrc):
+                with open(local_path, 'wb') as fdst:
+                    shutil.copyfileobj(fsrc, fdst)
+                    fsrc.close()
+            else:
+                with gzip.GzipFile(local_path, 'wb') as fdst:
+                    shutil.copyfileobj(fsrc, fdst)
+                    fsrc.close()
         except Exception:
             os.unlink(local_path)
             raise
-    # XXX: unnecessary decompression on first access
-    if is_gzip:
-        return gzip.GzipFile(local_path, 'rb')
-    return fsrc
+
+    # XXX: First time, decompression will not be necessary (by using fsrc), but
+    # it will happen nonetheless
+    return gzip.GzipFile(local_path, 'rb')
 
 
 def _get_json_content_from_openml_api(url, error_message, raise_if_error,
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index cf9cfcdc81ede..7a317f2f3799b 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -12,7 +12,8 @@
 from sklearn.datasets import fetch_openml
 from sklearn.datasets.openml import (_open_openml_url,
                                      _get_data_description_by_id,
-                                     _download_data_arff)
+                                     _download_data_arff,
+                                     _get_local_path)
 from sklearn.utils.testing import (assert_warns_message,
                                    assert_raise_message)
 from sklearn.externals.six import string_types
@@ -77,6 +78,8 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
                                    cache=False)
     assert int(data_by_name_id.details['id']) == data_id
 
+    # Please note that cache=False is crucial, as the monkey patched files are
+    # not consistent with reality
     fetch_openml(name=data_name, cache=False)
     # without specifying the version, there is no guarantee that the data id
     # will be the same
@@ -138,6 +141,9 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
 def _monkey_patch_webbased_functions(context,
                                      data_id,
                                      gzip_response):
+    # monkey patches the urlopen function. Important note: Do NOT use this
+    # in combination with a regular cache directory, as the files that are
+    # stored as cache should not be mixed up with real openml datasets
     url_prefix_data_description = "https://openml.org/api/v1/json/data/"
     url_prefix_data_features = "https://openml.org/api/v1/json/data/features/"
     url_prefix_download_data = "https://openml.org/data/v1/"
@@ -453,23 +459,47 @@ def test_decode_emotions(monkeypatch):
 
 
 @pytest.mark.parametrize('gzip_response', [True, False])
-def test_open_openml_url_cache(monkeypatch, gzip_response):
+def test_open_openml_url_cache(monkeypatch, gzip_response, tmpdir):
     data_id = 61
 
     _monkey_patch_webbased_functions(
         monkeypatch, data_id, gzip_response)
     openml_path = sklearn.datasets.openml._DATA_FILE.format(data_id)
-    test_directory = os.path.join(os.path.expanduser('~'), 'scikit_learn_data')
+    cache_directory = str(tmpdir.mkdir('scikit_learn_data'))
     # first fill the cache
-    response1 = _open_openml_url(openml_path, test_directory)
+    response1 = _open_openml_url(openml_path, cache_directory)
     # assert file exists
-    location = os.path.join(test_directory, 'openml.org', openml_path + '.gz')
+    location = _get_local_path(openml_path, cache_directory)
     assert os.path.isfile(location)
     # redownload, to utilize cache
-    response2 = _open_openml_url(openml_path, test_directory)
+    response2 = _open_openml_url(openml_path, cache_directory)
     assert response1.read() == response2.read()
 
 
+@pytest.mark.parametrize('gzip_response', [True, False])
+def test_fetch_openml_cache(monkeypatch, gzip_response, tmpdir):
+    def _mock_urlopen_raise(request):
+        raise ValueError('This mechanism intends to test correct cache'
+                         'handling. As such, urlopen should never be '
+                         'accessed. URL: %s' % request.get_full_url())
+    data_id = 2
+    cache_directory = str(tmpdir.mkdir('scikit_learn_data'))
+    _monkey_patch_webbased_functions(
+        monkeypatch, data_id, gzip_response)
+    X_fetched, y_fetched = fetch_openml(data_id=data_id, cache=True,
+                                        data_home=cache_directory,
+                                        return_X_y=True)
+
+    monkeypatch.setattr(sklearn.datasets.openml, 'urlopen',
+                        _mock_urlopen_raise)
+
+    X_cached, y_cached = fetch_openml(data_id=data_id, cache=True,
+                                      data_home=cache_directory,
+                                      return_X_y=True)
+    np.testing.assert_array_equal(X_fetched, X_cached)
+    np.testing.assert_array_equal(y_fetched, y_cached)
+
+
 @pytest.mark.parametrize('gzip_response', [True, False])
 def test_fetch_openml_notarget(monkeypatch, gzip_response):
     data_id = 61

From 08924c340ee61d4ae3db54daefdd68d11b470d0a Mon Sep 17 00:00:00 2001
From: BenjaStudio <benjamin@botify.com>
Date: Fri, 5 Oct 2018 09:17:20 +0200
Subject: [PATCH 137/163] EXA Fix bad data visualisation in "Importance of
 Feature Scaling" (#12280)

---
 examples/preprocessing/plot_scaling_importance.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py
index 15a134d0fd22e..7866c511614f9 100644
--- a/examples/preprocessing/plot_scaling_importance.py
+++ b/examples/preprocessing/plot_scaling_importance.py
@@ -93,16 +93,18 @@
 print('\nPC 1 without scaling:\n', pca.components_[0])
 print('\nPC 1 with scaling:\n', pca_std.components_[0])
 
-# Scale and use PCA on X_train data for visualization.
+# Use PCA without and with scale on X_train data for visualization.
+X_train_transformed = pca.transform(X_train)
 scaler = std_clf.named_steps['standardscaler']
-X_train_std = pca_std.transform(scaler.transform(X_train))
+X_train_std_transformed = pca_std.transform(scaler.transform(X_train))
 
 # visualize standardized vs. untouched dataset with PCA performed
 fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=FIG_SIZE)
 
 
 for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):
-    ax1.scatter(X_train[y_train == l, 0], X_train[y_train == l, 1],
+    ax1.scatter(X_train_transformed[y_train == l, 0],
+                X_train_transformed[y_train == l, 1],
                 color=c,
                 label='class %s' % l,
                 alpha=0.5,
@@ -110,7 +112,8 @@
                 )
 
 for l, c, m in zip(range(0, 3), ('blue', 'red', 'green'), ('^', 's', 'o')):
-    ax2.scatter(X_train_std[y_train == l, 0], X_train_std[y_train == l, 1],
+    ax2.scatter(X_train_std_transformed[y_train == l, 0],
+                X_train_std_transformed[y_train == l, 1],
                 color=c,
                 label='class %s' % l,
                 alpha=0.5,

From a1be3254c0534f555802d0a0a0ea35ffa4a8fcdd Mon Sep 17 00:00:00 2001
From: Jacopo Notarstefano <jacopo.notarstefano@gmail.com>
Date: Fri, 5 Oct 2018 12:35:04 +0200
Subject: [PATCH 138/163] MNT Make check_X_y raise a better error when y is
 None (#12283)

---
 sklearn/utils/tests/test_validation.py | 6 ++++++
 sklearn/utils/validation.py            | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 23db3323941fb..57fa4742804d6 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -780,3 +780,9 @@ def test_check_non_negative(retype):
     A[0, 0] = -1
     X = retype(A)
     assert_raises_regex(ValueError, "Negative ", check_non_negative, X, "")
+
+
+def test_check_X_y_informative_error():
+    X = np.ones((2, 2))
+    y = None
+    assert_raise_message(ValueError, "y cannot be None", check_X_y, X, y)
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 08679dbeebdb8..b2cf9f2426f0b 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -736,6 +736,9 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True,
     y_converted : object
         The converted and validated y.
     """
+    if y is None:
+        raise ValueError("y cannot be None")
+
     X = check_array(X, accept_sparse=accept_sparse,
                     accept_large_sparse=accept_large_sparse,
                     dtype=dtype, order=order, copy=copy,

From 8afe43e80358f1b13c940dda3d30e81d1bd05cda Mon Sep 17 00:00:00 2001
From: GauravAhlawat <gauravahlawat01goal@gmail.com>
Date: Fri, 5 Oct 2018 07:23:21 -0400
Subject: [PATCH 139/163] DOC Added "mars" testimonial to testimonials page
 (#12298)

---
 doc/testimonials/images/mars.png  | Bin 0 -> 47018 bytes
 doc/testimonials/testimonials.rst |  59 +++++++++++++++++++++++-------
 2 files changed, 46 insertions(+), 13 deletions(-)
 create mode 100644 doc/testimonials/images/mars.png

diff --git a/doc/testimonials/images/mars.png b/doc/testimonials/images/mars.png
new file mode 100644
index 0000000000000000000000000000000000000000..8c9ca8b8fe71aacb883aa3027dbaa3816df5442c
GIT binary patch
literal 47018
zcmeEt^;gti)Gi^QfFL5`kkSHzN;7mbAfO|Nv<gUxAPn6GokK|sIYUXPbfcnl%n%Al
zNe$gy_l)27z5l}f<t|;zwQ#M^IcLYSpZ)Aj*aKaS%alx%L_|cFVVbINA|i@@A|m4W
zi{#)Z&fH-^;2-km+8U}v=Y&65jrsB5Clm;njvB?tr7PDdB(>}1bBTy<62VlJ^gYv8
z#(h4dTYDXBu8Wq}2gP3I$a1N=MM3LX9L{%}6B8BUZQ9;Tu3D5Ge&I`+;+2oNS&Cok
z<1UUKILHx=sQ&%@__Mu<s(!^|Z3BT2PR=)3H@UkwM#jDT1GtT|IYw5Vt$EorWiEHR
zJ^S5#I#(Avo^tFr!N?6m{{O%K?<|0;(8dz~Uk}({89((Q_IW6<4l7#icrqHFu+NQ=
z>*ywPf8RhgD|d6XZZ1G{fi^(?UVL|Co9FJcr&h}rPULJv!5x3}wDwh+GFwjvai`pC
zJg$-{GS6md{7?JGO$}I4O##lZ(QjmR|NBt*Kv&pd*M9aN_nYE-Tyv)!tQWm3G%1LV
zNsozGzh=i^$p4U?q$aFoC9U7w6m+S(T=U-j)jaNphhpbf6q2&{zb`Rh-z4^7B=#C$
zO`QRED`pk)&>G+^a7(;#ljQ1LK2quf6GgmKLibm08glor-n19A@wJee_pLp%I|?m&
zkh!+v^+rjkF8p4NVlucIIwz&tp4=z$<bsqZyez&py0Wov^Ib6bAALo$A(|H}{!-Ek
zZ__g`$x?apZq!VDQFo)*pr2|E%!F$zOrKS;yUyk4d^L-jMkl)dHa!-bg^$1B^>0n(
zu^D#E5Yu)1&(dg=m<@t1-YkQZJ44OdnN7vldg~#<1$jf41Zkd&HO3~GYieo4Zo3u?
zG3~l9N7+u6G)I$VB|M9XqZ1(gMSQNSdrsq<i9HSYc{KBrM8@(Zn-3M*a@l*YD(M{K
z{~f)#ec_*_8DRS%8H!BxbRXo@$~-Tt_-W{39bj<39ux7ma%>-Jzn2MC_dF|hJkETJ
z`E1s{Qa6)5C+B{^BfYNGIo7Q9N;gbDt^E5hEj!*6h|Gs%O*@vdANO5G&m7{kV{Me|
zzPmTQAib;pJfZuiQ1cHzbUF<4%!MteA*h=ay><6fkZe^D7DrmV?q?f<LZXH0C^Bg(
zb770NSMP9_dm!Kuzb~F#sYx<kHA%4XjS!Eh+V=Z=N&h3ytCfbJ!kPy@Ht3!E!#ig+
zrJ|VE&OyQioKS;94gy+C@^{2t<&l-zr75^y38@`njI8Z=JYCG;zUwP4Z0+}7FpRFt
z#Mg16N5@HXb2L4{`V(i#gQFZ#+m%~1=7MfS^-(jG?i5u4V}0G!S7pq-zq6dI6hWeh
z_f_gBw&YqGP0mt%I3`v8nzwr>9qOuU`1h>UPj<BFdflxtLN4Xv&Q))*k{;S#)u_Mg
zbi=a6&8n0a^U-#>CRlsYOAj%aP_%PShAtcSS}LL5Wz}Cu!PR(DdGH$>R=(YX%p6|H
zk@Bw|dn+%gj-zo4z+Xa#Na!6dPkgrHGBnW0KXKzJq|<*a&uq^Jewv4n*eK!;Fe@~Q
zjB330Wcx#aLX>M;+O}ttt#6rlY1w$nxGU@aU}>v~8RP?%m#}kt(&SNYK(gq|!(=r1
zoCr0oiuKbQHLNvhtz`vsGPf{e9H(y*{FMq%8Fi}LV+WavhOSDwH3g=dlQuAIiJosv
zwxSN>6bDMEWs_T7IWoWE^X^+u@wGHR{A=;MqO1AK+2Az3y#wY)ta5(uX`YyHll4PC
z1M=a`<An&>s!j8u(%{5WtAhPRECZ71iS;4JgERhqHk61iEy*LlXZ_z>u1z+H#3-gN
zNxrl+al#(nz>QPy?%v<L4~xNwI5TY63~Y3!T<x3QYFf^luEhLZh7`wcTi)sC5q}fa
z;@Hv`WD=CtzNikbi;Fw;$0GZ_Bo2O<FG>BHj<tkU{!;N~*f_v*>x{n8{1tmBFn1y@
zyncE|^GEH49Nc++tp9`kTK-cq2k+*``?WYJPZG1;MBHtciOuqF`p#Z#iHU7@D-M{k
zJV+*edlOi%no$O6_n)JWUv+*}#*O)nkLB4Fpf#2k1L(bslNN}=jLpdrdIQVaNU?Ef
zP`=Y?rCO(A@Ser1JP~J6XD(+_X$uXLvwQWuO+hgzk*o27g%}nCN6XBSHbqByyV=SX
zmOe>|#At;-?#(}4yvYM}*bW53%DzbZp>`Yh)hS*X3JX#F-luhS8k1a>(`UMKPE~TW
z1Ucbz*&B`(;y7gtdF4qnWG_^w3V*u@kxfcJmxii5nwJPZ%DBPGA>J5YRR5`V+xsar
zlU7oTUoNrB$0_3{1b)wp#LWJ28ixh#VzcIAbNgY)ipUYEy#Me|{9Tw%ma)+5?N@hl
zA46&IBzxhy@X&iAli96_v$#ZRp|d`GmWI@`AJdVErKDPJ87E}HN9GoU=my`!iFNeN
z=Jm!0#%LsK+TYYL7;8Vw!BfKo(&Ku6++W{f$%<mq4tkX570nHc$|1#-2ie3|XEgZK
zl&D)z*|hA#PKdVlM@qI8<O+rD{zO}xS&fcPbY4EZaP?Si;g)GiYn)Z|yHScl>Is^|
zkFzJ%>K>=+E9C>O&tGmlUuu@6*ce{5x6M#A^z$NV9Qp&Rb-$P_<aZMaj8i~;o5VCr
zl2I>9d!WSOaH1mFiR8u5vr%3x%-b5@O_bXQ9)=LUe$3<jE9iCQCZ?LyIE2u$4|{kt
za!`>FTWo2Q7clW0%`6^wm@<X>bo2N0tV*p`k(ono-m`r*j*fb>xB<W0SxiZM{X9DP
zem4Y#$cnHWNT!xB#`O+--oBcRCVaXgxi4S7%QX-(Xy4d^gZ9L87hZIK?<?jkMzrch
zpBNNCaTaA-hs2&X^#M><rsDYlOsy1|sP{HbQYDqjEg-rg+_9^C0HzUt11;iAwZSl@
z)miDJyZp6c?X0n2ZcZuL`=ooWmI-C3RM;dNG!f30CE<N*xwnoBSm9G=i$-9zhK+3}
z@KvhrHbtfee?ZL91l;L{1bch`Nb|>;`-Ptge%u?UI$@R$NW}d%G;$qY#VR55jQTp0
zA9UHoZY>A7Afi)oNdt?udu}dAHW^<wp+!TZ2@Ng;XzRU0+@{;HyjoF#%$}I|FoHV(
z8HPiQMZphcr)Ke2U_rW1UQ1ctj_i#^Kko>`O=&Hi%hnn7Z|Y9{f%YYG7Z%KuIB{bn
zkz-jR8(f;1mbd+q^zNLk<E3Da&>%2pR%UMA;r(SkwW+F8E|fp;2%6e@_~-W_vjC|D
zMU$@Mjsv~?r(uKDtP)qbXI-v0hnm<VOqlr2>C29`E_i<|dib~2bi%_75sJPE%uQy_
zhRU{^zJ#!=+o5LBdtuhUGYKy}?WmX#c_45V1WRg7X567He?U?;&p90{mmNuTQPt=5
z;#X5FnjvZnnkJ_ox|?Hh2D78pM)kARGAHzj+xvmhZmPn4&msrqlf@$>E=Xv~%w9sk
zb|2*YByzqF4kj8uE&D!K%e|Uj`qiw%%j@Y^^GHlJ2_u<=CUTHEL;3GZO`AuM19lO^
z2U-qB1IIHUDP=x6yU8%KKT+&lAkvHOjbodGaAy!7Qo}p-$xA6pgAwADaUa=VD>phg
z8;;t`#ZHj2TzTB^BgQo2<3-af?V@DEy8%BwKB?a60x`K{J5dBd#|8b%%)N#SQg<ad
zcd-Vix0XMqS;}z3?3wg<UrR`5>R`?lg0%S)f4x5ZE0g=3`}A`<p6x^5#s-BZ%Qdrs
zDK-*+(nAi-?<N6GH?}UK?@jGT;3#f72LB;m*$Xw)wBxOnAjPq@te^Yy-Q7O-En(9p
z$!MB0^q0X%6i}aB#pyg4W7sO^SY=|pq}V$dsT=$AJ6Rq2h(9sORev|Lwg}3E;w;pi
ze`!v=2*W~<`nULK*4k|41)-!;fmI`NEqpmD;Ii_IBQ52HLysRoPMG)u!pC3uMH8Ub
zjJL9aOfWh}Lh9n8G?st*YBR4ZsWbg-X1ulsNi!xPK&8-~(Rsf@VapC!W96(rqat};
zBxxR*qWyO>c8_e2bdN0K0?UQ&55q}+rHQtPFYj|NxYR+RN=&qjc;wQe-OAXZ#8fW9
zo>>4<&^x97?D?=a|Dmoidelh&V`h$ViCR8zD!MavAugqj^m}uj%@q3$OY!9+e+VmY
zWkSYTz7kW`PYd<@3pG&*Wjy(rigjV2u0{7qZf=%KsE51;T(X7LBtWjpzOuCsx}V$n
z)r7`nHlL{P-HurM@h>ie8Mx)6I$o&qf43^p>MIhLUr@CtcEnKqcIgs=;XHdRUSF6c
z!$QslS$|$el~Kqun{mPb_g=Z2t`cUgPkweui&2zN^Niq+KBhNQ6rReu7;8GLXtQx|
zv$uHgV;o?Mqj{c@tw~h=0kylon&tkk8}v@T{d0zZFOI}!>is>mq9N5H#+0n75ARPf
zvvNdf2>PnT4nAR{&9x!5(FoB@RVRQ=C!Z<+!Oyd*b{i^UqjGy!ZDvT!kZ5lK(z5rX
z4u#Yw7jZ7?dlm%f%lVF;kE2igDoNVaAuC+>f=TY8soLM=*%P6)8%?EiY-Q4jz4btc
zs<Y7?o1vHXX&Dq4#yC&N##N>{;io;T$5>*Vr}!|1sJ*}3+jG_4vhdyqH6eBmGLhPP
z%|cLB9HKp}v!$A&@m8Ix+iPYOKfTE4*i>aqGX-uA3mHq;{#<OfwTq|bzu))teflqO
zZF{?FqSHvU-j}BRZ2XPF{yIb1c>SY`CP{fccnOhAuEeJMZKuCBI+ng;l`cC?FS)aM
zZjcSyYsBHOu`w4$bIVh8dn2CI&wj^3aGn4X;qT0Du%6!RSFGks6tnhz><(H1ZJe%A
zjW_RiT2mtFI;$}_l$pSI;pu$D?Adb7X&GN#M;fMQipyR_M<Xe%yZZhtD9K07k462V
zqK<p=J#}JT$-ZCm`sF3d{79n7iKAswgxMz<$JMr2Pw~67p31fs<?SQ%cmZm?tdC7~
z$H@-`8WW5&E_d_ZWBx$865YpQC!UfJ^Rx#U5JM9u_P(kC(n!2@hUHJazE8~D{xYjI
zxFq1zHTY<Pj+K|e+<NM?Wm4VdrD4gQ^03gUIU@L4PW?G1*83bUx5DytZ`eXqd5pWV
z$Ru$8HU4K;wfwOmZV$2FoRA<-%)){a!MN0gCn$Vt|9QvzVUJkhW9>h($2_y6zAoPM
zfWD#MFlAfLyj%~>%5e@V{fTw!xb4o0>aukdF0A6Ktl;BczhtM^?xP}qMT!V_)?$vW
zO(jWj`dqbc4S*u=0o`@QBTEmJCQ{sL%bg&ZjzzYs`^cK)dutfK6*e}u`sCfNwV3F{
z)ZzcSh5%C0+(+(H5!Abl!cb*<sqM!-Py0UCM$XkHq;x%T)YGyPPN{8?M>i;-PdiwM
zR#di8o&yJwMBiIZTS!~p#;%8mQNMh%qoLq1ckk(U_A?u;l=SttgEU^l=V$Vw9g6{X
zrZg+1XLpYKj;j0}?fFDQTGw7mH+*VCoRSK3@1l2J4e$8NLsjiH&ED4Ik`y$b)AhIp
zE-SSsBefaJtM>D;_k^rLOa-%h@`P2#ZCjAc;n9EF&pY~&3#x{iGjk+nKPsGe?@Lg#
ztMjv}7s(~2XHd$9yr_TkiDy<)q+-?;F!8*WB(na#K(pLOI~Q|fOG8dnT$+MfqY}2G
zPTYU$nAX}pLZowN;hL}GPVkYh+h=#E70~g#*~{S~7?bmN2^q=)3Q-BPWEmYv`{nXB
z<u@cqJ%~L>_hV~9y5(BrW*g>|TTSai;HzvuJDan2Hcq=!Wg2#eKSgCPzZti4N>k~r
ze<ZxF+iT*F5ew;_X=hOk^Q$)B`OI#zR+FWkd>}9@KYY7Z9;#-4V_eWewKMaLuO_1N
zS}o(2#;pZf*AZNOuBGur*xTcKse0rS<SbWE0~=S9%k$RX;i>H!OtU`LBKFtSdNm&C
z-np5dOr$}=iZvQ7O-m79w!6Ih><v1sklK_Op=J~613P54D&<LR?t3K~&`Rj%pjtRf
zywTxRmsOK%^NmJzk=YaUxq|3CQKup5`AE5B`07O$c@mspV4Iqa-Nyt4Wuda*z4|9U
zKh3bxZq>l<YXKW!6+?SG<?I{W?IB9gYf{%fRK7KQa2ko+R_nf%*~Z5>$;is}o)G2z
zUJ_vfJ+Bv%k8hSewfbdHtl%aERgd{XsPB?ZVF^cp3Q@gwPoy(@MtK+k@XQ#*(%PXf
zE!A0FUrjS7K6GnI+t4wy)VlpbXEG#5lw0T9o3C7jb54Kzu0Qlw*LC0@Dz(}UQm{IG
zZ3$@G&uqY%EEo0z7u51s-aWL(s644Y7{*^IEHnT;_L48LbTcI`tR)Bw6DoP^BfgyE
zFAdFowrg6WP{FDmWuxTJ%P&VN#X>8lS{Kn>SiE}t5)rEJLG^e|MaY8Y5RXsb-?+kW
zraDoPo?RZm$eR25$z4zGl=7+*>U@p%#j+=3#~6ZpPGU_;atT1qHO}2jLKEJLW$)T=
zY3De*d5d8XgeC27m*s~;dch}p=&kp~MklNI8!-BQ(SsK*XI6-x7fioBn3T!oLMJ^e
zNve6hpPOBEO-cfq&r_>7=>?jM-%fjv3WuU^Vz>gi0`sTKpDzpM_w`G-7gc*d?!qr-
zYUpO>NU#0fvKt-k1uWeuf(CnGXGbA3y{JVn_YsqN?H;$V$<g-z#L+IoKn9QZWlq^A
zb<K6%@v-AkX>0`f!<$uqUV`2^&k;4&`YVIR*_k#SgkbrRyamfhqY4FIu1|W1&I_`t
z!t8^{>U&8!21gd;c7e~wYjX}vv-G0k@2t#DM{{Xb=C-qtH4xd-mf8~^vWet>cJCO)
zU$q;3S^KG(vLWs7x~#u$Z~iCuj$gSdVr27WQg7gkr=E1Pq%3u>B|xhORb?v62@s9r
zU2Bb?v3j`@F4UL&i1c^s@81K5J1OV%tekdyJH@FwY_*p>eGytzwIAY0dp9Ij;a?5N
z6^IVaun_tr>C8&MAFIjXFA10ua)+pasMZ|ZF^2x}%Fid>{L-g9VTPm>8hql_4bjnC
zV}25Ksk(hdBeOlpD&9JXPVS+bejm1eOPf~h1mSP!d|vz7%soe3^(P9$a2lH3w4kn`
zDFc|C@mL3=`<C;ey=GsuE*(*(b-}t0mJaEz>=B^AzpG|b5q)wg_~?uY9xWUvc8RZ8
zD)cjMw?!DsAMec0ig|l-#hh|b)KgyiPDHQmCE@<%nS&q|{sluNZ_<tDS=Ug5@BC@(
z<W(9cLAA<j-6Rdc{bX#H6^B|QUvcHr8;u?J;IHW9<`y}w{PLnq*$$Kj$(lG|^JHv{
z0mZRO9dO0K;rPIa8Me3@7!mq&gWUUZ6E{>DCuNYtuB??0t`}<p8F7p2axJ_RnJA3@
z4ca#8mrTcL_=|;@@v+D65swhTV%GBG9$YHgk3|MOxb1erT13uSGhHZ@nu0UF%D1x>
z!$0>}YzlPxmTh!xCzJ*D^tpRYkdzihf5}_FYM-mew|$6DKkBSjWz>}EBCD&&TQ{C!
z&k?<#$oY%hkDiY~&Fcz2wA2<7X8jK(RB+scpb~3K+~-Fr{hBJpLf~C>E?8cB%ZOZ2
zwB6_V;xWileQfrR5T!HGtI4EkIpwLUGSd44<y+eNy^~x}_0-tTmNEXsCe6=!o6(BU
zC~@s*8~n<oT3bQ%Fv^X>;dnw(%ZX?$y0<W*wFG+!*Xt4_Ok7ckq8bF(YN0A9mX!xK
zqS>KlqiTRcbKjr5v2Z+jiFk*G6+PCDmA{6%_dY|B*(oeK)_62&EZl+_#i5g#05NtE
zcR13`GL?x<n4~?Cs4L>1JrS@y7fzfDQO(z!Nd`=hmWZ9W6DuCSVbb1qchKiGB9x?L
z@l>|<<i4h%dj5R{Kw3OOCqtB+;m^F0h^Ia^m(PqZQfn?L9BcqhnbNB~vIjF=UV$Hr
zasdV4T?{tjBkc?8{Q&2`Grr}g1kW1662r|v$i5oB?RBU8g&dtHg|i6)zKQvs<trpd
zS{5oC=WYuUqK-nI7^De*Jc25+@k2I`p+-mnak1QtaPal!@N`CET8)MKnW<C=hG;6p
z;y#icDi*8o%xX9Xe-Re*Y_iwm+8M<1$JUkHdVfw}D(+p7i~J~N)V<4R@BE+(8XB5H
zj@*7s8^dMxtn4y#O2J3+o$WtWs)SkAB6S67YOPAq0B5iqI7{PyT+lZzVFJ)4n8M9u
zi|+muNwtuIICS?Y4|`rKRnWYt*!e5QJKm3`fp+b`9vfxxRnjbqE8hcKx%3*Cfo-le
zV*f%B79zxh3D_N3fYTR5rK`MH$4A=5Jt3C{nuL&p-?~amDxSJ<ZP-w<+`S_N0@Dlj
zW!rdwfCp8^<J$0$l?a-+vzYfEZA2P2BVs`zxsTh2K%|2!ZT>K@=8(I2O}a&*7f3B4
zbW@%;&vdG#<#3@<fRY?F+E)fwzjorTACTXgAecvFZ{s*f4gje3*>gk?8V2=0jEAMO
zq3%Ux^uGD}OBL8g$a=D?$gO1EFEpDcfC}@y<HQo78^q(XmM=18bI+cI%COJM^C0sk
zD?LhtpWiPgF3!@ZdF2d$O}qvuW+E;vf*rSGbW@;*Yj_enP7p8wxAR}r*L}q6X4jwd
z1Nf<e0`Y3L;x06IE;0&KJ0%=<V%KR)Bru7=%nJTOA;wYIjLh?kMv0^Crcm*!k5CHz
z#%J>QhZvDhz@dNNG1T9a_2t>v!Y9Cv?xjhdzwdI58UroMlQEC$VICxwSn2p=e^#@b
zI`2Sy#6)df&bp4uR<M1Gcm~p><fj%p$tbWucS*6#ax^eX`~!%m^hO;%66!fU9Pk)M
zSM}%TDZw8c#jEw{@WsX0(nwwYPdxhMldP=S*MYS0wD=^Zr7PzVFW<N>lOLwbRi{0*
zN&6-{g)AXXfc(W#tC)QD`)I<4<5Uk3e5U!g3~&A#fe0SlsynuzXy61-pWS(jzW|Hx
zcT<Dl0E<|%H9`aq;zTB6bYq!*&PA-x_Fpa|pLk2b!+R@J+j?ph%V4i$W|&~NesW<p
zCxERcs8#TvFvKD7DVrxVHzAr#0wn1-6zIEDK`62bxtbx~u<;~5eo%5WDaha1U?8el
zr~u7XJ5;LCw{mRu6`ZwEw;9BK);~HI1C-l{XR=Ex=0VBujU}06<JGdjX?*&dx(`|b
zdWd(%_mA%=Q0wQ2@(Q_%ICuxJpt5<Lz1FMS4->C>gsoW`=xYk2(Hh$R`2N6*t|$~A
zs6Rpuz4Jd>;&Vlf2m5kJxpn$sf(x-ylF-~WmySnxc!Kd>V{wHLpgW-BAzusE<<Gi`
z4#*NYc!R%E8M=06ttk5EaBAIXY<~oQQ9KR|8TePO%9V3Kl~OM<;&Hh^UsS@lf@bXn
z;{BjYUDCRamW_r@Me)^Ybw171EFH^rayCZPLl6GuAToC7ZK#SpESF;9g6s_xhd5eX
z`;1lV4*9Dva4Mw5X3@CG0}+~P+BdZfTYQU!Jes<$?ry}wn$5e@tiAL6w82nd3TFYv
zW)+t!({iXPguDWRLymI<(Mbd5^6Vd2QP?k9mi{e#3=e_Qj;^=`<<boM{8^MhmnzO`
z?oBO#D)F7k*tcugM&O(iit*e!t4LN*F|6dOQ`K2|#}E>~oga7&C1S_lh}z9R`h8I+
z2D~?0Rr2n2)E9c$pi*T}1LTZt796pcyzkq1|0vhj0Np&r!NEJJ=IJjOT9CG>p*W&`
zHZwZj&w+=e6o9ps@E`g=9?i(+X$}Mp@T(Z`)a3pN=^Nlx2wbOhWTHGR0t8IZolklD
z4p=&Sm*o9ZE@xxJ&l+n0^dCUp#_WB5#P-e?1aOtz_0_pypQM6ooP3>!oF_&cxDqf#
z<IS?!c%oSr)_ZYSDMu2-5RAWGG1=V;WNkv*Jn_Cp{<Z!>pz5)MGjVQeDLs{LEnl(x
zkDk(3BCB5+`cd!tU8}hZwKR3vyHH2jLHh&axK(sYzIWZ5Jd=(fo@XSETbZ+<us9J9
z;MGwWV44MULHNkvSWjj!Atiuk{?9wP^l0Upmy3Gsm6AkhxsNFMwjZvG<?;`>A4|7b
zsM_2QYi;{Sxv48-0TClUlZIM`z(+Kg3Sz+{$L2{IUnh3dDuC<T18C!3pvn<V2w*h3
zX~(w;XiM1lTv4XhR|%8|6Qx2UAR90>S8AU#Y^7i!WEB)V^2D_VeMOIp*S;!n&FzE3
z;wTCe6|Ml7C<mTbhm#$6Av)bLrs#!A?SLr~v(YM<9MTH2Hk{l+GL{ZLz{F-w?wNDb
zSYVtP1x^GM;VrZCOw<4F0;EqeqOcEKpi!mTFTiU{ek?$>WvmOF7&d<3BT2l;5`)yR
zbON8^ANO+1=@HL?W+3i>VmRZqAtKngAHYw?p=t|HrPvR`Ly^vQ*o`g9x>DO{_AeN9
zm%nNhBz{;33D8*81|GhzutBK39eEqAlTRoDn4f;wOYqO=Zq4Jdo0IfraM{JTCX)(U
zGET0m>w$dg;Gs<XCE?Kzs7&f-nMAsBMcD>ynWA)Kc?oGEG(yYDWd7HA)aiH`;OJsx
zR58&Db(@Pf0W5|Cje@^LCP$PB-s=a5evT+Q32VL$z@U8i&e3J`1bA3#2fiTY_dxGQ
z>hy~&?5KNgTgkNz`}26p${K8x!ITCNh{SSt6cKO4=!E+*jve5*@SB+Z*IVyVAk_v4
z9gwt}nvJdCdN3!027k!~fcH#;@)dnhe&A+^Wo}b)BJcBpeUT}P98t=Gh|ZR-TrT~a
ze8rx!pG2URS{NK(4m~%h#wz~I6P;$MB7{@;0pwf}@hLA{Ee3Dm2NP2JLlwnv-8i&z
zA$0??+d|z&tg2#AV_nltq@sKO3+^5IeyuzCg9u^%^lpLXK8HW{7~-d|5I?WgCHssu
zU1jAUCd|qMW7KUld;VFib|r*HEzYR>B#0DP<y-x;;f@y9T+yAjtzbOKe**N~E2xJ=
z6a)VC9#U~joYos}ob8lB=R`k+!$*FDvFNs~>L6rZ>7-=@8OAOWxK~P_3lK1Y<i#Et
z*}z;Q3V@KD@R~v|s(q3aSlBk83d+ky;%Nyd=MQrsALl?(lDXTk$=dQLM>IU!{kqk<
z3+GS><1~!kU`DP5h-dHPx25$z`u7&MNr3KLggtv?&D-!VJIF$fC(SGVBA}|sew$C7
zWY^a`ix70whLo|~P9poS$_NyX&hHcHg@Vz8vh?O(ISzwd(Qq!gw9cspJY}I;q&kJ>
z`^wVwWGmr_-dkDuufMNIs%aDi^{{%$INJitjkV63m4-*hp2d8Zpoyy$hR)D-){6dD
z6OgG&VWvwl(Xo_;Urw&Ty=?$)abS*f>A4--et%!5m{+yAruD)I@n(>dEtDpfvLroe
zro7+7ywgAPR~3OH&Zh$gvLQE|31wmBIAtHQkAlm~Kve{wpx#tPzR=5J)alfC2=R49
z1nbKJSE?i4b2D^;T>-xQ_XM*@A=l25fisa6X36^#)V}=C+`tT#{Z2;k1md|K?V!dS
z(Y&v;Ii%>eiWeP;Es~&P;B*ZVcI=9j$n-GKtfVrksgtWy7KqIFYRS>a3spf<+7ydj
ztWdML1}~gl6tk<d*6Y=ZPuk|eUV5KkZ~nAEV+m|npnqT?16=2K>l8;z;U!EcqLvR7
z9{Tr%!L&9Gaf4a%Ob78^{~}=&2!s~GH@~)65C^cK9zKz7Mv{^Ch4;p$8b7y0ywUdz
zxqMCqD7iW@YadL|uQlq-(C3i)X*E7_@nIc5;rm_5=47VKD5av+tEhkc%Fe{TG7f)1
z=YiJxW>2aZ&9QqEsi!w0n43Wh45N&fVtQ}YsLmGynuV<TXk4PyBnGVFZvANW0F?Dm
z0Bgkg05#L15#am^OD{D9F$T<P7?4y5I}-^<@4HYX`YJ{)%XQ#%rcp4+yL2c=7(__1
zmg~I*@|60tbsb1|XK)~F%+NJRo-RyMov%QtLSaWD%ZRr2&ws)X$7+A}cMGCdG>$jG
zPDXeK7;?<V4{(A(*aJYyKB-GK(&91*-@Rq_kxipFfEk759&Pu6A>M({iX-NAT6O{6
zBd&qjQfTypPoSlZ?G5eypzfhTp*aID8P+ttI{H9(oegS{dJ~K$QnA!OwFr&I<wa24
z2fC{t#G@B+^4@Gy%;$wj=`b?E{jlCxZo7K;Q5*gpiG4^kVBWWW%a<<@Qvhg9>ox<8
zUrnNoDY+~QW31eN1fbA}RpVbUX<oLu{SVT(3m;skmZjRV(6A9pfy{pX0mwDCqhSx(
z(S=4RR1v#nl0%he&^gg$_T%-U%sR0=<C9MgQ!nklfpTE~I1FCG?h;ND$E5CYzD!{A
z9I*F~EO{P8_u4|G8&UC(C6-<9@xXeXBAVAp_cd&;^>^ivUMp}In$c_!fLeNEqOfE*
z-J|1A9hH9|5+<n^2oE=${{d+7H9)f7d3}5liwplZk;(Y_i}X%RuPwa%x2cQ}R2c~}
zfIy7KfKhiY2z1egML&q$>tV>^f~w%yAxn*vxK)nwN2w4I!&Gj<XUEC9O~kpN%9Pb@
z?GV5r2%Z8G@y&%Zd?asm==+bSifYKg8kZw;gfa(aD-L^!5~($gy)<y=vIgkv`K(>e
z?)Tef{*NI?<icum*TK9&*-dDY*%K5?Zie2Zh?tCPDD3RsbI+Nv94>B{_kA9J7SsdH
z$`!{yCJ%pYHFg{xf<(L17cLy@Zd`b+T(J++s4}k=KMQ6F+g*p_>7JV)hTtFea1=o!
z|E_56tFKX*Tv}9pn7}QbH%sED4EMG}ydkMbG}jSaVF6{uKsMyhxtU0F(fQPhj~oMh
zJ3ur(oOhe!sU<=^KIJ=@T7zoLF;6Q`i%ZKD<t}s{?OK@rp|R_7)3Mx#aLduWw~~KI
zWI<U_)XM`pFQ{WS{EY(8Y~icNs(hL<CB`LjTgfE6eV0D)>wuoQaC)}B-SmbT_T5A;
zP;3BE-%X^)oN%1@-+{^4eC=EyaSH3T<>$|YRW9ud!r(K7j~!Zs0K5&}o7uo$7{VsW
zP=Z)_$VUM}pMGbzPVYX*CP3b9q1DAw$%6=0FvVTV^a?A-kU?MPt0|s;>1ru{N4}^z
z-&tj0IrrbK6JAB=-a^an?SOASY+LIKNvf?pWb1Brp2T0Y%XdpSXSnfkx!cqgxNJ*>
zqmT~tqZhZNu@D`;P47C2b3o{eB0xio+&&B<Q=m>4g3%j7@SLx$5bD}mw<B~xa;3R6
zi9@qLXz4`=Kah*6940#{JiU`ck4Eiv{MaW}C|ZB;KRh>Tsef9$B|z0i43%)qkq<=3
zQMpf>x2IZSM^Y?;JbYvI75NN2h?78j@o51&e!lQ8&AtN$Og5LhV+@<eU@y5PZZU&z
zUpg{JE!6(k|1lX<8gu10fdJi+6(FSob=J-ea4zHk^3x#(!CN`{_SrU%U5<p?9&x}`
zz;WTAEkdGy#^aoF=)?KfS<SM%J1*fOas-!Fo?I{f!v|F!wM%hWmVHeQMij$dd17{;
zj;mKy?WeUc=8C53LHnFW?d@}OyMj_U%0(xi_x!EoDCe;g(PjD5XA&y;##e0HkcG~S
z1BT<8^U0y2?6gs>4ku$|rCeb^J+xsRw0DY%27xU-<P$_A+7MMA-bPk8i2o@?e**-B
z2nEjE1e8Uc2(q^tByD6mWpUE1aGKORFnUS&G0|N?sEXph`;$KstX0hX(1@?g0Sy%N
z#>Yk}D2M7_RyN$jY10WGQD+M?n`lpF4~7^Ji&($9b@i(OfaCh&j1XW43(J5@05aC#
zmex%&2H$iyKLyHE;M!jU=e}_0J*Y~+hOzfi=5<!^2=;`5M(iargJ2VOMsSd3t$2WY
z*NTphyf{=PH869~%^1Wb;SKs1Do^~E{0yFKr|H<O_+jaA33UW!l}WDC922+aLDrLm
zhP8)vtF@Z>qR$26<lYOz>~CUr9w3yTC60N7={{gZ?+oJKDcU@g4u}EKxqx0?hL(!W
zlbF2+(^G*rmD>JPzBi${i&kf;YhUv<Y&0JcMxt7ae_Gz`fAPmUkt}HAT6c<9f_K0w
zj-2sY9`3vqPYFX3-RMZmp{M+qE|W{IPa$2sMdEh{-gXFXs0%x6UWS&<gGxxS(=GpB
zXU_<2q)uK`YdE-q{Y!tzl*YHg=aDiJ{@k`z+&X{04T%CCT+j4~8I2(XDDmB+jqiXb
zBs#ESFQvXP$sj)C!$Pt}oN|3IjB`bbOTHv1q=(%Df^HT57idPB>7bGI-I5g$j0r1^
z<;4IJ<zjqILIE>C*aMKTK=Rsqdh$CzMFg`Abfka!yGc03?y5+=I-hN7&n?zapWv&7
z?$UA}Ux0D*4052y4V7#Uw2<kFBT0$EC&N2nbW)Fn^zhLF%|p6QIkjes@~L|N#=izs
z2}J7Od0G~tbO<VYbu<x+gAhKCO|b`U$oHbCRc2KG%?wqx+G~W8<!(EBHWqZ`=cGOH
z&Flxk2?+^>7#_uFVLRc&6vW9^4+x}p|0Gfg+aumvy96J1@V_R$3;3^0tf!0_;|d$7
zclNKD%QLfe^)F?zPwwF8bEufw9TtrazXA(~0QOJXTsqcZ>=IP^Z8Jon&~|v&kK*7y
zLLIzb;}=gkpy((U{mLj~wu@<*IIZC8lUE5wDzce+ujqjY{%K9)=;#FKD0gKIM6?0_
z>YJ9lLemJQ!2_YJ(%}V>JDph6Qgb#L=LPFNWT1u!pm8D_loR|_w5zmtfPCxTDPKOJ
z`*(wYLZ7{UQYgRB#-V3b=Q&$U1h1>{qjY~|_30&Y|KLv?&84Rl%%{kq7wfSH>Tve-
zEP^=k3~*AM`y9fDH6H-dKQG&Y#(dWIHA0=<xbuX{?^S&GG!Q4bHzz(F@@M`ydKB_~
zUK^)sK(0q*eg&0{iXBOln7B$X)3Y>nkO#MJGva&t;xS{$o2RUDr1|P0nlb!y^-EGc
z=eh;XfK#Z6=lZhE`s9eFcHh^UBXkz8A>;a>v+Un-fGjcpm&#y{EFvHODHLx0?cq=r
zpa<{>tx7qN2KMhN^9cgmVp3!IKoyS&<0FWaAXL?xdVkm{p-<6plMtv&@e%8!y-~fk
z@{2uSJ5#1kd9;-NF$>L!PX7*5C4vdfA_ja=dO9r%TVX_HUy=^}Pw_<>>ldzwX7F*3
z&-t`Hj(wrQC{{yNKy7;e7sDL>$|4<HjuAfs>KFkptjeJZB!6z8uvS_RmVpDS8~_Pd
zMSVM#TJ{dOB4&d8H~6`bq;6vi@7;PA&&{ve`OxEBpvKn*5wyfnZ~6Q4BfI9+?VaMS
z=cMAXJM~Z+P{zJ(rlsjyvCCM$OGr|)5z9907u{F_ftE&41W&yBcP`Iv)xQNko+Wh4
zOFJhxUrJ|JzZaFUt<$$}`04<XQb;zmH3NLjpKqbcS3m<V6oqErz&W(%DgYq{g5|kN
zROy|en2!Ridgyh=zxt44GH~oQwFHV##h-<jMbm&rz125n16jRaz86{ljo@&h8?wms
zx+SqyZ*`gz3Kw&Oe6JErQlqCnQa^xyl^swZ%;+xmQqkM=^%&h-(Y_a@Er1}2#$OH7
z|G0kc(UrDev@C0f1Od>Km2^au$uFKLqGz2sLIwI8jSuyQi1v^RI_Hr3)p-$3_$Hxm
z_PSEgSJLciOxT;gF}_P5yI$DUorE-ReZ!!kW~hlYzW=v_(Bcf}ftpG#Mt^W2F-Y%|
z^KLi(M&&2CS54{m=m(QuUN~-`C!~I@n_!Q9LXW~C;H5_|ImIJO=X~M^q+f&GEP3}%
z%|E`d)GX!~%@9v>RGwMS=)@i3PiKSArgGg<$BgYU<)A-=3Guo<ArQxt0l-FnsDQ%m
zc9l7#Sz|xkMm?lJEoyM7Ua<^h(Pt2XscN;WEK}3KqT5n8c1exJgR8bE0zXhe;YT7W
z+Go;M0E~Zvu^YUTy?CJFeu?`$mw&$g*~L$_y4xllpKm`o`~#XHcm%%4od}TqYyG4r
z_6I-l7l+<gJ7*TOPNQ+O1v{BtK(5Vk9ej?n^<%_K*DD%kANX1n8ku?@IqH*Ba5M`-
zF@>|f<cVj^mbM*tavlwS0~|3XL60)EDNMFbick@H^Y%RAwkO<D_aI<9WP;0oVe5wn
z9P)%B{YifC@V%ry@}*r<9~fM4%(_|ylFI@Z<S!QkH=qT(#cn%k?s^%+<LyIsUd=~0
z#2u14S({CI<h&i^boauWOL&++srK^f?)znM3y_c($WdYLm<9&Iw3B{>4A9>UAmSFK
z-@07Ny;ORSzr?C1Me=<&fZ%_-lyjm>Vvg_NqkC$R(n5T5^aZ~}f19XvI!%G;;vpiF
zZ;Tr@;83hzrtwuQ%}Jf2;FUi!YyUM7Jxpsg9#ZqJ;job!by|<-7O&NN%uWuHNi(X`
zq(-~Pswd|%dI#u7yn{N{8)QJ!d^k?J9h4m0t$V^W+Dn@v1dA7cY)rfYPc{l`p4pm6
zt3KQx-aET?U~C38)(*RBqox63?+w*XSo`7<hy<9Hhbo^~+|`tUtQnw59#_dLD<iM9
z09y3YzjAs#Xc^G1`)_0kY9C?u173Gi_@<C0xW#Kg?A}~2mZJ2uMCCbV>B~4BTObXG
z`7l4YulKu7p8-mY&ht=3C=$zcM_!&B+e|Lxk4*GuL-oI#k9Ad8MJZrxHg>W1kb~ib
zta{P4jR9yQmYob=3{U}beD}siN=uN^K$&&VUu^<rw7Qu+%u8CT`J^idl+f|9Q)G5i
zS==EE^~M-RQCZbQh&cLXrgSP<KG1$QHSZ3$*po4LRufeMYq{N(4v6k-@mPd-K0`!u
zS6*er!$p~vjwxrI04L5wWLN9LL34u*YBi&d=Rnc$Q}iUG!yM{Yg<><z^6fnCI#hpU
z4KV8DetbVbmrAQ45iG)jwFsC_5!bmiXeV!FoV>@A3}F)^B#5g4^wlyV4`~vE+*q2o
z3JGtdB)m~zLB8S0wTSh;#)3=^*C405hVX?K!QH)w@L=y|b`b!&gzW|Rk>dFL+=)3z
z?6)&Vqj6Eofin4$1w1`Dt#*Jfpm`0twm~VmH86Yz#s)*<ZdhWWo5XF8N>AT)wlLhj
z;rSR5%PlSn9$0Q%zti0AEAo%Fxi1U-K`#!BpTjlVD0C)UY#ojIU&2z;31U@D*+9i|
zt?NPj%-PcJP*1G9h@Z63?R*cMTL;hqVeoh$+MrrL=897CVb)z27c<mf$(=)3`<1m6
z8O8lot*5B|Nne`C)HuN8Qs%JDWNe{;P`lbKjDrw;K4c%3@c7GI=iiG9U(UKMC;(Fl
z`*>U7UBZM>wU@NeOwkfv_cWy{I)>!?0R13t9`|`Mf#A;gkx$c4G<^Vq=$86{ok(J>
zle^GtGM=f*Sbs(94Z&`9&)0;<(i6OC;xWuRwJnivE}uA)tN_TD&JtlseBlSRp5$iz
z9&H;TIR(myUJP36&zlslp5k=RYr$_((1z*VSBZ03!+ad2BM<>j44em=9mtmBn5>=4
zbuCM5W=s+21!i7sJWU|yES}umFx3CJ=ErS^JfSTdT<NUzpNa+{HlS4N3x;p055NwC
z;%UbCv2Eq&AB2Uc-vFHpIy)@`e+}Ci^ZrWGJ<!Xqpe)cai>?|?j_#EN8VWMCHA&;`
z;Id7`eB3sJ=}|8r7}ut79uq3_`B+G&#-o{tKfk@W63>2sWjh~495+KEF!YF7)eiAh
zf{^oWLK7@bt7;T<4665XlzX(VBs6u`VJv6}c<>q!I8zg&wy?}+<G6L513e4SqkP)V
z|5a(3BDx5A7{aSb9BCWLehWc!owXL%bLg)XB$P$#_Or`Ea5q<1o6#a0&!X9-#GsnU
z@Sgx1C}~CxU?RUjprcHg-?8kb+l!}lZmDRdvx)h=Gj@dii!48xN%g-QP#xy%gbDFQ
z!w#NxD^WR`yqRPtAarpvm{uBdV$=wd(`ARF(x~VMS;mWL*h@zW&>#=^d~+eS|9hYj
zp5A_h{7LB2D2s-e3TItjE;R<O1%dv-{`%b9;POagOwzW%u<)T#I|$}TFeD(JuxJbW
zSe<b*GrcmRnd*Re8`18S!cM7$<ei)EB`ll}q6z#emeN=Cv9rrJB6|BJ5^wf#Ed=C=
zc+w}HdE7hy=9inxT^I)nuknxmzfMFJpgDx{qxxV&W;vqz&f}*6G!4R5?tgzJK$mKL
zk?6wm!aeh+Wl)_l#aj2ZghpfC-G)_PDp+5hh}4}rWu*R7@!n9~TUicI>US(aNWDDG
znIxKw>g}hIgWxD`RgQz-OXvbii_z~tjuB4Af)Z>D>(may5-PB&`U@`yNu56rha@yX
zJMFM&R%}$k@v@0qN8jW8k!HGabC4ZbvQVQ>r}<j%<xJ97s4t+G?EEJXFz8#)-2=iu
zNZfo64ZebBzW1PtGmpv5VOp6so(JxA$H$9n0a^$k-B5xY5;Q8G^==Pf2Ry?C8d7Qy
z6VdxXX;tRfY3X2PLgSb?#PLA?pKIZT>;=DIVKOO|V}s@^2m`l-^cmqS`xjwc;B$5a
z8fWXm3D~h9rLCNUkKbNp)eYuj+1OmsiUBq~EkE?mr2bK}O5fC^^WPsn>KZUOP{80H
z`4{#{G9*yO<#z0wMi!%9eC7>&gGEXMqG1CCj~$di!TBq28lco$Ry5X-KKUIi)j4-s
zcp!f15$)?7E>mjp=w)jo-F5d{96Am|@sV%5VyminK&0Hvl-~{rTvxTR6~RTcALd5B
zs*CSzNiN*UYyl)j-4F(EB>1|=T6;Yhq_8?Y^!<z>=&_Z_M5`7XW1pAedk*2lJ7&xT
zh@L61XD%%5qixQR2~m(1Dr*2ltEHY)CnVjX<4uqasMJWxzvv}l^dLLxp_FuH7bbra
zjONwj??egrC)iS$Us{B-y{eB``?{-tG}%Iksqfr{Y@j}r6j*v**|^+0XrC)P|Nhc>
zW}^&kOnLFX!dX+M6%LM6HcF@zUZD&Pq!JDh$jRc&x&V!bQ`##f;kvoUtsr|<9rv}2
zD#)@cI?z1fw_P>@wGAZt=yq=@jOZt*#j0gp>gP>aI6IB2W38$Z^=cCB^ELnFCX7`%
z_VS6&>-f{h$-H(D-MH!7W9t{Fy5W}VNEe4&=|>1;;DwE;K6EXuY!sZH?Y^SBD`npt
zu<GLZ!bBr9R5S4gOMt|u2sib_+L&qL#%OhO2U|~uyH8OZ)|HT6cG5K&YKhZC{WU1^
zkPGoanDccWDI(6e;&%P@7@EG$URI?qGx5U=aGr1_ngYp@y)(|F{7kWt>va4-2D_@s
z19Yu?GBUBKS}%P`^(MA5mZ2!)IlC8H{)WrcbaaKzLa24(vbI>Kbi)mQN<SmB3%AOK
zBo}yCei_@e7eHw5|2Ry8y!4K?YKpOXMOmoL`y?bRG$Ed*U(MdYsgB~{y_@<<O;-HN
z#}+1YJ(w*GlFTZIN6f}^m4q01aD_};3%}{jXFlB891)x&g>t)|tf8Ty?P(tpO4?k-
z`=6rV1`Q7R)vHR&)P+y8o3CyRLqwJ&+72z9ivxq3kxLD4t<Lm~udi6K@^ZojShqq4
zZ!dhcDHeQ=Kb8F>HUw1`_&pubM#LISw)#iH5;ffv+MZ{(=&Oe=YuZ?oDSmnG#cd)e
z+Wf74DDLGDQ=eEK!qExE!O6*BAF{a+B-0MfE%JPoSG~(T41rI3kFi}ims_R|(<k|p
z?|eq-duoYxIoBz2ooIg~uc}%ULwL1O_G-RolLLxQCW9)a$tv9~Sie9h#3pdE$M)2n
zaxpEkN=;R3#*7i$c=S9JI<H<K{_XuX*Z4<P%Z(q8moEa7D;AdO1!X#-9$KHZe3*I{
zmnNb*f%C!(?-<Vo(gxOFTt8zk{KJKINsjCEdz_gPu36bnT&Gz=l|o8wWYZdMsc~d6
zJfl2UnacPoWZMQ(LOXq$qCUpSiAnjhDV^iD`{QceKy!>UEoI7B(4<<Okzm6y(?DHS
zMeh~5I7*~(4fDH6@}=TOMeRhrtUX_b8B0{qTGvL`2F&nZCQ4ftTS};@W%UI1EznRC
znERR?u>6)fB2XB<{Med}+yes~E;U)&_P*1BdrA1h&#}n*uH9%TU7^-MGWSJO;cq|0
z2j4TQ)>LO~OEpsYC5VaK$auy|FnNyGX4QlJ%$DoJ{06OW6@oH$oWRkY7uH^8*`hcY
ztZ#4FIeVr11nRFngx7YtdDMmKGqGkxTpW}%CBjqw-LmJdIM^@~7@j=|Y6(+c?!!&>
zN{)gU>rp&koZ1U@{v6M++NWBxulKf;`QT%R`$<nfg$pOAa@XHYG3v>0?c=durhD`I
z-QT*s)6ZV(KL1<$6U^;KWr79{s_O?_9q%uWX1;v9V95Dg-6Bx+LQ>|LY_FLuicV#~
zwqxCrl_Rl5CcV#cccFBDd*_23cyLcdHJOL*y9hn(?;CUDYv-kRP6cYZdiG`<naPiu
z?Fo+#&btHAklK-JIFEJHN~}#)K9XI&SonN*+NgyIggoW)x^Dj4yA3fR%$Xc1>s*P)
zOpmUsc|EKQ4F$1g)z~~_L34tIa=lpEm{?QsEMyTnw0!OR$M%$Q<zCjuMm02)C%Bii
z7dg}Xzq<fAJ|2}brb!tmT2?+~Z6T8v%*7o;g_856$DW#j`;ez@r`_uhh@bAW9$J#2
zUiGDva$@B)c&T<QI@R<F#WB?G7`5F-WO(&DJ9moD8vr`++QR!i>$YZmoSOi5=pe7h
zBHdb8mLqRK6RgheE?>MS=GP%5OBFRNH&Ko3306Pm<V+)=V9+9+@L4q9aaok}`tEL(
zTuA;Hlq-)HuF&)Ys;sKYDN*8}ef3PjIrEEdq3{_6^=>|1=J9%25^l9=UW>)U#6~mG
zb>-rHvQo?63Q9%WO2*K#ES^lxk=iJ?j8L#r+pjj8NxA5>oHp`s%9k(x%kD7bIr@A~
z;%N@n`%a^y2a{t&XhPnN+aybzWB;(%*~-U>7fs*)LofI1p^<lTPN?SPl+?*Mx9PsX
zw<V8?BI={Er>J(UDN3}tdB2QYIWY`!8|J^t_lmYbhGE^uc@2WBwTSP$!<ndn9OR)d
zURFidExskCJ7nx7g($|%3zY4DQJH}a(>;H>grR@!Uxhs)+I^LTtqRq&L!vZ@LsY94
z4k6<yX<TDIf*(a_NvogOv<_y-4k9Yt-i3y$-b@L&^ZksC#m6LHKV|)H#r&-!oHr&i
z!BB5(y``_RH}<!S)C9%Z{S8Uw@2+T<=9?oUFzvPNirZ{XthYFa)F|+rTkGB;`y{_i
zY+7SvGUKm{85wG}g1uqlpD8bW=F4SI(H-D2{4^G_<cz9^OH_J_b7s!h-bEDCKkr(s
zRO96QoZKlsbJEz-3o`VDw2BOSzD%F>;5Cs5XWvP%5}-`P66gw%t)2?W$ma>TQ`l>c
zM1|neC%d5!R0Yxq9;saVR*R3*n%i?99x@pZyM5ibPM(rGYM9;nen79Si7Hx*KDd{t
z*~H3c!q4WE>|G+J-cNi(a5?e=G}dY%dzg-sQ-qS&dKoqc4Kz^9b<<_oR7_I#PMz=)
z&h7FdkKiK-s1%M%eH63LP9h0ec*Qq;t}d`oYV6_)n^Vx--o-Y`-&ZYq^Y1E;D-^=F
zBT7V{x5dA3(KT%T_2#8SOGpHf&6jn*RO^vSUIJul3VX{qyT6N2shReoTXSStpEuzH
zzdwawtP9or!&2?c=x%$$-yb~G_k3v4fCUEjlZTr}^{f!PJ>IEt%ruD4!B3c|Sl%o=
z-%#M&JSazRO>(3tB+j_ho3w?6$^rChU)CNpqn5Qn(TS$f)kG9?ag)$SDG*Eb+n&;R
zkG&Ej09cfu`9;;kS9O?cFXXOHPyF<<@44J!ISkpHRp^+FrZqAIBthC!LttNgY3Ypd
zkJ7NX!H;e38rY82W21b{D1IydNDpuJI(_7&spima!v}3DXqN!qA+O6qhl58{0409n
ziOf{ZCjr)Pp7}0E9&A3+bYlZ}kXvBA=}8js8GCfs?b+x*{}h&<2F7!mnyQ#R8JS!z
z?xi7321Vy_R(-#kYLWRDOAGj6CW#?Qn#qEG${0kqAyfIN8KAiOZk7Ar6Oe<PPER>&
z=r@}v41L4(*Cf$fUc4T@(er{k;aB3AvH*a1S?cIaUI1{Y-Yw<qE$NninC<0RC2HJ^
zWLVv2CM4}GQ`HKEOvIO*)0B(*5;`i*;u|?`>i1b;+6z9#T&k*8^dRQ-M1H-eS|gQz
zfqG;e93*`W7lM_XF<QP+Xx&@xH4xU~RHCgq@rHDdcv9<%Z%pgx0&LD-v8X(e|In;t
zM%4;Iu*k%3TztsQ4~_uhDdIcX?N~W-pU$Surx0q(m2`&+Ax?N|Qm<v;TP{=I@VdL~
z(-V0n!$+LXmEbng4QtAbbcF(-=-GB;zG3&i(sn;<0$SK98@!*H6>F5WkJ46q;=LtU
zGu`*b=NnYbic5?rB(RJANJDduoj0*Be&v9>XM}uNijJR_5GQGZXeSc?=O{V_e-7}`
zVnKAb@+5KOy%G1d^xcr#oSZ|}#_qQ#gd!0GuZ0CiEus<S*LHW4@3~ZeNI>Z!mwZFq
z-iL-ZyniWib7<sSOAD#z`Q@iR%jl(<$Ng6tVkW9&fJM%9`d0XyeXo0VfC~*AjJH((
zeZ!8l^$ylw8$-JcNHxtk4qJ16Sp?;tlextvu)mJ;g?oPU`L*SX{JHwS@&j{U{yZ&%
z{8<!81KjrNW${Xq^>F7+5806b$9Fy$6aRTjsere@JVS3Cs-pqlki6>$d!zqKfL8L9
z+*A^fTsVU_<$9AMhnxYUnf8w**!gvKe>%?}P&Krw36TX!DdvW(n!vs*ytX_ASE=A>
z!^+3WtV@8lVM@pSpykiKp{#AS_m@X;P?gT7@G3TTcSwoPo2TS{ZP%mr)Kn$Rw!f`z
zbVs-k_E`QOp1wPf%J==>%8m}%D+g)FEOd^MO;({YvSpN+j$>qHM<ivBib&bndu3-k
zHYYL<j=jh4e!Rb*-~V`?`@XO1b-k|F^|~LZ(!;fRHxw0sw-w$=4me!LJQ6>W96O;w
zN_tB0l&pFfk&UQe5Z*WO94OO|3$6Y(wafMQSfu_l>aA)5Nj;hW7DdyM>Rj5DPgQTN
zPRVIZ?YBtOVWVb<drZ+6=@0XX^9^m9!^aaF)^UJ;lOn}aA}pPfT<YGgZTHwiH1Om1
zDiYfW`<|^|JNa4JGLyM=%~eAf7LgK`rUng&3lc_lu4>&#%VL&_%%U0@MP%PUp=r&!
zmXW0sfAj_w(MF21tGc*wHGP;#O=^|k^@Rl|>&9NOjF*$V$S4a#w;WV_Xmu>a6q<6n
zq#}8oKqT8ZzgGDemHX*slIH=&-8Z%G@a7=D^?71}TRQj5o(G1f`We91KBZYkqvY-H
z)+%ou-^vkFRD9mQ%|~H~h~oZq#*LC9uVsigOL_E-bls6r;RK(>zomd<j@ImtITe(g
z3=?#b;IwV1z>AJ-Q$uB-LZ}Za0~(6TP+o~o#Q(ieZwjE-n)*wU69+dRvk97}Jog=D
z;n#6j>p4Tla`@xnV`(ampZQ&#0=?$SjuMUPr9t@X8tIsx-2s~uo%mq|5vsQ)N=lmA
zTqU!fMzZZQqDfEGnT_`9Pg?sd^fN#u+c9WcQxZ+$$zM4beAPL|<#ke^Z>-*$r%r`6
zIO_?7Mn>$?`5WE&b<~h%30V8ootPCw8$v-`PV~$KtCzl`Vz-DFhG1Az{Zt=o2@|A|
zb8!sxO6N(s=wf1eq@s@P9^~MH!_%)NUBqxD_V9l?A{MM>ljIn|AaW}YjJ{J%t8lv#
zbq;774u^i!)IM~pKBN9Td`b1zD7!6b9ynWt8+#6hFI!#u-(1BBM;q!oaa@5vninD-
zE>IYt#@1l2i_AFxX5Wu`vxN~Uf&q@XXR~A<`)nAZF!q)6$JXw3!ZSmlI{cpPUX>fx
zY&4~|w@jf!rmlWpP{iKHU4})E*rwK92n$woKfB90PgTCYicu4J$uAhMpp7%eaKqCx
z%T~1_afjDXe{=mz{I8W*v#;|>nOZoU{N#0=Ci8R`*^qWjD(psR@vglhs|>YI9vU^T
ze(|e2lnZ^SJ<l3PIZCek(v(2Pq4?C2GC}6Nv*mrwCM?qU+h@6Pi)Vt(*n8<ogYN)>
zr!?8gQs*&KB6#;7YNMVXjLD+A(iCo>`NNuBu};k!kM4ym%x%z<(|mh>P)^hH?P|_*
zZ@(`yDt@N=S)t5h)UADzwSNM?)V*Im4o<bbAzHcJ;1_CNJ+-~3Hn&DVQ=%2dupsnz
z{kOytW#bL!{TY$KrWCWydO!2!Mi#~<IGisDkUDc&<jfEapaAq6^NXyM`Bxss5pI`n
zY<e+`OK6f-<|=@8u5p7SNtaqC6wSrIPWJcUTA*jw%6I27*ddicLdAHcBJWG@U#XdQ
z&$xJxhwgPCRGLQ~W3?KBvfg({lMC>$n>2Kc`WDjAhtWguow;GoCb-Y{z$S%`2M>~z
z<z5yFpmFq>?5cHfZQ%cEn{~W8vqzSZUg}rDE|qeoceMuEF<q$qx+LumC-HsG8bcMu
ze`mI}wULFsQ{e?`1nXqjgG)-gWXmp7i-$8KMr3QA^3%UNw)1l=6*YFgnO-ooZ9k*K
zGkxyiRU+6WnWrB#RlKpMu-?f+>8O7;%>=vd55dF0cIjELo+HcpGu+4c1B9+nUF=+z
zEMWdlS1J})A4rW`PsnPYjJP^LsCBzyRsox(Sh*w_$FM4-QpxhEUZ}AupP(DT=f>_O
z#^*~iIbojyhg)O<_V5<B+gRaEK2cjwxZk>$&KxnI=0U=)O<(CGELHsxEYD~mRPSTz
zt4OQXeCGyAGcp`|H049G*Q^s+YWs1jb5H(Gih3I1!9Hd`hHzDriNuCJxRvl7$ciGq
z->3y5Pet^%m%pj}H<V{>a5I#!Y^^ESKPK3mdHyV6>Pnq>QO)<A;*&_dH8-pn=6d1f
z!@Cg82t(p+4a#}UEtuG=-b~ZCPPtD7a3!i<$~haizxg7R%!kzHdH}IEME3&}Z#;H7
zii-C<sHM)WqHOKM+=Vk(Q>_oU_#m<;xcqR=O~d_SM-(7;pMM2^di2#~u6A?>ipT)>
zQGX-3(WRo(;FHG)Lg_T>y73WxF#l*o)wFQ&fX;c1{Pyy?GgsuRfvO5Xrod*_Tv#OV
z{#YIm-!BrjwmXF30}*RyQqF<eLH=z_adw1HX*X9GudlrA#J|nwI&yN#={2ti%JQ3g
zaLgEF5T)N5s`oCMjk;D<eeI`vW$5mshwvc_ChY6iCl$P|67=Ox{R5sMCQtY_F;Efz
zW@o?h*%$j$(ydr4ESdx*KoVI*6YV?4@%tFg?%#K+aTs}P%pQ;~v%?QA)YVe5Gur>~
z8SiVacgWEPbwvTvbfJ10!57<TCRi|rP*D8G1-ggemJ)h$I(p~7B;@?qHx!a}cSxK9
zoV}dKiJZvFr1u^CMR<Hk^L@!9j*etmPrTmOc`@v3JKwaOIF>c=+<cFEuR65c{`B`p
zC0Ra!T>>(F5Fy_Y>?*S-eHoRU4RdkAZ6({^&a64E>SUj<0sPWU-YuJex)ubVbtG?s
zZ2Z)=c8-Eojnxye$SAhly@zebBj@Z@Xs?H~OgAu`Q^hOL3&Th_Y@pISm<2HjP<xAM
zU@%L$$=}8Qc7M~q;UY0HIji$vnP{^tt?`91MG#M~N0~01FrHNt3=eJO=fBx>r0$M7
ziu?3uBciE~B*OG;c-D`i-nyPq__pIehOEESO_;Ov*=d?~71EA%r>7nE<mJT2_gfg9
z{w0o{VC3M1ebQUpnL>9%PyMM}iv5TF(87o6@xfU!BupPVb{;SZS|}ZzU>?CgDFc3^
zQz?yk+S;UP^SLUmqw|1$pLQ`C!Ek;m)UTLx>*T$?lg?YoOcO-(-@o7Mf+z23&rgL)
ztWxjf^Y*=>C<s8^7hpJf`gs%_Q_g|+#tL$C*vT1JdUV9Sdv-EBRgXJ~Uzoz?e@kTW
zACi3hP+~rKCn}jSx=(nMH)K?!ds4B!q<#&f24`>-Sbz4mK`rB@Y<-nfP43yZp07&E
zi}wX!*Nw&5Fe51@5AXWvIle^&5)c!!K>c>)oQ;h2IbEJve{D8lbm~1PM)JD?wl<3E
zq#-iKuoTW6Gu;);qBT`gw6d%wnh2cjq~<HldVg?PI^pLlcR{-7PYG2lFOU8nAJiKH
zgt&qAQ*b#I*Eu+{Yxn-t#1vH+NmmHT57(noyJ-FKpe2c_3I&xB!*IvprK62mWZ$9X
zZSnupgZ@pZtZRUjJNS96%agbN&;8y@PcKq~Zd>nOz5Tfa=iWoq>AV(xTij8d+ZDXS
zi^Zf3K&DhRm(}a;YmZluIC5d#p!eA1lu{pkxk$_FYIn{jRGZ%dJk5Chwn4yb1n4qu
znNwN0JkJg94WhMAe~JU$<t=nd6xBRfov+>m4zm8TLX9;Yg*iJ)kfPcgYe{*F%B={J
z;@*mqACt67y}=T=;in_z=M_Esr-yo+;S3qb6EDhy9)i{xkA8$o#&b!x=IHdrQw>pH
znm-4*IWdY~&vzp2Q7$d^SO*s|;?i{$pg4x*j@`>Oo_qbaC;XR1z0VavyQM@*cIKr_
zbV`n`?^Uo9bb-c;@-2P+_w>^W0Y8W7a;KhMU<^RM{ZeS8+otiA-<1Mn3UAWITw<kv
zn&8h+M@`Zv*V-_SUUPhrEE^SgMt>`1K?G<wIC%7Ti0pN|DBz~b0lYln>Qnbxl;WRu
zYvO<^{<A#t28{q?Ab{U7?F-#wQmH<?yz~2MbJC}0N=NDEO#<Ma@<py}kPIEB0o$Kz
zGfzUk<u1EblW=m)svCe%Sna^}^5y8~1L!e=pjWI5yR8*Qxvq*oH0Y;Nn#}f9bW*l#
zYrpm(RIQ33lGpLmspXfuZx~AXct1vUe)@&+LtReCx2J3EzJ~X|><pjph+0k()yk#S
z?>(TsoU@g<8I3Cv3@ub9>MnrVm4`F#tUZ6Qb&<5R<}2$YlnKPwG6s0a=9lUW_(4yw
zo#CsleYjPQwXD~MUZjh}nJHTT2)sz{Z5y_n07Yd;m&!P90bj8?wFfQ#4oU3*;;S|2
zSEHJi<=@vml%2Of=T7X?8-nXgsuCQu>v8@s;FCFe(g|MvUVt)pRt=!Cf<jdVq^KtR
zZMY-bvD0eW)?S`sDmu1R@t{L-`Lp7KQ-!fSlg>pyY-5jbO(XO%PvOydR(c_hzi<1@
zww=p?gGZEgIhR<GHyF^!{tsy(d(xmZ&fJXvBFlE^q<IRRZ<FhWz0GYO&Q5LLsdMc7
zZAK_8-v6h!#}f4zFLF#K!FBu&!yr<^a1p0P`uqq*2wG4}=6#;>qm5%B)afwEnhY2;
z6w}a0Q})NyavXCM|EncgdoAT<z#pa;ZM<<xs#D?RQk+-!-H4YF!twO<tdclApR4PH
zpl-Sds%-7xv+yHdQTOdRfY4IybZ2yTqDS}SNg`B_ZVVl8-E(dJiS8fIY?|>c0quc%
z`O;o}9$3de@5K3)XHe<ZRRk4@)PI_o*3Xw<VXbxJ3ht1rclZTtRJePfsm!kSw2GHA
ziMrT-yr=Sh#@pA-J)m=nBX-y>mA2vF1p@`Z+^)HBPA9Zfq#49DcggUk#*@D8MDUg_
zFC+X#u8v2tREFy){?HL;TQ4xWyz!&8k8{I%uG3oKLrS=dV<GCk#_BzV0gC!ZfMml<
zT4e*Ne6$z22=~YXqaNUWd&baGnc-bZ83M|+vD*aF8I>{@%-PoM>PLga^e%+B@)M}^
zen@I^Jeh`rj7wJ(|1{+f_6f{Gw45aXUPaa2!%wsG-KtRVOV=oso(7D$5R&k+IiE{W
zxcfH$lvvzwb~1DS^}@dnKw_*7<DcPS1Dnv$L1x-Dp}^z7dSeBr!~34&Yi1zn5K>Ih
z4<Fi|5C(ObOYX@iJ6ajor10wSl{WtRsFLWT48$F%#rp<emc^Lxmp?a<<wjG3pP*We
zD>OSW^mIFuF}`K7hr*WsDI>pUDJCw!@@XDgnKUO2rQ?eU*FE{#7Dqn?w*tJr1j`no
z*(d21YCf8N-yy5@V~Z_gQ)QXR6~^*NdTCmx?RC(X8DxWTgrFY7sBmf&_RW#%JfrX}
zBnF{kWeMf8@)Pvoo_k&aX!-GD@rlzw2Y+13CCkw|rHtbMcs6(*wtstvH|&{~GPj--
z-q>=^NE*8Oz3M|Wy0OD{-G|0qx<gm7Ibxb(;yTuew{X`(r-JMKic~=C1*Ab-n5qUO
z<H;Z5s4G(Z=>5}(rpoVU^bgI(|27-&E`H@xk5IO<gfLEdx}b`{y#e5Ti^z&lab)Pb
z`Qyjs>#vhs_DJ5n+d5~>uO=Feos(HMp39oo^bN1v=$|~iT+ol;sh#jC{zvmG4}dKJ
ztDl0iqe$(k2*?c8`EfN=%{b_pExv=fK~R?{WkLYO@-7i0)_n}2p3zuChF`n-sV;Qh
zJ^&~I!C(dy8c|!jJzcMIVmoXo3~nQSpO#?LrtlRk&($pD_yw_~b4~NdPaH`Lpzp@~
zHDYJ1OV%YX^XMlT#60CCh7od#iY85jTrCGdmrnil*5WcYZCsuXn_$f0bPAJVTwmWO
zG|j)GZ{WD_n)aHZO_<(y&-E?U+mpXP9_L|1;7*XpO!tn7HIL-f48R*9AqNKbDF54*
zbba>KaWG{Z7<*9!RI?v?YxMOGVfivj-XcsJ);o~QQ$7h=Lw2sLZ@6@z62dn#r)W=!
zmK&#|J0`VnpapdqSIs&2;3B`a|8avU+BK`lE9|MEmYnOKQL(B)i1)qIDmRAhIpCYt
z5R~+lJG3CAegU{#yacAAIXpiS(Z=^`8eYPfx(Kq6#7Y^5w@ppS93TFxO5M`$JAL`D
z>d_yx0aJ6*u<;_mvDun?u?72Avb(F)=Crf!)_p>K3~lZL{oEH&0E4<r0@sKorwNbS
zp>v2J%j5r46|tmF2X-J|1=@j>sqKsP5Uru#zo5}!$WHL2E_PzUUz;u0RQ-R2CfoP;
zCU)=LtAPyy?H%8n{eL8{SvivV03q0D$~V?ZrTEMk6dX}!GXJ`Wz%#1$iHlHr5Wj|9
zullrP+B+Z~E<bN9c_zn^8D8D8L|uLCZQOhHlPZ}YoQ`{e0x3$;ARSR1r_s^yiwOu%
z)KAU9C^hldMfW%f&pRvO)O_;>)Br~94Toc?gP9a-*$zFR{-f4tCplz-2#dgdh<{1a
zz<%W!(8bc18L*EDq)sFysB6h{SPq6KZc5hCEJ&v^sraZBkiL!<ejW7LhvPDte;kFe
z>ZR)2o48}^a_1G<$FF-sUC*SNcm6W+oJq0MIuey{U|Ypr4rBIr&R$cQv6&&wx~`o~
zb4RvdC0URAz+h43M%jl-XNsF+_%cvA;+O1>xUjx{1|ZuAY+I_VvA}%^pFieTEB8`!
z^YNb&uQ|MTBN!A_Uc;N?zFI$A8OqzwD_LCZ@0*JWg)NA0&u>S_664WghO%+BEIsFA
zA8X?cf#h-VnIYtoLzT$PpMB<KQ?;*lG6gk&BYPy?b^JS;lej|$gcBA%b9aH8<4z^r
z?&`Wi)2wI9yVS$0fm`E*$k`E&p458IdoeEN-^NKLQ!Jbj?Z?lP>prx!u${Lt@gQW*
zq&4=|$g#p$g6%=GY&9Q=<f<gSHTBD)vZZ-2LQq~pg9;o4<=ig|@7_8y8GSH56EaWe
znxg=T4^HWeJwV=-s*m|!%`0)1a2A7`?ChI8Zcf_hkK^F|R7dxETbt(l#ht<765=fV
zkWkfkMm~pI^;`A0X=&-ZWnyv#WVPi?gkE%&-p=gg_KeT0A36Kb35T8f^Q+Cr=`7@Z
zt=hSc6M%(%@#V}+Q`nkuuwy<InBt;gr(7(==%`;MD2wjkQsMTFK}Ce-S<^)?=Gvdo
zcbi&6TH?8^toY~}DU-<Nhyt-=Ca0VLeCTEy;lnzvZBo#;hq;USyx!L|s|U#~#V`C#
z9lal}`phW7BUv6tyXa6Aky<*i_~)4H-8&=fG1-O>X`~5O1lRR;W(2m-!$W<hp`KU&
zapDM&THcmJYbn645R?h+)T_thuh(H%B4wHpg;_-OwFts94#Q1;u>3`uSFl16$=9&3
z=G#}6Rq9(t^^>beY67<YXm;0dNw(XbD0}vooy(n>d}nsmvhbN9^S52$?qaE)<EiO<
z=v%2`Il30l1!_kq7wstEI>ySJ+NXO}Tq=UE(jehTCUVQP-c_>To4NTnRjk@fg2oN>
z&B*Y@_K6n|K4H-IuyUJ|vxte5RAvPGF|Tt70lnstqOvx}l&Sg+2|(o8Fd{ltkcu|4
zn6yP#z6DUKRV+d8Gyuc$sT0W_XR);~jaY6hZpaH&T>`@{3U9$p<At~)P`>_Exy(&i
zyjtj3+*rGuiCTKErubueb35j8|IMZL3|srxY$PEw<<#yIh+HB`?3Vi)_mBr!_5?qm
zo*By9f81l`t?vev#$2<SBK$q+ue-4}j!i4;roS#m_Eb*P_bV!NP!NjAl)9XQs{mQS
z2xi>_7y9;72Uo!C?A7sZRN>c?hcQ6f*I6BTB^p`7W3tMhGyL0aZZPFk+8J0>BJPMR
zYD3EY^`GDPX(2Kpoxrj(b*5E)c6*%z%<-*zyeao#w<@ZDzR_N(toU5R9K&;2bo#Tz
zWY-$86S41=?jBM{GLu5^R>UD&Pog`tOKGCVG63l){DnYjfCeps`S4o%L|hKf4J}t6
zvCjim<6FD|mV%;?rRCA`2S9vG{#8w(`jK1t(fAhQHhEm2`<9I;7(?SI0fmqL6AUP>
z9Jebq>aU|aBOx)yU|x!go7zW16iwXTEe(i!2I;e3+(D&Bi*`n;+*>Rrs`g5ke27|=
z>TBu?P+!}w{vfV<1C1lOb-bWzQ(7q|K`dJejUI>q#9~+&`2LO;c*^Qwt^3f4GWvAU
zH-_F{C_`n$L}q0RDWA3N8y1MVO7L)fZC@M-_V>@%2R5IGN`}>_{v^9^f)f{>GOhnW
zeYgvh&OIYA*S-P`wIf3Z3&yQ;(!oGfI^owfD;f)1TQ;Id5=P>>U)}VhsymXy;L{mg
zxJ%i$fKCSUlIsngYBg}IdT%TB2l3USSYU)A-m=)I_rb}f|9Js~q;qS|n;w>m@Z}fL
z_+Tr8^HNySSS5%HjPG2OSB7P?+#E~$@?J}qsg|j{9g#&eI6Ijwxkr90<yU+yuQ>-V
zTz)z^Zn1kpgWL6LP?s0Q8;*7MsJ&srD5Hfid%s)j$NaimNW&1-RfwRB;2cl*rt$%0
z<3ck+&Fak?Z}LqE^rf`!@V>I~+aiT#uC8*Np}PU5t;AA8XSu2pFaGGha3p}*B};m>
z+uxuGAG`#FA!S0kvj(Y2L3cKR(aa<fDe0Q!%hnK}dx9wz8mGH$UAg3tq|7Y_!|xHv
z((h|;pT5f0_u&t2k}=K^c-RNtMlblYJL4f~?nLR=sWb-;bHP#6rT4cD>w-anWj?nD
zeA{wf4^jEAI4*N|r`w_2nDINI$9;E_>>tZIg{!VEwdZ`T^^sk#E7i)rbaaUX9n)ca
ze~aNA-~#acpTM|8=@19FY-I3h?#5qkKpNI0{2VExDQ9TleS_x4+$D0dy#Nsx6HZ9>
zt0C)**0Xc#mDs4g9~BdEx0&}vg)k4&DzGm}wY>glMqj~cC8zOsq*Wz05U*zf=wYwT
z=_u6!TocXBn_^t)-M-X4e=ye>-~`t1^}S@XCv0-Trc4bMtr&Wa`zmUW3s5fYiReub
zO2V!Y^so+x5p-So^rAHK{r&*7DUo0PoKs95YgTdtGY<3l=#5rlvP63%TE<&!J0K`j
zFAIb$p_g*?@`jvOb5hAq!0fTlaS`sWuDMi-id}$uCuFu*I-h#tK2mPdU>|<~AAC%$
z3j+0{jdUq#=k__5Wdy1Q&4MgM;dJk5)6n01BKEA069c1jf@IzfkxjX^yI{mUTc=vI
zWr<;kM%u~I?;j}RXp0W!!{0$VwAjvOuK(6oF1P7#Wjm>h=ok8J7G}K&o)#M3AyCr5
z_)JV)o72MCf>I?PbzjM4jf5_2TKMiB3>t{M174Kv;uBiw1a**0*0w7o$xfuP0lrfR
z?dKitjTT%3MV7e=ytPn%e|qM;@M3)eOwW+rbM;12Efd|9%~De131CmIf=B#^JT9vT
z^|7Gc(cP{zl6ZboEQ<JMuGoy`Sp{P>>oD3ry5ZEWo;FwaMwcRykNgi;&e0s1G=}Kq
zK_Fg8J4|Q3zw7@c%!$lbxR*l8KIQ5ib1u>(!6I-d{Svlt9Rr}l&b&@SDN}pDkIl<K
zDO%|m<6Dj%$2crFe&hG9I`8^`0>y^URH?-s>pXt8%^e=}UI|07F)yoj<4g`fA57PK
zRF0Lcj^H>7bw4PZ3=N}|nAF;mPYSP7QtbGKAAtvzrs=d5>$a`PJt*S`IJ`#k7UMx@
zWP|w^^MsG?{9JTxkA$#wIiO@6g}+!R6gQ_~wiK?Wk+pYq7J}>0{Jugvs8T(qt?%p8
zoV1Dl&sqnLF%$9!{5S3vBI+^B()+BSI(QPSw_uY4MlBtgUh@ju#kVrvA4n>Mt-3g}
zmb3X0;%<k1LL?itZ1c_LDDe86pG13w+bfhsMNI(eomei9vN5nW)<1*_)5G|k(DO~m
z{8vlB{=6VL9X)S)(QOX@#`z0MzRTM`wtEkP*?1htwC*$lcMzM!x5|yw=KG=6{Mj&Y
z2YB`mKUDP%3qEylIsH{%=2vHsr6#ZfX65WS@-Qw?7*(G^ol4Ig!0cxyo+dImy+{#x
z4k|Qv`JmPK#%;@BLGkk<<XY<Ai&Cz|xhGZ%9~vK^r4#TFN{1E_;Lk0cdV#5><Ml#?
z-QOReBwuE{d?Mnl)`(D6w9*sk%=?zLDo&Y@-#b#yF~vb6Kx`r`7_*G&hB_=5ozVFq
z+<7WRCXLCOXc-WWFNtKngS_p{&^jeP>qs-)wN?hb!o#M0v3fk?NdYnnn$gtVJnQyI
zo(hub(8)kdLv}kI*z#+(broC$<t_t&&d*Sf1B{p74Lvt6)hyw4+mhQJoViIx62uY<
zfo&LK2_Nn$)-gGM{E!6B?q1a8TN3z8SExB;c_sk_zjY?&+-Y@f+{<$T>$cDoBfdvm
z-{(F5@Kc9^oB1}qDB>QVS?Eibi$l}fw*_BMo+ZmpBGMZ<G#by6Ax}&QusMAdLaJ)d
zdtDagUzeX*C1I!7Ca5$}5|7ve-{w6EL<Kj8KoVNf9r5{kX>&+I8~nX)oE7lb16wMt
zzbh$AoIEwl4<EWN@|wFq3GjfPXDBzxO=AylX#QvnSgWh){C;Ga0f>e3V}M{Pg0k!;
z>iA>qu?!Ro(UqWi1SK8lonOA3IY{m)t>5&2k~@$wV=(!YPXwYO+4kz%J{MXIz^s1T
z!|-K@zRE|<B3Es!UW@}iZeU<n_Po(f{NTm4FT^JBe}KJd4VduumqFXH2#d)C$jii#
zE~rYJuWzGQUN+mrCHC!wFAm56^qoa7mehRX<c&Uj`Lf!OMt#bO6Uf%*;!BY20v|4u
zgQfuoZbrX|_93sO`?Q)kTa+wWa@FPU68a`h&XTQWPZTqx<SYFuL!U@<|AJ3_yx2Fs
z%bw~8AbKmSUrP3nUj{7x>)aB*e=<sW&ee!MSQb8tdXEYV4b85-EA-IT=>C^g9``z?
z0)n0xjDe2%Q};;Jn`Q+m8ZAZMDd2tMDup~`<53|kX=cU`i`qHTh-Dm#RwoX&|9FmB
z0Uwv%t%G#JfW3eUe37Utdb_GfZh86TJz$o!H2R>LYykzJNN|f_^Ov;jsWHa#b+^n{
z8bx6Kl1*OYK&#W%Wgc}Jc8R^C?Wlj}_ASB*x#E8^_Qu>p+R)lLKotvIu!ej}Ou`VW
zd#fo9TqK|KI;|(98>g3r&NHA7_<ph8Sq+=QO0vUdPsH{1x<dSn0C4>giGc<KKx1I|
z0xFk$*4`(_c;!ayV_-yTLnta{SXcq_hv9nArg5;qGgUE+P|rDdj3dhSfr9_Wj16%U
zhM{m~B--D9xu)uDE)MecS|J5WUjpajf9EcIo8G;wVe?T=hm?K6O*S5KCIkNFL>nZX
zPa-NfRP{HriR+zm{v5F%&601hjG6z?CD76(&)l26=C8=_lGR?Qwn*V4tv-V53Z($=
z>yRe2ZfVGW91BwD0h?}RX<6imEijn=_DVvJNuqCp9`(*S_5gp2p9g8@$$sS)wc9gG
zR9QZ$&odD9s~Q~`$WE>oSi5p1RUJIMtu*jdJaZlm&Aqbq0aSl|e?UpS7RXP$2E*}&
z=}9*=@f)b9qRv@C+?QsLdUX)*1F9m`y$FkRzF<Mp7B&G|k(B|6oy<jluey|!pLW;N
z=5z>bcaWt&fZdvK4%Vn&kk-d6gz=>U!Nl^&>Phg4!|k5wMrg0UGR)WBbGq8I_F|XB
zmwDgFuNR>X7h$OW75RP{5*o7wy~R?0ADr9tT!`8uy>rFH^?$sOt)IUkk7vSdG{JRB
zGGDFICNm%8&DsUG%)_TYY3^B_q5WE+qprGpcuz&qss<3}rY}!`ZNXdh_QXBc(gx_+
zGv_4#>}&7tg+?<bDJuVkoL^RYHQ{<|vly684ODN2_(cks_hUdKH2C+87B2MSeA93J
zW0raM#Ls_^+t5(iWEO1vXG4JI^v=4dSa&nY9x%l6E(8yT-79A0u=@PCZOQXUw@(_C
zH`2f$?i;XSsXj6oJ>>Tk`(?K{oe)jcb(V(u5E1+NcjdhwaZ?pn;X^hCZnv_JpRPmR
z57%qAQbGm>+>%gjN%!Xx36p-3NxNR}(zk?^v>ToIY21X_-Ah-;i<XjUUj=MmbN%}i
z%Ls-h0AayY$?)ay%-tr$w~p7K!{h{pSOWmEdY^#5v0ik2IC}YzDDYPUhgAyS+`lMz
z(h2Km4(GTK+j+dDQ0D$roip;MHOy*$i{{Yim?}KY`_&<K3mGeyv-a`qLe-woe+KKe
z;hrGtqi$0|L})Hz3h|k-JV14xHdEq+9!Ce>1G}78&!nv~Eosl|dm(1)tg~ZVC`TPK
z%x_&Trzj7zIyGber7{(lCmU{ob&x7QAoEKz3Q^tZ<C##(xPLc_s(&$k@}-y1xP<p8
zV&>vS@e*SCi?Py?Vy{ZRhU(>_G+D4IAfGo8$!RZAt^j1Iw2>j1c$V@osO8c=zPH-l
z2*7Ie4@}Z2MbNel_XyYVeb9;W^1W<`#!%>_oFDYhtQ|otDKsogRsn`^3`@SqojM-j
zR;%%&xoxYp^V}NxcL9lwsP%c<7Q9cbTe%DMx!LTF2`ITwiK-UgL*{Wfh|T;k0UF=s
zu@9HSy|DU3IVD;jH2+C<A>@8Qw<0ns8Nz@0%K_=!14;d-DAN6%dEF&un*u36p>DAe
z24MDkf)?ooIS+S&Q=_88L))e=zB9N0rjvTv3faChIH+zE7Cy&tEw(4iUjiTFBJ!p&
zez*G!2zelCz0VipFq9+fSJjB%ag(ykG<m-9eS3KL!;$d&+tBTg$K9vl=S=D90&R0e
zHnefEwaqRSles*KiZ|q^?TG8ka>ahcZrMaHkM)3IG4JC_qqMK@@1|I6NZ%vueZl!t
zh2uqtEby-novWS1I6;C_Yhz$w6?qClLm=8+kof8L_!`W_e(|jjC`Rnn>D{Jp+lRGb
zqpE}`y(xM>Ma3MFHC9~UjaWbDyO7Nh9MFit9USdt(OZ|;3D(4M?4$SO)?IKU*)kC9
zE<cGsB8sHS5u0h%HU~B}9Hn7pCXqOYzte0f3r+)S2R?Oawrs&b;E$TM4iuU3pOcK<
zNl*TA6##Z4GJ_J&pcPvD>^*!zxzsZ=$%{3?v3uM+mCd2bID};U8S{d(``A4Ia<k;G
zv+U$R8;4zB%bnIj&s(398VyrXnuQ!F05a|B`TL=x_|)KGR)JD_<l1Ecb)Uc|XORoy
z9%%LJr?yx$=I*BQ1mW!kiC=l|mmzaVSO|dM{Za6YSHhr1{=u+eyg7Q@NIMnWeKR4&
z$Ag->n3z6S3=F~#@;|Ihv{cF~X235`qI2kfJmnB9PXwN+^WDNZR7HPD+525J;J1+D
z`tJ^_Y9PK4z~=c=qU6Uf)`O%doIQD+DWs-Y>-D;|YQ6T(rZEAYX<&T!^Z!MF8)=QY
zLB{<%GZRbagv9(2j<~)Fp!A<Y3FPgR=O7Ms=t1w%pHxc+`siB1K#baE{+ko9WtGsD
zS+qPOZ~{kPB__OH()J`IfL-|Nrm=gh2#Sey<!$IkmHO9a;0jEreUX^5i6svlF6kSC
zlBZ7Y1Xqtrq4Z&?ap-y2mVCd>AB!*Sj`kgCMcn%VO-Xr8D3{JiJwXfAn$}dDcZ(-U
zeNM)|X})h`s>jj*7k*x8pSOE6s~Xybs;ZTK^;vy&e&m-h-oprP(ozfe$qUe$?$|t=
zf3hO8Ubqv5GPO+MLEMXbmyX7Us!|_#U~9=EN(94b%kJRb+7LRV%M88t=8*DP`i`%z
zH7#hZzoQHU3BFC5w7SdAz=@*+R|%e9nft%VqQ>e*e~(ATlAYAJL6%;{Qt`h!IqkzT
z3^yH5cK0mzzP^>$m%2z`T;Lg}zaC;QPLgN!{Pb$@$z1(Ij9N)2-op6GxhtPeHmFn<
z$Oe>MrI2?`i+>2k7le_GT-KV|SQ>c>TXbpY+L<1C8>*M4cj}G>G9g@t5&2+RlA(MI
z)UjqgakdX#lW%5!0#|+>4PNMij2#DDWM+f`pYHWs3wuTl4(i7MPzme%cwKd!1xT;z
z^ACMa0rE`)@HgiuWFQt{Yj8SkK^Kwr98l^a{GCs~3r0v?Jw1Ln>26uGT|D*de4;mh
zlXc^%JqU?s|0e8<3{JWP;WOoZ2=1#ga?tFBTV!T39fn_8dzkH0li;{KDTDO|7q|kJ
zncbh+fLkq`hvrWeXR>WtSgSjM)uC5JqubkR&}Xk<UCJ!!;nB(T2y`FfJO_9&-HnCm
zAZ|Vm<w^Bt1kk{undoFmNI^}vAPf=W1h;KOV1d4GU-f(;eiar1nCz%GR?NMdC^*;k
z`CBfZEYur4$Th9{TDrvgdL$Y6y%yNM*7}@pog>84UpzbWmI$r0I!XTs@VaMEou3H!
zBf#APsb4B{M}n~E=Mb60`U2639&gcz%?7O(ebbKbTlWoP7(Of_)m}V9!WHI!#Je7k
z(pvrgtK3EX$44k?YaiQ$P^3%+n7FN7G2Cp8Ju5?n`zen4PM7UoyGZN-IUY8z(Vz${
zp$ylnPK%u|KTo`SWQ)&c=li65sc@8G{#mA;Z*1|YlVNrSClfpy{VAs`JC?h{Jd-@>
z#IOD{7`x*%r-_b%+~Fn__i?AtDF!J+_BzwPZn|Y%<`0m1&Q)5?c3*b<cBoCX<d!2~
z@S&j<^(SozZsk($zRyqM)t11^`e(tPQtOu;tijg86fF;ydc<yT2!9?x@a-l~kAb44
zQiJjXeIb<{KbC3n>z9|FOhz<41c>{mDF9c5&D_ZLSAm>rdmbS6;#l)EvK&tQu}Zpe
zwgA4A<~!k;ocgfJl3h(F6slc$Cv|gwTNWCY0`NuaVhR1IyOX&OIeqR69Prn#vX3TH
z?GpIVkEW{X0n8y<<wg32xlR*_|I&KP*Yg<EYH6svZIp>_-PEa=tG^BT1bjH#yI(9h
zc;#hGF90r1!1&}6tuB)_wHqL3vtKs&vB&<Toi7^jYF}1+NXK>T3}14!xJH4->A;!B
z8T6B(p$Yclya{I_-CV#kOFz5*x1#<!P<B@*=wbL(*lMb3v*);{UAy$#1xCmr>iQ8M
z_Jhu=l1*3fxk3$+3V73btV?;F7d~zy|7&^$b$ePW|Nm`;`HTyiB&QajG0gA%TXC$Q
zJ#T{L2VNYkD=%`l1VEq&!~PJ4qHyLwRF`o{Sm*xb9m2|Ku`y{FF|d44Ovu&ab9F6L
z(Yu|b5D^~?2Ep!Pf{(r~(Q}>eq&|hYl6mCoYdN+R1J*aL7ok;K8e4Ftu2RRE)?YNX
zD!9q;2W7xCNDxAGE<MDv(rj~XZ~ovHSUy5rf4J}798kHa@LT~JrTboTPD9=Ax~)&9
zx&x`TgOLq?5Ck>TKdb2ie_u%2zNu?`M93d2VMFZEZT&w8M%2z4yUPyPo>)-2nhKA_
z-iNFhy_2wqlCiUlYfz_WVGW9kWGc|d6NE&8cNEZ+KVNH!Q}%jadmA?=!NtMg#nN{1
z5>^oRk-2Y}_P_WZ>jT1=<j>Zi*3oE<rTF5FI)UoTF;0fZ`7Bch%Zm{A&R~8;V0kh2
zOhe&pj<iZ=2{fjya9jlI18w78Yv{op=~ca*oqw0-jZ{<_w&VuM7H7Zv%161xjrD;m
ztidgu!RVi0bHCbhTp3b;S}dbrH$&263`nX*aA(!6=}bdyF46ytx#qLPKZ-Xj+Z;cg
zVihwG@17O_17goqN=W-7;6w5Cb$axSJ0<Y<l|Utu(T6bSIFY=za^%4KJgrL)o3wx5
z6H<GiM|qh~P3RJ%rd~6+Yp}@DET_a0Nbok24g03(T2UJ#_|!`2TIIr?3Yns+YJYa+
z^barD{Dr@S7X1siwEK;Xr@gve3{TXb8X7JXmkw55bfZ|0M=KwWhH2oYCaZw-`>t>T
z$&C`wU>Q}|fZq#^;rtvB#6xeY+pK$LL&_|>Rzu>rsRe7Ny9hwa=rHOgy?hc{nUbSD
z<>DWs9WiZzcZBm#By7YYo(VV%g=@mHwco`pgx0Jj(v-cdU;ZZZLfv@L$(ZUd+Y}3n
zaPZ3&R8T-@sO&FX4ZTh_d&+9c71Z+IA=>Ee$nN?VbjI2li~<OciwXgFGRF<AY$x+i
zX#YW4=+Rz>V`ogWty3&ZmJ7m`e`$=4?bup+h)f=zq%!F%2>MnwRRU5KU5B?c3#rO+
z=wrYCZRD9lHrK;73$p;U@-j=>O!FE?4d7W%1srO||8~!9i$!K;&l*p$=!C?^1(Qo9
zFDACY!t(faZXRlzwwwKB#ya9Z9zJWRy0;ucx(NTESy+;e)V|>$^q97|da+0H%L`gF
zEz!F|&Vu3EH9dA<xV1V^ZAN6hurI&~#JC;Sw32Um0uCPJ`ZFofgtG=*zAZ>-zk}wc
zD6*}R=fKi$+&}&j%Tv0<Di?iwDXrp7TjYhu?pcH1>Yw9DvbhfBdEldi!-xC@PT=35
zMv8>AcvLy;`f^da`n$AWkj8N;=kDGWH&M)>Ys5178*@C4409-Ls?^CBctz;>qi!r>
zz;=Zd2g>fuKV`h%t0Ia136~sn(BPL#A2>fttjBHSYq@Jij};d0IAYyj9S)cN`g~?x
zG;|x#<`v4mxO>_uk4D@n4>V;<rX~(ZKgZj5D>GWV+S#Raj=|yMKP8=fvA?u_?iJ=}
zqI23YIJ-KI-mnWpdvuh4W2OLq5`0DLxkGYOP8d&}E#`kjZz?vk*gSkWo3F-Un11EA
zHqj?dk1pi@ZLYC*mbAQC9*E4qsDVM$;+9Vrxd4GwEvzmzb@%M0K4=EyUvByJ(NN`w
zI0u78?#Y*-<&=v>EX^ChuJXE6lGP-dsOat;Rr6$Y^ZEuN&73rSG^1GuhCM|Zwo@0x
z!#Fnt-yePf@MJZ<AMEJtt|LDAQt`{IsZU155ec@0=9a?0nFJafpi9cG3rIiOySYBF
zn$<l-?ya4ivJ-wkEy3@#H7v}wy8wG#zIgTiY?W85>U}oO>l28)6p6v>!puTr{8ehi
zmV+-$atIk|0@mJMs>Gk)w^n(BygFn)4^7<b{*aja^-%UdHi3j4m8=pa#Zv9rxw{tK
zu7)RSq^s;%C43y!K6vYJkt7|p>EUoy#wE_}<-2d@9rMW~zi|ewzi;{xBY9dKz>Z<a
zzIBqTZDzaGi+O9YdM_?F)2UcVk%oDZ&<B?ijo^OO>+TVGT-ie$!$HK!FI)NX7$gXT
zPPs2>i@D$nBgwc^4F6M1>?`J{HPq|wVt0&)$Xsm#g4y98q`_WR&Nyef?R7k+(yB5|
zUl)%<DLvQJ9)DI(TP?QS_&^TX`|PJ3D5*L(ek;0E0X(Sr&`4}!S-dacWNNi>J?bdu
z7Q84$Yh?;1Ic~Rebd6fT+0dx}Yh5wnED=7!zF?pR41DOaz$SLO-}z&V-|2pj5V_s4
zP!kvGi#B+Y!Kj2GqV}{IEWg9P|B-a@JSbs=`iK{I*S+x(+Z1VJExdICWg_LnUz>*+
zVA&scHWKR9pJK$1=;C5A6*;$-bV6%ssNHR>>R()Us~h-Vh*T1pZfm_qxGd5~5xdOb
zg4f+xt6Hws=BeH?9P^Y|N8uyJtkJqR_|7WsFUNwUhxdfObnT7tTrw-}^?f>uR8X{1
zOhnK$?nm^@?0W5*mNeTqsHLbIr8e$IqLsy)$ASdmqx2zc)$}@RsN1OOPNe*D{rbHQ
zOQu8pmvd#voSQmHo9R8IkG*ULbLvy3D0ECgy1_hJ`Ulns7(H#;+;CZCntOl4R8QW1
zrTL8g<t}BUZKbZ1aI4-@MQ67<z+xCz4VqE9PvNm%{26I>OqKSxvMeWfjPG38jQIOg
zU}tLAI;g6eDx=mG9M9%c?&J$nb1B?BrNOUF_C~{1Pj0xeE0CKw5uJRT+D((py=7Md
z(*KR|-;`(yZd-6W)v9ydrDV^;CL_`1_CRIiZuk4j_3s0fTp4=(5eolUHXcY6G$7Nj
zCb)g>EbU`rf<-SZzwnJPGX*CIOX``?-64G&I+%6+o0RpW)=#ppwxt7OAKV@;ZfmuX
z%m&;>v(ErL`cS~-NtR*4UQA_N`bK)bc-?Bv-aUL^3|?Y0noQOpxYhk#t;MvO{{I&7
zT6>O(z<00vy@MOq#fw|e`jzcB_qFLIN9=ZFVoQ&^?HtsqQcF#V>bASv{6d+f$pLWE
zwX%MGGF|mbGt~Yexr|CVF>`BG?k$~gqUl|&?{aL{a%zXb;e?+bGJ2<LhZ#Zi>$LdS
z;=el(c=5b!463@f|6yPm7nUxTjEP-Y+@xtE`DLvo>Fb0CB3biC*I`0pP>!;|=M63}
za=I8HM|}I;J-QvdqYVd!U-CFdp_Shq5QA{^P*L^n13j;muBV}p3i8{sO2OQA>5i8T
z7~bo9oFtf4?F%?E{mPlcgKBBbUT|3FUsWR}dZnkgR{#$t9?jcjc^C3_z|!_abGL@3
zU^-1*$8MjS@-Cllh_~)2J&(9RE<MF?Xa30)p^h&UPoun4&BcTbRf?tGBMidPfuU`o
zchKzeSY553vPwRKceP%q$9}vo!Y#1g-4+@(P;T}C{EVuFe6PsbkWKSqGkmm~IUm`Z
zMzJ4{KZM`8N+Y|j^<9MZbEo(Kv<_|G7>ECP0bFBCCN&DqK%Cq%RCEeoD}Y5GGUTwe
zA#;5|UEG+s1ve1Ay^E~Zh3bE7W3L^wyI~Y_Twg+_W7fIYr@af%frH=MI8lQ8<nN4;
zNC`k615!`#ka*{xMVQs?YP~;92cX>sAdxr+1i>GQs2Hx?su9HuAI%hS(+-5$bkAoy
zl>*+ZGcn?L?<OP<TVs};iEsm8H@vXUc`5P<r~tz%_Nl<urOU5Ke`jm@?)=6zKy~%n
zCkqXQS@XXxhlY-OdH;LTlEVR;b2K{GJ~AaYB?hijW-TCa$so(JAx?aX?ZyZ8Ul;00
z@e$M`(ZY+UJK%~v8OLmD#B74wls|Ytz-AD`6a|UQ-;^sly7K$)(DDwUj?A@8CRXE6
zN*3{@e-*;DC9z)K=b_E39f2UFne=fQ(fkhSKQ2kL9ScwcI%7+vNCdXP;pp0d5)LTo
zhfqljfi%nA8GZDZPO4nvs(vJ)<j46n2``n<L7JnVXgP;a-x=d8J8GcnMBl0?p1Y3i
zoMPFOYS94XrW(*5<qVew-U6YDTqkpyZ#pZbwN^%59#a%7g~%ZHO6d6EG@ggUetR5U
zEU?6D-Q&|$^3OHSk{?Y`G<AppxPGpD`D_yT<?25wS?EkMpfkbPGW&{*01CB9UG-+F
z6jyT-JSRukov|RjXU2^~di4n?1VjQasxFkbAbIHDcw>-q;7q>hXpnn)$3#f|lfiRv
zm$VSA7o#sU$jBGYKAU2JmvHa-q5YAwJpwX2GJrd1vjS(4{W~RoT8plY?n?xi=$mC=
z)u9n1+Xo?pV#$wbFZH+?8vVN~ABUE^${g}=WnTiyP({V#dL*|(X5XO8dk|vDgm{80
zQ3grqKvm69W$C39te|phS!XCz#)?Wj4eQ7z7E`#pY}#|Y&JiYuJ%#Kas3@_0<EKV(
zB}%Ell);L1gQ}Od7gJ~`FP!oT@r!CPQFzqo6F@rV2O9}Eh`45(9kT{%6`!}oHyYwe
z(2OU&NV(-W2xZ0_<&l@b&#%a^vUcwP62VfK8A(fscQ*8v=01D_;IGHh|59bmP0^Vq
zh@69(m9ZS#<;mL1HrIYYuDeh-g_M7;`aPNual1g~Gy<Gil@h<r(HGHKbJD$DhR>oF
z=Q|aix@&5=4N70^=z{3kjJD$Pa3PhlWuH{p4zX!g*_@LjE?jE5x84HF4T6|i>8{Kl
zs64_L1^2YtE;YUMtT#Sw*I!Lp7ns%4N<>b92xY$@m~~cOgbp4Q0$g0N^pT)1X+bxV
z$CXH;Z=inxK)R>}@}du8<Ehrq{6ObOu$Cl@=@@}qAzJw=Ge7Q{F04J!W!9HOYBeRA
zg)#HN$}t+8EX&Vo;#5mI9A%cLN@#hajBhgf&Cg4>wm^7gLg7_v_<Gr!<kdghq7MDr
z^ct-T;i}%?r=DQL9dRMfg5Il96}du1cezhVKUW?6aDwAH&w9>xjWMN~lnT)z{c!gV
zt5A=}6V;0a4{oZIJ6#^T=HtMRr-Vu1Iy~?sD!y6xVpC55&j8k}bXy`*7E`X=5xfCD
zRJaV@(_ekjp<?JfK|A{H){~_DUL_3vy5)^w9DP%}+gh;BfsfD-PExZR)`#NlN4REd
zIsc97w~Z%a)i*Op0~3)a<|_7jAmT!kj^jn9C?aERd1CF)w@~@3&HRn7Zcwj6vzTur
z{{<;7YVmum$sa7q>95A2QAyL2=W$At_f`OORmITqyMu_=n_L`{C3);=W2L??%Kx-H
z933Pn_Op;<@g8>YU6y~Cl{UNEu9mZ?qSW?9JbAV+6b=!CvX8x~$|sRL#N8Rb^OR6Q
zfl+;7b4Fl9CmLiQ3+);hS>3$lP{UT<0a7%=K;!zegKH|V_BZP4IC5Ji&#(4O!5H7s
zD*5E!gHr??tm3JpS3gKUY@2W5VaziF^}0qZ;}j%O(RJLc3m`OP0B=HPN2ge>qmkzT
zCl3HF5phOLr8|M4P5!;v|9<=sZglf)&z4qO=t=y9b2WrMLv!|FXnE2<Bkr3YtRCyn
z85gkdi<IE5;mM<$m8@LJJlU6Otg^{L_=)pz9lJeK#9&KH?W>;s@*)3gh>sW)ebI+i
zQC!A^eG_5c;-ETh_9u?Jn<rGX6WeV_bKDQ1PYO)~1cX1H+jaK%d4Wv4O12Sn$QolF
zox7_w5<yM{{$tto1o}qDDT*+v31w5<#VM8#G++}4*<BGckCX+9++I1^+<B^|jr)%y
zh)dR9aIY9Y^cTDSX?sTv)OAOEpoTC_`baKtgMWC*==W`kfo0n|u~K%9gWKf!I?c8X
zv&z84<i8Z|4+Y*e#B?@a29R?73gEk2n%Bj~08+H|myTa~J&fdehKUY0s)49Zj?0uI
zi430UlH<4h9m={LC1vcFM^6X<N3&AcrxasktOlt2<3`c%DT)NEzfThv)|OJZ1xag&
zP4w93``p(Q@zl}(^jk<5-I!=;wm~v@?sMqg(&-f7B-YA0Y&MVRX)H3H*<=dcY$cqn
z*41fPk&5l=ckzPj)KOOC6Z#An1uni|Lz{#RM$LWWPDJ#y_U8F7k4O3YYZI96K{MdQ
zZ@FmQm1CB5M*PPJ7TO>u0{j+D_798Jg@zx#3rAQJ&26qHd4~uCvlY2oVXpYUDWINR
z+TCJx*(W#cd-PdH4ccVZfwjHLV*BoZ%Sfc3SdH$o36_gGQl|Ah0W(ota_nhXedoQF
zlxp~njcpg45?4#UvY7IL(57z$#R7UE_YjaG+WI7!gr5EK?n0Uzn%gB=?);xMFPIq2
zDPe|3*I(zN>ls<nK38msdXD)lr@(a{>E8E<Ag&RkdnG}ex3&}c{#75)w399$tTM+O
zehOb8ruDEHtZB0zI7=9-kx*B(QlBZO>KPtwsN5MW8O;4VOfbC(a#-zh<5MWJey~QI
zQn`a@i;19$N1%()$_#uiw>LXTFyE73dIX>w9}5#vCCHfatKP3%y|Z@ZiaP&fYWkjB
zMn9l!qaUyBPlnLH5>=cu6vf#^2)Ww~maS8H8tNPeR)%Jn6wdP08*xx^T882!L1dq7
z9#9d~97IP%4NRO^pFe%c$vC`l5XqBd&-C@!f_w>iuY9RG?n4`c&tp^K?ZZN@xnwPw
zkFVI?{_y>+_=mhqw0xKjPpQUro>h`Gz3(U-Uv%z{`|f-*tJ~}Tt}EhlTNl0iR&^5L
z)?i!U(ak`$=#+bsPOoIrHKU7f(w|R>hAFfA2Cz_3O#uVUBl_Rr)%2n(9Jf&=?kaju
z^?W=;sX&H(R(Z%#lSXf?vm)72C7<y;Lc*P&m-mvA7;dGKDw`f0zWx2sW75S+d|~aU
z`y%>4U`nYBe3o0cab6`;NM)&37W51J-w&`gf1r6*37*j;8R75$_#Ac(L~&8f5?-L*
zZQRLpt8GE17YTgv3To1A&(Y=P&e_x&Atl{4W?fjF=YZ?*=~ffdnG0Cj;WR;VYeA2G
zTR~GO9kQoGQFCkfjqxh@Zv>A;_4grA0<2juo@8p+J@DotauqgB8ck_{aieO81rKYB
zi<go8GWiM)r?EgzBe|QDVSU;gFOVJ~dRnh-;hpi)$!&<y9UaOFeCB2%VS8zIiWgys
zyA7||#fsylDcA=S=-D4;VQXXlsWgmb=~c+G)X_Mg0yUlo{AR!c_T<vj%f>5}wW{Cd
zfZ8D+ymIfZp$3o(@X%D+-FAqf-W#y$QZDfy64ItNE1$vy=(o(^IIHoh>uzbFr06_k
zlp)DDYq&IfQ$XgC={UI30HW-&>ucf(^j?+TeeI)F=IPz;@ASUr6Xh4q+a7V$DNaOY
zZS2y1dgQH`L4z?*24~RU2UiDy{@zPP^+Nm<4(8zxfB*{`c@3)Skj9e*cN_aMo$K(x
z;j2ZDn-#)xbgy$8E~E&H{<Tl6oWdJ(>xeLVw;{O+>*lzAE`UE$xs-GxT9YCY*&7h)
zto`oVDcu9@J=p_c<R?J#JZ%8A1OL+c?$<?}dtIJ*?Go64NBZ@WJymd{#-+<u(@VzO
z<JX0!ka8JUgPgAyV30hs^MFx&L^r!tcpcwta8t7`T)!uZ(^HMynWEsa($|-FRbh;*
zjxA0C<bm92r~7-+#dvS~*(MkGBhXv4fsZfNXjt9_G2XiKO$SgeD4Qc#_lP|W^EV>j
z-)I}v$CE@W{=*;LUT0l+%l&dgbb&N_dBjZJzZH}b-Z$>o-)*^VK~Pu@h=DMpmkl|S
zaPAiI6pV+lQKbD=?9Y6T#T^HDL#_5~#n=e%PA+>2LB_wzh(8oIeFKMNAO~fk9CSbr
z51}Ja5nhJW%9D$!C4UTP=N^Mf9&7s%?XNm?r+fV3FCi}*v!}@8-dI)29ncpF0{DrK
z`_Qx3r{6!kdH88lLQ|=<>4AsLXO0)OFPpbTz=75%DD#fEwC6suQ7Ip;d-CAgaXbqZ
zdYuLa2G+dzz#B)mXlc5(BH!Eh0va^1`^s2aR<a%Cot=D&DIw`z2i>^!<aT#$CsHB9
z+QWn&Zj|*N7YP14>F-!~rck{{<V;_A&@s=<DT6lR?g`^BvqY2P38E8GqbqepabPQu
zmz17}K#eQ+xYhH87k)hSh7QCRkrQdXhM%H4bGNm=`$-gdevWGC+!0P3Oan~~=<Jc{
zZy!I*{GL9}cO4{(l<9zn(1q7g4}?^_s`7+B4GoVGAA?NKWTyRPzI_*HxJs<2_s2k!
zOLe(M%#ZNv<{qe>KYrw&h!ClRUiy2&{93-JN}K9=_=;FB5H%nb;uX6}E?3d+(TqNa
zW$~4drRl%gG<$<{sJrNbs6A!o`s{pm0rVr|0)em6wpmtUo?_gx(7{$Jh}!RAdOD$d
zn_Bf6#M^p>_X^zbZ^taMK#d^8Q${B;&TURn6tTQp(Bf4LuNEUHSmyyTH3oKX`KvQ0
zs~fLS;J|e^pAMu#>7$YMsw*e~*4dOR41*vL$Ys|-hhsuC_d;TACC_1Cp)OgG>+{93
zt7VUa{x!Pe11S$L|Lzt7blJYd&N$z19Z>iim7%qdQ?BrY^%EHXI}q6Yf7QMDKh)nF
zH_q7iu@r^uyCMo1TTGS;p`vVKi7bVb8C#5XY-4FtvSdwJMz&%mLnvj<Hp1AF-PqT`
zeY`*S<Np2)-`5ZGgU18sHRqh`x}M8*j`mk!(Q)18S3l>y2x#IqUe&W%Y0l%$qOjPl
zlL8BwH?|W&nfB>aw7v}e(I6WQ{7>k)q3+(NuvT-hmggaF>d8yOVzwST%Rz*g_f<ba
z%!{ANW4=b5Y5%}o!^S=MyKnU*C5`_U_8+88`>)Vvsfub>|IWfM>;52s*h*oCjVXA_
zAbXkF_Ht0MiN^t*l**%O-2QMxSmaHI_Bp*!;f7m*Xp#<Fp}PF<&Z>12OHUN$aT>DK
zFw&c}VgTwrdi%e^DHfOXcYF$;2rmhX+ln>hIYh>JCy@snxj~g`kX5bfc66=6>jUzK
z^hM-Xy8G8r{d<Br-v`DsZ}_3hJu&AlQg#CO(V-)-ysKG~SZrQHRU-`k$q3Y0GnJ=F
z7KOsIa~DF4ehm^-?V@q%_e$%ybOaqHg>P(6qWCHFfmzS~VP&FxApbHy^*jP<z?`sH
zTKa+Y5%cHoFp0d=m~Z{pGjv;E@ETWRx(!cezr7fJNNOl8hM@ZQj*IBVzzFUW0NapP
zl6cQ{(%J)-@;kKCZ;_X8(SN;K;g3be{BWDjD~B+Exnm|?cv(#y1%m^?J#{7_?^Uo2
z4af+$cYECF%mdvu?zG$p0q|V1<pOoo!3`EH^JBf_7t-pQW1c~GnOxgTOF3zq8WF>d
zi&%|4G1K+2Wnpn8@lV1)UXjc9wJu;Ei+*VLFH`KONsUuMsVK0Ko+99;ID)n?uui3J
z(}Yc=UHZuDPzh<0r9{YUz5*p1*f$P2q)p8TsZf1G75yD8!UNs4&XmRft*)m9NC<AG
zwSGbJ!O8434L8TtPG6@0wS!6#zyRgXGH<hX(-2->cp}WyM@qfl^MuSG-j>inSF&05
z11F4|l^>0B`1Vt?vGD5SI?KD!Yp;nZ$QZnygwx9HH!z2V+rG}*Sax=5IMg9~=TECS
zuVW<-#D&27;aBIo;z~jN(znLacLq^`oK1tpnO1A^vLTUQxBLkTlEWae1(t!JMiU&3
zWW151yb1+jGfifV9}>3SY5a1u<+C3fUIAQKrEO0elvi_`59SaP9Zf#fwJs2P_LxqP
zECaD(oC)O9S~@X16RW{syn!pP;#xzRV&+DjVrh{37=ztnrdY<tqlnS#6(18?6VkL9
zocE}&D#b=<@Mn9TWOEN6)s-mNy1g~5@G4f*F)<Z*a^#Mdz^lr4mAUwi-vqc51o<ql
z@wso|r@xfT=#13sefT^7dV%G(Ei&kqt=&VXp0-F(mb@Ru(^=~I^bJI`{_;=Xgj7F~
ze2AMyfRNQ3Yd>Kxn%L5G)IAcIhQw%@yY8c8^3&#Ab8ZS&Rf=jYgAqQD?bdfX$d(HT
zeS3=SW5{<O<@ijQ8`y{Fl@Ie})psHjh3%HBwzk@)rGsQ7^NpRTx8ih3VrzCS*jL;;
z$0aKj-N+&G?!P?)2dgsd7Tfn)E;c;CCm-II7l=%`Q4-<l2Ppu10mg*oO@%KL0{j~5
z^)*f?qVK?+8}$s?%4S1VrO<P$M+%lP4uB-U02wPRUjL~1vnnPEB|?dM{+mX-W6w5Q
z)b}M=0qHof-Oelf%9sjgzo#ybA^2d}ccdq(EA~WxC^caZf28G4Hg#Gued~KjCX1zA
z1|lQzCB!2bGeseFH}r<>T?IuENMC;)@4SEauG)hs=))aR>>qR$<&2OdJE%sDn+D4M
z48p{uw$?d+;0w#PdEa;N8t6a%h>XBw9QImA*aI3v@KknQ@&2RJ^_EX59t2$ITe{z#
zTojt(PqB<&IcJ03{;B118_QQdJQJTe^ZP>Ben6c05;?Z8%-m{mkB*d#R=+9j9~`AX
zU^7(QMCRyb3wL&RmwPNftwhGTUY|Hrg7~a<zE=zm|3SXe^_B^L4W5;{%?T}*tFBrY
zZu%IDd^C$1=$rT2W#|w-poewB3Ow~~^u`wJ@v4$eV!vq!D$>fKhh;!IdXF@g79I;#
zKB@HXwupxC&jk*LHZ`^}3EvXHTDnf?%Hh;aXF|ij_MTboY`fr+-KU$oFg1O_AuI2g
z#=+cdTl)>|{TCLaQODU`e7f34MVTq#U-!taa^oRC+mR7wS(G)*UE3Vj`?;T8`(n%!
zzcUG~-2Bpzedh`2_${KXXj%2>H?+pKHU+bkQbDUpQ_nM*{d;8BBk0e$RctEz{pAo5
zv{I1!O%kg<_1w#vF%ZVr?5cNTaxk$IQ`y+HP82rt{<2OxO#g!-Ytgd-3FD;LI76TZ
zi*ZioWcI$`nb`(xRDP?!Od8wR_wCtM{>qrc*9w4{v%9lw2tqV^&}rJSXXFlD{kF<{
z{+B71MF2)D8Cpkdy&~0$s|qS#(wz?P&mca87V>_EP98~dA}yGcRh4n*C%d<l=UMa*
zpDI$h*^cPmvhktsQdJMU{)@xT&H8TT^#Z2j!+bEMaS}rw!9}v=O}&E&LT>KOg7#(1
zn2F=QBi;a>rDPh`4X^2%_49!QtaR>JR|NFukEY$(rr}4q-;B*P>uSG6J~25-LkaI=
zzPI$x{7+&u?1pRzm;PB@{K|7=IHta!h|Dtk5dWuA9WCfF|E3_gN7x&K-7qt**0BCn
z&XI4)_&_a(H>dem0hxBVrRf}*rPIu*s4?4OabSDZGVr)KASS;bWjZjLv~VVyjFpT9
z$6~zW0oT|iJG_tQbr2;5<9Q`%^K$)<1A~QAgVWKiYj}y~1j0bQ=tV`%W|){2;KwS0
za=1_#$e$;*=7dEjyC5rq6xN}9$jN^_BcUf`?9w;Ye?Pm0#jb#X<ygH`dDpPX(tZ~#
z$!7y9^7HFhlG$Dzm(k@Aww6vq3e+UEmEoDJ(V!lw7gq3Skyan2@axaR)pOiKLdU_+
zsfP*At2WVb!J%&)6eIy2zpUsx*E-+lgs<KKiaiW2)Nb>cZp9NYI?VV^m>4V~wc%(%
zpn4~0+>v4Qp#mg)sGPw2_e#`?1LNRWYgJhu!AeEqL$N~JZ(|s>G8dc#qNhD(aCWme
z`<m_iL4NTo6jL9Dg*bfyX;-zX&6AqyrAaCH%T!M1Fp+6_DS*G(r>A1caslKITG;>*
zhdiTdta!t+0EO9vxU)J&v%Jyy9s@lIS~IRu*!@&+d$OXG%+xaz9M|RM8r*r_XwJBV
z{D!%FOkV=jp(QV*Hc;lrNr7N!?1EfCz&yuadV2C=X38U<70wj;6NX_tQ%)52wv_aB
zJg>ayM`VuAU7We<%0}t+rI2UH_`c#|e;JqQX@S_lqMO)*tj2d?d9?=)6rR+xKtG}v
zZ5G)yBPp^~4@<uQI@M+U^r=h>TDrfFve&UtBf};8Sc|a$#CDt}Ps4|Zo)JH8w+Cb?
zTcyR^`sgd+h@jpAoxkC?!4Z{uz0}&^@A(5&%M#%Pf~w^*BHnSh4W`fYY3KQOvA6V^
zyGA^Ip;xiz2WSo$nhxP3@Ctt&nU}4oL1SUL0Lr!J4J;ZzU<DCRSr>}Bb#1xco7jWM
zn_qE(HE%YvA}~O)w+;!!6#9*7N0yQC%nEybjo5;-fJnjQAw*WqjI-CmQ2hC~0mO?R
z9g`vz5v*!@Y~E;cZD65({L!=&`XT9k=bmalSi=>oSS+fvO5<rcpa$VlJtAw^6#C67
zM-Dr{pq+v$o*mpXUE_Rj!QI^%v)a)A@bQqm>8MZC4nOfLS?&byjx-0y_uu<pWC&L9
znqxUPuhpC`V6zj&9XRP54~i<ODvMh4&%^dw7e5Tw>kFNz=VICd<$`df;gVWe#N7IK
zi_b?5qSv?0PMzwFJRvUT08AUh?cvET0sI~I4=%hk3}h-W>NZF;Q9e!Jm2@&c`3HSr
zk^SYLP2Rn`3&E-k#KdTZ7|P+x4p<ZhY+GvK8R_R8p7WZV3FUb;Gbe^~+26hRT5}3m
zUb<n1uC_oQ8oDiig?@hg$H0omG521t&(jU$2V4R9l!WuU7a!frwmYHb6ZDjb=YKN(
zl??_P<^Nn=-^zPif9n01y=yDWb-8Kaaj|PDbkScJ#71>rcA{p>2FzcsSn3BJ4%|0_
z>L)S$v(|H=%WY}kuNB!nq|aJ2OT8H{1~_O7?vI1>{!;_z$9dBaUjOSfx0LtLk@!~l
zd<{@?6*zX|Uw%h)^!v($r9Zw>PfW6Nq=uJ;!XUmFWb18G>XYbRf`p{l?g)Q)HTl%=
z>}f!;fjy2rT^|s$VnRve%a{{RIo(pmmG<j^wdo_?s-~BA@%*6+fDL8sTR@gy+F~7l
z6mPG-VBEXJeHYKBlCP(e1!xSGdhadA^MzKmMJ*;A^{yz1E3D0z={6p$^FR_l`%@_%
z^FhhTM2(CA0<1o{f%E>o9kB#HTs`w@#+(=7XFc3C|AOADMNN-o6a<v3q@Z^%=GGmT
z?ZsGZfV?08)T?1c3e*fJGVXz9w5Rc^mI}UFy`8<8gTpR&2Nh%k4*8=L&{g;^{%T2V
zkGpbQUqvSe%NV!d(U@$&da@j=<k(^bnYRt<jgLcu)7tO8K2|bfBuk2xn4q1johdg1
z(;AyVVJ$DsPx+U5qlmMmssF+gd-IAZV~T(usN}T#s78qw2#jCT8ac`fd<G~419%NE
z9i`cO`ST+dCo(HIT9x@it+(mjt~i}p!5~|1vpCU)E`<R-%t$7Hf{E@5a?iJe+a2a2
z;r4I-JmKreyoULfjea~Tc!rc@5vi_R8jPgyr8b)`=s-{n$e5cW^>N>l_+YQB7M~U&
zuiMLVg`Vktai;AJ{Ox`inR^o9OW{EufwA{#B>k6n<?ky|!t30eSV`q_0q63H7j965
zO!Qm^JNaOH;M7WOu3avt0W+Tkqgmb10%YQkt*dQ*Q1M}LftIpBy?SIDhW;>imBSP)
z^(18c%{vt?j3j+Z;#r%<@$thTi|2l(nF-3^+uMA>vRXfv@gx&z+>ud%BNBU$XeVxt
zbC55we9o|=I=yV)%R>UK5=+qv7aF8{MzG5N)1(y6$+&%}bMXBI!OPRke@2j*D)a!m
zf@94c>>gfI2T6$plCLsuZw3(?Tx4v&uS0kMLdpC0g=I=k6N;3?=a|MaNlr(nJHuzm
z=Pf-NWdqFU7H9%C%ZhWD;=JXr+4`(;au@S)bW7P5c8$GwH=7|KvSK!L4n6$id+&x}
z;7?Pz>DO9^w{rrrwKyi8ZB&N=u0-ertbzG05WK{=^aEK5m6SEkViO?oRQPK7r8~G6
zwcl*gZ12oN_#cb46oJ%vX2Xg~<^VBV9x_R??F`LH6^)Ivd+xf6_Ex-DicY#+2wl!c
zEh;rGC73C+sQ-JLljWrj|7}UWr?gJy6u#nXTxki8-_q_R2gTmJS+Ncy#sLk6m)Y(L
zrs3zOJ4Z9Z5+{_a*Dt0&;mFoVk)Dv(KYSKsD(L|TFjs>q{HVZ*(0GKZNms@LgQ#wU
z&bm8)0y2<^H%<J%SJ3aLB3s?4F?6>ZvMlAcIJubs-IbTJKp#(*i)HiPqoLu4n!qpL
zeC?vezU2X=)S1l5vYYXrKTa5rhtg-T3XNypq|2iJN;mB@dzsB7iT9d|&De@l`yT*~
z{G0n3nXRFpF<-Z`1HXTkeIdT=y+cp3`@K$haG-LAu@iCB|7w0eH_>A`NJ<PGuG|c}
zGJo~;5Aw^O!f#CN2a9}UZLT#bDZ{e;IIM9XD^gAm86NLNgGeb{a$4E5$lESeJ-u6|
z6lkg^NK?N{Nq&;(E@!n90!x`z+5y5A8rk33bSD4|BY5RE#b*xEP(XE>rPpq2GZ0XT
z#7HjgbkhCsXGJGNc=7%!%ZAJ|y~>9lu7}c%YWS$L_S6%xwB)1!@&mKP)oaf6JPGA4
zz7*Rh)QY|r9;5ZVug0)L!OVD(^w~_wi!(j~q0pc?kA_iL;rwYOPefl`GLHrW$fiaP
z;|@3<?pVsFlBK5<0h1qnorBO-D})S~Siz#+VYNY6u6%rE7@fm%X6n-|nv>_GImhw=
zWCgDq@$XBp{!=v|$G8yu<rin3{}P%0G!(W8mh4HeWbJ{)Nu`b&G&#Z~8UiH^!sMJj
zhJO^ZyYkhS_t?1zGnUWsIQ)p4Me23JrY{A}J@DGXrbG$p`shCD*=O#F#jV`l*n2mi
z#cW4w*YUo8&<|JukvFaYHZsX#SDoj(PwV0mabk9R+*pfYLRzs74Ly2ue%ki4Yo{kg
zmK(97GgLH)kGY}4TA9Z@QH@{<ayJoX8Hridjhgk0+1k0~c}CzkI1kKV#o!|0E3_M;
zCWOz?#AGo$;D9OrFP@~_?CC4Z!!%XTb`^PBe2ypAa78ds(&D}FT1FFx)q0IEDT4us
zwccYnrV_kaQBGev2=Qi@-Y~p{@2PE;%0%%P1b3Ntcs)$*=*~#C9E%^_*WXP_Cu9*b
z!ma57DJ5^XcbHl`1k|mtVU0g~YCU6vGCk+1YON;ZQ-T~RwUIU>dt4M~am4Y`RX=2Y
zv}LF-a5(aYP@lgrbT`o!vwI@-;PjOBv9%QD`LV;s27%~!7qudtyV9EEI9$1<Y-j15
z2xHz7goKGc_mTN|^qTJWp}h}awn4@odN7#<pC(kyP7^OTPe2pDg@+msTEt8)u#AVH
zMR<*V-5FRz&SaJHB~I0E$@mC197d}{VsF@T_57?E-q}>6@+3TsUX{tUsI3GQ9N}Aj
z1RztcjeFwk@mBe62k6GeJga@Z4we~8M|U>?P#ggwEA~I9DW2-MN{{uT8(25zknqP}
zW9reeNR?~m_1|D{NtV+&*OtJ^vG$ljipdaXvj$DmWg?n=%>TnwWUc9`&rFpt(XYab
zO3!!Mc4yDL%-Zsm&yFxnI9~GKkq{2DoIrL_(UKSW)F)x!o>9gHsh|lM@11o|ZzgA<
zrj=ALkC4>td*-xA2oik|P0WO^YTGQ+I#jH2*E)HCaP-5dym_%7Q7#^3?IgsW_-HQG
zy@a8jt!XU00X<4L;q!NLKimjNKNb|dVvF9HWETTcQpC~~p{0(M!Dpe&HC!jRXt&^8
zb@5D7PLKxrrzWoIF4JQbt^>UfwL$c9dF-SzjoP`KhB06fYF^o{of@f=^iW{llF9XD
z@>LQWF&cM>7`+x)&QzWEk@Mmcp{Su`^sB%k<I)NyT%-DL)frk!+6TD+svrrd#QHRc
z$$Gjc4>QOC+I=Zet!0^VH|TX(CT@{0-CCn>RRke1Zx`(v$2e;l2n1UJlr!qbt6%)G
zGqFcsgl5{JSDRD0hp<s7aT2{^7L6LMz+o`+GYm=&-9~&!6sSE?8d71_O7zo3htVX{
znCqi>7ESa~thqApsFKkZ=I5*2E=J}J_KejHe1XB@Gu<IlGx5O!)q5#@!rb=Bg<T@C
zNy6W{$Km<{2`w(l!>Nz7V(cD@`{CK#U@nXL2zOkj7@nRKInh=6XHJI30KsJtqDy)v
zU=Bq^OpFGb^PnOyMa(^#CHORrlJ9GDe`6SbOso3>=%%!T>0pzFsx8ug$rL5W5jzFy
zc}Psd8aE^sKMHF;@q#XLGjr*qq7YoX(Xp63Bjv-(IHQWS$i&kqxTC_ny>cP)y0ff$
zoLwN#y<y=>6R0$LiPOm%XTlz4I&^~6c*(}qtA$7qG@sZ}ZOybnrHGx8@JUwjTuJ38
zOiluHJ~THcoTH5XlqAjH%A3n5ZphmNZx0Y_hWSu6YgMD{V;o~E1ix{m<u3`C7vpJ4
z5U3wtA#BMAt*Z!z6#+K|xmVWCHEFVUP0rP($vRZG4c<#D0T_(;M48zt&6S!_)<>(u
z$NC~!?AEPCE!v$jWNX4_;ilc^4=&|RT0CDuVX(2d2BggihVfcre{ua};YqNcUyrPC
zj>B<W&z1*~gujy~wXZAP0e&g(dgqxIVi8_7zp#X908t`?V<gdxFybLLOcR$30)kJI
z^q8{?K}=63)m;P@cXsv>hU)=d%oE=LlA5b|`jW4{GjiozjEH#r&P-;|R1c>PcQMz;
z@b584pT1YDXf&RQUD4nwJ3RgCngJR8dT?ZyGlK96N=Y0#CjjHk65d*hI8(#JU9A35
zdBB_cm1+X<QXW7|*ltuEy+YbaUr<Wd6R=|s>4seyDV6;90NA!#VEOZJ>b^!rSK?LG
zCTihtJAMy*RQuu;aCe)#*^jr|c+Wg}XXb>u+T#Zw%;E1}{$t?9FpWj~DMoSKLs$`)
zpqW>FepLf$*kOYapBR3TUAI|!t)z!IYf7!rxWD6*fx2CE<9_)=OV+c%^>1qLNNkca
zV;H=T$`YMALC*=QTkauFL306TM`l4sorfV6IQ+)F2GH{pEE9Fod&;YtCaCo;Jip9T
zRBPr#y}NFKQ7z5s9GQ2(Tf;0;NZMqGF;D6Jc7JGt`TTb`XA~#GNtY5ebH`vjfqm)8
z;Y!&;jp<QI!Py47r%X<2R`q0KIPVSm05jZ0IT+n{y6?PeKvsnLN{EfnQxm^#HGCfE
z&jrrbXEH+Q7AxkS^HGhph%UQJujVPQ&q$D7YgKuso(XzY0?b5)qO%VzhfZs@R-Wdx
z+q7TyPG<=~H7Rg$mt35<cm*z*`S>6KT)vW~-@QE(_eoF5(QTeO#(m`#L}~S08hrHH
zYo!03;Y_j^i{=m5_#N~x=GRf<y~?Ay&4wS4U!PBXe(l0vRoUAeEE>YTgUq>6VCl0@
zvyTXX(6n?KDerSMH%P%7Bsd#f?hGH4MVokOA;ot`QlSLysr%!m&{p2WL~rR4J)lu)
zZlADclTf)1n!^IzIHInH^rS4Ql3}7Rt`?;K>kNRHh-CYp4=oM5iZbQqHYwsD(~66W
z59KC7&%Nx}6H8imXnQ|`WX6Ao8Kps=HX^9p|JAE$>6UVnW}KUyq}i)6t=ErznG%Q0
z)?Id7AtF0Erou{%xy4X3dThxJm(n0PF+Bew0UFcQDI1`}I-<`UMt`V(t}gO^-;$!v
zwBO9Y60&JGSk*>{o-LL()<A=Gk;d|Cbx0*-Ov1eq6sfhyc;jPwq{bdmf{f^6IM6A6
znN<VG$jxsP{55eljqhjMqXs}b`Gzf^x5UNM&Z;^6(lz>!8@BZg3YyV6t&}qo$O<Nv
zXAC()BFMirDJk&LOMW7l?}6@!fOGrhnvs^ab`{A(gNFjXV!mRg_OZ@Rn##iIb`=*r
zhs%iP(}n5ughU;<S~^b<g^6R~!RMD{qV{EHmL=rNNjqa5$L>4T3OsBV$BzybBlCMZ
z;tp6Q5HT^*)P?t28UFsvgFKHMbIYgs$x`Cg#&nNQ(Q1EDw7r*z)ce3Or1)q;AZ<tM
z;oEn4!QkwH2;l31!{I7!RY<*>J2}a08ei9C{_dy&>y3Ev`zMG>U|C~U3^ocdTy?tn
z&{f+K=*xFM%(H$gQpqx#ns!&7M<bz=$>`8@@25{Qt5(awg<Z5nQGTu?0_jrJssQ=~
zF^|X#Wh4&J&_%8Atz2jgQoL)<?3H!!qxG(S)vDNYR8f+y=E)`j#dv0?$yO0TrbLJ!
z-C|;esu=kc>6ACWfY4V>iNhJDf7ulfb)F%9n?C{10Af3Pyeph1UxtDHivD|N!wN~e
zrsG!q_Oc^CV+cyc%csH2ocF4sT7kFAcW&PZ4?g?3WXbGmLG7c#s8dlZ7ZFAj$sx1W
zY!-`D@=M!y{q?OzLY)4i1i^G8M_ajyP0*S|H%K$sO{!L9hzCa5P}{#Oi;Rm~T_Qo9
z6z<?oUEQNMBpUCvIQHO)u7eq+of)WO&EFa5i`fSzd!9E^>-YkhjJ1DrK2Cj;l5maq
zLB0%%$$c%@a0#5OQvAH=e_y*))GE+UEsFVid-pgPe=!*V3yg$>8XR#Zy5ESMtE;kI
zi=ykO=Qu1>+rMv{om0E1A2f^=pY)AM4i0N7HFE8B#wlO|RS6G#dRFXQ7l3b00#R3P
z0@|KlZ_c(B4SXy}E-2OfX#_gzA4CF@7-QUeUjkk_mwf<O7gPES2(Ypl)wG>$-ZI?#
z^Uv3#bs08RWvyAdFTrUuI$5>FcJ@Ji61k>p&DBZ8SBTW_9i&igY&n2F8?knIqvnta
zaIssQJjLpoP4<SljW9j9+G%ayGlH#QtfcxTkwA`J>Wn<~{1rHy=v@tz5%BL5*Ev%<
zX1xfsO&Ry+A2v5#A}ZjYxmIS0lTLwv$9<e@&R=x8D#s->i(4CI=R7I*!qZ1D6gcYj
zbi2F$S92Nx@U*`hV-hpM74pmupCVOxW)sU@><%2|#<OkrQttvtA>4xPiacAoVx;q`
zrHE`jkt`=J4iURwgaw266ae^BeiyXPIrpn`tkUWioVG4On1Zd_Dr0asOuFivS8zM5
zuyC=*mqu)LJC2EYOAM&CuUG1ZAMS)!D*>BaOJ`oA8;uC<<GPPG?<NlQH!Hf{?-H5Q
zG)w&nK%t>N4BWtI;$y4SQf7r7yge&H{`S6qU16gFti~)K!vC8HZ2@vWKFYE<7AB}e
zuf~MRMBa{RL1;gZ?VLPDT-ah0zsCY6)9h8FPVW6Ve7=}F{0l-DVIDDFk6?*vOxC;4
z7!0d(wyx=l6c`hW0#_;olp_<3{{Z4|xUj-YQIgtZ46h+VEUX3nK39l~oKwEGjf%q<
zZ${k$LaMm<DOE6KHH|A3ebzMA%D^3|7{Z#zvUJz<r+gxX=O=$RZ2uf3{msdiSRTc;
z6Xoy9mxjBu{=Rw9uj+roSI$??YQ{|3`o1`4?|FwMJLydqsTJ+S0&kMnmw=3fo5`Th
zCP^(ZA_6#{w8nl9WXJ~C(@sbX@1~X&l`l0Y{FW3K|721F`r+VkNf*OlrXS2_y$(25
z{Nf$GVtYEi)!zmNx7(V4XsHkHF-Z7w4FBHLL5C+K)pZ@O=p32wjIB9bIrif}hGbsP
z4ilKZe_Htj>a|aLUk(ScUymzti*C<$Fo-G66%B^+0N8PVbf+MrG2Xws=Td@oCBlB*
z!`Nw6rhyCP#FY>=t-wnT2GWXR`VlaHF!GETO^oK7S>pQQmcyb!R4-XBl_GIf?AUJ9
z*gyL);my;IHD1oQ^cqDKp)}8e?yK>~%-MOQKzh<XdYjHj<5gTX2SQh>JGr#s(bOO<
zw@J$JASOR8ckmS70_;t0@w#|oui?f*Qn^n^<2i8alQz1Dz<V5wi&k?P_jFaq!DW-m
zAtfjlS&Ky-A$<ukytR>@PjwMm#EGgd*h*c=4m&r$W8=%!J43m8Y4LTt^TgdCTP5^>
z>r4N8Xosh4)RVI3tNoS4$xI>8^awLPllm6|y5cnoacjC{)ZYe-#&Mc6?GC*rez8<}
zPl3JkNEX*ybI`P+@#VbxtpXEpX!#e#0pMQ*9Tv9b9NZp#!;chv6?_?ivg@>^1GpG0
zW9x2f|8tqfr+S!jgzR1%R<|tt6G8gji-F73x%Mnh9H1Y@7l?zG{x2Z^7n*(mQ*QfW
zCO=hfv%_-UTIGT--GNkSgQBGbU{xRR+8W$;EXVXkw-jlaRZRUt*bi^9EhT`=y*dvE
z$3kftDUZs5ot#!x@r7}m1(?2b$y>qbgz$uP!0z>WIT#6<H=-0)f)Eith}<k`Qh9|h
zFI@dM?i<H{thoF=?DhMZ{_0tEqOaQTa8L(V92<ddm<S~sB3Mrza(MGo;6w5XIA3j2
zHGH*N@%58rWD3Un&GFoo*wL()K~wqH|GrNc@>o$NKDFIuYbs@5CcOV(3cjN*=9<p`
z){y^X4lhHcE6Rqto)ng8P|W9N{p~V6j1_fu<@7kTW_P3R%@YyynDd16q^~NkG}Vl5
z-LtYj{7b)mgjkmBTigDN^Dh~KcIZ}^5VQrB&7)I}CPIbb1dbi`-|I8=Ul|T*sjNGv
z&<mIDZ$5Z_wF3DP<NaFtow&G1-^gk<{leJK?1O-y;j(hiWN?S=@zeJbw0fr28W1^K
zat}lo=@vQg?4%*}WrhLtD9Z$ABCPSDOfynhV+$Kbb~z_b^Np+Hy2f)TmLBW8(;WT{
zeg3Ocu{tyL%#_>Y3L9&5cI8grK0Xib>I!5oZj%B8U;RS}86TZZGWQwwGvNdCbo>}+
z3VihmRMLN_(){dv2L0-_xS=OI*{_>dv=?AByy#W$1CJsv^b@Ev?gjA~Q6dT#fia3(
za#Ac2g)3lom;~3jdUgG;Ts-uNcWr%cnbpS273nM4Y=jnO5n`N;N=prd09Ho?vv^x^
z<R>Rb?ZO6I>IMkDaLLlb&M<9@4K}r-FHb=RY7FAB1l#KPk<SHNi}fRG*J1DlH}=)K
z355_&n+8iG{)E0X#C2`Ty}wPg&8uj*5FAI>$QgwzLfUi4(rrVK9N8g_!p#$AtkJNI
zryq>>j=Mb4+*&&C5mz&I9lLUCOP?c>{`cD8v#$;qe%$SsX(E;)qqQ5i=%sl{Y%W{~
zE<}~j-vC>-2^}00^8~H<GD9#;6CL~XVdsaDvCl#Of+)t^tH&*+#&1l45kU4(RS7>e
z3;H`a-Fn)*v_AdsCBSDs)-=08VVdfyNPd>CjcIa?SYoc>ZHDo98zl-)3^lY`SGZ~8
z7*T1&(}~|%Y9MS1$Ec1q*`<dycP+Bym$q@(0`ILw?o`d9<NpSXZtW7Jl9_{9KhQ{4
zzw!TG#JJ`deqL~bFbuB_i6l(L!Nnk=aE|m7YP()Z)lqA8?neagkLL$?9td=ALd7jP
zfVU2lZ$I^gSB`HPB25eRz7$&Ov!2Am{r^41+NsC+@8+GOIfcef@{Z5F_PchV7X81^
z-=kW0CMg;SEg!>C1j5Fg5t=B*s2t0Uze}XaVo0iJ>XbxnGZD>C&C;Cxyet*iCFxbO
z`aYA6=J;7LV${@Q+uAjeGpKv|Artc75C8xFabcV;g2eRTm6}TC@ynQ8F@u-sBO?9}
D3^YS~

literal 0
HcmV?d00001

diff --git a/doc/testimonials/testimonials.rst b/doc/testimonials/testimonials.rst
index 6bb1ddbc8a264..252cd1ed10cae 100644
--- a/doc/testimonials/testimonials.rst
+++ b/doc/testimonials/testimonials.rst
@@ -106,7 +106,7 @@ development, it is an essential part of our toolkit. Recent uses are included
 in `digg’s new video recommender system
 <https://medium.com/i-data/the-digg-video-recommender-2f9ade7c4ba3>`_,
 and Poncho’s `dynamic heuristic subspace clustering
-<https://medium.com/@DiggData/scaling-poncho-using-data-ca24569d56fd>`_. 
+<https://medium.com/@DiggData/scaling-poncho-using-data-ca24569d56fd>`_.
 
 .. raw:: html
 
@@ -136,7 +136,7 @@ Gilad Lotan, Chief Data Scientist
 
 At Hugging Face we're using NLP and probabilistic models to generate
 conversational Artificial intelligences that are fun to chat with. Despite using
-deep neural nets for `a few <https://medium.com/huggingface/understanding-emotions-from-keras-to-pytorch-3ccb61d5a983>`_ 
+deep neural nets for `a few <https://medium.com/huggingface/understanding-emotions-from-keras-to-pytorch-3ccb61d5a983>`_
 of our `NLP tasks <https://huggingface.co/coref/>`_, scikit-learn is still the bread-and-butter of
 our daily machine learning routine. The ease of use and predictability of the
 interface, as well as the straightforward mathematical explanations that are
@@ -624,7 +624,7 @@ David Koh - Senior Data Scientist at OkCupid
 .. raw:: html
 
    </span>
-   
+
 
 `Lovely <https://livelovely.com/>`_
 -----------------------------------
@@ -815,13 +815,13 @@ Thorsten Kranz, Data Scientist, Coma Soft AG.
 
    </div>
 
-Our software, Data Science Studio (DSS), enables users to create data services 
-that combine `ETL <https://en.wikipedia.org/wiki/Extract,_transform,_load>`_ with 
-Machine Learning. Our Machine Learning module integrates 
-many scikit-learn algorithms. The scikit-learn library is a perfect integration 
-with DSS because it offers algorithms for virtually all business cases. Our goal 
-is to offer a transparent and flexible tool that makes it easier to optimize 
-time consuming aspects of building a data service, preparing data, and training 
+Our software, Data Science Studio (DSS), enables users to create data services
+that combine `ETL <https://en.wikipedia.org/wiki/Extract,_transform,_load>`_ with
+Machine Learning. Our Machine Learning module integrates
+many scikit-learn algorithms. The scikit-learn library is a perfect integration
+with DSS because it offers algorithms for virtually all business cases. Our goal
+is to offer a transparent and flexible tool that makes it easier to optimize
+time consuming aspects of building a data service, preparing data, and training
 machine learning algorithms on all types of data.
 
 
@@ -886,10 +886,10 @@ Christian Rammig, Head of Data Science, Otto Group
 
 
 At Zopa, the first ever Peer-to-Peer lending platform, we extensively use scikit-learn
-to run the business and optimize our users' experience. It powers our 
-Machine Learning models involved in credit risk, fraud risk, marketing, and pricing, 
+to run the business and optimize our users' experience. It powers our
+Machine Learning models involved in credit risk, fraud risk, marketing, and pricing,
 and has been used for originating at least 1 billion GBP worth of Zopa loans.
-It is very well documented, powerful, and simple to use. We are grateful for the 
+It is very well documented, powerful, and simple to use. We are grateful for the
 capabilities it has provided, and for allowing us to deliver on our mission of making
 money simple and fair.
 
@@ -903,3 +903,36 @@ Vlasios Vasileiou, Head of Data Science, Zopa
 
   </span>
 
+`MARS <https://www.mars.com/global>`_
+------------------------------------
+
+.. raw:: html
+
+    <div class="logo">
+
+.. image:: images/mars.png
+    :width: 120pt
+    :target: https://www.mars.com/global
+
+.. raw:: html
+
+    </div>
+
+Scikit-Learn is integral to the Machine Learning Ecosystem at Mars. Whether
+we're designing better recipes for petfood or closely analysing our cocoa
+supply chain, Scikit-Learn is used as a tool for rapidly prototyping ideas
+and taking them to production. This allows us to better understand and meet
+the needs of our consumers worldwide. Scikit-Learn's feature-rich toolset is
+easy to use and equips our associates with the capabilities they need to
+solve the business challenges they face every day.
+
+.. raw:: html
+
+   <span class="testimonial-author">
+
+Michael Fitzke Next Generation Technologies Sr Leader, Mars Inc.
+
+.. raw:: html
+
+   </span>
+

From 9e1d48f819b0b7960b8e60edd7b949309f51899e Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Fri, 5 Oct 2018 20:23:13 +0800
Subject: [PATCH 140/163] DOC What's new typo

---
 doc/whats_new/v0.20.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index d53983419533e..64a7d1050d5ab 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -37,7 +37,7 @@ Changelog
 :mod:`sklearn.datasets`
 ............................
 
-- |Fix| :func:`dataset.fetch_openml` to correctly use the local cache.
+- |Fix| :func:`datasets.fetch_openml` to correctly use the local cache.
   :issue:`12246` by :user:`Jan N. van Rijn <janvanrijn>`.
 
 - |Fix| Fixed integer overflow in :func:`datasets.make_classification`

From e6359e272a32dc6b43b8d05098953d6ec363185d Mon Sep 17 00:00:00 2001
From: Kushal Chauhan <kushalchauhan98@gmail.com>
Date: Fri, 5 Oct 2018 18:40:51 +0530
Subject: [PATCH 141/163] DOC Fix broken link to joblib documentation (#12301)

---
 doc/tutorial/basic/tutorial.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 18189ee385bfd..58e7380f515f9 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -250,7 +250,7 @@ with::
 
     ``joblib.dump`` and ``joblib.load`` functions also accept file-like object
     instead of filenames. More information on data persistence with Joblib is
-    available `here <https://pythonhosted.org/joblib/persistence.html>`_.
+    available `here <https://joblib.readthedocs.io/en/latest/persistence.html>`_.
 
 Note that pickle has some security and maintainability issues. Please refer to
 section :ref:`model_persistence` for more detailed information about model

From b020f622507f48bce415dd7450ead17a0b2aaedd Mon Sep 17 00:00:00 2001
From: Denis Kataev <bteamko@gmail.com>
Date: Fri, 5 Oct 2018 19:51:42 +0500
Subject: [PATCH 142/163] MNT Apply pep8 to docs code (#12275)

---
 doc/modules/compose.rst                       |  6 +--
 doc/modules/computing.rst                     |  4 +-
 doc/modules/ensemble.rst                      | 18 +++++----
 doc/modules/linear_model.rst                  | 10 ++---
 doc/modules/model_evaluation.rst              | 10 ++---
 doc/modules/random_projection.rst             |  2 +-
 doc/modules/tree.rst                          |  8 ++--
 doc/tutorial/basic/tutorial.rst               | 12 +++---
 .../statistical_inference/model_selection.rst |  6 +--
 .../supervised_learning.rst                   | 38 +++++++++----------
 .../text_analytics/working_with_text_data.rst | 28 ++++++++------
 sklearn/ensemble/forest.py                    |  4 +-
 sklearn/ensemble/voting_classifier.py         |  1 -
 sklearn/linear_model/coordinate_descent.py    |  4 +-
 sklearn/linear_model/passive_aggressive.py    |  4 +-
 sklearn/preprocessing/data.py                 |  2 +-
 16 files changed, 82 insertions(+), 75 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 663ca40b8c7fa..d896a172a2d48 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -420,7 +420,7 @@ By default, the remaining rating columns are ignored (``remainder='drop'``)::
   >>> column_trans = ColumnTransformer(
   ...     [('city_category', CountVectorizer(analyzer=lambda x: [x]), 'city'),
   ...      ('title_bow', CountVectorizer(), 'title')],
-  ...      remainder='drop')
+  ...     remainder='drop')
 
   >>> column_trans.fit(X) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
   ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
@@ -460,7 +460,7 @@ transformation::
   >>> column_trans = ColumnTransformer(
   ...     [('city_category', CountVectorizer(analyzer=lambda x: [x]), 'city'),
   ...      ('title_bow', CountVectorizer(), 'title')],
-  ...      remainder='passthrough')
+  ...     remainder='passthrough')
 
   >>> column_trans.fit_transform(X)
   ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
@@ -477,7 +477,7 @@ the transformation::
   >>> column_trans = ColumnTransformer(
   ...     [('city_category', CountVectorizer(analyzer=lambda x: [x]), 'city'),
   ...      ('title_bow', CountVectorizer(), 'title')],
-  ...      remainder=MinMaxScaler())
+  ...     remainder=MinMaxScaler())
 
   >>> column_trans.fit_transform(X)[:, -2:]
   ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
diff --git a/doc/modules/computing.rst b/doc/modules/computing.rst
index 25c36e0510ab7..cc6da011f6af7 100644
--- a/doc/modules/computing.rst
+++ b/doc/modules/computing.rst
@@ -222,7 +222,7 @@ allows you to set this configuration within a specified context::
 
   >>> import sklearn
   >>> with sklearn.config_context(assume_finite=True):
-  ...    pass  # do learning/prediction here with reduced validation
+  ...     pass  # do learning/prediction here with reduced validation
 
 Note that this will affect all uses of
 :func:`sklearn.utils.assert_all_finite` within the context.
@@ -463,7 +463,7 @@ memory to 128 MiB::
 
   >>> import sklearn
   >>> with sklearn.config_context(working_memory=128):
-  ...    pass  # do chunked work here
+  ...     pass  # do chunked work here
 
 An example of a chunked operation adhering to this setting is
 :func:`metric.pairwise_distances_chunked`, which facilitates computing
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 5399f13dbc9f4..1b52c56f7cb77 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -1034,19 +1034,20 @@ Vector Machine, a Decision Tree, and a K-nearest neighbor classifier::
 
    >>> # Loading some example data
    >>> iris = datasets.load_iris()
-   >>> X = iris.data[:, [0,2]]
+   >>> X = iris.data[:, [0, 2]]
    >>> y = iris.target
 
    >>> # Training classifiers
    >>> clf1 = DecisionTreeClassifier(max_depth=4)
    >>> clf2 = KNeighborsClassifier(n_neighbors=7)
    >>> clf3 = SVC(gamma='scale', kernel='rbf', probability=True)
-   >>> eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], voting='soft', weights=[2,1,2])
+   >>> eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)],
+   ...                         voting='soft', weights=[2, 1, 2])
 
-   >>> clf1 = clf1.fit(X,y)
-   >>> clf2 = clf2.fit(X,y)
-   >>> clf3 = clf3.fit(X,y)
-   >>> eclf = eclf.fit(X,y)
+   >>> clf1 = clf1.fit(X, y)
+   >>> clf2 = clf2.fit(X, y)
+   >>> clf3 = clf3.fit(X, y)
+   >>> eclf = eclf.fit(X, y)
 
 .. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_001.png
     :target: ../auto_examples/ensemble/plot_voting_decision_regions.html
@@ -1066,7 +1067,7 @@ to tune the hyperparameters of the individual estimators::
    >>> clf3 = GaussianNB()
    >>> eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft')
 
-   >>> params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200],}
+   >>> params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200]}
 
    >>> grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
    >>> grid = grid.fit(iris.data, iris.target)
@@ -1082,4 +1083,5 @@ must support ``predict_proba`` method)::
 
 Optionally, weights can be provided for the individual classifiers::
 
-   >>> eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', weights=[2,5,1])
+   >>> eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
+   ...                         voting='soft', weights=[2, 5, 1])
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index d859d4b14517b..ae2ce9ee1a8ff 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -44,7 +44,7 @@ and will store the coefficients :math:`w` of the linear model in its
 
     >>> from sklearn import linear_model
     >>> reg = linear_model.LinearRegression()
-    >>> reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
+    >>> reg.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
     ...                                       # doctest: +NORMALIZE_WHITESPACE
     LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                      normalize=False)
@@ -103,8 +103,8 @@ arrays X, y and will store the coefficients :math:`w` of the linear model in
 its ``coef_`` member::
 
     >>> from sklearn import linear_model
-    >>> reg = linear_model.Ridge (alpha = .5)
-    >>> reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) # doctest: +NORMALIZE_WHITESPACE
+    >>> reg = linear_model.Ridge(alpha=.5)
+    >>> reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) # doctest: +NORMALIZE_WHITESPACE
     Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
           normalize=False, random_state=None, solver='auto', tol=0.001)
     >>> reg.coef_
@@ -184,7 +184,7 @@ the algorithm to fit the coefficients. See :ref:`least_angle_regression`
 for another implementation::
 
     >>> from sklearn import linear_model
-    >>> reg = linear_model.Lasso(alpha = 0.1)
+    >>> reg = linear_model.Lasso(alpha=0.1)
     >>> reg.fit([[0, 0], [1, 1]], [0, 1])
     Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
        normalize=False, positive=False, precompute=False, random_state=None,
@@ -646,7 +646,7 @@ Bayesian Ridge Regression is used for regression::
 
 After being fitted, the model can then be used to predict new values::
 
-    >>> reg.predict ([[1, 0.]])
+    >>> reg.predict([[1, 0.]])
     array([0.50000013])
 
 
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 0be68a0ba96cf..07c719c87cef9 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -185,7 +185,7 @@ Here is an example of building custom scorers, and of using the
     >>> # and y defined below.
     >>> score = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> X = [[1], [1]]
-    >>> y  = [0, 1]
+    >>> y = [0, 1]
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
     >>> clf = clf.fit(X, y)
@@ -250,8 +250,8 @@ permitted and will require a wrapper to return a single metric::
     >>> def fp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[0, 1]
     >>> def fn(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 0]
     >>> def tp(y_true, y_pred): return confusion_matrix(y_true, y_pred)[1, 1]
-    >>> scoring = {'tp' : make_scorer(tp), 'tn' : make_scorer(tn),
-    ...            'fp' : make_scorer(fp), 'fn' : make_scorer(fn)}
+    >>> scoring = {'tp': make_scorer(tp), 'tn': make_scorer(tn),
+    ...            'fp': make_scorer(fp), 'fn': make_scorer(fn)}
     >>> cv_results = cross_validate(svm.fit(X, y), X, y,
     ...                             scoring=scoring, cv=5)
     >>> # Getting the test set true positive scores
@@ -1297,7 +1297,7 @@ Here is a small example of usage of this function:::
     >>> y_pred = np.array([0, 1, 1, 0])
     >>> brier_score_loss(y_true, y_prob)
     0.055
-    >>> brier_score_loss(y_true, 1-y_prob, pos_label=0)
+    >>> brier_score_loss(y_true, 1 - y_prob, pos_label=0)
     0.055
     >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham")
     0.055
@@ -1776,7 +1776,7 @@ Next, let's compare the accuracy of ``SVC`` and ``most_frequent``::
   >>> clf = SVC(kernel='linear', C=1).fit(X_train, y_train)
   >>> clf.score(X_test, y_test) # doctest: +ELLIPSIS
   0.63...
-  >>> clf = DummyClassifier(strategy='most_frequent',random_state=0)
+  >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
   >>> clf.fit(X_train, y_train)
   DummyClassifier(constant=None, random_state=0, strategy='most_frequent')
   >>> clf.score(X_test, y_test)  # doctest: +ELLIPSIS
diff --git a/doc/modules/random_projection.rst b/doc/modules/random_projection.rst
index d3e2c80230893..3989b81e77b83 100644
--- a/doc/modules/random_projection.rst
+++ b/doc/modules/random_projection.rst
@@ -141,7 +141,7 @@ projection transformer::
 
   >>> import numpy as np
   >>> from sklearn import random_projection
-  >>> X = np.random.rand(100,10000)
+  >>> X = np.random.rand(100, 10000)
   >>> transformer = random_projection.SparseRandomProjection()
   >>> X_new = transformer.fit_transform(X)
   >>> X_new.shape
diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 9c252e5e9e101..297993ecfcfc6 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -149,10 +149,10 @@ using explicit variable and class names if desired. Jupyter notebooks also
 render these plots inline automatically::
 
     >>> dot_data = tree.export_graphviz(clf, out_file=None, # doctest: +SKIP
-                             feature_names=iris.feature_names,  # doctest: +SKIP
-                             class_names=iris.target_names,  # doctest: +SKIP
-                             filled=True, rounded=True,  # doctest: +SKIP
-                             special_characters=True)  # doctest: +SKIP
+    ...                      feature_names=iris.feature_names,  # doctest: +SKIP
+    ...                      class_names=iris.target_names,  # doctest: +SKIP
+    ...                      filled=True, rounded=True,  # doctest: +SKIP
+    ...                      special_characters=True)  # doctest: +SKIP
     >>> graph = graphviz.Source(dot_data)  # doctest: +SKIP
     >>> graph # doctest: +SKIP
 
diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 58e7380f515f9..6fc36e52152d2 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -391,15 +391,15 @@ Note that the fourth and fifth instances returned all zeroes, indicating that
 they matched none of the three labels ``fit`` upon. With multilabel outputs, it
 is similarly possible for an instance to be assigned multiple labels::
 
-  >> from sklearn.preprocessing import MultiLabelBinarizer
-  >> y = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
-  >> y = MultiLabelBinarizer().fit_transform(y)
-  >> classif.fit(X, y).predict(X)
+  >>> from sklearn.preprocessing import MultiLabelBinarizer
+  >>> y = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
+  >>> y = MultiLabelBinarizer().fit_transform(y)
+  >>> classif.fit(X, y).predict(X)
   array([[1, 1, 0, 0, 0],
          [1, 0, 1, 0, 0],
          [0, 1, 0, 1, 0],
-         [1, 0, 1, 1, 0],
-         [0, 0, 1, 0, 1]])
+         [1, 0, 1, 0, 0],
+         [1, 0, 1, 0, 0]])
 
 In this case, the classifier is fit upon instances each assigned multiple labels.
 The :class:`MultiLabelBinarizer <sklearn.preprocessing.MultiLabelBinarizer>` is
diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
index 50e3a06b9a4d0..f573917bdf4b6 100644
--- a/doc/tutorial/statistical_inference/model_selection.rst
+++ b/doc/tutorial/statistical_inference/model_selection.rst
@@ -32,10 +32,10 @@ data in *folds* that we use for training and testing::
     >>> for k in range(3):
     ...     # We use 'list' to copy, in order to 'pop' later on
     ...     X_train = list(X_folds)
-    ...     X_test  = X_train.pop(k)
+    ...     X_test = X_train.pop(k)
     ...     X_train = np.concatenate(X_train)
     ...     y_train = list(y_folds)
-    ...     y_test  = y_train.pop(k)
+    ...     y_test = y_train.pop(k)
     ...     y_train = np.concatenate(y_train)
     ...     scores.append(svc.fit(X_train, y_train).score(X_test, y_test))
     >>> print(scores)  # doctest: +ELLIPSIS
@@ -73,7 +73,7 @@ This example shows an example usage of the ``split`` method.
 The cross-validation can then be performed easily::
 
     >>> [svc.fit(X_digits[train], y_digits[train]).score(X_digits[test], y_digits[test])
-    ...          for train, test in k_fold.split(X_digits)]  # doctest: +ELLIPSIS
+    ...  for train, test in k_fold.split(X_digits)]  # doctest: +ELLIPSIS
     [0.963..., 0.922..., 0.963..., 0.963..., 0.930...]
 
 The cross-validation score can be directly calculated using the
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 2c12f1038c285..332ed68053966 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -88,8 +88,8 @@ Scikit-learn documentation for more information about this type of classifier.)
     >>> indices = np.random.permutation(len(iris_X))
     >>> iris_X_train = iris_X[indices[:-10]]
     >>> iris_y_train = iris_y[indices[:-10]]
-    >>> iris_X_test  = iris_X[indices[-10:]]
-    >>> iris_y_test  = iris_y[indices[-10:]]
+    >>> iris_X_test = iris_X[indices[-10:]]
+    >>> iris_y_test = iris_y[indices[-10:]]
     >>> # Create and fit a nearest-neighbor classifier
     >>> from sklearn.neighbors import KNeighborsClassifier
     >>> knn = KNeighborsClassifier()
@@ -184,7 +184,7 @@ Linear models: :math:`y = X\beta + \epsilon`
       492.81458798  102.84845219  184.60648906  743.51961675   76.09517222]
 
     >>> # The mean square error
-    >>> np.mean((regr.predict(diabetes_X_test)-diabetes_y_test)**2)
+    >>> np.mean((regr.predict(diabetes_X_test) - diabetes_y_test)**2)
     ...                                                   # doctest: +ELLIPSIS
     2004.56760268...
 
@@ -220,10 +220,10 @@ induces high variance:
 
     >>> np.random.seed(0)
     >>> for _ in range(6): # doctest: +SKIP
-    ...    this_X = .1*np.random.normal(size=(2, 1)) + X
-    ...    regr.fit(this_X, y)
-    ...    plt.plot(test, regr.predict(test)) # doctest: +SKIP
-    ...    plt.scatter(this_X, y, s=3)  # doctest: +SKIP
+    ...     this_X = .1 * np.random.normal(size=(2, 1)) + X
+    ...     regr.fit(this_X, y)
+    ...     plt.plot(test, regr.predict(test)) # doctest: +SKIP
+    ...     plt.scatter(this_X, y, s=3)  # doctest: +SKIP
 
 
 
@@ -245,10 +245,10 @@ regression:
 
     >>> np.random.seed(0)
     >>> for _ in range(6): # doctest: +SKIP
-    ...    this_X = .1*np.random.normal(size=(2, 1)) + X
-    ...    regr.fit(this_X, y)
-    ...    plt.plot(test, regr.predict(test)) # doctest: +SKIP
-    ...    plt.scatter(this_X, y, s=3) # doctest: +SKIP
+    ...     this_X = .1 * np.random.normal(size=(2, 1)) + X
+    ...     regr.fit(this_X, y)
+    ...     plt.plot(test, regr.predict(test)) # doctest: +SKIP
+    ...     plt.scatter(this_X, y, s=3) # doctest: +SKIP
 
 This is an example of **bias/variance tradeoff**: the larger the ridge
 ``alpha`` parameter, the higher the bias and the lower the variance.
@@ -256,11 +256,11 @@ This is an example of **bias/variance tradeoff**: the larger the ridge
 We can choose ``alpha`` to minimize left out error, this time using the
 diabetes dataset rather than our synthetic data::
 
-    >>> alphas = np.logspace(-4, -1, 6)
     >>> from __future__ import print_function
-    >>> print([regr.set_params(alpha=alpha
-    ...             ).fit(diabetes_X_train, diabetes_y_train,
-    ...             ).score(diabetes_X_test, diabetes_y_test)
+    >>> alphas = np.logspace(-4, -1, 6)
+    >>> print([regr.set_params(alpha=alpha)
+    ...            .fit(diabetes_X_train, diabetes_y_train)
+    ...            .score(diabetes_X_test, diabetes_y_test)
     ...        for alpha in alphas])
     ...                            # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
     [0.5851110683883..., 0.5852073015444..., 0.5854677540698...,
@@ -326,10 +326,10 @@ application of Occam's razor: *prefer simpler models*.
 ::
 
     >>> regr = linear_model.Lasso()
-    >>> scores = [regr.set_params(alpha=alpha
-    ...             ).fit(diabetes_X_train, diabetes_y_train
-    ...             ).score(diabetes_X_test, diabetes_y_test)
-    ...        for alpha in alphas]
+    >>> scores = [regr.set_params(alpha=alpha)
+    ...               .fit(diabetes_X_train, diabetes_y_train)
+    ...               .score(diabetes_X_test, diabetes_y_test)
+    ...           for alpha in alphas]
     >>> best_alpha = alphas[scores.index(max(scores))]
     >>> regr.alpha = best_alpha
     >>> regr.fit(diabetes_X_train, diabetes_y_train)
diff --git a/doc/tutorial/text_analytics/working_with_text_data.rst b/doc/tutorial/text_analytics/working_with_text_data.rst
index 5ba798e7a70bc..e7f47f1c1e342 100644
--- a/doc/tutorial/text_analytics/working_with_text_data.rst
+++ b/doc/tutorial/text_analytics/working_with_text_data.rst
@@ -319,11 +319,13 @@ to work with, ``scikit-learn`` provides a :class:`~sklearn.pipeline.Pipeline` cl
 like a compound classifier::
 
   >>> from sklearn.pipeline import Pipeline
-  >>> text_clf = Pipeline([('vect', CountVectorizer()),
-  ...                      ('tfidf', TfidfTransformer()),
-  ...                      ('clf', MultinomialNB()),
+  >>> text_clf = Pipeline([
+  ...     ('vect', CountVectorizer()),
+  ...     ('tfidf', TfidfTransformer()),
+  ...     ('clf', MultinomialNB()),
   ... ])
 
+
 The names ``vect``, ``tfidf`` and ``clf`` (classifier) are arbitrary.
 We will use them to perform grid search for suitable hyperparameters below. 
 We can now train the model with a single command::
@@ -353,12 +355,14 @@ than naïve Bayes). We can change the learner by simply plugging a different
 classifier object into our pipeline::
 
   >>> from sklearn.linear_model import SGDClassifier
-  >>> text_clf = Pipeline([('vect', CountVectorizer()),
-  ...                      ('tfidf', TfidfTransformer()),
-  ...                      ('clf', SGDClassifier(loss='hinge', penalty='l2',
-  ...                                            alpha=1e-3, random_state=42,
-  ...                                            max_iter=5, tol=None)),
+  >>> text_clf = Pipeline([
+  ...     ('vect', CountVectorizer()),
+  ...     ('tfidf', TfidfTransformer()),
+  ...     ('clf', SGDClassifier(loss='hinge', penalty='l2',
+  ...                           alpha=1e-3, random_state=42,
+  ...                           max_iter=5, tol=None)),
   ... ])
+
   >>> text_clf.fit(twenty_train.data, twenty_train.target)  # doctest: +ELLIPSIS
   Pipeline(...)
   >>> predicted = text_clf.predict(docs_test)
@@ -430,11 +434,13 @@ on either words or bigrams, with or without idf, and with a penalty
 parameter of either 0.01 or 0.001 for the linear SVM::
 
   >>> from sklearn.model_selection import GridSearchCV
-  >>> parameters = {'vect__ngram_range': [(1, 1), (1, 2)],
-  ...               'tfidf__use_idf': (True, False),
-  ...               'clf__alpha': (1e-2, 1e-3),
+  >>> parameters = {
+  ...     'vect__ngram_range': [(1, 1), (1, 2)],
+  ...     'tfidf__use_idf': (True, False),
+  ...     'clf__alpha': (1e-2, 1e-3),
   ... }
 
+
 Obviously, such an exhaustive search can be expensive. If we have multiple
 CPU cores at our disposal, we can tell the grid searcher to try these eight
 parameter combinations in parallel with the ``n_jobs`` parameter. If we give
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 1feef0ed16897..0805e835933cc 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -947,7 +947,7 @@ class labels (multi-output problem).
     --------
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> from sklearn.datasets import make_classification
-    >>>
+
     >>> X, y = make_classification(n_samples=1000, n_features=4,
     ...                            n_informative=2, n_redundant=0,
     ...                            random_state=0, shuffle=False)
@@ -1199,7 +1199,7 @@ class RandomForestRegressor(ForestRegressor):
     --------
     >>> from sklearn.ensemble import RandomForestRegressor
     >>> from sklearn.datasets import make_regression
-    >>>
+
     >>> X, y = make_regression(n_features=4, n_informative=2,
     ...                        random_state=0, shuffle=False)
     >>> regr = RandomForestRegressor(max_depth=2, random_state=0,
diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
index 731a6fa53f00c..689b9b4b049e6 100644
--- a/sklearn/ensemble/voting_classifier.py
+++ b/sklearn/ensemble/voting_classifier.py
@@ -120,7 +120,6 @@ class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):
     [1 1 1 2 2 2]
     >>> print(eclf3.transform(X).shape)
     (6, 6)
-    >>>
     """
 
     def __init__(self, estimators, voting='hard', weights=None, n_jobs=None,
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index c17aab86eab1c..86d621b415b3a 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -619,7 +619,7 @@ class ElasticNet(LinearModel, RegressorMixin):
     --------
     >>> from sklearn.linear_model import ElasticNet
     >>> from sklearn.datasets import make_regression
-    >>>
+
     >>> X, y = make_regression(n_features=2, random_state=0)
     >>> regr = ElasticNet(random_state=0)
     >>> regr.fit(X, y)
@@ -1548,7 +1548,7 @@ class ElasticNetCV(LinearModelCV, RegressorMixin):
     --------
     >>> from sklearn.linear_model import ElasticNetCV
     >>> from sklearn.datasets import make_regression
-    >>>
+
     >>> X, y = make_regression(n_features=2, random_state=0)
     >>> regr = ElasticNetCV(cv=5, random_state=0)
     >>> regr.fit(X, y)
diff --git a/sklearn/linear_model/passive_aggressive.py b/sklearn/linear_model/passive_aggressive.py
index 22f1c0fbba121..9867413264b50 100644
--- a/sklearn/linear_model/passive_aggressive.py
+++ b/sklearn/linear_model/passive_aggressive.py
@@ -137,7 +137,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     --------
     >>> from sklearn.linear_model import PassiveAggressiveClassifier
     >>> from sklearn.datasets import make_classification
-    >>>
+
     >>> X, y = make_classification(n_features=4, random_state=0)
     >>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0)
     >>> clf.fit(X, y)
@@ -375,7 +375,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
     --------
     >>> from sklearn.linear_model import PassiveAggressiveRegressor
     >>> from sklearn.datasets import make_regression
-    >>>
+
     >>> X, y = make_regression(n_features=4, random_state=0)
     >>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0)
     >>> regr.fit(X, y)
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 96b6feaac7cf4..2584b14dc34ac 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -259,7 +259,7 @@ class MinMaxScaler(BaseEstimator, TransformerMixin):
     Examples
     --------
     >>> from sklearn.preprocessing import MinMaxScaler
-    >>>
+
     >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
     >>> scaler = MinMaxScaler()
     >>> print(scaler.fit(data))

From f4e7d2b19a9432f66ed22b01bae76f31af1db00f Mon Sep 17 00:00:00 2001
From: TakingItCasual <TakingItCasual@gmail.com>
Date: Fri, 5 Oct 2018 19:50:31 +0300
Subject: [PATCH 143/163] Converting http to https (3)... (#12302)

---
 benchmarks/bench_covertype.py                        |  4 ++--
 doc/about.rst                                        |  8 ++++----
 doc/glossary.rst                                     |  2 +-
 doc/modules/clustering.rst                           | 10 +++++-----
 doc/modules/compose.rst                              |  4 ++--
 doc/modules/covariance.rst                           |  2 +-
 doc/modules/cross_validation.rst                     |  2 +-
 doc/modules/decomposition.rst                        | 10 +++++-----
 doc/modules/feature_extraction.rst                   |  8 ++++----
 doc/modules/kernel_approximation.rst                 |  2 +-
 doc/modules/linear_model.rst                         |  6 +++---
 doc/modules/manifold.rst                             | 10 +++++-----
 doc/modules/metrics.rst                              |  4 ++--
 doc/modules/model_evaluation.rst                     |  4 ++--
 doc/modules/model_persistence.rst                    |  2 +-
 doc/modules/naive_bayes.rst                          |  2 +-
 doc/modules/neighbors.rst                            |  2 +-
 doc/modules/neural_networks_unsupervised.rst         |  4 ++--
 doc/modules/outlier_detection.rst                    |  2 +-
 doc/modules/random_projection.rst                    |  2 +-
 doc/modules/tree.rst                                 |  4 ++--
 doc/presentations.rst                                |  4 ++--
 doc/related_projects.rst                             |  2 +-
 doc/support.rst                                      |  2 +-
 doc/testimonials/testimonials.rst                    |  2 +-
 doc/themes/scikit-learn/static/ML_MAPS_README.rst    |  4 ++--
 doc/tutorial/basic/tutorial.rst                      |  2 +-
 doc/tutorial/machine_learning_map/ML_MAPS_README.txt |  4 ++--
 doc/tutorial/machine_learning_map/pyparsing.py       |  2 +-
 doc/tutorial/statistical_inference/finding_help.rst  |  2 +-
 doc/whats_new/_contributors.rst                      |  4 ++--
 doc/whats_new/v0.14.rst                              |  2 +-
 doc/whats_new/v0.19.rst                              |  2 +-
 examples/datasets/plot_digits_last_image.py          |  2 +-
 examples/manifold/plot_t_sne_perplexity.py           |  2 +-
 examples/preprocessing/plot_all_scaling.py           |  2 +-
 sklearn/cluster/birch.py                             |  2 +-
 sklearn/cluster/k_means_.py                          |  2 +-
 sklearn/cluster/spectral.py                          |  6 +++---
 sklearn/datasets/base.py                             |  2 +-
 sklearn/datasets/california_housing.py               |  2 +-
 sklearn/datasets/covtype.py                          |  4 ++--
 sklearn/datasets/descr/covtype.rst                   |  2 +-
 sklearn/datasets/descr/diabetes.rst                  |  4 ++--
 sklearn/datasets/descr/digits.rst                    |  2 +-
 sklearn/datasets/descr/kddcup99.rst                  |  2 +-
 sklearn/datasets/descr/wine_data.rst                 |  2 +-
 sklearn/datasets/images/README.txt                   | 12 ++++++------
 sklearn/datasets/kddcup99.py                         |  4 ++--
 sklearn/datasets/species_distributions.py            |  4 ++--
 sklearn/decomposition/_online_lda.pyx                |  2 +-
 sklearn/decomposition/dict_learning.py               |  4 ++--
 sklearn/decomposition/incremental_pca.py             |  4 ++--
 sklearn/kernel_approximation.py                      |  2 +-
 sklearn/linear_model/huber.py                        |  2 +-
 sklearn/linear_model/least_angle.py                  |  2 +-
 sklearn/linear_model/omp.py                          |  6 +++---
 sklearn/metrics/classification.py                    |  4 ++--
 sklearn/metrics/cluster/supervised.py                |  8 ++++----
 sklearn/naive_bayes.py                               |  4 ++--
 sklearn/random_projection.py                         |  4 ++--
 61 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py
index c7b23f82d2d10..31fede0ad4308 100644
--- a/benchmarks/bench_covertype.py
+++ b/benchmarks/bench_covertype.py
@@ -34,10 +34,10 @@
    S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07.
 
  * `"Training Linear SVMs in Linear Time"
-   <www.cs.cornell.edu/People/tj/publications/joachims_06a.pdf>`_
+   <https://www.cs.cornell.edu/people/tj/publications/joachims_06a.pdf>`_
    T. Joachims - In SIGKDD '06
 
-[1] http://archive.ics.uci.edu/ml/datasets/Covertype
+[1] https://archive.ics.uci.edu/ml/datasets/Covertype
 
 """
 from __future__ import division, print_function
diff --git a/doc/about.rst b/doc/about.rst
index ca5017cd39933..37fe10da19fee 100644
--- a/doc/about.rst
+++ b/doc/about.rst
@@ -99,14 +99,14 @@ full-time. It also hosts coding sprints and other events.
    :align: center
    :target: https://www.inria.fr
 
-`Paris-Saclay Center for Data Science <http://www.datascience-paris-saclay.fr>`_
+`Paris-Saclay Center for Data Science <https://www.datascience-paris-saclay.fr/>`_
 funded one year for a developer to work on the project full-time
 (2014-2015) and 50% of the time of Guillaume Lemaitre (2016-2017).
 
 .. image:: images/cds-logo.png
    :width: 200pt
    :align: center
-   :target: http://www.datascience-paris-saclay.fr
+   :target: https://www.datascience-paris-saclay.fr/
 
 `NYU Moore-Sloan Data Science Environment <https://cds.nyu.edu/mooresloan/>`_
 funded Andreas Mueller (2014-2016) to work on this project. The Moore-Sloan Data Science
@@ -118,14 +118,14 @@ Environment also funds several students to work on the project part-time.
    :target: https://cds.nyu.edu/mooresloan/
 
 
-`Télécom Paristech <http://www.telecom-paristech.com>`_ funded Manoj Kumar (2014),
+`Télécom Paristech <https://www.telecom-paristech.fr/>`_ funded Manoj Kumar (2014),
 Tom Dupré la Tour (2015), Raghav RV (2015-2017), Thierry Guillemot (2016-2017)
 and Albert Thomas (2017) to work on scikit-learn.
 
 .. image:: themes/scikit-learn/static/img/telecom.png
    :width: 100pt
    :align: center
-   :target: http://www.telecom-paristech.fr/
+   :target: https://www.telecom-paristech.fr/
 
 
 `Columbia University <https://columbia.edu/>`_ funds Andreas Müller since 2016.
diff --git a/doc/glossary.rst b/doc/glossary.rst
index dbd9961654a21..df441aee5aacd 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -170,7 +170,7 @@ General Concepts
         :class:`~sklearn.preprocessing.OneHotEncoder` can be used to
         one-hot encode categorical features.
         See also :ref:`preprocessing_categorical_features` and the
-        `http://contrib.scikit-learn.org/categorical-encoding
+        `https://contrib.scikit-learn.org/categorical-encoding/
         <category_encoders>`_ package for tools related to encoding
         categorical features.
 
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index 1f8210f35ffb4..0e474b61a7b99 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -286,7 +286,7 @@ small, as shown in the example and cited reference.
 .. topic:: References:
 
  * `"Web Scale K-Means clustering"
-   <http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf>`_
+   <https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf>`_
    D. Sculley, *Proceedings of the 19th international conference on World
    wide web* (2010)
 
@@ -445,7 +445,7 @@ works well for a small number of clusters but is not advised when using
 many clusters.
 
 For two clusters, it solves a convex relaxation of the `normalised
-cuts <http://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_ problem on
+cuts <https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf>`_ problem on
 the similarity graph: cutting the graph in two so that the weight of the
 edges cut is small compared to the weights of the edges inside each
 cluster. This criteria is especially interesting when working on images:
@@ -1008,7 +1008,7 @@ the user is advised
 
  * Tian Zhang, Raghu Ramakrishnan, Maron Livny
    BIRCH: An efficient data clustering method for large databases.
-   http://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf
+   https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf
 
  * Roberto Perdisci
    JBirch - Java implementation of BIRCH clustering algorithm
@@ -1144,7 +1144,7 @@ random labelings by defining the adjusted Rand index as follows:
 .. topic:: References
 
  * `Comparing Partitions
-   <http://link.springer.com/article/10.1007%2FBF01908075>`_
+   <https://link.springer.com/article/10.1007%2FBF01908075>`_
    L. Hubert and P. Arabie, Journal of Classification 1985
 
  * `Wikipedia entry for the adjusted Rand index
@@ -1483,7 +1483,7 @@ mean of homogeneity and completeness**:
 .. topic:: References
 
  * `V-Measure: A conditional entropy-based external cluster evaluation
-   measure <http://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
+   measure <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
    Andrew Rosenberg and Julia Hirschberg, 2007
 
  .. [B2011] `Identication and Characterization of Events in Social Media
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index d896a172a2d48..aca8623ed7780 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -380,7 +380,7 @@ ColumnTransformer for heterogeneous data
 Many datasets contain features of different types, say text, floats, and dates,
 where each type of feature requires separate preprocessing or feature
 extraction steps.  Often it is easiest to preprocess data before applying
-scikit-learn methods, for example using `pandas <http://pandas.pydata.org/>`__.
+scikit-learn methods, for example using `pandas <https://pandas.pydata.org/>`__.
 Processing your data before passing it to scikit-learn might be problematic for
 one of the following reasons:
 
@@ -395,7 +395,7 @@ transformations for different columns of the data, within a
 :class:`~sklearn.pipeline.Pipeline` that is safe from data leakage and that can
 be parametrized. :class:`~sklearn.compose.ColumnTransformer` works on
 arrays, sparse matrices, and
-`pandas DataFrames <http://pandas.pydata.org/pandas-docs/stable/>`__.
+`pandas DataFrames <https://pandas.pydata.org/pandas-docs/stable/>`__.
 
 To each column, a different transformation can be applied, such as
 preprocessing or a specific feature extraction method::
diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst
index 1eb74b51cd166..a0a5899c80ebf 100644
--- a/doc/modules/covariance.rst
+++ b/doc/modules/covariance.rst
@@ -264,7 +264,7 @@ paper. It is the same algorithm as in the R ``glasso`` package.
 .. topic:: References:
 
    * Friedman et al, `"Sparse inverse covariance estimation with the
-     graphical lasso" <http://biostatistics.oxfordjournals.org/content/9/3/432.short>`_,
+     graphical lasso" <https://biostatistics.oxfordjournals.org/content/9/3/432.short>`_,
      Biostatistics 9, pp 432, 2008
 
 .. _robust_covariance:
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index a6343c1b39efd..5f7220eaeef21 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -422,7 +422,7 @@ fold cross validation should be preferred to LOO.
  * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation
    <https://people.csail.mit.edu/romer/papers/CrossVal_SDM08.pdf>`_, SIAM 2008;
  * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to
-   Statistical Learning <http://www-bcf.usc.edu/~gareth/ISL>`_, Springer 2013.
+   Statistical Learning <https://www-bcf.usc.edu/~gareth/ISL/>`_, Springer 2013.
 
 
 Leave P Out (LPO)
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 73fb683321b02..e2d60e555a2f3 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -270,10 +270,10 @@ factorization, while larger values shrink many coefficients to zero.
 .. topic:: References:
 
   .. [Mrl09] `"Online Dictionary Learning for Sparse Coding"
-     <http://www.di.ens.fr/sierra/pdfs/icml09.pdf>`_
+     <https://www.di.ens.fr/sierra/pdfs/icml09.pdf>`_
      J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009
   .. [Jen09] `"Structured Sparse Principal Component Analysis"
-     <www.di.ens.fr/~fbach/sspca_AISTATS2010.pdf>`_
+     <https://www.di.ens.fr/~fbach/sspca_AISTATS2010.pdf>`_
      R. Jenatton, G. Obozinski, F. Bach, 2009
 
 
@@ -289,7 +289,7 @@ where :math:`k` is a user-specified parameter.
 When truncated SVD is applied to term-document matrices
 (as returned by ``CountVectorizer`` or ``TfidfVectorizer``),
 this transformation is known as
-`latent semantic analysis <http://nlp.stanford.edu/IR-book/pdf/18lsi.pdf>`_
+`latent semantic analysis <https://nlp.stanford.edu/IR-book/pdf/18lsi.pdf>`_
 (LSA), because it transforms such matrices
 to a "semantic" space of low dimensionality.
 In particular, LSA is known to combat the effects of synonymy and polysemy
@@ -354,7 +354,7 @@ compensating for LSA's erroneous assumptions about textual data.
   * Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze (2008),
     *Introduction to Information Retrieval*, Cambridge University Press,
     chapter 18: `Matrix decompositions & latent semantic indexing
-    <http://nlp.stanford.edu/IR-book/pdf/18lsi.pdf>`_
+    <https://nlp.stanford.edu/IR-book/pdf/18lsi.pdf>`_
 
 
 .. _DictionaryLearning:
@@ -495,7 +495,7 @@ extracted from part of the image of a raccoon face looks like.
 .. topic:: References:
 
   * `"Online dictionary learning for sparse coding"
-    <http://www.di.ens.fr/sierra/pdfs/icml09.pdf>`_
+    <https://www.di.ens.fr/sierra/pdfs/icml09.pdf>`_
     J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009
 
 .. _MiniBatchDictionaryLearning:
diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index 827cc13592f56..5cd651c067ea2 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -207,7 +207,7 @@ otherwise the features will not be mapped evenly to the columns.
 
  * Kilian Weinberger, Anirban Dasgupta, John Langford, Alex Smola and
    Josh Attenberg (2009). `Feature hashing for large scale multitask learning
-   <http://alex.smola.org/papers/2009/Weinbergeretal09.pdf>`_. Proc. ICML.
+   <https://alex.smola.org/papers/2009/Weinbergeretal09.pdf>`_. Proc. ICML.
 
  * `MurmurHash3 <https://github.com/aappleby/smhasher>`_.
 
@@ -409,7 +409,7 @@ identify and warn about some kinds of inconsistencies.
 
     .. [NQY18] J. Nothman, H. Qin and R. Yurchak (2018).
                `"Stop Word Lists in Free Open-source Software Packages"
-               <http://aclweb.org/anthology/W18-2502>`__.
+               <https://aclweb.org/anthology/W18-2502>`__.
                In *Proc. Workshop for NLP Open Source Software*.
 
 .. _tfidf:
@@ -673,7 +673,7 @@ The output is not shown here.
 
 For an introduction to Unicode and character encodings in general,
 see Joel Spolsky's `Absolute Minimum Every Software Developer Must Know
-About Unicode <http://www.joelonsoftware.com/articles/Unicode.html>`_.
+About Unicode <https://www.joelonsoftware.com/articles/Unicode.html>`_.
 
 .. _`ftfy`: https://github.com/LuminosoInsight/python-ftfy
 
@@ -932,7 +932,7 @@ Some tips and tricks:
     scikit-learn codebase, but can be added by customizing either the
     tokenizer or the analyzer.
     Here's a ``CountVectorizer`` with a tokenizer and lemmatizer using
-    `NLTK <http://www.nltk.org>`_::
+    `NLTK <https://www.nltk.org/>`_::
 
         >>> from nltk import word_tokenize          # doctest: +SKIP
         >>> from nltk.stem import WordNetLemmatizer # doctest: +SKIP
diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst
index 65a18bca9f11e..a2afe2517a920 100644
--- a/doc/modules/kernel_approximation.rst
+++ b/doc/modules/kernel_approximation.rst
@@ -194,7 +194,7 @@ or store training examples.
 .. topic:: References:
 
     .. [RR2007] `"Random features for large-scale kernel machines"
-      <http://www.robots.ox.ac.uk/~vgg/rg/papers/randomfeatures.pdf>`_
+      <https://www.robots.ox.ac.uk/~vgg/rg/papers/randomfeatures.pdf>`_
       Rahimi, A. and Recht, B. - Advances in neural information processing 2007,
     .. [LS2010] `"Random Fourier approximations for skewed multiplicative histogram kernels"
       <http://www.maths.lth.se/matematiklth/personal/sminchis/papers/lis_dagm10.pdf>`_
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index ae2ce9ee1a8ff..63f9adb153bbd 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -508,7 +508,7 @@ column is always zero.
 .. topic:: References:
 
  * Original Algorithm is detailed in the paper `Least Angle Regression
-   <http://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf>`_
+   <https://www-stat.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf>`_
    by Hastie et al.
 
 
@@ -547,7 +547,7 @@ previously chosen dictionary elements.
 
 .. topic:: References:
 
- * http://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
+ * https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
 
  * `Matching pursuits with time-frequency dictionaries
    <http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf>`_,
@@ -710,7 +710,7 @@ ARD is also known in the literature as *Sparse Bayesian Learning* and
 
     .. [1] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 7.2.1
 
-    .. [2] David Wipf and Srikantan Nagarajan: `A new view of automatic relevance determination <http://papers.nips.cc/paper/3372-a-new-view-of-automatic-relevance-determination.pdf>`_
+    .. [2] David Wipf and Srikantan Nagarajan: `A new view of automatic relevance determination <https://papers.nips.cc/paper/3372-a-new-view-of-automatic-relevance-determination.pdf>`_
 
     .. [3] Michael E. Tipping: `Sparse Bayesian Learning and the Relevance Vector Machine <http://www.jmlr.org/papers/volume1/tipping01a/tipping01a.pdf>`_
 
diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst
index 7061a4c035c26..1122cdad6b52e 100644
--- a/doc/modules/manifold.rst
+++ b/doc/modules/manifold.rst
@@ -343,7 +343,7 @@ The overall complexity of spectral embedding is
 
    * `"Laplacian Eigenmaps for Dimensionality Reduction
      and Data Representation"
-     <http://web.cse.ohio-state.edu/~mbelkin/papers/LEM_NC_03.pdf>`_
+     <https://web.cse.ohio-state.edu/~mbelkin/papers/LEM_NC_03.pdf>`_
      M. Belkin, P. Niyogi, Neural Computation, June 2003; 15 (6):1373-1396
 
 
@@ -461,15 +461,15 @@ order to avoid that, the disparities :math:`\hat{d}_{ij}` are normalized.
 .. topic:: References:
 
   * `"Modern Multidimensional Scaling - Theory and Applications"
-    <http://www.springer.com/fr/book/9780387251509>`_
+    <https://www.springer.com/fr/book/9780387251509>`_
     Borg, I.; Groenen P. Springer Series in Statistics (1997)
 
   * `"Nonmetric multidimensional scaling: a numerical method"
-    <http://link.springer.com/article/10.1007%2FBF02289694>`_
+    <https://link.springer.com/article/10.1007%2FBF02289694>`_
     Kruskal, J. Psychometrika, 29 (1964)
 
   * `"Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis"
-    <http://link.springer.com/article/10.1007%2FBF02289565>`_
+    <https://link.springer.com/article/10.1007%2FBF02289565>`_
     Kruskal, J. Psychometrika, 29, (1964)
 
 .. _t_sne:
@@ -561,7 +561,7 @@ is a tradeoff between performance and accuracy. Larger angles imply that we
 can approximate larger regions by a single point, leading to better speed
 but less accurate results.
 
-`"How to Use t-SNE Effectively" <http://distill.pub/2016/misread-tsne/>`_
+`"How to Use t-SNE Effectively" <https://distill.pub/2016/misread-tsne/>`_
 provides a good discussion of the effects of the various parameters, as well
 as interactive plots to explore the effects of different parameters.
 
diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst
index 1d48bf4a642dd..0c3d255d3b134 100644
--- a/doc/modules/metrics.rst
+++ b/doc/modules/metrics.rst
@@ -63,7 +63,7 @@ is equivalent to :func:`linear_kernel`, only slower.)
 
     * C.D. Manning, P. Raghavan and H. Schütze (2008). Introduction to
       Information Retrieval. Cambridge University Press.
-      http://nlp.stanford.edu/IR-book/html/htmledition/the-vector-space-model-for-scoring-1.html
+      https://nlp.stanford.edu/IR-book/html/htmledition/the-vector-space-model-for-scoring-1.html
 
 .. _linear_kernel:
 
@@ -149,7 +149,7 @@ Manhattan distance between the input vectors.
 
 It has proven useful in ML applied to noiseless data.
 See e.g. `Machine learning for quantum mechanics in a nutshell
-<http://onlinelibrary.wiley.com/doi/10.1002/qua.24954/abstract/>`_.
+<https://onlinelibrary.wiley.com/doi/10.1002/qua.24954/abstract/>`_.
 
 .. _chi2_kernel:
 
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 07c719c87cef9..f122ddc0983ed 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -775,7 +775,7 @@ binary classification and multilabel indicator format.
 .. topic:: References:
 
   .. [Manning2008] C.D. Manning, P. Raghavan, H. Schütze, `Introduction to Information Retrieval
-     <http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-ranked-retrieval-results-1.html>`_,
+     <https://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-ranked-retrieval-results-1.html>`_,
      2008.
   .. [Everingham2010] M. Everingham, L. Van Gool, C.K.I. Williams, J. Winn, A. Zisserman,
      `The Pascal Visual Object Classes (VOC) Challenge
@@ -785,7 +785,7 @@ binary classification and multilabel indicator format.
      <http://www.machinelearning.org/proceedings/icml2006/030_The_Relationship_Bet.pdf>`_,
      ICML 2006.
   .. [Flach2015] P.A. Flach, M. Kull, `Precision-Recall-Gain Curves: PR Analysis Done Right
-     <http://papers.nips.cc/paper/5867-precision-recall-gain-curves-pr-analysis-done-right.pdf>`_,
+     <https://papers.nips.cc/paper/5867-precision-recall-gain-curves-pr-analysis-done-right.pdf>`_,
      NIPS 2015.
 
 
diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
index ccf5755c1c7e9..2efd1d736b662 100644
--- a/doc/modules/model_persistence.rst
+++ b/doc/modules/model_persistence.rst
@@ -87,4 +87,4 @@ another architecture is not supported.
 
 If you want to know more about these issues and explore other possible
 serialization methods, please refer to this
-`talk by Alex Gaynor <http://pyvideo.org/video/2566/pickles-are-for-delis-not-software>`_.
+`talk by Alex Gaynor <https://pyvideo.org/video/2566/pickles-are-for-delis-not-software>`_.
diff --git a/doc/modules/naive_bayes.rst b/doc/modules/naive_bayes.rst
index 14bfd9802cbbd..3320eed42bedb 100644
--- a/doc/modules/naive_bayes.rst
+++ b/doc/modules/naive_bayes.rst
@@ -72,7 +72,7 @@ it is known to be a bad estimator, so the probability outputs from
 .. topic:: References:
 
  * H. Zhang (2004). `The optimality of Naive Bayes.
-   <http://www.cs.unb.ca/~hzhang/publications/FLAIRS04ZhangH.pdf>`_
+   <https://www.cs.unb.ca/~hzhang/publications/FLAIRS04ZhangH.pdf>`_
    Proc. FLAIRS.
 
 .. _gaussian_naive_bayes:
diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index afcc8d3331119..fec8d98a999eb 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -307,7 +307,7 @@ keyword ``algorithm = 'kd_tree'``, and are computed using the class
 .. topic:: References:
 
    * `"Multidimensional binary search trees used for associative searching"
-     <http://dl.acm.org/citation.cfm?doid=361002.361007>`_,
+     <https://dl.acm.org/citation.cfm?doid=361002.361007>`_,
      Bentley, J.L., Communications of the ACM (1975)
 
 
diff --git a/doc/modules/neural_networks_unsupervised.rst b/doc/modules/neural_networks_unsupervised.rst
index 914ef3d1f009e..aca56ae8aaf2e 100644
--- a/doc/modules/neural_networks_unsupervised.rst
+++ b/doc/modules/neural_networks_unsupervised.rst
@@ -152,10 +152,10 @@ explore the space more thoroughly.
 .. topic:: References:
 
     * `"A fast learning algorithm for deep belief nets"
-      <http://www.cs.toronto.edu/~hinton/absps/fastnc.pdf>`_
+      <https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf>`_
       G. Hinton, S. Osindero, Y.-W. Teh, 2006
 
     * `"Training Restricted Boltzmann Machines using Approximations to
       the Likelihood Gradient"
-      <http://www.cs.toronto.edu/~tijmen/pcd/pcd.pdf>`_
+      <https://www.cs.toronto.edu/~tijmen/pcd/pcd.pdf>`_
       T. Tieleman, 2008
diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst
index 3482d4246cda7..69f7a275ab018 100644
--- a/doc/modules/outlier_detection.rst
+++ b/doc/modules/outlier_detection.rst
@@ -153,7 +153,7 @@ but regular, observation outside the frontier.
 .. topic:: References:
 
     * `Estimating the support of a high-dimensional distribution
-      <http://dl.acm.org/citation.cfm?id=1119749>`_ Schölkopf,
+      <https://dl.acm.org/citation.cfm?id=1119749>`_ Schölkopf,
       Bernhard, et al. Neural computation 13.7 (2001): 1443-1471.
 
 .. topic:: Examples:
diff --git a/doc/modules/random_projection.rst b/doc/modules/random_projection.rst
index 3989b81e77b83..eb8d6de984985 100644
--- a/doc/modules/random_projection.rst
+++ b/doc/modules/random_projection.rst
@@ -22,7 +22,7 @@ technique for distance based method.
 .. topic:: References:
 
  * Sanjoy Dasgupta. 2000.
-   `Experiments with random projection. <http://cseweb.ucsd.edu/~dasgupta/papers/randomf.pdf>`_
+   `Experiments with random projection. <https://cseweb.ucsd.edu/~dasgupta/papers/randomf.pdf>`_
    In Proceedings of the Sixteenth conference on Uncertainty in artificial
    intelligence (UAI'00), Craig Boutilier and Moisés Goldszmidt (Eds.). Morgan
    Kaufmann Publishers Inc., San Francisco, CA, USA, 143-151.
diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 297993ecfcfc6..4c3f584b079ab 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -125,8 +125,8 @@ Using the Iris dataset, we can construct a tree as follows::
     >>> clf = clf.fit(iris.data, iris.target)
 
 Once trained, we can export the tree in `Graphviz
-<http://www.graphviz.org/>`_ format using the :func:`export_graphviz`
-exporter. If you use the `conda <http://conda.io>`_ package manager, the graphviz binaries  
+<https://www.graphviz.org/>`_ format using the :func:`export_graphviz`
+exporter. If you use the `conda <https://conda.io/>`_ package manager, the graphviz binaries  
 and the python package can be installed with 
 
     conda install python-graphviz
diff --git a/doc/presentations.rst b/doc/presentations.rst
index dd90eaa3bc9ae..15b02469d3a6c 100644
--- a/doc/presentations.rst
+++ b/doc/presentations.rst
@@ -50,7 +50,7 @@ Videos
     section :ref:`stat_learn_tut_index`.
 
 - `Statistical Learning for Text Classification with scikit-learn and NLTK
-  <http://www.pyvideo.org/video/417/pycon-2011--statistical-machine-learning-for-text>`_
+  <https://pyvideo.org/video/417/pycon-2011--statistical-machine-learning-for-text>`_
   (and `slides <https://www.slideshare.net/ogrisel/statistical-machine-learning-for-text-classification-with-scikitlearn-and-nltk>`_)
   by `Olivier Grisel`_ at PyCon 2011
 
@@ -74,5 +74,5 @@ Videos
 
 
 .. _Gael Varoquaux: http://gael-varoquaux.info
-.. _Jake Vanderplas: http://staff.washington.edu/jakevdp
+.. _Jake Vanderplas: https://staff.washington.edu/jakevdp
 .. _Olivier Grisel: https://twitter.com/ogrisel
diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index 89bf7c8ed809b..308db6b974ffd 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -244,7 +244,7 @@ Other packages useful for data analysis and machine learning.
 - `Sacred <https://github.com/IDSIA/Sacred>`_ Tool to help you configure,
   organize, log and reproduce experiments
 
-- `Seaborn <http://stanford.edu/~mwaskom/software/seaborn/>`_ Visualization library based on
+- `Seaborn <https://stanford.edu/~mwaskom/software/seaborn/>`_ Visualization library based on
   matplotlib. It provides a high-level interface for drawing attractive statistical graphics.
 
 - `Deep Learning <http://deeplearning.net/software_links/>`_ A curated list of deep learning
diff --git a/doc/support.rst b/doc/support.rst
index 3f346406de57c..5dd52c01030f0 100644
--- a/doc/support.rst
+++ b/doc/support.rst
@@ -28,7 +28,7 @@ User questions
   tag.
 
 - For general theoretical or methodological Machine Learning questions
-  `stack exchange <http://stats.stackexchange.com/>`_ is probably a more
+  `stack exchange <https://stats.stackexchange.com/>`_ is probably a more
   suitable venue.
 
 In both cases please use a descriptive question in the title field (e.g.
diff --git a/doc/testimonials/testimonials.rst b/doc/testimonials/testimonials.rst
index 252cd1ed10cae..6b173abfd9bd4 100644
--- a/doc/testimonials/testimonials.rst
+++ b/doc/testimonials/testimonials.rst
@@ -775,7 +775,7 @@ Trent McConaghy, founder, Solido Design Automation Inc.
 
 .. image:: images/infonea.jpg
     :width: 120pt
-    :target: http://www.infonea.com/en
+    :target: http://www.infonea.com/en/
 
 .. raw:: html
 
diff --git a/doc/themes/scikit-learn/static/ML_MAPS_README.rst b/doc/themes/scikit-learn/static/ML_MAPS_README.rst
index 069cc6be4de22..47fe633767995 100644
--- a/doc/themes/scikit-learn/static/ML_MAPS_README.rst
+++ b/doc/themes/scikit-learn/static/ML_MAPS_README.rst
@@ -5,10 +5,10 @@ This document is intended to explain how to edit
 the machine learning cheat sheet, originally created
 by Andreas Mueller:
 
-(http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html)
+(https://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html)
 
 The image is made interactive using an imagemap, and uses the jQuery Map Hilight plugin module
-by David Lynch (http://davidlynch.org/projects/maphilight/docs/) to highlight
+by David Lynch (https://davidlynch.org/projects/maphilight/docs/) to highlight
 the different items on the image upon mouseover.
 
 Modifying the map on the docs is currently a little bit tedious,
diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 6fc36e52152d2..2a75d9c41dbbf 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -76,7 +76,7 @@ Loading an example dataset
 
 `scikit-learn` comes with a few standard datasets, for instance the
 `iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ and `digits
-<http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits>`_
+<https://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits>`_
 datasets for classification and the `boston house prices dataset
 <https://archive.ics.uci.edu/ml/machine-learning-databases/housing/>`_ for regression.
 
diff --git a/doc/tutorial/machine_learning_map/ML_MAPS_README.txt b/doc/tutorial/machine_learning_map/ML_MAPS_README.txt
index 069cc6be4de22..47fe633767995 100644
--- a/doc/tutorial/machine_learning_map/ML_MAPS_README.txt
+++ b/doc/tutorial/machine_learning_map/ML_MAPS_README.txt
@@ -5,10 +5,10 @@ This document is intended to explain how to edit
 the machine learning cheat sheet, originally created
 by Andreas Mueller:
 
-(http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html)
+(https://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html)
 
 The image is made interactive using an imagemap, and uses the jQuery Map Hilight plugin module
-by David Lynch (http://davidlynch.org/projects/maphilight/docs/) to highlight
+by David Lynch (https://davidlynch.org/projects/maphilight/docs/) to highlight
 the different items on the image upon mouseover.
 
 Modifying the map on the docs is currently a little bit tedious,
diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py
index c366c5ae71fc3..20690df7aec47 100644
--- a/doc/tutorial/machine_learning_map/pyparsing.py
+++ b/doc/tutorial/machine_learning_map/pyparsing.py
@@ -895,7 +895,7 @@ def pprint(self, *args, **kwargs):
         """
         Pretty-printer for parsed results as a list, using the C{pprint} module.
         Accepts additional positional or keyword args as defined for the 
-        C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
+        C{pprint.pprint} method. (U{https://docs.python.org/3/library/pprint.html#pprint.pprint})
 
         Example::
             ident = Word(alphas, alphanums)
diff --git a/doc/tutorial/statistical_inference/finding_help.rst b/doc/tutorial/statistical_inference/finding_help.rst
index 9d2c0d48e3074..69026e2e5dbd2 100644
--- a/doc/tutorial/statistical_inference/finding_help.rst
+++ b/doc/tutorial/statistical_inference/finding_help.rst
@@ -25,7 +25,7 @@ Q&A communities with Machine Learning practitioners
 
 .. _`How do I learn machine learning?`: https://www.quora.com/How-do-I-learn-machine-learning-1
 
-.. _`multiple subdomains for Machine Learning questions`: http://meta.stackexchange.com/questions/130524/which-stack-exchange-website-for-machine-learning-and-computational-algorithms
+.. _`multiple subdomains for Machine Learning questions`: https://meta.stackexchange.com/q/130524
 
 -- _'An excellent free online course for Machine Learning taught by Professor Andrew Ng of Stanford': https://www.coursera.org/learn/machine-learning
 
diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
index 218122981889f..937fce63219df 100644
--- a/doc/whats_new/_contributors.rst
+++ b/doc/whats_new/_contributors.rst
@@ -52,7 +52,7 @@
 
 .. _Matthieu Perrot: http://brainvisa.info/biblio/lnao/en/Author/PERROT-M.html
 
-.. _Jake Vanderplas: http://staff.washington.edu/jakevdp/
+.. _Jake Vanderplas: https://staff.washington.edu/jakevdp/
 
 .. _Gilles Louppe: http://www.montefiore.ulg.ac.be/~glouppe/
 
@@ -64,7 +64,7 @@
 
 .. _Brian Holt: http://personal.ee.surrey.ac.uk/Personal/B.Holt
 
-.. _Satrajit Ghosh: http://www.mit.edu/~satra/
+.. _Satrajit Ghosh: https://www.mit.edu/~satra/
 
 .. _Robert Layton: https://twitter.com/robertlayton
 
diff --git a/doc/whats_new/v0.14.rst b/doc/whats_new/v0.14.rst
index 2b0456593e613..5abe7d12d2051 100644
--- a/doc/whats_new/v0.14.rst
+++ b/doc/whats_new/v0.14.rst
@@ -51,7 +51,7 @@ Changelog
 
 - Added an interactive version of `Andreas Müller`_'s
   `Machine Learning Cheat Sheet (for scikit-learn)
-  <http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html>`_
+  <https://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html>`_
   to the documentation. See :ref:`Choosing the right estimator <ml_map>`.
   By `Jaques Grobler`_.
 
diff --git a/doc/whats_new/v0.19.rst b/doc/whats_new/v0.19.rst
index 2740e0752f266..91b60e32d6591 100644
--- a/doc/whats_new/v0.19.rst
+++ b/doc/whats_new/v0.19.rst
@@ -811,7 +811,7 @@ Miscellaneous
   Python 2. :issue:`9284` by :user:`Sebastin Santy <SebastinSanty>`.
 
 - Several minor issues were fixed with thanks to the alerts of
-  [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie <jhelie>`,
+  [lgtm.com](https://lgtm.com/). :issue:`9278` by :user:`Jean Helie <jhelie>`,
   among others.
 
 API changes summary
diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py
index f08f06888a8f3..7105b62880bdd 100644
--- a/examples/datasets/plot_digits_last_image.py
+++ b/examples/datasets/plot_digits_last_image.py
@@ -12,7 +12,7 @@
 first transform it into a feature vector with length 64.
 
 See `here
-<http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits>`_
+<https://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits>`_
 for more information about this dataset.
 """
 print(__doc__)
diff --git a/examples/manifold/plot_t_sne_perplexity.py b/examples/manifold/plot_t_sne_perplexity.py
index 0fbade5746afa..56467f7e65132 100644
--- a/examples/manifold/plot_t_sne_perplexity.py
+++ b/examples/manifold/plot_t_sne_perplexity.py
@@ -18,7 +18,7 @@
 larger perplexity values.
 
 For further details, "How to Use t-SNE Effectively"
-http://distill.pub/2016/misread-tsne/ provides a good discussion of the
+https://distill.pub/2016/misread-tsne/ provides a good discussion of the
 effects of various parameters, as well as interactive plots to explore
 those effects.
 """
diff --git a/examples/preprocessing/plot_all_scaling.py b/examples/preprocessing/plot_all_scaling.py
index 07fd3662da448..ab5daebf5f4dd 100755
--- a/examples/preprocessing/plot_all_scaling.py
+++ b/examples/preprocessing/plot_all_scaling.py
@@ -8,7 +8,7 @@
 
 Feature 0 (median income in a block) and feature 5 (number of households) of
 the `California housing dataset
-<http://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html>`_ have very
+<https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html>`_ have very
 different scales and contain some very large outliers. These two
 characteristics lead to difficulties to visualize the data and, more
 importantly, they can degrade the predictive performance of many machine
diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index 02925598573bd..188eff02b6f02 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -404,7 +404,7 @@ class Birch(BaseEstimator, TransformerMixin, ClusterMixin):
     ----------
     * Tian Zhang, Raghu Ramakrishnan, Maron Livny
       BIRCH: An efficient data clustering method for large databases.
-      http://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf
+      https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf
 
     * Roberto Perdisci
       JBirch - Java implementation of BIRCH clustering algorithm
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index 5fbe8810e56e0..bb0ab40d72038 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -1447,7 +1447,7 @@ class MiniBatchKMeans(KMeans):
 
     Notes
     -----
-    See http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf
+    See https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf
 
     """
 
diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py
index 31a2046dbf0ec..75757ae907717 100644
--- a/sklearn/cluster/spectral.py
+++ b/sklearn/cluster/spectral.py
@@ -53,7 +53,7 @@ def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20,
 
     - Multiclass spectral clustering, 2003
       Stella X. Yu, Jianbo Shi
-      http://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
+      https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
 
     Notes
     -----
@@ -237,7 +237,7 @@ def spectral_clustering(affinity, n_clusters=8, n_components=None,
 
     - Multiclass spectral clustering, 2003
       Stella X. Yu, Jianbo Shi
-      http://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
+      https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
 
     Notes
     ------
@@ -422,7 +422,7 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
 
     - Multiclass spectral clustering, 2003
       Stella X. Yu, Jianbo Shi
-      http://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
+      https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
     """
 
     def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py
index 6f1ceef70aaa8..eb06f133ec488 100644
--- a/sklearn/datasets/base.py
+++ b/sklearn/datasets/base.py
@@ -527,7 +527,7 @@ def load_digits(n_class=10, return_X_y=False):
         .. versionadded:: 0.18
 
     This is a copy of the test set of the UCI ML hand-written digits datasets
-    http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
+    https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
 
     Examples
     --------
diff --git a/sklearn/datasets/california_housing.py b/sklearn/datasets/california_housing.py
index 76cb27dadd7a1..6e46cd8b56d4f 100644
--- a/sklearn/datasets/california_housing.py
+++ b/sklearn/datasets/california_housing.py
@@ -36,7 +36,7 @@
 from ..externals import joblib
 
 # The original data can be found at:
-# http://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz
+# https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz
 ARCHIVE = RemoteFileMetadata(
     filename='cal_housing.tgz',
     url='https://ndownloader.figshare.com/files/5976036',
diff --git a/sklearn/datasets/covtype.py b/sklearn/datasets/covtype.py
index a08f61f02b0c5..33f59e1fe7d6c 100644
--- a/sklearn/datasets/covtype.py
+++ b/sklearn/datasets/covtype.py
@@ -5,7 +5,7 @@
 
 The dataset page is available from UCI Machine Learning Repository
 
-    http://archive.ics.uci.edu/ml/datasets/Covertype
+    https://archive.ics.uci.edu/ml/datasets/Covertype
 
 Courtesy of Jock A. Blackard and Colorado State University.
 """
@@ -31,7 +31,7 @@
 from ..utils import check_random_state
 
 # The original data can be found in:
-# http://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz
+# https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz
 ARCHIVE = RemoteFileMetadata(
     filename='covtype.data.gz',
     url='https://ndownloader.figshare.com/files/5976039',
diff --git a/sklearn/datasets/descr/covtype.rst b/sklearn/datasets/descr/covtype.rst
index 08447403ebba8..4e79b5b89b9a1 100644
--- a/sklearn/datasets/descr/covtype.rst
+++ b/sklearn/datasets/descr/covtype.rst
@@ -8,7 +8,7 @@ collected for the task of predicting each patch's cover type,
 i.e. the dominant species of tree.
 There are seven covertypes, making this a multiclass classification problem.
 Each sample has 54 features, described on the
-`dataset's homepage <http://archive.ics.uci.edu/ml/datasets/Covertype>`__.
+`dataset's homepage <https://archive.ics.uci.edu/ml/datasets/Covertype>`__.
 Some of the features are boolean indicators,
 while others are discrete or continuous measurements.
 
diff --git a/sklearn/datasets/descr/diabetes.rst b/sklearn/datasets/descr/diabetes.rst
index f75beafd37b95..f2adc8d192b6c 100644
--- a/sklearn/datasets/descr/diabetes.rst
+++ b/sklearn/datasets/descr/diabetes.rst
@@ -31,8 +31,8 @@ quantitative measure of disease progression one year after baseline.
 Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).
 
 Source URL:
-http://www4.stat.ncsu.edu/~boos/var.select/diabetes.html
+https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html
 
 For more information see:
 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) "Least Angle Regression," Annals of Statistics (with discussion), 407-499.
-(http://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)
\ No newline at end of file
+(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)
\ No newline at end of file
diff --git a/sklearn/datasets/descr/digits.rst b/sklearn/datasets/descr/digits.rst
index b4ecb714a01b9..bc97a8ce1a152 100644
--- a/sklearn/datasets/descr/digits.rst
+++ b/sklearn/datasets/descr/digits.rst
@@ -13,7 +13,7 @@ Optical recognition of handwritten digits dataset
     :Date: July; 1998
 
 This is a copy of the test set of the UCI ML hand-written digits datasets
-http://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
+https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
 
 The data set contains images of hand-written digits: 10 classes where
 each class refers to a digit.
diff --git a/sklearn/datasets/descr/kddcup99.rst b/sklearn/datasets/descr/kddcup99.rst
index 6e942246ea2ed..00427ac08b748 100644
--- a/sklearn/datasets/descr/kddcup99.rst
+++ b/sklearn/datasets/descr/kddcup99.rst
@@ -6,7 +6,7 @@ Kddcup 99 dataset
 The KDD Cup '99 dataset was created by processing the tcpdump portions
 of the 1998 DARPA Intrusion Detection System (IDS) Evaluation dataset,
 created by MIT Lincoln Lab [1]. The artificial data (described on the `dataset's
-homepage <http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html>`_) was
+homepage <https://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html>`_) was
 generated using a closed network and hand-injected attacks to produce a
 large number of different types of attack with normal activity in the
 background. As the initial goal was to produce a large training set for
diff --git a/sklearn/datasets/descr/wine_data.rst b/sklearn/datasets/descr/wine_data.rst
index 9d506b4ab70b4..bfde9288fa4dd 100644
--- a/sklearn/datasets/descr/wine_data.rst
+++ b/sklearn/datasets/descr/wine_data.rst
@@ -71,7 +71,7 @@ Via Brigata Salerno, 16147 Genoa, Italy.
 Citation:
 
 Lichman, M. (2013). UCI Machine Learning Repository
-[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+[https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 School of Information and Computer Science. 
 
 .. topic:: References
diff --git a/sklearn/datasets/images/README.txt b/sklearn/datasets/images/README.txt
index 48318bf2dc970..a95a5d42500d4 100644
--- a/sklearn/datasets/images/README.txt
+++ b/sklearn/datasets/images/README.txt
@@ -3,9 +3,9 @@ Released under a creative commons license. [1]
 Attribution: Some rights reserved by danielbuechele [2]
 Retrieved 21st August, 2011 from [3] by Robert Layton
 
-[1] http://creativecommons.org/licenses/by/2.0/
-[2] http://www.flickr.com/photos/danielbuechele/
-[3] http://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/
+[1] https://creativecommons.org/licenses/by/2.0/
+[2] https://www.flickr.com/photos/danielbuechele/
+[3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/
 
 
 Image: flower.jpg
@@ -13,9 +13,9 @@ Released under a creative commons license. [1]
 Attribution: Some rights reserved by danielbuechele [2]
 Retrieved 21st August, 2011 from [3] by Robert Layton
 
-[1] http://creativecommons.org/licenses/by/2.0/
-[2] http://www.flickr.com/photos/vultilion/
-[3] http://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/
+[1] https://creativecommons.org/licenses/by/2.0/
+[2] https://www.flickr.com/photos/vultilion/
+[3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/
 
 
 
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index c8ed0e30884a8..d91d02f191baa 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -27,7 +27,7 @@
 from ..utils import shuffle as shuffle_method
 
 # The original data can be found at:
-# http://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz
+# https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz
 ARCHIVE = RemoteFileMetadata(
     filename='kddcup99_data',
     url='https://ndownloader.figshare.com/files/5976045',
@@ -35,7 +35,7 @@
               '343652c9db428893e7494f837b274292'))
 
 # The original data can be found at:
-# http://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data_10_percent.gz
+# https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data_10_percent.gz
 ARCHIVE_10_PERCENT = RemoteFileMetadata(
     filename='kddcup99_10_data',
     url='https://ndownloader.figshare.com/files/5976042',
diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py
index 6d8acddccb393..0d684729eefea 100644
--- a/sklearn/datasets/species_distributions.py
+++ b/sklearn/datasets/species_distributions.py
@@ -56,7 +56,7 @@
 PY3_OR_LATER = sys.version_info[0] >= 3
 
 # The original data can be found at:
-# http://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip
+# https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip
 SAMPLES = RemoteFileMetadata(
     filename='samples.zip',
     url='https://ndownloader.figshare.com/files/5976075',
@@ -64,7 +64,7 @@
               '3c098f7f85955e89d321ee8efe37ac28'))
 
 # The original data can be found at:
-# http://biodiversityinformatics.amnh.org/open_source/maxent/coverages.zip
+# https://biodiversityinformatics.amnh.org/open_source/maxent/coverages.zip
 COVERAGES = RemoteFileMetadata(
     filename='coverages.zip',
     url='https://ndownloader.figshare.com/files/5976078',
diff --git a/sklearn/decomposition/_online_lda.pyx b/sklearn/decomposition/_online_lda.pyx
index d4fd8a8183ffd..f0591b33b4ad1 100644
--- a/sklearn/decomposition/_online_lda.pyx
+++ b/sklearn/decomposition/_online_lda.pyx
@@ -93,7 +93,7 @@ def _dirichlet_expectation_2d(np.ndarray[ndim=2, dtype=np.float64_t] arr):
 # Psi function for positive arguments. Optimized for speed, not accuracy.
 #
 # After: J. Bernardo (1976). Algorithm AS 103: Psi (Digamma) Function.
-# http://www.uv.es/~bernardo/1976AppStatist.pdf
+# https://www.uv.es/~bernardo/1976AppStatist.pdf
 @cython.cdivision(True)
 cdef double psi(double x) nogil:
     if x <= 1e-6:
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index d050949f131fe..f39e26e083cee 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -1129,7 +1129,7 @@ class DictionaryLearning(BaseEstimator, SparseCodingMixin):
     **References:**
 
     J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning
-    for sparse coding (http://www.di.ens.fr/sierra/pdfs/icml09.pdf)
+    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)
 
     See also
     --------
@@ -1317,7 +1317,7 @@ class MiniBatchDictionaryLearning(BaseEstimator, SparseCodingMixin):
     **References:**
 
     J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning
-    for sparse coding (http://www.di.ens.fr/sierra/pdfs/icml09.pdf)
+    for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)
 
     See also
     --------
diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index 05e6693051f56..779ebf42b20f1 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -120,13 +120,13 @@ class IncrementalPCA(_BasePCA):
     `D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual
     Tracking, International Journal of Computer Vision, Volume 77, Issue 1-3,
     pp. 125-141, May 2008.`
-    See http://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf
+    See https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf
 
     This model is an extension of the Sequential Karhunen-Loeve Transform from:
     `A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and
     its Application to Images, IEEE Transactions on Image Processing, Volume 9,
     Number 8, pp. 1371-1374, August 2000.`
-    See http://www.cs.technion.ac.il/~mic/doc/skl-ip.pdf
+    See https://www.cs.technion.ac.il/~mic/doc/skl-ip.pdf
 
     We have specifically abstained from an optimization used by authors of both
     papers, a QR decomposition used in specific situations to reduce the
diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
index 79d915fa1e2df..1fe52a4a292f7 100644
--- a/sklearn/kernel_approximation.py
+++ b/sklearn/kernel_approximation.py
@@ -72,7 +72,7 @@ class RBFSampler(BaseEstimator, TransformerMixin):
     [1] "Weighted Sums of Random Kitchen Sinks: Replacing
     minimization with randomization in learning" by A. Rahimi and
     Benjamin Recht.
-    (http://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)
+    (https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)
     """
 
     def __init__(self, gamma=1., n_components=100, random_state=None):
diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
index 3270b5d221a51..49bbf1d34b8e2 100644
--- a/sklearn/linear_model/huber.py
+++ b/sklearn/linear_model/huber.py
@@ -220,7 +220,7 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
     .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
            Concomitant scale estimates, pg 172
     .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.
-           http://statweb.stanford.edu/~owen/reports/hhu.pdf
+           https://statweb.stanford.edu/~owen/reports/hhu.pdf
     """
 
     def __init__(self, epsilon=1.35, max_iter=100, alpha=0.0001,
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index c24a2b7b1c673..756e072f044df 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -135,7 +135,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
     References
     ----------
     .. [1] "Least Angle Regression", Effron et al.
-           http://statweb.stanford.edu/~tibs/ftp/lars.pdf
+           https://statweb.stanford.edu/~tibs/ftp/lars.pdf
 
     .. [2] `Wikipedia entry on the Least-angle regression
            <https://en.wikipedia.org/wiki/Least-angle_regression>`_
diff --git a/sklearn/linear_model/omp.py b/sklearn/linear_model/omp.py
index 46da8413a9562..e8516efed0f9e 100644
--- a/sklearn/linear_model/omp.py
+++ b/sklearn/linear_model/omp.py
@@ -340,7 +340,7 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, tol=None, precompute=False,
     This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
     M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
     Matching Pursuit Technical Report - CS Technion, April 2008.
-    http://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
+    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
 
     """
     X = check_array(X, order='F', copy=copy_X)
@@ -479,7 +479,7 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
     This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
     M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
     Matching Pursuit Technical Report - CS Technion, April 2008.
-    http://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
+    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
 
     """
     Gram = check_array(Gram, order='F', copy=copy_Gram)
@@ -604,7 +604,7 @@ class OrthogonalMatchingPursuit(LinearModel, RegressorMixin):
     This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
     M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
     Matching Pursuit Technical Report - CS Technion, April 2008.
-    http://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
+    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
 
     See also
     --------
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
index 60f47980d6a17..e713c4e230525 100644
--- a/sklearn/metrics/classification.py
+++ b/sklearn/metrics/classification.py
@@ -508,11 +508,11 @@ def matthews_corrcoef(y_true, y_pred, sample_weight=None):
 
     .. [3] `Gorodkin, (2004). Comparing two K-category assignments by a
         K-category correlation coefficient
-        <http://www.sciencedirect.com/science/article/pii/S1476927104000799>`_
+        <https://www.sciencedirect.com/science/article/pii/S1476927104000799>`_
 
     .. [4] `Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN
         Error Measures in MultiClass Prediction
-        <http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0041882>`_
+        <https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0041882>`_
 
     Examples
     --------
diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
index 8483ef11f8d09..4ef2b317ec1e5 100644
--- a/sklearn/metrics/cluster/supervised.py
+++ b/sklearn/metrics/cluster/supervised.py
@@ -207,7 +207,7 @@ def adjusted_rand_score(labels_true, labels_pred):
 
     .. [Hubert1985] `L. Hubert and P. Arabie, Comparing Partitions,
       Journal of Classification 1985`
-      http://link.springer.com/article/10.1007%2FBF01908075
+      https://link.springer.com/article/10.1007%2FBF01908075
 
     .. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index
 
@@ -350,7 +350,7 @@ def homogeneity_score(labels_true, labels_pred):
 
     .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
        conditional entropy-based external cluster evaluation measure
-       <http://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
+       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
 
     See also
     --------
@@ -424,7 +424,7 @@ def completeness_score(labels_true, labels_pred):
 
     .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
        conditional entropy-based external cluster evaluation measure
-       <http://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
+       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
 
     See also
     --------
@@ -500,7 +500,7 @@ def v_measure_score(labels_true, labels_pred):
 
     .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
        conditional entropy-based external cluster evaluation measure
-       <http://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
+       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
 
     See also
     --------
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 2b7f3a3279631..33ea230246b20 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -700,7 +700,7 @@ class MultinomialNB(BaseDiscreteNB):
     ----------
     C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to
     Information Retrieval. Cambridge University Press, pp. 234-265.
-    http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html
+    https://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html
     """
 
     def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
@@ -897,7 +897,7 @@ class BernoulliNB(BaseDiscreteNB):
 
     C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to
     Information Retrieval. Cambridge University Press, pp. 234-265.
-    http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+    https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
 
     A. McCallum and K. Nigam (1998). A comparison of event models for naive
     Bayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index 4a2edada4c1df..c2338b9afc813 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -251,7 +251,7 @@ def sparse_random_matrix(n_components, n_features, density='auto',
 
     .. [1] Ping Li, T. Hastie and K. W. Church, 2006,
            "Very Sparse Random Projections".
-           http://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
+           https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
 
     .. [2] D. Achlioptas, 2001, "Database-friendly random projections",
            http://www.cs.ucsc.edu/~optas/papers/jl.pdf
@@ -610,7 +610,7 @@ class SparseRandomProjection(BaseRandomProjection):
 
     .. [1] Ping Li, T. Hastie and K. W. Church, 2006,
            "Very Sparse Random Projections".
-           http://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
+           https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf
 
     .. [2] D. Achlioptas, 2001, "Database-friendly random projections",
            https://users.soe.ucsc.edu/~optas/papers/jl.pdf

From 3804ccd2770ac4c026a823d019091917e4a2c70e Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Sat, 6 Oct 2018 08:19:07 -0400
Subject: [PATCH 144/163] MNT Refactors doc test into seperate script (#12248)

---
 .travis.yml                                   |  5 ++++-
 build_tools/travis/test_docs.sh               |  8 ++++++++
 .../travis/test_pytest_soft_dependency.sh     | 18 +++++++++++++++++
 build_tools/travis/test_script.sh             | 20 -------------------
 4 files changed, 30 insertions(+), 21 deletions(-)
 create mode 100755 build_tools/travis/test_docs.sh
 create mode 100755 build_tools/travis/test_pytest_soft_dependency.sh

diff --git a/.travis.yml b/.travis.yml
index 4b0a7d0f4281b..5ac8d251084e1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -62,7 +62,10 @@ matrix:
        if: type = cron OR commit_message =~ /\[scipy-dev\]/
 
 install: source build_tools/travis/install.sh
-script: bash build_tools/travis/test_script.sh
+script: 
+  - bash build_tools/travis/test_script.sh
+  - bash build_tools/travis/test_docs.sh
+  - bash build_tools/travis/test_pytest_soft_dependency.sh
 after_success: source build_tools/travis/after_success.sh
 notifications:
   webhooks:
diff --git a/build_tools/travis/test_docs.sh b/build_tools/travis/test_docs.sh
new file mode 100755
index 0000000000000..6f635cc6d51e5
--- /dev/null
+++ b/build_tools/travis/test_docs.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+if [[ "$SKIP_TESTS" != "true" ]]; then
+    set -x
+    make test-doc
+fi
diff --git a/build_tools/travis/test_pytest_soft_dependency.sh b/build_tools/travis/test_pytest_soft_dependency.sh
new file mode 100755
index 0000000000000..50f413459b457
--- /dev/null
+++ b/build_tools/travis/test_pytest_soft_dependency.sh
@@ -0,0 +1,18 @@
+##!/bin/bash
+
+set -e
+
+if [[ "$CHECK_PYTEST_SOFT_DEPENDENCY" == "true" ]]; then
+    conda remove -y py pytest || pip uninstall -y py pytest
+    if [[ "$COVERAGE" == "true" ]]; then
+        # Need to append the coverage to the existing .coverage generated by
+        # running the tests
+        CMD="coverage run --append"
+    else
+        CMD="python"
+    fi
+    # .coverage from running the tests is in TEST_DIR
+    cd $TEST_DIR
+    $CMD -m sklearn.utils.tests.test_estimator_checks
+    cd $OLDPWD
+fi
diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh
index 0f3c35fc20695..c7b58700f4fbe 100755
--- a/build_tools/travis/test_script.sh
+++ b/build_tools/travis/test_script.sh
@@ -46,11 +46,6 @@ run_tests() {
     set -x  # print executed commands to the terminal
 
     $TEST_CMD sklearn
-
-    # Going back to git checkout folder needed to test documentation
-    cd $OLDPWD
-
-    make test-doc
 }
 
 if [[ "$RUN_FLAKE8" == "true" ]]; then
@@ -60,18 +55,3 @@ fi
 if [[ "$SKIP_TESTS" != "true" ]]; then
     run_tests
 fi
-
-if [[ "$CHECK_PYTEST_SOFT_DEPENDENCY" == "true" ]]; then
-    conda remove -y py pytest || pip uninstall -y py pytest
-    if [[ "$COVERAGE" == "true" ]]; then
-        # Need to append the coverage to the existing .coverage generated by
-        # running the tests
-        CMD="coverage run --append"
-    else
-        CMD="python"
-    fi
-    # .coverage from running the tests is in TEST_DIR
-    cd $TEST_DIR
-    $CMD -m sklearn.utils.tests.test_estimator_checks
-    cd $OLDPWD
-fi

From 42803089edfae58386a3df1716203617229085fb Mon Sep 17 00:00:00 2001
From: Erich Schubert <kno10@users.noreply.github.com>
Date: Sun, 7 Oct 2018 10:02:37 +0200
Subject: [PATCH 145/163] ENH (0.21) Remember predecessor in OPTICS (#12135)

This will allow filtering the extracted clusters,
in order to improve quality (see Schubert and Gertz 2018)
---
 sklearn/cluster/optics_.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index 37a5d051ee35b..e3c38d7ba9621 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -6,6 +6,7 @@
 
 Authors: Shane Grigsby <refuge@rocktalus.com>
          Amy X. Zhang <axz@mit.edu>
+         Erich Schubert <erich@debian.org>
 License: BSD 3 clause
 """
 
@@ -319,6 +320,10 @@ class OPTICS(BaseEstimator, ClusterMixin):
         order. Points which will never be core have a distance of inf. Use
         ``clust.core_distances_[clust.ordering_]`` to access in cluster order.
 
+    predecessor_ : array, shape (n_samples,)
+        Point that a sample was reached from.
+        Seed points have a predecessor of -1.
+
     See also
     --------
 
@@ -331,6 +336,10 @@ class OPTICS(BaseEstimator, ClusterMixin):
     Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander.
     "OPTICS: ordering points to identify the clustering structure." ACM SIGMOD
     Record 28, no. 2 (1999): 49-60.
+
+    Schubert, Erich, Michael Gertz.
+    "Improving the Cluster Structure Extracted from OPTICS Plots." Proc. of
+    the Conference "Lernen, Wissen, Daten, Analysen" (LWDA) (2018): 318-329.
     """
 
     def __init__(self, min_samples=5, max_eps=np.inf, metric='euclidean',
@@ -398,6 +407,8 @@ def fit(self, X, y=None):
         # Start all points as 'unprocessed' ##
         self.reachability_ = np.empty(n_samples)
         self.reachability_.fill(np.inf)
+        self.predecessor_ = np.empty(n_samples, dtype=int)
+        self.predecessor_.fill(-1)
         # Start all points as noise ##
         self.labels_ = np.full(n_samples, -1, dtype=int)
 
@@ -501,8 +512,9 @@ def _set_reach_dist(self, point_index, processed, X, nbrs):
                                        self.metric, n_jobs=None).ravel()
 
         rdists = np.maximum(dists, self.core_distances_[point_index])
-        new_reach = np.minimum(np.take(self.reachability_, unproc), rdists)
-        self.reachability_[unproc] = new_reach
+        improved = np.where(rdists < np.take(self.reachability_, unproc))
+        self.reachability_[unproc[improved]] = rdists[improved]
+        self.predecessor_[unproc[improved]] = point_index
 
         # Define return order based on reachability distance
         return (unproc[quick_scan(np.take(self.reachability_, unproc),

From 2020867b8ffa6325a386591fa8dfcf62ad3128ff Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Sun, 7 Oct 2018 10:58:02 +0200
Subject: [PATCH 146/163] FIX (0.21) OPTICS correctly handle multiple infs in
 reachability array. (#12029)

---
 sklearn/cluster/optics_.py           | 20 +++++++++++++-------
 sklearn/cluster/tests/test_optics.py | 14 ++++++++++++--
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/sklearn/cluster/optics_.py b/sklearn/cluster/optics_.py
index e3c38d7ba9621..cdd8703d7763d 100755
--- a/sklearn/cluster/optics_.py
+++ b/sklearn/cluster/optics_.py
@@ -653,7 +653,13 @@ def _extract_optics(ordering, reachability, maxima_ratio=.75,
     """
 
     # Extraction wrapper
-    reachability = reachability / np.max(reachability[1:])
+    # according to Ankerst M. et.al. 1999 (p. 5), for a small enough
+    # generative distance epsilong, there should be more than one INF.
+    if np.all(np.isinf(reachability)):
+        raise ValueError("All reachability values are inf. Set a larger"
+                         " max_eps.")
+    normalization_factor = np.max(reachability[reachability < np.inf])
+    reachability = reachability / normalization_factor
     reachability_plot = reachability[ordering].tolist()
     root_node = _automatic_cluster(reachability_plot, ordering,
                                    maxima_ratio, rejection_ratio,
@@ -798,17 +804,17 @@ def _cluster_tree(node, parent_node, local_maxima_points,
     avg_reach2 = np.mean(reachability_plot[node_2.start:(node_2.start
                                                          + check_value_2)])
 
-    if ((avg_reach1 / reachability_plot[s]) > maxima_ratio or
-            (avg_reach2 / reachability_plot[s]) > maxima_ratio):
+    if ((avg_reach1 / maxima_ratio) > reachability_plot[s] or
+            (avg_reach2 / maxima_ratio) > reachability_plot[s]):
 
-        if (avg_reach1 / reachability_plot[s]) < rejection_ratio:
+        if (avg_reach1 / rejection_ratio) < reachability_plot[s]:
             # reject node 2
             node_list.remove((node_2, local_max_2))
-        if (avg_reach2 / reachability_plot[s]) < rejection_ratio:
+        if (avg_reach2 / rejection_ratio) < reachability_plot[s]:
             # reject node 1
             node_list.remove((node_1, local_max_1))
-        if ((avg_reach1 / reachability_plot[s]) >= rejection_ratio and
-                (avg_reach2 / reachability_plot[s]) >= rejection_ratio):
+        if ((avg_reach1 / rejection_ratio) >= reachability_plot[s] and
+                (avg_reach2 / rejection_ratio) >= reachability_plot[s]):
             # since split_point is not significant,
             # ignore this split and continue (reject both child nodes)
             node.split_point = -1
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 1215746faa4c3..5163504ab0401 100755
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -88,17 +88,27 @@ def test_empty_extract():
 
 def test_bad_extract():
     # Test an extraction of eps too close to original eps
-    msg = "Specify an epsilon smaller than 0.015. Got 0.3."
+    msg = "Specify an epsilon smaller than 0.15. Got 0.3."
     centers = [[1, 1], [-1, -1], [1, -1]]
     X, labels_true = make_blobs(n_samples=750, centers=centers,
                                 cluster_std=0.4, random_state=0)
 
     # Compute OPTICS
-    clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10)
+    clust = OPTICS(max_eps=5.0 * 0.03, min_samples=10)
     clust2 = clust.fit(X)
     assert_raise_message(ValueError, msg, clust2.extract_dbscan, 0.3)
 
 
+def test_bad_reachability():
+    msg = "All reachability values are inf. Set a larger max_eps."
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10)
+    assert_raise_message(ValueError, msg, clust.fit, X)
+
+
 def test_close_extract():
     # Test extract where extraction eps is close to scaled epsPrime
 

From 63e5ae6319285c44b1a9519aa6acd9006c82a10a Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr-pavlyk@users.noreply.github.com>
Date: Sun, 7 Oct 2018 04:13:32 -0500
Subject: [PATCH 147/163] TST Use same random seeds for both
 GaussianMixture.fit (#12307)

---
 sklearn/mixture/tests/test_gaussian_mixture.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index cabe4b67d7f29..a4808e7bf02b6 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -686,10 +686,10 @@ def test_multiple_init():
     for cv_type in COVARIANCE_TYPE:
         train1 = GaussianMixture(n_components=n_components,
                                  covariance_type=cv_type,
-                                 random_state=rng).fit(X).score(X)
+                                 random_state=0).fit(X).score(X)
         train2 = GaussianMixture(n_components=n_components,
                                  covariance_type=cv_type,
-                                 random_state=rng, n_init=5).fit(X).score(X)
+                                 random_state=0, n_init=5).fit(X).score(X)
         assert_greater_equal(train2, train1)
 
 

From 4e2e1fac0da592863b767356e30cee37f752b1bf Mon Sep 17 00:00:00 2001
From: Ekaterina Krivich <krivich.ekaterina@gmail.com>
Date: Mon, 8 Oct 2018 16:24:24 +0300
Subject: [PATCH 148/163] ENH Cache class mapping in MultiLabelBinarizer()
 (#12116)

Fixes #11680
---
 doc/whats_new/v0.21.rst                   |  6 +++++-
 sklearn/preprocessing/label.py            | 12 +++++++++++-
 sklearn/preprocessing/tests/test_label.py | 18 ++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 6406950b5dff7..391171548efbf 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -47,10 +47,14 @@ Support for Python 3.4 and below has been officially dropped.
   algoritm related to :class:`cluster.DBSCAN`, that has hyperparameters easier
   to set and that scales better, by :user:`Shane <espg>` and
   :user:`Adrin Jalali <adrinjalali>`.
-
+  
 :mod:`sklearn.preprocessing`
 ............................
 
+- |Efficiency| Make :class:`preprocessing.MultiLabelBinarizer` to cache class mappings instead 
+  of calculating it every time on the fly.
+  :issue:`12116` by :user:`Ekaterina Krivich <kiote>` and `Joel Nothman`_.
+
 - |Efficiency| |API| Speed improvement in :class:`preprocessing.PolynomialFeatures`,
   in the dense case. Also added a new parameter ``order`` which controls output
   order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.
diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
index 809b537831356..12f95b5e2cb4f 100644
--- a/sklearn/preprocessing/label.py
+++ b/sklearn/preprocessing/label.py
@@ -824,6 +824,7 @@ def fit(self, y):
         -------
         self : returns this MultiLabelBinarizer instance
         """
+        self._cached_dict = None
         if self.classes is None:
             classes = sorted(set(itertools.chain.from_iterable(y)))
         elif len(set(self.classes)) < len(self.classes):
@@ -853,6 +854,8 @@ def fit_transform(self, y):
             A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in
             `y[i]`, and 0 otherwise.
         """
+        self._cached_dict = None
+
         if self.classes is not None:
             return self.fit(y).transform(y)
 
@@ -896,7 +899,7 @@ def transform(self, y):
         """
         check_is_fitted(self, 'classes_')
 
-        class_to_index = dict(zip(self.classes_, range(len(self.classes_))))
+        class_to_index = self._build_cache()
         yt = self._transform(y, class_to_index)
 
         if not self.sparse_output:
@@ -904,6 +907,13 @@ def transform(self, y):
 
         return yt
 
+    def _build_cache(self):
+        if self._cached_dict is None:
+            self._cached_dict = dict(zip(self.classes_,
+                                         range(len(self.classes_))))
+
+        return self._cached_dict
+
     def _transform(self, y, class_mapping):
         """Transforms the label sets with a given mapping
 
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index 57c95ab5f7e2d..56550a9816a3f 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -381,6 +381,24 @@ def test_multilabel_binarizer_given_classes():
     assert_raise_message(ValueError, err_msg, mlb.fit, inp)
 
 
+def test_multilabel_binarizer_multiple_calls():
+    inp = [(2, 3), (1,), (1, 2)]
+    indicator_mat = np.array([[0, 1, 1],
+                              [1, 0, 0],
+                              [1, 0, 1]])
+
+    indicator_mat2 = np.array([[0, 1, 1],
+                               [1, 0, 0],
+                               [1, 1, 0]])
+
+    # first call
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+    # second call change class
+    mlb.classes = [1, 2, 3]
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat2)
+
+
 def test_multilabel_binarizer_same_length_sequence():
     # Ensure sequences of the same length are not interpreted as a 2-d array
     inp = [[1], [0], [2]]

From 5fd9e03cf3a2e58d3b76dfaf0692437a4d6e1eec Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Mon, 8 Oct 2018 23:27:42 +0800
Subject: [PATCH 149/163] ENH Raise an error when pos_label is not in binary
 y_true (#12313)

---
 sklearn/metrics/ranking.py            |  5 +++++
 sklearn/metrics/tests/test_ranking.py | 12 +++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/ranking.py b/sklearn/metrics/ranking.py
index a582c3859e47a..10331c87e3e16 100644
--- a/sklearn/metrics/ranking.py
+++ b/sklearn/metrics/ranking.py
@@ -230,6 +230,11 @@ def _binary_uninterpolated_average_precision(
         raise ValueError("Parameter pos_label is fixed to 1 for "
                          "multilabel-indicator y_true. Do not set "
                          "pos_label or set pos_label to 1.")
+    elif y_type == "binary":
+        present_labels = np.unique(y_true)
+        if len(present_labels) == 2 and pos_label not in present_labels:
+            raise ValueError("pos_label=%r is invalid. Set it to a label in "
+                             "y_true." % pos_label)
     average_precision = partial(_binary_uninterpolated_average_precision,
                                 pos_label=pos_label)
     return _average_binary_score(average_precision, y_true, y_score,
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index a8b6e38a42d52..69113b6efc9a2 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -682,15 +682,21 @@ def test_average_precision_constant_values():
     assert_equal(average_precision_score(y_true, y_score), .25)
 
 
-def test_average_precision_score_pos_label_multilabel_indicator():
+def test_average_precision_score_pos_label_errors():
+    # Raise an error when pos_label is not in binary y_true
+    y_true = np.array([0, 1])
+    y_pred = np.array([0, 1])
+    error_message = ("pos_label=2 is invalid. Set it to a label in y_true.")
+    assert_raise_message(ValueError, error_message, average_precision_score,
+                         y_true, y_pred, pos_label=2)
     # Raise an error for multilabel-indicator y_true with
     # pos_label other than 1
     y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
     y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]])
-    erorr_message = ("Parameter pos_label is fixed to 1 for multilabel"
+    error_message = ("Parameter pos_label is fixed to 1 for multilabel"
                      "-indicator y_true. Do not set pos_label or set "
                      "pos_label to 1.")
-    assert_raise_message(ValueError, erorr_message, average_precision_score,
+    assert_raise_message(ValueError, error_message, average_precision_score,
                          y_true, y_pred, pos_label=0)
 
 

From c5b020f908e628f4f2ed7e633b07cb5ef35f50a6 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr-pavlyk@users.noreply.github.com>
Date: Tue, 9 Oct 2018 08:18:49 -0500
Subject: [PATCH 150/163] TST Use v_measure_score to compare label arrays up to
 permutation (#12265)

---
 sklearn/cluster/tests/test_bicluster.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index ec5934f252adc..d217d51373512 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -21,7 +21,7 @@
 from sklearn.cluster.bicluster import _bistochastic_normalize
 from sklearn.cluster.bicluster import _log_normalize
 
-from sklearn.metrics import consensus_score
+from sklearn.metrics import (consensus_score, v_measure_score)
 
 from sklearn.datasets import make_biclusters, make_checkerboard
 
@@ -204,7 +204,7 @@ def test_project_and_cluster():
     for mat in (data, csr_matrix(data)):
         labels = model._project_and_cluster(data, vectors,
                                             n_clusters=2)
-        assert_array_equal(labels, [0, 0, 1, 1])
+        assert_almost_equal(v_measure_score(labels, [0, 0, 1, 1]), 1.0)
 
 
 def test_perfect_checkerboard():

From 205ff38d66347459e330145e3e01c2393608a06f Mon Sep 17 00:00:00 2001
From: Hanmin Qin <qinhanmin2005@sina.com>
Date: Tue, 9 Oct 2018 21:25:52 +0800
Subject: [PATCH 151/163] DOC What's new typo

---
 doc/whats_new/v0.20.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 64a7d1050d5ab..67608ad29cbf0 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -42,7 +42,7 @@ Changelog
 
 - |Fix| Fixed integer overflow in :func:`datasets.make_classification`
   for values of ``n_informative`` parameter larger than 64.
-  :issue:10811 by :user:`Roman Feldbauer <VarIr>`.
+  :issue:`10811` by :user:`Roman Feldbauer <VarIr>`.
 
 :mod:`sklearn.ensemble`
 .......................

From 5df8cd3bd7fe709325e385b20df51f5c096774ae Mon Sep 17 00:00:00 2001
From: Xing Han Lu <xhlperso@gmail.com>
Date: Tue, 9 Oct 2018 09:44:57 -0400
Subject: [PATCH 152/163] EXA Fix title, overlapping plots and axis labels in
 plot_ols_ridge_variance.py (#12296)

---
 examples/linear_model/plot_ols_ridge_variance.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py
index 4d589d42e5f81..ba5f65575f927 100644
--- a/examples/linear_model/plot_ols_ridge_variance.py
+++ b/examples/linear_model/plot_ols_ridge_variance.py
@@ -42,12 +42,8 @@
 classifiers = dict(ols=linear_model.LinearRegression(),
                    ridge=linear_model.Ridge(alpha=.1))
 
-fignum = 1
 for name, clf in classifiers.items():
-    fig = plt.figure(fignum, figsize=(4, 3))
-    plt.clf()
-    plt.title(name)
-    ax = plt.axes([.12, .12, .8, .8])
+    fig, ax = plt.subplots(figsize=(4, 3))
 
     for _ in range(6):
         this_X = .1 * np.random.normal(size=(2, 1)) + X_train
@@ -60,12 +56,12 @@
     ax.plot(X_test, clf.predict(X_test), linewidth=2, color='blue')
     ax.scatter(X_train, y_train, s=30, c='red', marker='+', zorder=10)
 
-    ax.set_xticks(())
-    ax.set_yticks(())
+    ax.set_title(name)
+    ax.set_xlim(0, 2)
     ax.set_ylim((0, 1.6))
     ax.set_xlabel('X')
     ax.set_ylabel('y')
-    ax.set_xlim(0, 2)
-    fignum += 1
+
+    fig.tight_layout()
 
 plt.show()

From 03c3af5bdec29903567fa3d63f2e6776a2b3041b Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Tue, 9 Oct 2018 11:25:46 -0400
Subject: [PATCH 153/163] [MRG] Fix fetch_openml when ignore attributes are
 numeric (#12330)

* modularized data column functionality

* small bugfix

* removes redundant line breaks

* added some documentation on the added fn

* added additional comment on advice of Nicholas Hug

* added test case

* merged master into branch, and added small comments by Joel

* added doc item
---
 doc/whats_new/v0.20.rst                       |   3 ++
 sklearn/datasets/openml.py                    |  32 ++++++++++++------
 .../openml/1119/api-v1-json-data-1119.json.gz | Bin 0 -> 707 bytes
 .../api-v1-json-data-features-1119.json.gz    | Bin 0 -> 1108 bytes
 ...dult-census-limit-2-data_version-1.json.gz | Bin 0 -> 364 bytes
 ...dult-census-limit-2-status-active-.json.gz | Bin 0 -> 363 bytes
 .../1119/data-v1-download-54002.arff.gz       | Bin 0 -> 1190 bytes
 sklearn/datasets/tests/test_openml.py         |  19 +++++++++++
 8 files changed, 44 insertions(+), 10 deletions(-)
 create mode 100644 sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz
 create mode 100644 sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz
 create mode 100644 sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz
 create mode 100644 sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz
 create mode 100644 sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 67608ad29cbf0..4d14b909983d1 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -40,6 +40,9 @@ Changelog
 - |Fix| :func:`datasets.fetch_openml` to correctly use the local cache.
   :issue:`12246` by :user:`Jan N. van Rijn <janvanrijn>`.
 
+- |Fix| :func:`datasets.fetch_openml` to correctly handle ignore attributes and
+  row id attributes. :issue:`12330` by :user:`Jan N. van Rijn <janvanrijn>`.
+
 - |Fix| Fixed integer overflow in :func:`datasets.make_classification`
   for values of ``n_informative`` parameter larger than 64.
   :issue:`10811` by :user:`Roman Feldbauer <VarIr>`.
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 5820ff68c0925..54c3094d3c22a 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -369,6 +369,20 @@ def _verify_target_data_type(features_dict, target_columns):
                          'categorical.')
 
 
+def _valid_data_column_names(features_list, target_columns):
+    # logic for determining on which columns can be learned. Note that from the
+    # OpenML guide follows that columns that have the `is_row_identifier` or
+    # `is_ignore` flag, these can not be learned on. Also target columns are
+    # excluded.
+    valid_data_column_names = []
+    for feature in features_list:
+        if (feature['name'] not in target_columns
+                and feature['is_ignore'] != 'true'
+                and feature['is_row_identifier'] != 'true'):
+            valid_data_column_names.append(feature['name'])
+    return valid_data_column_names
+
+
 def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                  target_column='default-target', cache=True, return_X_y=False):
     """Fetch dataset from openml by name or dataset id.
@@ -522,10 +536,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         raise TypeError("Did not recognize type of target_column"
                         "Should be six.string_type, list or None. Got: "
                         "{}".format(type(target_column)))
-    data_columns = [feature['name'] for feature in features_list
-                    if (feature['name'] not in target_column and
-                        feature['is_ignore'] != 'true' and
-                        feature['is_row_identifier'] != 'true')]
+    data_columns = _valid_data_column_names(features_list,
+                                            target_column)
 
     # prepare which columns and data types should be returned for the X and y
     features_dict = {feature['name']: feature for feature in features_list}
@@ -555,13 +567,13 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     arff = _download_data_arff(data_description['file_id'], return_sparse,
                                data_home)
     arff_data = arff['data']
+    # nominal attributes is a dict mapping from the attribute name to the
+    # possible values. Includes also the target column (which will be popped
+    # off below, before it will be packed in the Bunch object)
     nominal_attributes = {k: v for k, v in arff['attributes']
-                          if isinstance(v, list)}
-    for feature in features_list:
-        if 'true' in (feature['is_row_identifier'],
-                      feature['is_ignore']) and (feature['name'] not in
-                                                 target_column):
-            del nominal_attributes[feature['name']]
+                          if isinstance(v, list) and
+                          k in data_columns + target_column}
+
     X, y = _convert_arff_data(arff_data, col_slice_x, col_slice_y)
 
     is_classification = {col_name in nominal_attributes
diff --git a/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz b/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-1119.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..97ab0d1ce64f686215c763732e3c303fcfae22db
GIT binary patch
literal 707
zcmV;!0zCa6iwFn~(Ysp!17UD!Ep{<2YIARHEo5PIVJ$H+F*z=3b8l_{g;P;a<1h^V
zD^{PN+SYbkf%OfT(4jjJ8t_EXlqT*LU6)jeyIqC&@8^`=9X#Ee?bx5~FQ4BcB}lA2
z(psUHPPe43iHf&KtEh;wEL%jQ$cP=XCsfzaSfcTr=ja}tACjYty)Wfa9A9>{wJwee
zSXpCw_-?miuR$0ESHig-u2B9&2T+<gezcvF7?NM^eymonCtISnD3dY~zpR-^KdJg|
zf~qU>G)>Fo6yT%qp+Vshq$WJJIyM?X89hqTXtROm7HErbDP`w`?7`M>FZ3SASE5G`
z*NyOA*P7QUK)@2E4ttHC93p({E2mW%jo`sqiKrOR2ZVt5h|=;tw}#Le5mpxsTw7D4
zLn9HwG{znpR2zh)_R=zE{lLMJX@@&)7;XgFVsRGsDNP9TXF%=jZfFtc0Eeyhnyl*!
z_zyDyB#!SbVZrs<gE}IzsTCU1ObU=4z>hx&o}p2JHt>9XJA$es9=+)~Ae)Xqc1R{C
zhH(eP5g1D!Ve~YFCQJvy%ODasnVx?7MBAd-H52PLWunR?%NOY>)Yc6E((c*@2|qyM
zn6<=DJ3%a?%hk;d`(4{uq1K!bWH?Q;`8Z3*>G@Nd7W1sggGL)I1Lyqy&{gb&GS{#h
zrtrfUhr?m=WfX2$Ci{#ndoZCzndI{{oie$fq&^X@uESKi!S!$!L(-u6T5#T)gxl~1
z2_mPNyfB0eqT&>14_x!*>2}F+5}T-aVFbrQR2<m#w5>W%%<wwglc!GgpRN3_IZdNi
zK0j7zr5jB>W43>G_9Td}laEyrBXgt;x@Cbg?mq%A1_Ea=&5G%wn9aigDqqVjAEobh
poOE_E%cm+=qPnPOm}fQ4WpzF~6S=5S3KhM-{{|TACyhM>004f1PqhF5

literal 0
HcmV?d00001

diff --git a/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz b/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-features-1119.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..b8a81c9e40068bb8f60f32453609a7720abf1520
GIT binary patch
literal 1108
zcmV-a1grZWiwFo6(Ysp!17UD!Ep{<2YIARHEo5PIVJ&85VRUtJWpgbtF)=wVYIARH
z0L@ozZyPrZ{xA1o*nkR%^VT%j2DnLGyK7RTc3KQWVbD6>S%^+L=;a(E$bTQH^IMt?
zn1RL3C-EqWdPs^7b@y2g0VV}Phz@;rexEJn?EJ6$Oqm?-vh(a9dzBfeNSO^TNlu%>
zqGpAO6*?tJtNbK@D>0C^0PV5WsnS@sx2<z_K2bTEpbCYK=7NhnMaHJ+CbpPV%6nzX
zNdr1EFv9TgYJ;e=K?Ef}3#8TDf5vpU!8Et-R%-D6dt|m!1~e~hrPdY&I~zLHK;Q#M
zs0)ErEsPC98M&=$KYN&?11(D1(AP&R*YYTkjivlmg_(^()UcqhYi!U728Ma|?b$=q
zIXgOi6rH0@bPA*AWjKEsSv%g~!aPa{%9<z8{ylAPLC#QH$C)143Wc;<W4e-^PMFcq
zABoaIPSViXLU=i|mQ8iNw^D%2tMx}?+5I<Z+v_kBXG!WDrr`>Fz;sT>2XWj^9i-vX
z%3xi4droNIY0?g<qT9osKi!XGdh!o36}0<>r2KRPuL7q6Rgb5-iGGK?Ux9N9b0JkD
zd~IVwaH$&WB<AaKtJ+nmf@|Om<*6`pJ5SQeN^3#D)RSLZ`fhJ!eBUX2y@`>PGS<7+
z#TeyG_*mE0u|pRQiXa>XK{@*T6lN41kFHTNQ66aIoBi$%B?Zk8r#qDlw>49vq|d-S
z)(4YosskZ4f@urlii%Jf6qX6n*OE`{<`plY>y_;Ez!Aby6w1@mk~Cunw3gtB$~LWv
zIath6Di~XPd_Wmgv(|K7MTzNgp8Z~U^!VseV$L>+ai~*T$fJ6P)O=M1ANpx-1cgoW
zJ?uw6rMU9FO+7~xP$|7wXR1A^;cYi&b*cA#CEsj$4ib0qc%~9JcD({4h9JejYw{QQ
zE_R87#K7b{_0Hd_%C6j|@vR1VyLV4JTMsUecRP66!-`5Nm4x0uX>_)pWfJPdtR<+Y
zyzygrzjOE$p0@re?f)QnTTdh+A)>gNUu=lCF9qP%TgfCopw~_`!7X;Up*MzVF&F8f
z%G`Yc)znh)G5ecRw=6V_=z<lWm9%Mzft(k-M@P*e(NYY0!F+x*`uTl?ipDPkfkFO4
zlc|_#`~Wo=-sTY7Hbc*0ei>a*3*ZG8amvHMrDc6<^5}p;4T%?3T~kltuK`^YK6Gf8
z@IF8V)K^IQqA*ZIxZv=vMLVRH63gWLfO#|R=+O}g!X?Lp_8RdS>p0aZiyy(J?k6MU
zmT6}UYTjZ36%-@2nzEF<>PJ(8V?5+|;Cg56HQ97fMwUm?hUK~A8K|ZBXDltTUektJ
z3+lPe>Nux3!ud~QJB2bifiAWoDbv6<#5FQ2vbc}VcXIZ)@pZOdl{@}h{rcO<!5=j7
aI}hI>U;cUizeV%y;o)z@KBpsF4*&o~*BxU3

literal 0
HcmV?d00001

diff --git a/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz b/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-data_version-1.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f0225303025e47eeb3ac22116afda18acb8b51eb
GIT binary patch
literal 364
zcmV-y0h9h8iwFoB(Ysp!17UD!Ep{<2YIARHEo5PIVJ&QFb960aVRT_%ZeeX@En#GJ
zY;-MSWo~nIb1iIXZE18ZGA(3bbYWk1WpZ<AZ*DCyE^2dcZUBvtO-sW-5Qgvh6=lv<
zk~B?BZbb-qsDcGONRe$ats|Qa?rs9L<iET5XhDr}=dd5|%sl(<oFHeKL2~MLg&L9<
zXJ*0(ltbcqUOaCF5UfB*faMj}R~ZUbDd;mbN=2nG#r<7rrcKh+W}0dos`jNUn8Cy9
zF-?1|lZoSN`_gO~xUPTEKPtvaAJ2ArOM{0sdoQI)-g2hYhCcC(gPQTGt?+}$cduRO
z<fdEa-w`DKK}0u!H$}LwiXF;_eMdm`+k#DVie^-Dq}vN^+V6y!wa7;L7ydG|vVTE5
zlYU#1QX~#fEo9_Ji#WDHPE!Iuv>J`sQBwBjgd!Wr=2+}XK0*2Mp*FU|saLj*(>Pxb
K+-%?`0{{R;tfx-^

literal 0
HcmV?d00001

diff --git a/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz b/sklearn/datasets/tests/data/openml/1119/api-v1-json-data-list-data_name-adult-census-limit-2-status-active-.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..5bbb680e52fa2e37865f0f804b139d25377e5b7b
GIT binary patch
literal 363
zcmV-x0hIn9iwFoF(Ysp!17UD!Ep{<2YIARHEo5PIVJ&QFb960aVRT_%ZeeX@En#GJ
zY;-MSWo~nIb1iIXZE18ZGA(m-VRUtKEn#DHX?A5UYIARH0F97MOT$1AhVS_mWzJQS
zG)+uyMF@DPf(1QDk!>=qBbyEGZUVLBzq|QpL5*?eupjTtJp1mPAZMCEa_V-48j=@h
zX2J-RL*jW}JZ}XMtUySB<rUXg846V?=rc7+MWrys{atCMP14k6nra-X_N6SC!Nck?
zO?$1AiQ{Yg(rg*Hu7A-#D#l44&vtrCgNHSHFQrM|a;DUVKJko$n(?Zw@Po*AuU+Wm
zrd#LV5hVUWL^pvqMYyku9m<D&M?m%4f=zRZW>j*d+Y4>l?}VAP$VU1X{xY<(e?dHx
zep{1LBo0q4WaLMSIJQAfQvyG<8jaaeQugPBA{)r&SnNtZLHY5aHnzj5SGJAQIA0Ik
JY~UsX0016Yr$Yb$

literal 0
HcmV?d00001

diff --git a/sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz b/sklearn/datasets/tests/data/openml/1119/data-v1-download-54002.arff.gz
new file mode 100644
index 0000000000000000000000000000000000000000..da54d98d7b132dd3e81f345dbb41803e3dc7efd9
GIT binary patch
literal 1190
zcmV;X1X=qZiwFp$(Ysp!17u-zVJ&tsEo5(ZZftL1WGyu`FfcMMVRB|>0G(A`Z`(K!
ze9y1Q)8P(S29_=PqqsZNO`68NCN-L*#XYX6wM>K}1(I@N6#ehFl<hc5RHQf<kSU7f
z?99$C-#XG5lr_hg*E$GE#@All(;c2Hl@8?mNzGFgxwZ0?pI``1E$Tog(>$M@iPNQW
zK8UN?+0oJ4$B)Oevtv`2<dl3miv@K}-&=PlHF|&ieeTp61Hm=vrJ%AB#s;B`JWRxw
zbIQq~7KL5ICo6ZO>%cpk(Ki)Vwhp4g4P4QhoQM#V|9FbJH2S^>Z@L1ayq1kBZ;rpe
zL%AYtorh~%l8{zwDhQG!tl;8W6b^HkJG&HKuB_EC_1;Q>GB0<-wUsd8#iOtiWByP2
z{J2CPwS|PWOXFmZA7@qu>mvP(*A97R?r3}LkIwJ}$KFYxM5hAF;#rc~GmA@fPEjtT
zS_@y<+7q0qwRMv6okHby1q)qaj3H+gb9>MK`@P$J0vDc4c$}S{?>@9bv{Kg9Ucwua
zE5RPBtP61Ja2bSS6y@M^STVGZ=8~Lr)a2p)14)j?uvwf!y%V=0$uD687ie<L(g~>v
zP1Nv`Go%a&8_nS>8DN^1@iU9U`_Ivp$`oR$JTEKY#v!lbP(*3haUyz7tQu}Hk_Z?`
z2DRS3PhH78&!bqrMRH#|d~U@wzo{j`=X)bK9G$~O?H3#?Ts00BODvV%>=<u%t_g0<
zPWSe~C@CJeQtXAPS7^i>rI>lm2a<y+CtS=-9zVSIcdE48bK#vvdDlA1(*vJ$P9<y0
zO6hi*Bvy@23RI@kNc#oadf&N#b>m!AvBZe(I+vJ+DpyWzg4=L3w}yG;qN!I8r&um5
zQ-SxUi2Pt)lM7aSQPT29gE_(na_k6JuMT<>lX-jn`-d7!{{K1?6yzV!$N?X*LIWQh
zQS=k{fEAo_))7wY1;UI|4;R+twL=ecPLrxCwvLAxlia}EHUWIZ64^TNRZ%cl!X-V#
zSmx}Rx?o4mV)}U9IUmO`XO{iN>y7wARlU$k!Y8zBWLaZw9o(S0kGQCUf>W#Oa>0>a
z)TThU0j3dRb`Y-k42MOHa4l_U5N=ken!&$yVLAKjD3cakDC_OfPDOq4-)`&z3l5Q7
z9SIivi2KA9MPI~W+q%1U`{!R-?;rS^A3wN#=NRzlD1pJ`NiQ9ZvSbK{E|?6SHH=oV
zwavaYaC9B?lTiviNPE!EeiOk_AF|%do<AN8lB5mY(Z`<mi+$Za0q^mTH+LZqU@(UL
zt<z7kVLE(l>ytx|eA#t=%Cz@$nki(11K1!P^^@&IGU_7s5_3<jDf3s+GUy+u4F==1
zH+faeXU(>0X<M{i3)$f{<+Ozv!eDeDHBPf}|8Tvfzml;AWvgHtrHj=F(#a!SGVCSe
zQFFa!Ke?ycS4=x=FPO2@_r<?O8$;HAY0Q3aGEDnVeDeQonP$&e#$r60z#w@fnP#JL
z@>J4x{XI)rmoIk=le9OEMVLKTw!us|nEpZ8Y?7w0`{fr{X31#WmCgqAKabIt5`YQ-
E0CBxbApigX

literal 0
HcmV?d00001

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 7a317f2f3799b..7c7e450285c33 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -410,6 +410,25 @@ def test_fetch_openml_australian(monkeypatch, gzip_response):
     )
 
 
+@pytest.mark.parametrize('gzip_response', [True, False])
+def test_fetch_openml_adultcensus(monkeypatch, gzip_response):
+    # Check because of the numeric row attribute (issue #12329)
+    data_id = 1119
+    data_name = 'adult-census'
+    data_version = 1
+    target_column = 'class'
+    # Not all original instances included for space reasons
+    expected_observations = 10
+    expected_features = 14
+    expected_missing = 0
+    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
+    _fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
+                               expected_observations, expected_features,
+                               expected_missing,
+                               np.float64, object, expect_sparse=False,
+                               compare_default_target=True)
+
+
 @pytest.mark.parametrize('gzip_response', [True, False])
 def test_fetch_openml_miceprotein(monkeypatch, gzip_response):
     # JvR: very important check, as this dataset defined several row ids

From bbb0d935f529b57885177fccf79f3b30f7ab568b Mon Sep 17 00:00:00 2001
From: Eric Chang <ericchang2017@u.northwestern.edu>
Date: Tue, 9 Oct 2018 22:53:26 -0700
Subject: [PATCH 154/163] [MRG] FIX Update power_transform docstring and add
 FutureWarning (#12317)

---
 sklearn/preprocessing/data.py            | 67 ++++++++++++++++--------
 sklearn/preprocessing/tests/test_data.py | 28 +++++++++-
 2 files changed, 72 insertions(+), 23 deletions(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 2584b14dc34ac..4e7337f64a554 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -2489,7 +2489,7 @@ class PowerTransformer(BaseEstimator, TransformerMixin):
     or other situations where normality is desired.
 
     Currently, PowerTransformer supports the Box-Cox transform and the
-    Yeo-Johson transform. The optimal parameter for stabilizing variance and
+    Yeo-Johnson transform. The optimal parameter for stabilizing variance and
     minimizing skewness is estimated through maximum likelihood.
 
     Box-Cox requires input data to be strictly positive, while Yeo-Johnson
@@ -2544,8 +2544,8 @@ class PowerTransformer(BaseEstimator, TransformerMixin):
 
     Notes
     -----
-    NaNs are treated as missing values: disregarded in fit, and maintained in
-    transform.
+    NaNs are treated as missing values: disregarded in ``fit``, and maintained
+    in ``transform``.
 
     For a comparison of the different scalers, transformers, and normalizers,
     see :ref:`examples/preprocessing/plot_all_scaling.py
@@ -2844,18 +2844,19 @@ def _check_input(self, X, check_positive=False, check_shape=False,
         return X
 
 
-def power_transform(X, method='box-cox', standardize=True, copy=True):
-    """Apply a power transform featurewise to make data more Gaussian-like.
-
+def power_transform(X, method='warn', standardize=True, copy=True):
+    """
     Power transforms are a family of parametric, monotonic transformations
     that are applied to make data more Gaussian-like. This is useful for
     modeling issues related to heteroscedasticity (non-constant variance),
     or other situations where normality is desired.
 
-    Currently, power_transform() supports the Box-Cox transform. Box-Cox
-    requires input data to be strictly positive. The optimal parameter
-    for stabilizing variance and minimizing skewness is estimated
-    through maximum likelihood.
+    Currently, power_transform supports the Box-Cox transform and the
+    Yeo-Johnson transform. The optimal parameter for stabilizing variance and
+    minimizing skewness is estimated through maximum likelihood.
+
+    Box-Cox requires input data to be strictly positive, while Yeo-Johnson
+    supports both positive or negative data.
 
     By default, zero-mean, unit-variance normalization is applied to the
     transformed data.
@@ -2867,39 +2868,51 @@ def power_transform(X, method='box-cox', standardize=True, copy=True):
     X : array-like, shape (n_samples, n_features)
         The data to be transformed using a power transformation.
 
-    method : str, (default='box-cox')
-        The power transform method. Currently, 'box-cox' (Box-Cox transform)
-        is the only option available.
+    method : str
+        The power transform method. Available methods are:
+
+        - 'yeo-johnson' [1]_, works with positive and negative values
+        - 'box-cox' [2]_, only works with strictly positive values
+
+        The default method will be changed from 'box-cox' to 'yeo-johnson'
+        in version 0.23. To suppress the FutureWarning, explicitly set the
+        parameter.
 
     standardize : boolean, default=True
         Set to True to apply zero-mean, unit-variance normalization to the
         transformed output.
 
     copy : boolean, optional, default=True
-        Set to False to perform inplace computation.
+        Set to False to perform inplace computation during transformation.
+
+    Returns
+    -------
+    X_trans : array-like, shape (n_samples, n_features)
+        The transformed data.
 
     Examples
     --------
     >>> import numpy as np
     >>> from sklearn.preprocessing import power_transform
     >>> data = [[1, 2], [3, 2], [4, 5]]
-    >>> print(power_transform(data))  # doctest: +ELLIPSIS
+    >>> print(power_transform(data, method='box-cox'))  # doctest: +ELLIPSIS
     [[-1.332... -0.707...]
      [ 0.256... -0.707...]
      [ 1.076...  1.414...]]
 
     See also
     --------
-    PowerTransformer: Performs power transformation using the ``Transformer``
-        API (as part of a preprocessing :class:`sklearn.pipeline.Pipeline`).
+    PowerTransformer : Equivalent transformation with the
+        ``Transformer`` API (e.g. as part of a preprocessing
+        :class:`sklearn.pipeline.Pipeline`).
 
     quantile_transform : Maps data to a standard normal distribution with
         the parameter `output_distribution='normal'`.
 
     Notes
     -----
-    NaNs are treated as missing values: disregarded to compute the statistics,
-    and maintained during the data transformation.
+    NaNs are treated as missing values: disregarded in ``fit``, and maintained
+    in ``transform``.
 
     For a comparison of the different scalers, transformers, and normalizers,
     see :ref:`examples/preprocessing/plot_all_scaling.py
@@ -2907,9 +2920,21 @@ def power_transform(X, method='box-cox', standardize=True, copy=True):
 
     References
     ----------
-    G.E.P. Box and D.R. Cox, "An Analysis of Transformations", Journal of the
-    Royal Statistical Society B, 26, 211-252 (1964).
+
+    .. [1] I.K. Yeo and R.A. Johnson, "A new family of power transformations to
+           improve normality or symmetry." Biometrika, 87(4), pp.954-959,
+           (2000).
+
+    .. [2] G.E.P. Box and D.R. Cox, "An Analysis of Transformations", Journal
+           of the Royal Statistical Society B, 26, 211-252 (1964).
     """
+    if method == 'warn':
+        warnings.warn("The default value of 'method' will change from "
+                      "'box-cox' to 'yeo-johnson' in version 0.23. Set "
+                      "the 'method' argument explicitly to silence this "
+                      "warning in the meantime.",
+                      FutureWarning)
+        method = 'box-cox'
     pt = PowerTransformer(method=method, standardize=standardize, copy=copy)
     return pt.fit_transform(X)
 
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 65c2119c2f936..c19d834f89b2c 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -2042,7 +2042,10 @@ def test_power_transformer_1d():
         pt = PowerTransformer(method='box-cox', standardize=standardize)
 
         X_trans = pt.fit_transform(X)
-        X_trans_func = power_transform(X, standardize=standardize)
+        X_trans_func = power_transform(
+            X, method='box-cox',
+            standardize=standardize
+        )
 
         X_expected, lambda_expected = stats.boxcox(X.flatten())
 
@@ -2066,7 +2069,10 @@ def test_power_transformer_2d():
         pt = PowerTransformer(method='box-cox', standardize=standardize)
 
         X_trans_class = pt.fit_transform(X)
-        X_trans_func = power_transform(X, standardize=standardize)
+        X_trans_func = power_transform(
+            X, method='box-cox',
+            standardize=standardize
+        )
 
         for X_trans in [X_trans_class, X_trans_func]:
             for j in range(X_trans.shape[1]):
@@ -2289,3 +2295,21 @@ def test_power_transformer_copy_False(method, standardize):
 
     X_inv_trans = pt.inverse_transform(X_trans)
     assert X_trans is X_inv_trans
+
+
+def test_power_transform_default_method():
+    X = np.abs(X_2d)
+
+    future_warning_message = (
+        "The default value of 'method' "
+        "will change from 'box-cox'"
+    )
+    assert_warns_message(FutureWarning, future_warning_message,
+                         power_transform, X)
+
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore')
+        X_trans_default = power_transform(X)
+
+    X_trans_boxcox = power_transform(X, method='box-cox')
+    assert_array_equal(X_trans_boxcox, X_trans_default)

From c8a4132774e841fa91d986f74276945601e809df Mon Sep 17 00:00:00 2001
From: Dmitry Vukolov <dmvuko@gmail.com>
Date: Wed, 10 Oct 2018 13:16:44 +0300
Subject: [PATCH 155/163] DOC check_array() and check_X_y() documentation
 update (#12340)

---
 doc/developers/utilities.rst |  7 +++---
 sklearn/utils/validation.py  | 43 ++++++++++++++++++------------------
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst
index e1b2ca209ad69..d42fb46ca58b3 100644
--- a/doc/developers/utilities.rst
+++ b/doc/developers/utilities.rst
@@ -30,9 +30,10 @@ should be used when applicable.
 - :func:`as_float_array`: convert input to an array of floats.  If a sparse
   matrix is passed, a sparse matrix will be returned.
 
-- :func:`check_array`: convert input to 2d array, raise error on sparse
-  matrices.  Allowed sparse matrix formats can be given optionally, as well as
-  allowing 1d or nd arrays.  Calls :func:`assert_all_finite` by default.
+- :func:`check_array`: check that input is a 2D array, raise error on sparse
+  matrices. Allowed sparse matrix formats can be given optionally, as well as
+  allowing 1D or N-dimensional arrays. Calls :func:`assert_all_finite` by
+  default.
 
 - :func:`check_X_y`: check that X and y have consistent length, calls
   check_array on X, and column_or_1d on y. For multilabel classification or
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index b2cf9f2426f0b..3ae1b283ccef5 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -89,7 +89,7 @@ def as_float_array(X, copy=True, force_all_finite=True):
 
         - True: Force all values of X to be finite.
         - False: accept both np.inf and np.nan in X.
-        - 'allow-nan':  accept  only  np.nan  values in  X.  Values  cannot  be
+        - 'allow-nan': accept only np.nan values in X. Values cannot be
           infinite.
 
         .. versionadded:: 0.20
@@ -287,7 +287,7 @@ def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,
 
         - True: Force all values of X to be finite.
         - False: accept both np.inf and np.nan in X.
-        - 'allow-nan':  accept  only  np.nan  values in  X.  Values  cannot  be
+        - 'allow-nan': accept only np.nan values in X. Values cannot be
           infinite.
 
         .. versionadded:: 0.20
@@ -361,9 +361,9 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
 
     """Input validation on an array, list, sparse matrix or similar.
 
-    By default, the input is converted to an at least 2D numpy array.
-    If the dtype of the array is object, attempt converting to float,
-    raising on failure.
+    By default, the input is checked to be a non-empty 2D array containing
+    only finite values. If the dtype of the array is object, attempt
+    converting to float, raising on failure.
 
     Parameters
     ----------
@@ -407,22 +407,22 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
         be triggered by a conversion.
 
     force_all_finite : boolean or 'allow-nan', (default=True)
-        Whether to raise an error on np.inf and np.nan in X. The possibilities
-        are:
+        Whether to raise an error on np.inf and np.nan in array. The
+        possibilities are:
 
-        - True: Force all values of X to be finite.
-        - False: accept both np.inf and np.nan in X.
-        - 'allow-nan':  accept  only  np.nan  values in  X.  Values  cannot  be
-          infinite.
+        - True: Force all values of array to be finite.
+        - False: accept both np.inf and np.nan in array.
+        - 'allow-nan': accept only np.nan values in array. Values cannot
+          be infinite.
 
         .. versionadded:: 0.20
            ``force_all_finite`` accepts the string ``'allow-nan'``.
 
     ensure_2d : boolean (default=True)
-        Whether to raise a value error if X is not 2d.
+        Whether to raise a value error if array is not 2D.
 
     allow_nd : boolean (default=False)
-        Whether to allow X.ndim > 2.
+        Whether to allow array.ndim > 2.
 
     ensure_min_samples : int (default=1)
         Make sure that the array has a minimum number of samples in its first
@@ -444,8 +444,8 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
 
     Returns
     -------
-    X_converted : object
-        The converted and validated X.
+    array_converted : object
+        The converted and validated array.
 
     """
     # accept_sparse 'None' deprecation check
@@ -635,10 +635,11 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True,
               warn_on_dtype=False, estimator=None):
     """Input validation for standard estimators.
 
-    Checks X and y for consistent length, enforces X 2d and y 1d.
-    Standard input checks are only applied to y, such as checking that y
+    Checks X and y for consistent length, enforces X to be 2D and y 1D. By
+    default, X is checked to be non-empty and containing only finite values.
+    Standard input checks are also applied to y, such as checking that y
     does not have np.nan or np.inf targets. For multi-label y, set
-    multi_output=True to allow 2d and sparse y.  If the dtype of X is
+    multi_output=True to allow 2D and sparse y. If the dtype of X is
     object, attempt converting to float, raising on failure.
 
     Parameters
@@ -688,20 +689,20 @@ def check_X_y(X, y, accept_sparse=False, accept_large_sparse=True,
 
         - True: Force all values of X to be finite.
         - False: accept both np.inf and np.nan in X.
-        - 'allow-nan':  accept  only  np.nan  values in  X.  Values  cannot  be
+        - 'allow-nan': accept only np.nan values in X. Values cannot be
           infinite.
 
         .. versionadded:: 0.20
            ``force_all_finite`` accepts the string ``'allow-nan'``.
 
     ensure_2d : boolean (default=True)
-        Whether to make X at least 2d.
+        Whether to raise a value error if X is not 2D.
 
     allow_nd : boolean (default=False)
         Whether to allow X.ndim > 2.
 
     multi_output : boolean (default=False)
-        Whether to allow 2-d y (array or sparse matrix). If false, y will be
+        Whether to allow 2D y (array or sparse matrix). If false, y will be
         validated as a vector. y cannot have np.nan or np.inf values if
         multi_output=True.
 

From a80bbd9403fea9cf4aa46dfef26a4b31a608957b Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Wed, 10 Oct 2018 13:01:11 +0200
Subject: [PATCH 156/163] ENH add get_n_leaves() and get_max_depth() to
 DesicionTrees (#12300)

---
 doc/whats_new/v0.21.rst         |  9 +++++++++
 sklearn/tree/_tree.pyx          |  8 +++++++-
 sklearn/tree/tests/test_tree.py |  6 ++----
 sklearn/tree/tree.py            | 15 +++++++++++++++
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 391171548efbf..be200250a9ae5 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -60,6 +60,15 @@ Support for Python 3.4 and below has been officially dropped.
   order for further speed performances. :issue:`12251` by `Tom Dupre la Tour`_.
 
 
+:mod:`sklearn.tree`
+...................
+- |Feature| ``get_n_leaves()`` and ``get_depth()`` have been added to
+  :class:`tree.BaseDecisionTree` and consequently all estimators based
+  on it, including :class:`tree.DecisionTreeClassifier`,
+  :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeClassifier`,
+  and :class:`tree.ExtraTreeRegressor`.
+  :issue:`12300` by :user:`Adrin Jalali <adrinjalali>`.
+
 Multiple modules
 ................
 
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 911e63bbf6ed4..996ee1c99d8e5 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -524,7 +524,7 @@ cdef class Tree:
         great as `node_count`.
 
     max_depth : int
-        The maximal depth of the tree.
+        The depth of the tree, i.e. the maximum depth of its leaves.
 
     children_left : array of int, shape [node_count]
         children_left[i] holds the node id of the left child of node i.
@@ -574,6 +574,12 @@ cdef class Tree:
         def __get__(self):
             return self._get_node_ndarray()['right_child'][:self.node_count]
 
+    property n_leaves:
+        def __get__(self):
+            return np.sum(np.logical_and(
+                self.children_left == -1,
+                self.children_right == -1))
+
     property feature:
         def __get__(self):
             return self._get_node_ndarray()['feature'][:self.node_count]
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 37eb6582c7023..e8d7b945e09bd 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -1215,8 +1215,7 @@ def test_max_leaf_nodes():
     k = 4
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_depth=None, max_leaf_nodes=k + 1).fit(X, y)
-        tree = est.tree_
-        assert_equal((tree.children_left == TREE_LEAF).sum(), k + 1)
+        assert_equal(est.get_n_leaves(), k + 1)
 
         # max_leaf_nodes in (0, 1) should raise ValueError
         est = TreeEstimator(max_depth=None, max_leaf_nodes=0)
@@ -1233,8 +1232,7 @@ def test_max_leaf_nodes_max_depth():
     k = 4
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y)
-        tree = est.tree_
-        assert_greater(tree.max_depth, 1)
+        assert_greater(est.get_depth(), 1)
 
 
 def test_arrays_persist():
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 7ad25ff7282c8..2357ce3f508e4 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -108,6 +108,21 @@ def __init__(self,
         self.class_weight = class_weight
         self.presort = presort
 
+    def get_depth(self):
+        """Returns the depth of the decision tree.
+
+        The depth of a tree is the maximum distance between the root
+        and any leaf.
+        """
+        check_is_fitted(self, 'tree_')
+        return self.tree_.max_depth
+
+    def get_n_leaves(self):
+        """Returns the number of leaves of the decision tree.
+        """
+        check_is_fitted(self, 'tree_')
+        return self.tree_.n_leaves
+
     def fit(self, X, y, sample_weight=None, check_input=True,
             X_idx_sorted=None):
 

From 00c2f411cf87c83abdf6ad59fa522ad16a2a18cb Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Wed, 10 Oct 2018 13:57:11 +0200
Subject: [PATCH 157/163] DOC fix logistic regression.fit docstring on y
 (#12343)

---
 sklearn/linear_model/logistic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index ef3b93cb15467..8391284817fad 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1249,7 +1249,7 @@ def fit(self, X, y, sample_weight=None):
             Training vector, where n_samples is the number of samples and
             n_features is the number of features.
 
-        y : array-like, shape (n_samples,) or (n_samples, n_targets)
+        y : array-like, shape (n_samples,)
             Target vector relative to X.
 
         sample_weight : array-like, shape (n_samples,) optional
@@ -1688,7 +1688,7 @@ def fit(self, X, y, sample_weight=None):
             Training vector, where n_samples is the number of samples and
             n_features is the number of features.
 
-        y : array-like, shape (n_samples,) or (n_samples, n_targets)
+        y : array-like, shape (n_samples,)
             Target vector relative to X.
 
         sample_weight : array-like, shape (n_samples,) optional

From 39bd736e595470840460b15ef0eaec398421c061 Mon Sep 17 00:00:00 2001
From: Rohan Varma <rvarm1@gmail.com>
Date: Wed, 10 Oct 2018 12:56:47 -0700
Subject: [PATCH 158/163] [MRG] Move RandomTreesEmbedding criterion &
 max_features to be class attributes (#12324)

* rm criterion and max_features from __init__ and store them as class attrs instead

* make sure that the docstring comes first
---
 sklearn/ensemble/forest.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index 0805e835933cc..6c3bb93e2c071 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -1896,6 +1896,9 @@ class RandomTreesEmbedding(BaseForest):
 
     """
 
+    criterion = 'mse'
+    max_features = 1
+
     def __init__(self,
                  n_estimators='warn',
                  max_depth=5,
@@ -1925,12 +1928,10 @@ def __init__(self,
             verbose=verbose,
             warm_start=warm_start)
 
-        self.criterion = 'mse'
         self.max_depth = max_depth
         self.min_samples_split = min_samples_split
         self.min_samples_leaf = min_samples_leaf
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
-        self.max_features = 1
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
         self.min_impurity_split = min_impurity_split

From 831c760ccf7711fe95477b172da1f0a1616ac575 Mon Sep 17 00:00:00 2001
From: Krishna Sangeeth <kskrishnasangeeth@gmail.com>
Date: Thu, 11 Oct 2018 13:03:06 +0530
Subject: [PATCH 159/163] ENH (0.21) Add max_error to the existing set of
 metrics for regression (#12232)

---
 doc/modules/classes.rst                     |  1 +
 doc/modules/model_evaluation.rst            | 33 +++++++++++++++++++
 doc/whats_new/v0.21.rst                     |  9 ++++++
 sklearn/metrics/__init__.py                 |  3 ++
 sklearn/metrics/regression.py               | 35 +++++++++++++++++++++
 sklearn/metrics/scorer.py                   |  5 ++-
 sklearn/metrics/tests/test_common.py        |  5 ++-
 sklearn/metrics/tests/test_regression.py    |  3 ++
 sklearn/metrics/tests/test_score_objects.py |  3 +-
 9 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 57ccfb5cff704..0084bb3f76b85 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -865,6 +865,7 @@ details.
    :template: function.rst
 
    metrics.explained_variance_score
+   metrics.max_error
    metrics.mean_absolute_error
    metrics.mean_squared_error
    metrics.mean_squared_log_error
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index f122ddc0983ed..0c7feb311e0f2 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -84,6 +84,7 @@ Scoring                           Function
 
 **Regression**
 'explained_variance'              :func:`metrics.explained_variance_score`
+'max_error'                       :func:`metrics.max_error`
 'neg_mean_absolute_error'         :func:`metrics.mean_absolute_error`
 'neg_mean_squared_error'          :func:`metrics.mean_squared_error`
 'neg_mean_squared_log_error'      :func:`metrics.mean_squared_log_error`
@@ -1530,6 +1531,38 @@ function::
     ... # doctest: +ELLIPSIS
     0.990...
 
+.. _max_error:
+
+Max error
+-------------------
+
+The :func:`max_error` function computes the maximum `residual error
+<https://en.wikipedia.org/wiki/Errors_and_residuals>`_ , a metric
+that captures the worst case error between the predicted value and
+the true value. In a perfectly fitted single output regression
+model, ``max_error`` would be ``0`` on the training set and though this
+would be highly unlikely in the real world, this metric shows the
+extent of error that the model had when it was fitted.
+
+
+If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample,
+and :math:`y_i` is the corresponding true value, then the max error is
+defined as
+
+.. math::
+
+  \text{Max Error}(y, \hat{y}) = max(| y_i - \hat{y}_i |)
+
+Here is a small example of usage of the :func:`max_error` function::
+
+  >>> from sklearn.metrics import max_error
+  >>> y_true = [3, 2, 7, 1]
+  >>> y_pred = [9, 2, 7, 1]
+  >>> max_error(y_true, y_pred)
+  6
+
+The :func:`max_error` does not support multioutput.
+
 .. _mean_absolute_error:
 
 Mean absolute error
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index be200250a9ae5..a3e8608e3f690 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -69,6 +69,15 @@ Support for Python 3.4 and below has been officially dropped.
   and :class:`tree.ExtraTreeRegressor`.
   :issue:`12300` by :user:`Adrin Jalali <adrinjalali>`.
 
+
+:mod:`sklearn.metrics`
+......................
+
+- |Feature| Added the :func:`metrics.max_error` metric and a corresponding
+  ``'max_error'`` scorer for single output regression.
+  :issue:`12232` by :user:`Krishna Sangeeth <whiletruelearn>`.
+
+
 Multiple modules
 ................
 
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 2d19e24db8683..6aa885a9d5315 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -55,12 +55,14 @@
 from .pairwise import pairwise_distances_chunked
 
 from .regression import explained_variance_score
+from .regression import max_error
 from .regression import mean_absolute_error
 from .regression import mean_squared_error
 from .regression import mean_squared_log_error
 from .regression import median_absolute_error
 from .regression import r2_score
 
+
 from .scorer import check_scoring
 from .scorer import make_scorer
 from .scorer import SCORERS
@@ -99,6 +101,7 @@
     'log_loss',
     'make_scorer',
     'matthews_corrcoef',
+    'max_error',
     'mean_absolute_error',
     'mean_squared_error',
     'mean_squared_log_error',
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index e9084a4276e18..f4854ff244bc4 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -31,6 +31,7 @@
 
 
 __ALL__ = [
+    "max_error",
     "mean_absolute_error",
     "mean_squared_error",
     "mean_squared_log_error",
@@ -573,3 +574,37 @@ def r2_score(y_true, y_pred, sample_weight=None,
         avg_weights = multioutput
 
     return np.average(output_scores, weights=avg_weights)
+
+
+def max_error(y_true, y_pred):
+    """
+    max_error metric calculates the maximum residual error.
+
+    Read more in the :ref:`User Guide <max_error>`.
+
+    Parameters
+    ----------
+    y_true : array-like of shape = (n_samples)
+        Ground truth (correct) target values.
+
+    y_pred : array-like of shape = (n_samples)
+        Estimated target values.
+
+    Returns
+    -------
+    max_error : float
+        A positive floating point value (the best value is 0.0).
+
+    Examples
+    --------
+    >>> from sklearn.metrics import max_error
+    >>> y_true = [3, 2, 7, 1]
+    >>> y_pred = [4, 2, 7, 1]
+    >>> max_error(y_true, y_pred)
+    1
+    """
+    y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None)
+    check_consistent_length(y_true, y_pred)
+    if y_type == 'continuous-multioutput':
+        raise ValueError("Multioutput not supported in max_error")
+    return np.max(np.abs(y_true - y_pred))
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index 2661a379b4e53..f596d41637155 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -22,7 +22,7 @@
 
 import numpy as np
 
-from . import (r2_score, median_absolute_error, mean_absolute_error,
+from . import (r2_score, median_absolute_error, max_error, mean_absolute_error,
                mean_squared_error, mean_squared_log_error, accuracy_score,
                f1_score, roc_auc_score, average_precision_score,
                precision_score, recall_score, log_loss,
@@ -454,6 +454,8 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
 # Standard regression scores
 explained_variance_scorer = make_scorer(explained_variance_score)
 r2_scorer = make_scorer(r2_score)
+max_error_scorer = make_scorer(max_error,
+                               greater_is_better=False)
 neg_mean_squared_error_scorer = make_scorer(mean_squared_error,
                                             greater_is_better=False)
 neg_mean_squared_log_error_scorer = make_scorer(mean_squared_log_error,
@@ -498,6 +500,7 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
 
 SCORERS = dict(explained_variance=explained_variance_scorer,
                r2=r2_scorer,
+               max_error=max_error_scorer,
                neg_median_absolute_error=neg_median_absolute_error_scorer,
                neg_mean_absolute_error=neg_mean_absolute_error_scorer,
                neg_mean_squared_error=neg_mean_squared_error_scorer,
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 16e4f5d4c76da..21c5e97444db6 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -40,6 +40,7 @@
 from sklearn.metrics import label_ranking_average_precision_score
 from sklearn.metrics import label_ranking_loss
 from sklearn.metrics import log_loss
+from sklearn.metrics import max_error
 from sklearn.metrics import matthews_corrcoef
 from sklearn.metrics import mean_absolute_error
 from sklearn.metrics import mean_squared_error
@@ -89,6 +90,7 @@
 #
 
 REGRESSION_METRICS = {
+    "max_error": max_error,
     "mean_absolute_error": mean_absolute_error,
     "mean_squared_error": mean_squared_error,
     "median_absolute_error": median_absolute_error,
@@ -399,7 +401,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
     "micro_precision_score", "micro_recall_score",
 
     "matthews_corrcoef_score", "mean_absolute_error", "mean_squared_error",
-    "median_absolute_error",
+    "median_absolute_error", "max_error",
 
     "cohen_kappa_score",
 }
@@ -429,6 +431,7 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
 # No Sample weight support
 METRICS_WITHOUT_SAMPLE_WEIGHT = {
     "median_absolute_error",
+    "max_error"
 }
 
 
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 2faaaad3a39f2..86849bd159358 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -14,6 +14,7 @@
 from sklearn.metrics import mean_squared_error
 from sklearn.metrics import mean_squared_log_error
 from sklearn.metrics import median_absolute_error
+from sklearn.metrics import max_error
 from sklearn.metrics import r2_score
 
 from sklearn.metrics.regression import _check_reg_targets
@@ -29,6 +30,7 @@ def test_regression_metrics(n_samples=50):
                                            np.log(1 + y_pred)))
     assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
     assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
+    assert_almost_equal(max_error(y_true, y_pred), 1.)
     assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
     assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
 
@@ -59,6 +61,7 @@ def test_regression_metrics_at_limits():
     assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
     assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
     assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
+    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
     assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
     assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
     assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 9033a2b2d86ee..971d581dc0437 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -46,7 +46,8 @@
                       'neg_mean_absolute_error', 'neg_mean_squared_error',
                       'neg_mean_squared_log_error',
                       'neg_median_absolute_error', 'mean_absolute_error',
-                      'mean_squared_error', 'median_absolute_error']
+                      'mean_squared_error', 'median_absolute_error',
+                      'max_error']
 
 CLF_SCORERS = ['accuracy', 'balanced_accuracy',
                'f1', 'f1_weighted', 'f1_macro', 'f1_micro',

From 8985a6309d31b7e756338238c04aea5ad5603ab0 Mon Sep 17 00:00:00 2001
From: Eric Chang <ericchang2017@u.northwestern.edu>
Date: Thu, 11 Oct 2018 00:34:59 -0700
Subject: [PATCH 160/163] DOC Update v0.20.rst with power_transform API change
 (#12351)

---
 doc/whats_new/v0.20.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 4d14b909983d1..f259d51c911b3 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -71,6 +71,15 @@ Changelog
   avoid pickling errors caused by the serialization of their methods.
   :issue:`12171` by :user:`Thomas Moreau <tomMoral>`.
 
+:mod:`sklearn.preprocessing`
+........................
+
+- |API| The default value of the :code:`method` argument in
+  :func:`preprocessing.power_transform` will be changed from :code:`box-cox`
+  to :code:`yeo-johnson` to match :class:`preprocessing.PowerTransformer`
+  in version 0.23. A FutureWarning is raised when the default value is used.
+  :issue:`12317` by :user:`Eric Chang <chang>`.
+
 .. _changes_0_20:
 
 Version 0.20.0

From 0f94f2962b5b0573144dfd4a67803aea0bab99bd Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 11 Oct 2018 14:56:37 -0400
Subject: [PATCH 161/163] MNT simple deprecations and removals for 0.21
 (#12238)

Part of #11992.
These were all the things that seemed pretty straight-forward. It's actually a bit bulky but should still be easy to review, hopefully.
---
 doc/modules/classes.rst                       |  21 +-
 sklearn/cluster/hierarchical.py               |  12 +-
 sklearn/cluster/tests/test_hierarchical.py    |  15 -
 sklearn/covariance/graph_lasso_.py            |   6 -
 sklearn/covariance/tests/test_graph_lasso.py  |  25 -
 .../covariance/tests/test_graphical_lasso.py  |  26 -
 sklearn/datasets/__init__.py                  |   2 -
 sklearn/datasets/mlcomp.py                    | 114 ---
 sklearn/decomposition/fastica_.py             |  12 +-
 sklearn/decomposition/online_lda.py           |  45 +-
 sklearn/decomposition/sparse_pca.py           |  21 +-
 .../decomposition/tests/test_kernel_pca.py    |   2 +-
 .../decomposition/tests/test_online_lda.py    |  20 -
 sklearn/decomposition/tests/test_pca.py       |   2 -
 .../decomposition/tests/test_sparse_pca.py    |   7 -
 sklearn/discriminant_analysis.py              |  23 +-
 sklearn/gaussian_process/gpr.py               |  13 -
 sklearn/kernel_approximation.py               |   7 +-
 sklearn/linear_model/__init__.py              |   6 -
 sklearn/linear_model/least_angle.py           |   9 +-
 sklearn/linear_model/randomized_l1.py         | 670 ------------------
 sklearn/linear_model/tests/test_huber.py      |   1 -
 .../linear_model/tests/test_randomized_l1.py  | 219 ------
 sklearn/manifold/t_sne.py                     |   7 -
 sklearn/metrics/pairwise.py                   |  10 +-
 sklearn/metrics/tests/test_classification.py  |   1 -
 sklearn/metrics/tests/test_pairwise.py        |   5 -
 sklearn/model_selection/tests/test_split.py   |   1 -
 sklearn/neighbors/__init__.py                 |   2 -
 sklearn/neighbors/approximate.py              | 589 ---------------
 sklearn/neighbors/tests/test_approximate.py   | 498 -------------
 sklearn/neighbors/tests/test_kd_tree.py       |   2 +-
 sklearn/neighbors/tests/test_lof.py           |   2 +-
 sklearn/preprocessing/__init__.py             |   2 -
 .../preprocessing/_function_transformer.py    |  38 +-
 sklearn/preprocessing/data.py                 |  57 +-
 sklearn/preprocessing/tests/test_data.py      |   1 -
 sklearn/preprocessing/tests/test_encoders.py  |   5 -
 .../tests/test_function_transformer.py        |  13 +-
 sklearn/semi_supervised/label_propagation.py  |  19 +-
 .../tests/test_label_propagation.py           |  13 -
 sklearn/tests/test_calibration.py             |   3 +-
 sklearn/tests/test_discriminant_analysis.py   |  15 -
 sklearn/tests/test_kernel_approximation.py    |   7 +-
 sklearn/utils/arpack.py                       |  23 -
 sklearn/utils/extmath.py                      |  45 +-
 sklearn/utils/graph.py                        |  16 -
 sklearn/utils/random.py                       | 103 +--
 sklearn/utils/setup.py                        |   1 -
 sklearn/utils/sparsetools/__init__.py         |  13 -
 sklearn/utils/sparsetools/setup.py            |  15 -
 sklearn/utils/sparsetools/tests/__init__.py   |   0
 sklearn/utils/stats.py                        |   9 -
 sklearn/utils/tests/test_extmath.py           |  32 -
 sklearn/utils/tests/test_graph.py             |  26 -
 sklearn/utils/tests/test_stats.py             |  23 -
 sklearn/utils/tests/test_utils.py             |  57 +-
 57 files changed, 54 insertions(+), 2877 deletions(-)
 delete mode 100644 sklearn/datasets/mlcomp.py
 delete mode 100644 sklearn/linear_model/randomized_l1.py
 delete mode 100644 sklearn/linear_model/tests/test_randomized_l1.py
 delete mode 100644 sklearn/neighbors/approximate.py
 delete mode 100644 sklearn/neighbors/tests/test_approximate.py
 delete mode 100644 sklearn/utils/arpack.py
 delete mode 100644 sklearn/utils/sparsetools/__init__.py
 delete mode 100644 sklearn/utils/sparsetools/setup.py
 delete mode 100644 sklearn/utils/sparsetools/tests/__init__.py
 delete mode 100644 sklearn/utils/tests/test_graph.py
 delete mode 100644 sklearn/utils/tests/test_stats.py

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 0084bb3f76b85..b371fdf7c1dcc 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -1513,23 +1513,4 @@ To be removed in 0.22
    :template: deprecated_function.rst
 
    covariance.graph_lasso
-   datasets.fetch_mldata
-
-
-To be removed in 0.21
----------------------
-
-.. autosummary::
-   :toctree: generated/
-   :template: deprecated_class.rst
-
-   linear_model.RandomizedLasso
-   linear_model.RandomizedLogisticRegression
-   neighbors.LSHForest
-
-.. autosummary::
-   :toctree: generated/
-   :template: deprecated_function.rst
-
-   datasets.load_mlcomp
-   linear_model.lasso_stability_path
+   datasets.fetch_mldata
\ No newline at end of file
diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py
index 1d6755fd72060..a23542ff5a97f 100644
--- a/sklearn/cluster/hierarchical.py
+++ b/sklearn/cluster/hierarchical.py
@@ -339,9 +339,8 @@ def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
 
 
 # single average and complete linkage
-def linkage_tree(X, connectivity=None, n_components='deprecated',
-                 n_clusters=None, linkage='complete', affinity="euclidean",
-                 return_distance=False):
+def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
+                 affinity="euclidean", return_distance=False):
     """Linkage agglomerative clustering based on a Feature matrix.
 
     The inertia matrix uses a Heapq-based representation.
@@ -362,9 +361,6 @@ def linkage_tree(X, connectivity=None, n_components='deprecated',
         be symmetric and only the upper triangular half is used.
         Default is None, i.e, the Ward algorithm is unstructured.
 
-    n_components : int (optional)
-        The number of connected components in the graph.
-
     n_clusters : int (optional)
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is
@@ -420,10 +416,6 @@ def linkage_tree(X, connectivity=None, n_components='deprecated',
     --------
     ward_tree : hierarchical clustering with ward linkage
     """
-    if n_components != 'deprecated':
-        warnings.warn("n_components was deprecated in 0.19"
-                      "will be removed in 0.21", DeprecationWarning)
-
     X = np.asarray(X)
     if X.ndim == 1:
         X = np.reshape(X, (-1, 1))
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 6f03f9aa32106..2456f61c872c5 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -38,21 +38,6 @@
 from sklearn.datasets import make_moons, make_circles
 
 
-def test_deprecation_of_n_components_in_linkage_tree():
-    rng = np.random.RandomState(0)
-    X = rng.randn(50, 100)
-    # Test for warning of deprecation of n_components in linkage_tree
-    children, n_nodes, n_leaves, parent = assert_warns(DeprecationWarning,
-                                                       linkage_tree,
-                                                       X.T,
-                                                       n_components=10)
-    children_t, n_nodes_t, n_leaves_t, parent_t = linkage_tree(X.T)
-    assert_array_equal(children, children_t)
-    assert_equal(n_nodes, n_nodes_t)
-    assert_equal(n_leaves, n_leaves_t)
-    assert_equal(parent, parent_t)
-
-
 def test_linkage_misc():
     # Misc tests on linkage
     rng = np.random.RandomState(42)
diff --git a/sklearn/covariance/graph_lasso_.py b/sklearn/covariance/graph_lasso_.py
index 79d5897b8eb99..4a43ae007acfa 100644
--- a/sklearn/covariance/graph_lasso_.py
+++ b/sklearn/covariance/graph_lasso_.py
@@ -586,12 +586,6 @@ def __init__(self, alphas=4, n_refinements=4, cv='warn', tol=1e-4,
         self.cv = cv
         self.n_jobs = n_jobs
 
-    @property
-    @deprecated("Attribute grid_scores was deprecated in version 0.19 and "
-                "will be removed in 0.21. Use ``grid_scores_`` instead")
-    def grid_scores(self):
-        return self.grid_scores_
-
     def fit(self, X, y=None):
         """Fits the GraphicalLasso covariance model to X.
 
diff --git a/sklearn/covariance/tests/test_graph_lasso.py b/sklearn/covariance/tests/test_graph_lasso.py
index 8c07536363614..d368356100a4f 100644
--- a/sklearn/covariance/tests/test_graph_lasso.py
+++ b/sklearn/covariance/tests/test_graph_lasso.py
@@ -9,7 +9,6 @@
 
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_less
-from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import ignore_warnings
 
 from sklearn.covariance import (graph_lasso, GraphLasso, GraphLassoCV,
@@ -19,8 +18,6 @@
 from sklearn.utils import check_random_state
 from sklearn import datasets
 
-from numpy.testing import assert_equal
-
 
 @ignore_warnings(category=DeprecationWarning)
 def test_graph_lasso(random_state=0):
@@ -141,25 +138,3 @@ def test_graph_lasso_cv(random_state=1):
 
     # Smoke test with specified alphas
     GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
-
-
-@ignore_warnings(category=DeprecationWarning)
-@pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
-def test_deprecated_grid_scores(random_state=1):
-    dim = 5
-    n_samples = 6
-    random_state = check_random_state(random_state)
-    prec = make_sparse_spd_matrix(dim, alpha=.96,
-                                  random_state=random_state)
-    cov = linalg.inv(prec)
-    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
-    graph_lasso = GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1)
-    graph_lasso.fit(X)
-
-    depr_message = ("Attribute grid_scores was deprecated in version "
-                    "0.19 and will be removed in 0.21. Use "
-                    "``grid_scores_`` instead")
-
-    assert_warns_message(DeprecationWarning, depr_message,
-                         lambda: graph_lasso.grid_scores)
-    assert_equal(graph_lasso.grid_scores, graph_lasso.grid_scores_)
diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py
index f1d6aab6a9b26..47f15f4a762ac 100644
--- a/sklearn/covariance/tests/test_graphical_lasso.py
+++ b/sklearn/covariance/tests/test_graphical_lasso.py
@@ -8,7 +8,6 @@
 
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_less
-from sklearn.utils.testing import assert_warns_message
 
 from sklearn.covariance import (graphical_lasso, GraphicalLasso,
                                 GraphicalLassoCV, empirical_covariance)
@@ -16,9 +15,6 @@
 from sklearn.externals.six.moves import StringIO
 from sklearn.utils import check_random_state
 from sklearn import datasets
-from sklearn.utils.fixes import PY3_OR_LATER
-
-from numpy.testing import assert_equal
 
 
 def test_graphical_lasso(random_state=0):
@@ -137,25 +133,3 @@ def test_graphical_lasso_cv(random_state=1):
 
     # Smoke test with specified alphas
     GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
-
-
-@pytest.mark.filterwarnings('ignore: You should specify a value')  # 0.22
-@pytest.mark.skipif(not PY3_OR_LATER,
-                    reason='On Python 2 DeprecationWarning is not issued for some unkown reason.')
-def test_deprecated_grid_scores(random_state=1):
-    dim = 5
-    n_samples = 6
-    random_state = check_random_state(random_state)
-    prec = make_sparse_spd_matrix(dim, alpha=.96,
-                                  random_state=random_state)
-    cov = linalg.inv(prec)
-    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
-    graphical_lasso = GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1)
-    graphical_lasso.fit(X)
-
-    depr_message = ("Attribute grid_scores was deprecated in version "
-                    "0.19 and will be removed in 0.21. Use "
-                    "``grid_scores_`` instead")
-
-    with pytest.warns(DeprecationWarning, match=depr_message):
-        assert_equal(graphical_lasso.grid_scores, graphical_lasso.grid_scores_)
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index c7d78e633493d..77dac99c1d970 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -17,7 +17,6 @@
 from .base import clear_data_home
 from .covtype import fetch_covtype
 from .kddcup99 import fetch_kddcup99
-from .mlcomp import load_mlcomp
 from .lfw import fetch_lfw_pairs
 from .lfw import fetch_lfw_people
 from .twenty_newsgroups import fetch_20newsgroups
@@ -75,7 +74,6 @@
            'load_iris',
            'load_breast_cancer',
            'load_linnerud',
-           'load_mlcomp',
            'load_sample_image',
            'load_sample_images',
            'load_svmlight_file',
diff --git a/sklearn/datasets/mlcomp.py b/sklearn/datasets/mlcomp.py
deleted file mode 100644
index 9adb7bbc1c06e..0000000000000
--- a/sklearn/datasets/mlcomp.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) 2010 Olivier Grisel <olivier.grisel@ensta.org>
-# License: BSD 3 clause
-"""Glue code to load http://mlcomp.org data as a scikit.learn dataset"""
-
-import os
-import numbers
-from sklearn.datasets.base import load_files
-from sklearn.utils import deprecated
-
-
-def _load_document_classification(dataset_path, metadata, set_=None, **kwargs):
-    if set_ is not None:
-        dataset_path = os.path.join(dataset_path, set_)
-    return load_files(dataset_path, metadata.get('description'), **kwargs)
-
-
-LOADERS = {
-    'DocumentClassification': _load_document_classification,
-    # TODO: implement the remaining domain formats
-}
-
-
-@deprecated("since the http://mlcomp.org/ website will shut down "
-            "in March 2017, the load_mlcomp function was deprecated "
-            "in version 0.19 and will be removed in 0.21.")
-def load_mlcomp(name_or_id, set_="raw", mlcomp_root=None, **kwargs):
-    r"""Load a datasets as downloaded from http://mlcomp.org
-
-    Read more in the :ref:`User Guide <datasets>`.
-
-    Parameters
-    ----------
-
-    name_or_id : int or str
-        The integer id or the string name metadata of the MLComp
-        dataset to load
-
-    set\_ : str, default='raw'
-        Select the portion to load: 'train', 'test' or 'raw'
-
-    mlcomp_root : str, optional
-        The filesystem path to the root folder where MLComp datasets
-        are stored, if mlcomp_root is None, the MLCOMP_DATASETS_HOME
-        environment variable is looked up instead.
-
-    **kwargs : domain specific kwargs to be passed to the dataset loader.
-
-    Returns
-    -------
-
-    data : Bunch
-        Dictionary-like object, the interesting attributes are:
-        'filenames', the files holding the raw to learn, 'target', the
-        classification labels (integer index), 'target_names',
-        the meaning of the labels, and 'DESCR', the full description of the
-        dataset.
-
-    Note on the lookup process: depending on the type of name_or_id,
-    will choose between integer id lookup or metadata name lookup by
-    looking at the unzipped archives and metadata file.
-
-    TODO: implement zip dataset loading too
-    """
-
-    if mlcomp_root is None:
-        try:
-            mlcomp_root = os.environ['MLCOMP_DATASETS_HOME']
-        except KeyError:
-            raise ValueError("MLCOMP_DATASETS_HOME env variable is undefined")
-
-    mlcomp_root = os.path.expanduser(mlcomp_root)
-    mlcomp_root = os.path.abspath(mlcomp_root)
-    mlcomp_root = os.path.normpath(mlcomp_root)
-
-    if not os.path.exists(mlcomp_root):
-        raise ValueError("Could not find folder: " + mlcomp_root)
-
-    # dataset lookup
-    if isinstance(name_or_id, numbers.Integral):
-        # id lookup
-        dataset_path = os.path.join(mlcomp_root, str(name_or_id))
-    else:
-        # assume name based lookup
-        dataset_path = None
-        expected_name_line = "name: " + name_or_id
-        for dataset in os.listdir(mlcomp_root):
-            metadata_file = os.path.join(mlcomp_root, dataset, 'metadata')
-            if not os.path.exists(metadata_file):
-                continue
-            with open(metadata_file) as f:
-                for line in f:
-                    if line.strip() == expected_name_line:
-                        dataset_path = os.path.join(mlcomp_root, dataset)
-                        break
-        if dataset_path is None:
-            raise ValueError("Could not find dataset with metadata line: " +
-                             expected_name_line)
-
-    # loading the dataset metadata
-    metadata = dict()
-    metadata_file = os.path.join(dataset_path, 'metadata')
-    if not os.path.exists(metadata_file):
-        raise ValueError(dataset_path + ' is not a valid MLComp dataset')
-    with open(metadata_file) as f:
-        for line in f:
-            if ":" in line:
-                key, value = line.split(":", 1)
-                metadata[key.strip()] = value.strip()
-
-    format = metadata.get('format', 'unknow')
-    loader = LOADERS.get(format)
-    if loader is None:
-        raise ValueError("No loader implemented for format: " + format)
-    return loader(dataset_path, metadata, set_=set_, **kwargs)
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index f64d4787b3f71..693d46d31fab5 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -18,7 +18,6 @@
 from ..exceptions import ConvergenceWarning
 from ..externals import six
 from ..externals.six import moves
-from ..externals.six import string_types
 from ..utils import check_array, as_float_array, check_random_state
 from ..utils.validation import check_is_fitted
 from ..utils.validation import FLOAT_DTYPES
@@ -553,7 +552,7 @@ def fit(self, X, y=None):
         self._fit(X, compute_sources=False)
         return self
 
-    def transform(self, X, y='deprecated', copy=True):
+    def transform(self, X, copy=True):
         """Recover the sources from X (apply the unmixing matrix).
 
         Parameters
@@ -561,9 +560,7 @@ def transform(self, X, y='deprecated', copy=True):
         X : array-like, shape (n_samples, n_features)
             Data to transform, where n_samples is the number of samples
             and n_features is the number of features.
-        y : (ignored)
-            .. deprecated:: 0.19
-               This parameter will be removed in 0.21.
+
         copy : bool (optional)
             If False, data passed to fit are overwritten. Defaults to True.
 
@@ -571,11 +568,6 @@ def transform(self, X, y='deprecated', copy=True):
         -------
         X_new : array-like, shape (n_samples, n_components)
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-
         check_is_fitted(self, 'mixing_')
 
         X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index 5b48ea1a26b30..4c0f8625771c7 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -14,7 +14,6 @@
 import numpy as np
 import scipy.sparse as sp
 from scipy.special import gammaln
-import warnings
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import (check_random_state, check_array,
@@ -230,11 +229,6 @@ class LatentDirichletAllocation(BaseEstimator, TransformerMixin):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
-    n_topics : int, optional (default=None)
-        This parameter has been renamed to n_components and will
-        be removed in version 0.21.
-        .. deprecated:: 0.19
-
     Attributes
     ----------
     components_ : array, [n_components, n_features]
@@ -286,7 +280,7 @@ def __init__(self, n_components=10, doc_topic_prior=None,
                  learning_decay=.7, learning_offset=10., max_iter=10,
                  batch_size=128, evaluate_every=-1, total_samples=1e6,
                  perp_tol=1e-1, mean_change_tol=1e-3, max_doc_update_iter=100,
-                 n_jobs=None, verbose=0, random_state=None, n_topics=None):
+                 n_jobs=None, verbose=0, random_state=None):
         self.n_components = n_components
         self.doc_topic_prior = doc_topic_prior
         self.topic_word_prior = topic_word_prior
@@ -303,21 +297,12 @@ def __init__(self, n_components=10, doc_topic_prior=None,
         self.n_jobs = n_jobs
         self.verbose = verbose
         self.random_state = random_state
-        self.n_topics = n_topics
 
     def _check_params(self):
         """Check model parameters."""
-        if self.n_topics is not None:
-            self._n_components = self.n_topics
-            warnings.warn("n_topics has been renamed to n_components in "
-                          "version 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-        else:
-            self._n_components = self.n_components
-
-        if self._n_components <= 0:
+        if self.n_components <= 0:
             raise ValueError("Invalid 'n_components' parameter: %r"
-                             % self._n_components)
+                             % self.n_components)
 
         if self.total_samples <= 0:
             raise ValueError("Invalid 'total_samples' parameter: %r"
@@ -339,12 +324,12 @@ def _init_latent_vars(self, n_features):
         self.n_iter_ = 0
 
         if self.doc_topic_prior is None:
-            self.doc_topic_prior_ = 1. / self._n_components
+            self.doc_topic_prior_ = 1. / self.n_components
         else:
             self.doc_topic_prior_ = self.doc_topic_prior
 
         if self.topic_word_prior is None:
-            self.topic_word_prior_ = 1. / self._n_components
+            self.topic_word_prior_ = 1. / self.n_components
         else:
             self.topic_word_prior_ = self.topic_word_prior
 
@@ -352,7 +337,7 @@ def _init_latent_vars(self, n_features):
         init_var = 1. / init_gamma
         # In the literature, this is called `lambda`
         self.components_ = self.random_state_.gamma(
-            init_gamma, init_var, (self._n_components, n_features))
+            init_gamma, init_var, (self.n_components, n_features))
 
         # In the literature, this is `exp(E[log(beta)])`
         self.exp_dirichlet_component_ = np.exp(
@@ -711,7 +696,7 @@ def _loglikelihood(prior, distr, dirichlet_distr, size):
 
         # compute E[log p(theta | alpha) - log q(theta | gamma)]
         score += _loglikelihood(doc_topic_prior, doc_topic_distr,
-                                dirichlet_doc_topic, self._n_components)
+                                dirichlet_doc_topic, self.n_components)
 
         # Compensate for the subsampling of the population of documents
         if sub_sampling:
@@ -781,7 +766,7 @@ def _perplexity_precomp_distr(self, X, doc_topic_distr=None,
                 raise ValueError("Number of samples in X and doc_topic_distr"
                                  " do not match.")
 
-            if n_components != self._n_components:
+            if n_components != self.n_components:
                 raise ValueError("Number of topics does not match.")
 
         current_samples = X.shape[0]
@@ -795,7 +780,7 @@ def _perplexity_precomp_distr(self, X, doc_topic_distr=None,
 
         return np.exp(-1.0 * perword_bound)
 
-    def perplexity(self, X, doc_topic_distr='deprecated', sub_sampling=False):
+    def perplexity(self, X, sub_sampling=False):
         """Calculate approximate perplexity for data X.
 
         Perplexity is defined as exp(-1. * log-likelihood per word)
@@ -809,12 +794,6 @@ def perplexity(self, X, doc_topic_distr='deprecated', sub_sampling=False):
         X : array-like or sparse matrix, [n_samples, n_features]
             Document word matrix.
 
-        doc_topic_distr : None or array, shape=(n_samples, n_components)
-            Document topic distribution.
-            This argument is deprecated and is currently being ignored.
-
-            .. deprecated:: 0.19
-
         sub_sampling : bool
             Do sub-sampling or not.
 
@@ -823,10 +802,4 @@ def perplexity(self, X, doc_topic_distr='deprecated', sub_sampling=False):
         score : float
             Perplexity score.
         """
-        if doc_topic_distr != 'deprecated':
-            warnings.warn("Argument 'doc_topic_distr' is deprecated and is "
-                          "being ignored as of 0.19. Support for this "
-                          "argument will be removed in 0.21.",
-                          DeprecationWarning)
-
         return self._perplexity_precomp_distr(X, sub_sampling=sub_sampling)
diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py
index 95c9ab8960e64..5bc2107f7f31c 100644
--- a/sklearn/decomposition/sparse_pca.py
+++ b/sklearn/decomposition/sparse_pca.py
@@ -197,7 +197,7 @@ def fit(self, X, y=None):
         self.error_ = E
         return self
 
-    def transform(self, X, ridge_alpha='deprecated'):
+    def transform(self, X):
         """Least Squares projection of the data onto the sparse components.
 
         To avoid instability issues in case the system is under-determined,
@@ -213,14 +213,6 @@ def transform(self, X, ridge_alpha='deprecated'):
             Test data to be transformed, must have the same number of
             features as the data used to train the model.
 
-        ridge_alpha : float, default: 0.01
-            Amount of ridge shrinkage to apply in order to improve
-            conditioning.
-
-            .. deprecated:: 0.19
-               This parameter will be removed in 0.21.
-               Specify ``ridge_alpha`` in the ``SparsePCA`` constructor.
-
         Returns
         -------
         X_new array, shape (n_samples, n_components)
@@ -229,20 +221,11 @@ def transform(self, X, ridge_alpha='deprecated'):
         check_is_fitted(self, 'components_')
 
         X = check_array(X)
-        if ridge_alpha != 'deprecated':
-            warnings.warn("The ridge_alpha parameter on transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21. "
-                          "Specify ridge_alpha in the SparsePCA constructor.",
-                          DeprecationWarning)
-            if ridge_alpha is None:
-                ridge_alpha = self.ridge_alpha
-        else:
-            ridge_alpha = self.ridge_alpha
 
         if self.normalize_components:
             X = X - self.mean_
 
-        U = ridge_regression(self.components_.T, X.T, ridge_alpha,
+        U = ridge_regression(self.components_.T, X.T, self.ridge_alpha,
                              solver='cholesky')
 
         if not self.normalize_components:
diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py
index b0f2c5aeae52a..040f9e49d590b 100644
--- a/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/sklearn/decomposition/tests/test_kernel_pca.py
@@ -4,7 +4,7 @@
 
 from sklearn.utils.testing import (assert_array_almost_equal, assert_less,
                                    assert_equal, assert_not_equal,
-                                   assert_raises, ignore_warnings)
+                                   assert_raises)
 
 from sklearn.decomposition import PCA, KernelPCA
 from sklearn.datasets import make_circles
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index f3354cba375c3..0abc2efe75ec2 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -19,7 +19,6 @@
 from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_raises_regexp
 from sklearn.utils.testing import if_safe_multiprocessing_with_blas
-from sklearn.utils.testing import assert_warns
 
 from sklearn.exceptions import NotFittedError
 from sklearn.externals.six.moves import xrange
@@ -347,19 +346,6 @@ def test_lda_fit_perplexity():
     assert_almost_equal(perplexity1, perplexity2)
 
 
-def test_doc_topic_distr_deprecation():
-    # Test that the appropriate warning message is displayed when a user
-    # attempts to pass the doc_topic_distr argument to the perplexity method
-    n_components, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
-                                    learning_method='batch',
-                                    total_samples=100, random_state=0)
-    distr1 = lda.fit_transform(X)
-    distr2 = None
-    assert_warns(DeprecationWarning, lda.perplexity, X, distr1)
-    assert_warns(DeprecationWarning, lda.perplexity, X, distr2)
-
-
 def test_lda_empty_docs():
     """Test LDA on empty document (all-zero rows)."""
     Z = np.zeros((5, 4))
@@ -415,9 +401,3 @@ def test_verbosity(verbose, evaluate_every, expected_lines,
                    expected_perplexities):
     check_verbosity(verbose, evaluate_every, expected_lines,
                     expected_perplexities)
-
-
-def test_lda_n_topics_deprecation():
-    n_components, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=10, learning_method='batch')
-    assert_warns(DeprecationWarning, lda.fit, X)
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 7484367127157..c852e4bed0e58 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -13,7 +13,6 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_no_warnings
-from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_less
 
@@ -685,7 +684,6 @@ def test_svd_solver_auto():
     assert_array_almost_equal(pca.components_, pca_test.components_)
 
 
-
 @pytest.mark.parametrize('svd_solver', solver_list)
 def test_pca_sparse_input(svd_solver):
     X = np.random.RandomState(0).rand(5, 4)
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index 70c78cce203b5..e02d077f50e8b 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -77,13 +77,6 @@ def test_fit_transform(norm_comp):
     spca_lasso.fit(Y)
     assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
 
-    # Test that deprecated ridge_alpha parameter throws warning
-    warning_msg = "The ridge_alpha parameter on transform()"
-    assert_warns_message(DeprecationWarning, warning_msg, spca_lars.transform,
-                         Y, ridge_alpha=0.01)
-    assert_warns_message(DeprecationWarning, warning_msg, spca_lars.transform,
-                         Y, ridge_alpha=None)
-
 
 @pytest.mark.filterwarnings("ignore:normalize_components")
 @pytest.mark.parametrize("norm_comp", [False, True])
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index a635792c6f6ca..e0084741e583f 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -12,7 +12,6 @@
 from __future__ import print_function
 import warnings
 import numpy as np
-from .utils import deprecated
 from scipy import linalg
 from .externals.six import string_types
 from .externals.six.moves import xrange
@@ -567,9 +566,6 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin):
 
         .. versionadded:: 0.17
 
-    store_covariances : boolean
-        Deprecated, use `store_covariance`.
-
     Attributes
     ----------
     covariance_ : list of array-like, shape = [n_features, n_features]
@@ -602,8 +598,7 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin):
     >>> clf.fit(X, y)
     ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
     QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0,
-                                  store_covariance=False,
-                                  store_covariances=None, tol=0.0001)
+                                  store_covariance=False, tol=0.0001)
     >>> print(clf.predict([[-0.8, -1]]))
     [1]
 
@@ -614,20 +609,12 @@ class QuadraticDiscriminantAnalysis(BaseEstimator, ClassifierMixin):
     """
 
     def __init__(self, priors=None, reg_param=0., store_covariance=False,
-                 tol=1.0e-4, store_covariances=None):
+                 tol=1.0e-4):
         self.priors = np.asarray(priors) if priors is not None else None
         self.reg_param = reg_param
-        self.store_covariances = store_covariances
         self.store_covariance = store_covariance
         self.tol = tol
 
-    @property
-    @deprecated("Attribute ``covariances_`` was deprecated in version"
-                " 0.19 and will be removed in 0.21. Use "
-                "``covariance_`` instead")
-    def covariances_(self):
-        return self.covariance_
-
     def fit(self, X, y):
         """Fit the model according to the given training data and parameters.
 
@@ -661,11 +648,7 @@ def fit(self, X, y):
             self.priors_ = self.priors
 
         cov = None
-        store_covariance = self.store_covariance or self.store_covariances
-        if self.store_covariances:
-            warnings.warn("'store_covariances' was renamed to store_covariance"
-                          " in version 0.19 and will be removed in 0.21.",
-                          DeprecationWarning)
+        store_covariance = self.store_covariance
         if store_covariance:
             cov = []
         means = []
diff --git a/sklearn/gaussian_process/gpr.py b/sklearn/gaussian_process/gpr.py
index ac2c0a46b6866..c5ff9674bc575 100644
--- a/sklearn/gaussian_process/gpr.py
+++ b/sklearn/gaussian_process/gpr.py
@@ -15,7 +15,6 @@
 from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
 from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_X_y, check_array
-from sklearn.utils.deprecation import deprecated
 from sklearn.exceptions import ConvergenceWarning
 
 
@@ -158,18 +157,6 @@ def __init__(self, kernel=None, alpha=1e-10,
         self.copy_X_train = copy_X_train
         self.random_state = random_state
 
-    @property
-    @deprecated("Attribute rng was deprecated in version 0.19 and "
-                "will be removed in 0.21.")
-    def rng(self):
-        return self._rng
-
-    @property
-    @deprecated("Attribute y_train_mean was deprecated in version 0.19 and "
-                "will be removed in 0.21.")
-    def y_train_mean(self):
-        return self._y_train_mean
-
     def fit(self, X, y):
         """Fit Gaussian process regression model.
 
diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
index 1fe52a4a292f7..59aaef9863e10 100644
--- a/sklearn/kernel_approximation.py
+++ b/sklearn/kernel_approximation.py
@@ -619,10 +619,7 @@ def _get_kernel_params(self):
             if (self.gamma is not None or
                     self.coef0 is not None or
                     self.degree is not None):
-                warnings.warn(
-                    "Passing gamma, coef0 or degree to Nystroem when using a"
-                    " callable kernel is deprecated in version 0.19 and will"
-                    " raise an error in 0.21, as they are ignored. Use "
-                    "kernel_params instead.", DeprecationWarning)
+                raise ValueError("Don't pass gamma, coef0 or degree to "
+                                 "Nystroem if using a callable kernel.")
 
         return params
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index f3100d45e2e66..2e01990ccce8c 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -31,9 +31,6 @@
 from .passive_aggressive import PassiveAggressiveRegressor
 from .perceptron import Perceptron
 
-from .randomized_l1 import (RandomizedLasso, RandomizedLogisticRegression,
-                            lasso_stability_path)
-
 from .ransac import RANSACRegressor
 from .theil_sen import TheilSenRegressor
 
@@ -65,8 +62,6 @@
            'PassiveAggressiveClassifier',
            'PassiveAggressiveRegressor',
            'Perceptron',
-           'RandomizedLasso',
-           'RandomizedLogisticRegression',
            'Ridge',
            'RidgeCV',
            'RidgeClassifier',
@@ -78,7 +73,6 @@
            'enet_path',
            'lars_path',
            'lasso_path',
-           'lasso_stability_path',
            'logistic_regression_path',
            'orthogonal_mp',
            'orthogonal_mp_gram',
diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py
index 756e072f044df..140bea73f54d9 100644
--- a/sklearn/linear_model/least_angle.py
+++ b/sklearn/linear_model/least_angle.py
@@ -20,7 +20,7 @@
 
 from .base import LinearModel
 from ..base import RegressorMixin
-from ..utils import arrayfuncs, as_float_array, check_X_y, deprecated
+from ..utils import arrayfuncs, as_float_array, check_X_y
 from ..model_selection import check_cv
 from ..exceptions import ConvergenceWarning
 from ..utils import Parallel, delayed
@@ -1187,13 +1187,6 @@ def fit(self, X, y):
                   Xy=None, fit_path=True)
         return self
 
-    @property
-    @deprecated("Attribute alpha is deprecated in 0.19 and "
-                "will be removed in 0.21. See ``alpha_`` instead")
-    def alpha(self):
-        # impedance matching for the above Lars.fit (should not be documented)
-        return self.alpha_
-
 
 class LassoLarsCV(LarsCV):
     """Cross-validated Lasso, using the LARS algorithm.
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
deleted file mode 100644
index 40ebe3c57826b..0000000000000
--- a/sklearn/linear_model/randomized_l1.py
+++ /dev/null
@@ -1,670 +0,0 @@
-"""
-Randomized Lasso/Logistic: feature selection based on Lasso and
-sparse Logistic Regression
-"""
-
-# Author: Gael Varoquaux, Alexandre Gramfort
-#
-# License: BSD 3 clause
-
-import warnings
-import itertools
-from abc import ABCMeta, abstractmethod
-
-import numpy as np
-from scipy.sparse import issparse
-from scipy import sparse
-from scipy.interpolate import interp1d
-
-from .base import _preprocess_data
-from ..base import BaseEstimator
-from ..externals import six
-from ..utils import Memory, Parallel, delayed
-from ..feature_selection.base import SelectorMixin
-from ..utils import (as_float_array, check_random_state, check_X_y, safe_mask,
-                     deprecated)
-from ..utils.validation import check_is_fitted
-from .least_angle import lars_path, LassoLarsIC
-from .logistic import LogisticRegression
-from ..exceptions import ConvergenceWarning
-
-
-###############################################################################
-# Randomized linear model: feature selection
-
-def _resample_model(estimator_func, X, y, scaling=.5, n_resampling=200,
-                    n_jobs=None, verbose=False, pre_dispatch='3*n_jobs',
-                    random_state=None, sample_fraction=.75, **params):
-    random_state = check_random_state(random_state)
-    # We are generating 1 - weights, and not weights
-    n_samples, n_features = X.shape
-
-    if not (0 < scaling < 1):
-        raise ValueError(
-            "'scaling' should be between 0 and 1. Got %r instead." % scaling)
-
-    scaling = 1. - scaling
-    scores_ = 0.0
-    for active_set in Parallel(n_jobs=n_jobs, verbose=verbose,
-                               pre_dispatch=pre_dispatch)(
-            delayed(estimator_func)(
-                X, y, weights=scaling * random_state.randint(
-                    0, 2, size=(n_features,)),
-                mask=(random_state.rand(n_samples) < sample_fraction),
-                verbose=max(0, verbose - 1),
-                **params)
-            for _ in range(n_resampling)):
-        scores_ += active_set
-
-    scores_ /= n_resampling
-    return scores_
-
-
-@deprecated("The class BaseRandomizedLinearModel is deprecated in 0.19"
-            " and will be removed in 0.21.")
-class BaseRandomizedLinearModel(six.with_metaclass(ABCMeta, BaseEstimator,
-                                                   SelectorMixin)):
-    """Base class to implement randomized linear models for feature selection
-
-    This implements the strategy by Meinshausen and Buhlman:
-    stability selection with randomized sampling, and random re-weighting of
-    the penalty.
-    """
-
-    @abstractmethod
-    def __init__(self):
-        pass
-
-    _preprocess_data = staticmethod(_preprocess_data)
-
-    def fit(self, X, y):
-        """Fit the model using X, y as training data.
-
-        Parameters
-        ----------
-        X : array-like, shape = [n_samples, n_features]
-            Training data.
-
-        y : array-like, shape = [n_samples]
-            Target values. Will be cast to X's dtype if necessary
-
-        Returns
-        -------
-        self : object
-               Returns an instance of self.
-        """
-        X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True,
-                         ensure_min_samples=2, estimator=self)
-        X = as_float_array(X, copy=False)
-        n_samples, n_features = X.shape
-
-        X, y, X_offset, y_offset, X_scale = \
-            self._preprocess_data(X, y, self.fit_intercept, self.normalize)
-
-        estimator_func, params = self._make_estimator_and_params(X, y)
-        memory = self.memory
-        if memory is None:
-            memory = Memory(cachedir=None, verbose=0)
-        elif isinstance(memory, six.string_types):
-            memory = Memory(cachedir=memory, verbose=0)
-        elif not isinstance(memory, Memory):
-            raise ValueError("'memory' should either be a string or"
-                             " a sklearn.utils.Memory"
-                             " instance, got 'memory={!r}' instead.".format(
-                                 type(memory)))
-
-        scores_ = memory.cache(
-            _resample_model, ignore=['verbose', 'n_jobs', 'pre_dispatch']
-        )(
-            estimator_func, X, y,
-            scaling=self.scaling, n_resampling=self.n_resampling,
-            n_jobs=self.n_jobs, verbose=self.verbose,
-            pre_dispatch=self.pre_dispatch, random_state=self.random_state,
-            sample_fraction=self.sample_fraction, **params)
-
-        if scores_.ndim == 1:
-            scores_ = scores_[:, np.newaxis]
-        self.all_scores_ = scores_
-        self.scores_ = np.max(self.all_scores_, axis=1)
-        return self
-
-    def _make_estimator_and_params(self, X, y):
-        """Return the parameters passed to the estimator"""
-        raise NotImplementedError
-
-    def _get_support_mask(self):
-        """Get the boolean mask indicating which features are selected.
-
-        Returns
-        -------
-        support : boolean array of shape [# input features]
-                  An element is True iff its corresponding feature is selected
-                  for retention.
-        """
-        check_is_fitted(self, 'scores_')
-        return self.scores_ > self.selection_threshold
-
-
-###############################################################################
-# Randomized lasso: regression settings
-
-def _randomized_lasso(X, y, weights, mask, alpha=1., verbose=False,
-                      precompute=False, eps=np.finfo(np.float).eps,
-                      max_iter=500):
-    X = X[safe_mask(X, mask)]
-    y = y[mask]
-
-    # Center X and y to avoid fit the intercept
-    X -= X.mean(axis=0)
-    y -= y.mean()
-
-    alpha = np.atleast_1d(np.asarray(alpha, dtype=np.float64))
-
-    X = (1 - weights) * X
-
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', ConvergenceWarning)
-        alphas_, _, coef_ = lars_path(X, y,
-                                      Gram=precompute, copy_X=False,
-                                      copy_Gram=False, alpha_min=np.min(alpha),
-                                      method='lasso', verbose=verbose,
-                                      max_iter=max_iter, eps=eps)
-
-    if len(alpha) > 1:
-        if len(alphas_) > 1:  # np.min(alpha) < alpha_min
-            interpolator = interp1d(alphas_[::-1], coef_[:, ::-1],
-                                    bounds_error=False, fill_value=0.)
-            scores = (interpolator(alpha) != 0.0)
-        else:
-            scores = np.zeros((X.shape[1], len(alpha)), dtype=np.bool)
-    else:
-        scores = coef_[:, -1] != 0.0
-    return scores
-
-
-@deprecated("The class RandomizedLasso is deprecated in 0.19"
-            " and will be removed in 0.21.")
-class RandomizedLasso(BaseRandomizedLinearModel):
-    """Randomized Lasso.
-
-    Randomized Lasso works by subsampling the training data and
-    computing a Lasso estimate where the penalty of a random subset of
-    coefficients has been scaled. By performing this double
-    randomization several times, the method assigns high scores to
-    features that are repeatedly selected across randomizations. This
-    is known as stability selection. In short, features selected more
-    often are considered good features.
-
-    Parameters
-    ----------
-    alpha : float, 'aic', or 'bic', optional
-        The regularization parameter alpha parameter in the Lasso.
-        Warning: this is not the alpha parameter in the stability selection
-        article which is scaling.
-
-    scaling : float, optional
-        The s parameter used to randomly scale the penalty of different
-        features.
-        Should be between 0 and 1.
-
-    sample_fraction : float, optional
-        The fraction of samples to be used in each randomized design.
-        Should be between 0 and 1. If 1, all samples are used.
-
-    n_resampling : int, optional
-        Number of randomized models.
-
-    selection_threshold : float, optional
-        The score above which features should be selected.
-
-    fit_intercept : boolean, optional
-        whether to calculate the intercept for this model. If set
-        to false, no intercept will be used in calculations
-        (e.g. data is expected to be already centered).
-
-    verbose : boolean or integer, optional
-        Sets the verbosity amount
-
-    normalize : boolean, optional, default True
-        If True, the regressors X will be normalized before regression.
-        This parameter is ignored when `fit_intercept` is set to False.
-        When the regressors are normalized, note that this makes the
-        hyperparameters learned more robust and almost independent of
-        the number of samples. The same property is not valid for
-        standardized data. However, if you wish to standardize, please
-        use `preprocessing.StandardScaler` before calling `fit` on an
-        estimator with `normalize=False`.
-
-    precompute : True | False | 'auto' | array-like
-        Whether to use a precomputed Gram matrix to speed up calculations.
-        If set to 'auto' let us decide.
-        The Gram matrix can also be passed as argument, but it will be used
-        only for the selection of parameter alpha, if alpha is 'aic' or 'bic'.
-
-    max_iter : integer, optional
-        Maximum number of iterations to perform in the Lars algorithm.
-
-    eps : float, optional
-        The machine-precision regularization in the computation of the
-        Cholesky diagonal factors. Increase this for very ill-conditioned
-        systems. Unlike the 'tol' parameter in some iterative
-        optimization-based algorithms, this parameter does not control
-        the tolerance of the optimization.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
-
-    n_jobs : int or None, optional (default=None)
-        Number of CPUs to use during the resampling.
-        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
-        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
-        for more details.
-
-    pre_dispatch : int, or string, optional
-        Controls the number of jobs that get dispatched during parallel
-        execution. Reducing this number can be useful to avoid an
-        explosion of memory consumption when more jobs get dispatched
-        than CPUs can process. This parameter can be:
-
-            - None, in which case all the jobs are immediately
-              created and spawned. Use this for lightweight and
-              fast-running jobs, to avoid delays due to on-demand
-              spawning of the jobs
-
-            - An int, giving the exact number of total jobs that are
-              spawned
-
-            - A string, giving an expression as a function of n_jobs,
-              as in '2*n_jobs'
-
-    memory : None, str or object with the joblib.Memory interface, optional \
-            (default=None)
-        Used for internal caching. By default, no caching is done.
-        If a string is given, it is the path to the caching directory.
-
-    Attributes
-    ----------
-    scores_ : array, shape = [n_features]
-        Feature scores between 0 and 1.
-
-    all_scores_ : array, shape = [n_features, n_reg_parameter]
-        Feature scores between 0 and 1 for all values of the regularization \
-        parameter. The reference article suggests ``scores_`` is the max of \
-        ``all_scores_``.
-
-    Examples
-    --------
-    >>> from sklearn.linear_model import RandomizedLasso
-    >>> randomized_lasso = RandomizedLasso() # doctest: +SKIP
-
-    References
-    ----------
-    Stability selection
-    Nicolai Meinshausen, Peter Buhlmann
-    Journal of the Royal Statistical Society: Series B
-    Volume 72, Issue 4, pages 417-473, September 2010
-    DOI: 10.1111/j.1467-9868.2010.00740.x
-
-    See also
-    --------
-    RandomizedLogisticRegression, Lasso, ElasticNet
-    """
-    def __init__(self, alpha='aic', scaling=.5, sample_fraction=.75,
-                 n_resampling=200, selection_threshold=.25,
-                 fit_intercept=True, verbose=False,
-                 normalize=True, precompute='auto',
-                 max_iter=500,
-                 eps=np.finfo(np.float).eps, random_state=None,
-                 n_jobs=None, pre_dispatch='3*n_jobs',
-                 memory=None):
-        self.alpha = alpha
-        self.scaling = scaling
-        self.sample_fraction = sample_fraction
-        self.n_resampling = n_resampling
-        self.fit_intercept = fit_intercept
-        self.max_iter = max_iter
-        self.verbose = verbose
-        self.normalize = normalize
-        self.precompute = precompute
-        self.eps = eps
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.selection_threshold = selection_threshold
-        self.pre_dispatch = pre_dispatch
-        self.memory = memory
-
-    def _make_estimator_and_params(self, X, y):
-        alpha = self.alpha
-        if isinstance(alpha, six.string_types) and alpha in ('aic', 'bic'):
-            model = LassoLarsIC(precompute=self.precompute,
-                                criterion=self.alpha,
-                                max_iter=self.max_iter,
-                                eps=self.eps)
-            model.fit(X, y)
-            self.alpha_ = alpha = model.alpha_
-
-        precompute = self.precompute
-        # A precomputed Gram array is useless, since _randomized_lasso
-        # change X a each iteration
-        if hasattr(precompute, '__array__'):
-            precompute = 'auto'
-        assert precompute in (True, False, None, 'auto')
-        return _randomized_lasso, dict(alpha=alpha, max_iter=self.max_iter,
-                                       eps=self.eps,
-                                       precompute=precompute)
-
-
-###############################################################################
-# Randomized logistic: classification settings
-
-def _randomized_logistic(X, y, weights, mask, C=1., verbose=False,
-                         fit_intercept=True, tol=1e-3):
-    X = X[safe_mask(X, mask)]
-    y = y[mask]
-    if issparse(X):
-        size = len(weights)
-        weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
-        X = X * weight_dia
-    else:
-        X *= (1 - weights)
-
-    C = np.atleast_1d(np.asarray(C, dtype=np.float64))
-    if C.ndim > 1:
-        raise ValueError("C should be 1-dimensional array-like, "
-                         "but got a {}-dimensional array-like instead: {}."
-                         .format(C.ndim, C))
-
-    scores = np.zeros((X.shape[1], len(C)), dtype=np.bool)
-
-    for this_C, this_scores in zip(C, scores.T):
-        # XXX : would be great to do it with a warm_start ...
-        clf = LogisticRegression(C=this_C, tol=tol, penalty='l1', dual=False,
-                                 fit_intercept=fit_intercept,
-                                 solver='liblinear', multi_class='ovr')
-        clf.fit(X, y)
-        this_scores[:] = np.any(
-            np.abs(clf.coef_) > 10 * np.finfo(np.float).eps, axis=0)
-    return scores
-
-
-@deprecated("The class RandomizedLogisticRegression is deprecated in 0.19"
-            " and will be removed in 0.21.")
-class RandomizedLogisticRegression(BaseRandomizedLinearModel):
-    """Randomized Logistic Regression
-
-    Randomized Logistic Regression works by subsampling the training
-    data and fitting a L1-penalized LogisticRegression model where the
-    penalty of a random subset of coefficients has been scaled. By
-    performing this double randomization several times, the method
-    assigns high scores to features that are repeatedly selected across
-    randomizations. This is known as stability selection. In short,
-    features selected more often are considered good features.
-
-    Parameters
-    ----------
-    C : float or array-like of shape [n_reg_parameter], optional, default=1
-        The regularization parameter C in the LogisticRegression.
-        When C is an array, fit will take each regularization parameter in C
-        one by one for LogisticRegression and store results for each one
-        in ``all_scores_``, where columns and rows represent corresponding
-        reg_parameters and features.
-
-    scaling : float, optional, default=0.5
-        The s parameter used to randomly scale the penalty of different
-        features.
-        Should be between 0 and 1.
-
-    sample_fraction : float, optional, default=0.75
-        The fraction of samples to be used in each randomized design.
-        Should be between 0 and 1. If 1, all samples are used.
-
-    n_resampling : int, optional, default=200
-        Number of randomized models.
-
-    selection_threshold : float, optional, default=0.25
-        The score above which features should be selected.
-
-    tol : float, optional, default=1e-3
-         tolerance for stopping criteria of LogisticRegression
-
-    fit_intercept : boolean, optional, default=True
-        whether to calculate the intercept for this model. If set
-        to false, no intercept will be used in calculations
-        (e.g. data is expected to be already centered).
-
-    verbose : boolean or integer, optional
-        Sets the verbosity amount
-
-    normalize : boolean, optional, default True
-        If True, the regressors X will be normalized before regression.
-        This parameter is ignored when `fit_intercept` is set to False.
-        When the regressors are normalized, note that this makes the
-        hyperparameters learnt more robust and almost independent of the number
-        of samples. The same property is not valid for standardized data.
-        However, if you wish to standardize, please use
-        `preprocessing.StandardScaler` before calling `fit` on an estimator
-        with `normalize=False`.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
-
-    n_jobs : int or None, optional (default=None)
-        Number of CPUs to use during the resampling.
-        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
-        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
-        for more details.
-
-    pre_dispatch : int, or string, optional
-        Controls the number of jobs that get dispatched during parallel
-        execution. Reducing this number can be useful to avoid an
-        explosion of memory consumption when more jobs get dispatched
-        than CPUs can process. This parameter can be:
-
-            - None, in which case all the jobs are immediately
-              created and spawned. Use this for lightweight and
-              fast-running jobs, to avoid delays due to on-demand
-              spawning of the jobs
-
-            - An int, giving the exact number of total jobs that are
-              spawned
-
-            - A string, giving an expression as a function of n_jobs,
-              as in '2*n_jobs'
-
-    memory : None, str or object with the joblib.Memory interface, optional \
-            (default=None)
-        Used for internal caching. By default, no caching is done.
-        If a string is given, it is the path to the caching directory.
-
-    Attributes
-    ----------
-    scores_ : array, shape = [n_features]
-        Feature scores between 0 and 1.
-
-    all_scores_ : array, shape = [n_features, n_reg_parameter]
-        Feature scores between 0 and 1 for all values of the regularization \
-        parameter. The reference article suggests ``scores_`` is the max \
-        of ``all_scores_``.
-
-    Examples
-    --------
-    >>> from sklearn.linear_model import RandomizedLogisticRegression
-    >>> randomized_logistic = RandomizedLogisticRegression() # doctest: +SKIP
-
-    References
-    ----------
-    Stability selection
-    Nicolai Meinshausen, Peter Buhlmann
-    Journal of the Royal Statistical Society: Series B
-    Volume 72, Issue 4, pages 417-473, September 2010
-    DOI: 10.1111/j.1467-9868.2010.00740.x
-
-    See also
-    --------
-    RandomizedLasso, LogisticRegression
-    """
-    def __init__(self, C=1, scaling=.5, sample_fraction=.75,
-                 n_resampling=200,
-                 selection_threshold=.25, tol=1e-3,
-                 fit_intercept=True, verbose=False,
-                 normalize=True,
-                 random_state=None,
-                 n_jobs=None, pre_dispatch='3*n_jobs',
-                 memory=None):
-        self.C = C
-        self.scaling = scaling
-        self.sample_fraction = sample_fraction
-        self.n_resampling = n_resampling
-        self.fit_intercept = fit_intercept
-        self.verbose = verbose
-        self.normalize = normalize
-        self.tol = tol
-        self.random_state = random_state
-        self.n_jobs = n_jobs
-        self.selection_threshold = selection_threshold
-        self.pre_dispatch = pre_dispatch
-        self.memory = memory
-
-    def _make_estimator_and_params(self, X, y):
-        params = dict(C=self.C, tol=self.tol,
-                      fit_intercept=self.fit_intercept)
-        return _randomized_logistic, params
-
-    def _preprocess_data(self, X, y, fit_intercept, normalize=False):
-        """Center the data in X but not in y"""
-        X, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,
-                                                      normalize=normalize)
-        return X, y, X_offset, y, X_scale
-
-
-###############################################################################
-# Stability paths
-def _lasso_stability_path(X, y, mask, weights, eps):
-    "Inner loop of lasso_stability_path"
-    X = X * weights[np.newaxis, :]
-    X = X[safe_mask(X, mask), :]
-    y = y[mask]
-
-    alpha_max = np.max(np.abs(np.dot(X.T, y))) / X.shape[0]
-    alpha_min = eps * alpha_max  # set for early stopping in path
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', ConvergenceWarning)
-        alphas, _, coefs = lars_path(X, y, method='lasso', verbose=False,
-                                     alpha_min=alpha_min)
-    # Scale alpha by alpha_max
-    alphas /= alphas[0]
-    # Sort alphas in ascending order
-    alphas = alphas[::-1]
-    coefs = coefs[:, ::-1]
-    # Get rid of the alphas that are too small
-    mask = alphas >= eps
-    # We also want to keep the first one: it should be close to the OLS
-    # solution
-    mask[0] = True
-    alphas = alphas[mask]
-    coefs = coefs[:, mask]
-    return alphas, coefs
-
-
-@deprecated("The function lasso_stability_path is deprecated in 0.19"
-            " and will be removed in 0.21.")
-def lasso_stability_path(X, y, scaling=0.5, random_state=None,
-                         n_resampling=200, n_grid=100,
-                         sample_fraction=0.75,
-                         eps=4 * np.finfo(np.float).eps, n_jobs=None,
-                         verbose=False):
-    """Stability path based on randomized Lasso estimates
-
-    Parameters
-    ----------
-    X : array-like, shape = [n_samples, n_features]
-        training data.
-
-    y : array-like, shape = [n_samples]
-        target values.
-
-    scaling : float, optional, default=0.5
-        The alpha parameter in the stability selection article used to
-        randomly scale the features. Should be between 0 and 1.
-
-    random_state : int, RandomState instance or None, optional, default=None
-        The generator used to randomize the design.  If int, random_state is
-        the seed used by the random number generator; If RandomState instance,
-        random_state is the random number generator; If None, the random number
-        generator is the RandomState instance used by `np.random`.
-
-    n_resampling : int, optional, default=200
-        Number of randomized models.
-
-    n_grid : int, optional, default=100
-        Number of grid points. The path is linearly reinterpolated
-        on a grid between 0 and 1 before computing the scores.
-
-    sample_fraction : float, optional, default=0.75
-        The fraction of samples to be used in each randomized design.
-        Should be between 0 and 1. If 1, all samples are used.
-
-    eps : float, optional
-        Smallest value of alpha / alpha_max considered
-
-    n_jobs : int or None, optional (default=None)
-        Number of CPUs to use during the resampling.
-        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
-        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
-        for more details.
-
-    verbose : boolean or integer, optional
-        Sets the verbosity amount
-
-    Returns
-    -------
-    alphas_grid : array, shape ~ [n_grid]
-        The grid points between 0 and 1: alpha/alpha_max
-
-    scores_path : array, shape = [n_features, n_grid]
-        The scores for each feature along the path.
-    """
-    X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'])
-    rng = check_random_state(random_state)
-
-    if not (0 < scaling < 1):
-        raise ValueError("Parameter 'scaling' should be between 0 and 1."
-                         " Got %r instead." % scaling)
-
-    n_samples, n_features = X.shape
-
-    paths = Parallel(n_jobs=n_jobs, verbose=verbose)(
-        delayed(_lasso_stability_path)(
-            X, y, mask=rng.rand(n_samples) < sample_fraction,
-            weights=1. - scaling * rng.randint(0, 2, size=(n_features,)),
-            eps=eps)
-        for k in range(n_resampling))
-
-    all_alphas = sorted(list(set(itertools.chain(*[p[0] for p in paths]))))
-    # Take approximately n_grid values
-    stride = int(max(1, int(len(all_alphas) / float(n_grid))))
-    all_alphas = all_alphas[::stride]
-    if not all_alphas[-1] == 1:
-        all_alphas.append(1.)
-    all_alphas = np.array(all_alphas)
-    scores_path = np.zeros((n_features, len(all_alphas)))
-
-    for alphas, coefs in paths:
-        if alphas[0] != 0:
-            alphas = np.r_[0, alphas]
-            coefs = np.c_[np.ones((n_features, 1)), coefs]
-        if alphas[-1] != all_alphas[-1]:
-            alphas = np.r_[alphas, all_alphas[-1]]
-            coefs = np.c_[coefs, np.zeros((n_features, 1))]
-        scores_path += (interp1d(alphas, coefs,
-                        kind='nearest', bounds_error=False,
-                        fill_value=0, axis=-1)(all_alphas) != 0)
-
-    scores_path /= n_resampling
-    return all_alphas, scores_path
diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index d7658396b3f22..3bc77ee8c1778 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -4,7 +4,6 @@
 import numpy as np
 from scipy import optimize, sparse
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py
deleted file mode 100644
index 564fbd4e7827d..0000000000000
--- a/sklearn/linear_model/tests/test_randomized_l1.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
-# License: BSD 3 clause
-
-from tempfile import mkdtemp
-import shutil
-
-import numpy as np
-from scipy import sparse
-
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import assert_allclose
-from sklearn.utils.testing import ignore_warnings
-from sklearn.utils.testing import assert_warns_message
-
-from sklearn.linear_model.randomized_l1 import(lasso_stability_path,
-                                               RandomizedLasso,
-                                               RandomizedLogisticRegression)
-
-from sklearn.datasets import load_diabetes, load_iris
-from sklearn.feature_selection import f_regression, f_classif
-from sklearn.preprocessing import StandardScaler
-from sklearn.linear_model.base import _preprocess_data
-
-diabetes = load_diabetes()
-X = diabetes.data
-y = diabetes.target
-X = StandardScaler().fit_transform(X)
-X = X[:, [2, 3, 6, 7, 8]]
-
-# test that the feature score of the best features
-F, _ = f_regression(X, y)
-
-
-@ignore_warnings(category=DeprecationWarning)
-def test_lasso_stability_path():
-    # Check lasso stability path
-    # Load diabetes data and add noisy features
-    scaling = 0.3
-    coef_grid, scores_path = lasso_stability_path(X, y, scaling=scaling,
-                                                  random_state=42,
-                                                  n_resampling=30)
-
-    assert_array_equal(np.argsort(F)[-3:],
-                       np.argsort(np.sum(scores_path, axis=1))[-3:])
-
-
-@ignore_warnings(category=DeprecationWarning)
-def test_randomized_lasso_error_memory():
-    scaling = 0.3
-    selection_threshold = 0.5
-    tempdir = 5
-    clf = RandomizedLasso(verbose=False, alpha=[1, 0.8], random_state=42,
-                          scaling=scaling,
-                          selection_threshold=selection_threshold,
-                          memory=tempdir)
-    assert_raises_regex(ValueError, "'memory' should either be a string or"
-                        " a sklearn.utils.Memory instance",
-                        clf.fit, X, y)
-
-
-@ignore_warnings(category=DeprecationWarning)
-def test_randomized_lasso():
-    # Check randomized lasso
-    scaling = 0.3
-    selection_threshold = 0.5
-    n_resampling = 20
-
-    # or with 1 alpha
-    clf = RandomizedLasso(verbose=False, alpha=1, random_state=42,
-                          scaling=scaling, n_resampling=n_resampling,
-                          selection_threshold=selection_threshold)
-    feature_scores = clf.fit(X, y).scores_
-    assert_array_equal(np.argsort(F)[-3:], np.argsort(feature_scores)[-3:])
-
-    # or with many alphas
-    clf = RandomizedLasso(verbose=False, alpha=[1, 0.8], random_state=42,
-                          scaling=scaling, n_resampling=n_resampling,
-                          selection_threshold=selection_threshold)
-    feature_scores = clf.fit(X, y).scores_
-    assert_equal(clf.all_scores_.shape, (X.shape[1], 2))
-    assert_array_equal(np.argsort(F)[-3:], np.argsort(feature_scores)[-3:])
-    # test caching
-    try:
-        tempdir = mkdtemp()
-        clf = RandomizedLasso(verbose=False, alpha=[1, 0.8], random_state=42,
-                              scaling=scaling,
-                              selection_threshold=selection_threshold,
-                              memory=tempdir)
-        feature_scores = clf.fit(X, y).scores_
-        assert_equal(clf.all_scores_.shape, (X.shape[1], 2))
-        assert_array_equal(np.argsort(F)[-3:], np.argsort(feature_scores)[-3:])
-    finally:
-        shutil.rmtree(tempdir)
-
-    X_r = clf.transform(X)
-    X_full = clf.inverse_transform(X_r)
-    assert_equal(X_r.shape[1], np.sum(feature_scores > selection_threshold))
-    assert_equal(X_full.shape, X.shape)
-
-    clf = RandomizedLasso(verbose=False, alpha='aic', random_state=42,
-                          scaling=scaling, n_resampling=100)
-    feature_scores = clf.fit(X, y).scores_
-    assert_allclose(feature_scores, [1., 1., 1., 0.225, 1.], rtol=0.2)
-
-    clf = RandomizedLasso(verbose=False, scaling=-0.1)
-    assert_raises(ValueError, clf.fit, X, y)
-
-    clf = RandomizedLasso(verbose=False, scaling=1.1)
-    assert_raises(ValueError, clf.fit, X, y)
-
-
-@ignore_warnings(category=DeprecationWarning)
-def test_randomized_lasso_precompute():
-    # Check randomized lasso for different values of precompute
-    n_resampling = 20
-    alpha = 1
-    random_state = 42
-
-    G = np.dot(X.T, X)
-
-    clf = RandomizedLasso(alpha=alpha, random_state=random_state,
-                          precompute=G, n_resampling=n_resampling)
-    feature_scores_1 = clf.fit(X, y).scores_
-
-    for precompute in [True, False, None, 'auto']:
-        clf = RandomizedLasso(alpha=alpha, random_state=random_state,
-                              precompute=precompute, n_resampling=n_resampling)
-        feature_scores_2 = clf.fit(X, y).scores_
-        assert_array_equal(feature_scores_1, feature_scores_2)
-
-
-@ignore_warnings(category=DeprecationWarning)
-def test_randomized_logistic():
-    # Check randomized sparse logistic regression
-    iris = load_iris()
-    X = iris.data[:, [0, 2]]
-    y = iris.target
-    X = X[y != 2]
-    y = y[y != 2]
-
-    F, _ = f_classif(X, y)
-
-    scaling = 0.3
-    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
-                                       scaling=scaling, n_resampling=50,
-                                       tol=1e-3)
-    X_orig = X.copy()
-    feature_scores = clf.fit(X, y).scores_
-    assert_array_equal(X, X_orig)   # fit does not modify X
-    assert_array_equal(np.argsort(F), np.argsort(feature_scores))
-
-    clf = RandomizedLogisticRegression(verbose=False, C=[1., 0.5],
-                                       random_state=42, scaling=scaling,
-                                       n_resampling=50, tol=1e-3)
-    feature_scores = clf.fit(X, y).scores_
-    assert_array_equal(np.argsort(F), np.argsort(feature_scores))
-
-    clf = RandomizedLogisticRegression(verbose=False, C=[[1., 0.5]])
-    assert_raises(ValueError, clf.fit, X, y)
-
-
-@ignore_warnings(category=DeprecationWarning)
-def test_randomized_logistic_sparse():
-    # Check randomized sparse logistic regression on sparse data
-    iris = load_iris()
-    X = iris.data[:, [0, 2]]
-    y = iris.target
-    X = X[y != 2]
-    y = y[y != 2]
-
-    # center here because sparse matrices are usually not centered
-    # labels should not be centered
-    X, _, _, _, _ = _preprocess_data(X, y, True, True)
-
-    X_sp = sparse.csr_matrix(X)
-
-    F, _ = f_classif(X, y)
-
-    scaling = 0.3
-    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
-                                       scaling=scaling, n_resampling=50,
-                                       tol=1e-3)
-    feature_scores = clf.fit(X, y).scores_
-    clf = RandomizedLogisticRegression(verbose=False, C=1., random_state=42,
-                                       scaling=scaling, n_resampling=50,
-                                       tol=1e-3)
-    feature_scores_sp = clf.fit(X_sp, y).scores_
-    assert_array_equal(feature_scores, feature_scores_sp)
-
-
-def test_warning_raised():
-
-    scaling = 0.3
-    selection_threshold = 0.5
-    tempdir = 5
-    assert_warns_message(DeprecationWarning, "The function"
-                         " lasso_stability_path is "
-                         "deprecated in 0.19 and will be removed in 0.21.",
-                         lasso_stability_path, X, y, scaling=scaling,
-                         random_state=42, n_resampling=30)
-
-    assert_warns_message(DeprecationWarning, "Class RandomizedLasso is"
-                         " deprecated; The class RandomizedLasso is "
-                         "deprecated in 0.19 and will be removed in 0.21.",
-                         RandomizedLasso, verbose=False, alpha=[1, 0.8],
-                         random_state=42, scaling=scaling,
-                         selection_threshold=selection_threshold,
-                         memory=tempdir)
-
-    assert_warns_message(DeprecationWarning, "The class"
-                         " RandomizedLogisticRegression is "
-                         "deprecated in 0.19 and will be removed in 0.21.",
-                         RandomizedLogisticRegression,
-                         verbose=False, C=1., random_state=42,
-                         scaling=scaling, n_resampling=50,
-                         tol=1e-3)
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
index 7fe7a368809b4..fe7268515abfd 100644
--- a/sklearn/manifold/t_sne.py
+++ b/sklearn/manifold/t_sne.py
@@ -26,7 +26,6 @@
 from . import _utils
 from . import _barnes_hut_tsne
 from ..externals.six import string_types
-from ..utils import deprecated
 
 
 MACHINE_EPSILON = np.finfo(np.double).eps
@@ -805,12 +804,6 @@ def _fit(self, X, skip_num_points=0):
                           neighbors=neighbors_nn,
                           skip_num_points=skip_num_points)
 
-    @property
-    @deprecated("Attribute n_iter_final was deprecated in version 0.19 and "
-                "will be removed in 0.21. Use ``n_iter_`` instead")
-    def n_iter_final(self):
-        return self.n_iter_
-
     def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,
               neighbors=None, skip_num_points=0):
         """Runs t-SNE."""
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 552a3d7623414..15e1fc25bc50f 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -443,8 +443,7 @@ def pairwise_distances_argmin(X, Y, axis=1, metric="euclidean",
                                          batch_size=batch_size)[0]
 
 
-def manhattan_distances(X, Y=None, sum_over_features=True,
-                        size_threshold=None):
+def manhattan_distances(X, Y=None, sum_over_features=True):
     """ Compute the L1 distances between the vectors in X and Y.
 
     With sum_over_features equal to False it returns the componentwise
@@ -465,9 +464,6 @@ def manhattan_distances(X, Y=None, sum_over_features=True,
         else it returns the componentwise L1 pairwise-distances.
         Not supported for sparse matrix inputs.
 
-    size_threshold : int, default=5e8
-        Unused parameter.
-
     Returns
     -------
     D : array
@@ -497,10 +493,6 @@ def manhattan_distances(X, Y=None, sum_over_features=True,
     array([[1., 1.],
            [1., 1.]])
     """
-    if size_threshold is not None:
-        warnings.warn('Use of the "size_threshold" is deprecated '
-                      'in 0.19 and it will be removed version '
-                      '0.21 of scikit-learn', DeprecationWarning)
     X, Y = check_pairwise_arrays(X, Y)
 
     if issparse(X) or issparse(Y):
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index c07f9d66aa0f9..8e18af7128350 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -14,7 +14,6 @@
 from sklearn.datasets import make_multilabel_classification
 from sklearn.preprocessing import label_binarize
 from sklearn.utils.validation import check_random_state
-from sklearn.utils.testing import assert_dict_equal
 from sklearn.utils.testing import assert_raises, clean_warning_registry
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_equal
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index e28453ee70086..62aaec5fdc9a6 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -17,7 +17,6 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regexp
 from sklearn.utils.testing import assert_true
-from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns_message
 
@@ -82,10 +81,6 @@ def test_pairwise_distances():
     assert_equal(S.shape[0], X.shape[0])
     assert_equal(S.shape[1], Y.shape[0])
     assert_array_almost_equal(S, S2)
-    # Using size_threshold argument should raise
-    # a deprecation warning
-    assert_warns(DeprecationWarning,
-                 manhattan_distances, X, Y, size_threshold=10)
     # Test cosine as a string metric versus cosine callable
     # The string "cosine" uses sklearn.metric,
     # while the function cosine is scipy.spatial
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 28286bf2402fd..637b4dca5537f 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -49,7 +49,6 @@
 from sklearn.linear_model import Ridge
 
 from sklearn.model_selection._split import _validate_shuffle_split
-from sklearn.model_selection._split import _CVIterableWrapper
 from sklearn.model_selection._split import _build_repr
 from sklearn.model_selection._split import CV_WARNING
 from sklearn.model_selection._split import NSPLIT_WARNING
diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py
index 93c1bbbba0ba8..51116b3f470e6 100644
--- a/sklearn/neighbors/__init__.py
+++ b/sklearn/neighbors/__init__.py
@@ -12,7 +12,6 @@
 from .regression import KNeighborsRegressor, RadiusNeighborsRegressor
 from .nearest_centroid import NearestCentroid
 from .kde import KernelDensity
-from .approximate import LSHForest
 from .lof import LocalOutlierFactor
 from .base import VALID_METRICS, VALID_METRICS_SPARSE
 
@@ -28,7 +27,6 @@
            'kneighbors_graph',
            'radius_neighbors_graph',
            'KernelDensity',
-           'LSHForest',
            'LocalOutlierFactor',
            'VALID_METRICS',
            'VALID_METRICS_SPARSE']
diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py
deleted file mode 100644
index 650af47e0d81b..0000000000000
--- a/sklearn/neighbors/approximate.py
+++ /dev/null
@@ -1,589 +0,0 @@
-"""Approximate nearest neighbor search"""
-# Author: Maheshakya Wijewardena <maheshakya.10@cse.mrt.ac.lk>
-#         Joel Nothman <joel.nothman@gmail.com>
-
-import numpy as np
-import warnings
-
-from scipy import sparse
-
-from .base import KNeighborsMixin, RadiusNeighborsMixin
-from ..base import BaseEstimator
-from ..utils.validation import check_array
-from ..utils import check_random_state
-from ..metrics.pairwise import pairwise_distances
-
-from ..random_projection import GaussianRandomProjection
-
-__all__ = ["LSHForest"]
-
-HASH_DTYPE = '>u4'
-MAX_HASH_SIZE = np.dtype(HASH_DTYPE).itemsize * 8
-
-
-def _find_matching_indices(tree, bin_X, left_mask, right_mask):
-    """Finds indices in sorted array of integers.
-
-    Most significant h bits in the binary representations of the
-    integers are matched with the items' most significant h bits.
-    """
-    left_index = np.searchsorted(tree, bin_X & left_mask)
-    right_index = np.searchsorted(tree, bin_X | right_mask,
-                                  side='right')
-    return left_index, right_index
-
-
-def _find_longest_prefix_match(tree, bin_X, hash_size,
-                               left_masks, right_masks):
-    """Find the longest prefix match in tree for each query in bin_X
-
-    Most significant bits are considered as the prefix.
-    """
-    hi = np.empty_like(bin_X, dtype=np.intp)
-    hi.fill(hash_size)
-    lo = np.zeros_like(bin_X, dtype=np.intp)
-    res = np.empty_like(bin_X, dtype=np.intp)
-
-    left_idx, right_idx = _find_matching_indices(tree, bin_X,
-                                                 left_masks[hi],
-                                                 right_masks[hi])
-    found = right_idx > left_idx
-    res[found] = lo[found] = hash_size
-
-    r = np.arange(bin_X.shape[0])
-    kept = r[lo < hi]  # indices remaining in bin_X mask
-    while kept.shape[0]:
-        mid = (lo.take(kept) + hi.take(kept)) // 2
-
-        left_idx, right_idx = _find_matching_indices(tree,
-                                                     bin_X.take(kept),
-                                                     left_masks[mid],
-                                                     right_masks[mid])
-        found = right_idx > left_idx
-        mid_found = mid[found]
-        lo[kept[found]] = mid_found + 1
-        res[kept[found]] = mid_found
-        hi[kept[~found]] = mid[~found]
-
-        kept = r[lo < hi]
-
-    return res
-
-
-class ProjectionToHashMixin(object):
-    """Turn a transformed real-valued array into a hash"""
-    @staticmethod
-    def _to_hash(projected):
-        if projected.shape[1] % 8 != 0:
-            raise ValueError('Require reduced dimensionality to be a multiple '
-                             'of 8 for hashing')
-        # XXX: perhaps non-copying operation better
-        out = np.packbits((projected > 0).astype(int)).view(dtype=HASH_DTYPE)
-        return out.reshape(projected.shape[0], -1)
-
-    def fit_transform(self, X, y=None):
-        """
-        Parameters
-        ----------
-        X : array-like, shape = [n_samples, n_features]
-            Training vectors, where n_samples is the number of samples and
-            n_features is the number of predictors.
-        """
-
-        self.fit(X)
-        return self.transform(X)
-
-    def transform(self, X):
-        """
-        Parameters
-        ----------
-        X : array-like, shape = [n_samples, n_features]
-            Training vectors, where n_samples is the number of samples and
-            n_features is the number of predictors.
-        """
-        return self._to_hash(super(ProjectionToHashMixin, self).transform(X))
-
-
-class GaussianRandomProjectionHash(ProjectionToHashMixin,
-                                   GaussianRandomProjection):
-    """Use GaussianRandomProjection to produce a cosine LSH fingerprint
-
-    Parameters
-    ----------
-
-    n_components : int or 'auto', optional (default = 32)
-        Dimensionality of the target projection space.
-
-        n_components can be automatically adjusted according to the
-        number of samples in the dataset and the bound given by the
-        Johnson-Lindenstrauss lemma. In that case the quality of the
-        embedding is controlled by the ``eps`` parameter.
-
-        It should be noted that Johnson-Lindenstrauss lemma can yield
-        very conservative estimated of the required number of components
-        as it makes no assumption on the structure of the dataset.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
-    """
-    def __init__(self,
-                 n_components=32,
-                 random_state=None):
-        super(GaussianRandomProjectionHash, self).__init__(
-            n_components=n_components,
-            random_state=random_state)
-
-
-def _array_of_arrays(list_of_arrays):
-    """Creates an array of array from list of arrays."""
-    out = np.empty(len(list_of_arrays), dtype=object)
-    out[:] = list_of_arrays
-    return out
-
-
-class LSHForest(BaseEstimator, KNeighborsMixin, RadiusNeighborsMixin):
-    """Performs approximate nearest neighbor search using LSH forest.
-
-    LSH Forest: Locality Sensitive Hashing forest [1] is an alternative
-    method for vanilla approximate nearest neighbor search methods.
-    LSH forest data structure has been implemented using sorted
-    arrays and binary search and 32 bit fixed-length hashes.
-    Random projection is used as the hash family which approximates
-    cosine distance.
-
-    The cosine distance is defined as ``1 - cosine_similarity``: the lowest
-    value is 0 (identical point) but it is bounded above by 2 for the farthest
-    points. Its value does not depend on the norm of the vector points but
-    only on their relative angles.
-
-    Parameters
-    ----------
-
-    n_estimators : int (default = 10)
-        Number of trees in the LSH Forest.
-
-    radius : float, optinal (default = 1.0)
-        Radius from the data point to its neighbors. This is the parameter
-        space to use by default for the :meth:`radius_neighbors` queries.
-
-    n_candidates : int (default = 50)
-        Minimum number of candidates evaluated per estimator, assuming enough
-        items meet the `min_hash_match` constraint.
-
-    n_neighbors : int (default = 5)
-        Number of neighbors to be returned from query function when
-        it is not provided to the :meth:`kneighbors` method.
-
-    min_hash_match : int (default = 4)
-        lowest hash length to be searched when candidate selection is
-        performed for nearest neighbors.
-
-    radius_cutoff_ratio : float, optional (default = 0.9)
-        A value ranges from 0 to 1. Radius neighbors will be searched until
-        the ratio between total neighbors within the radius and the total
-        candidates becomes less than this value unless it is terminated by
-        hash length reaching `min_hash_match`.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
-
-    Attributes
-    ----------
-
-    hash_functions_ : list of GaussianRandomProjectionHash objects
-        Hash function g(p,x) for a tree is an array of 32 randomly generated
-        float arrays with the same dimension as the data set. This array is
-        stored in GaussianRandomProjectionHash object and can be obtained
-        from ``components_`` attribute.
-
-    trees_ : array, shape (n_estimators, n_samples)
-        Each tree (corresponding to a hash function) contains an array of
-        sorted hashed values. The array representation may change in future
-        versions.
-
-    original_indices_ : array, shape (n_estimators, n_samples)
-        Original indices of sorted hashed values in the fitted index.
-
-    References
-    ----------
-
-    .. [1] M. Bawa, T. Condie and P. Ganesan, "LSH Forest: Self-Tuning
-           Indexes for Similarity Search", WWW '05 Proceedings of the
-           14th international conference on World Wide Web,  651-660,
-           2005.
-
-    Examples
-    --------
-      >>> from sklearn.neighbors import LSHForest
-
-      >>> X_train = [[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1], [6, 10, 2]]
-      >>> X_test = [[9, 1, 6], [3, 1, 10], [7, 10, 3]]
-      >>> lshf = LSHForest(random_state=42)  # doctest: +SKIP
-      >>> lshf.fit(X_train)  # doctest: +SKIP
-      LSHForest(min_hash_match=4, n_candidates=50, n_estimators=10,
-                n_neighbors=5, radius=1.0, radius_cutoff_ratio=0.9,
-                random_state=42)
-      >>> distances, indices = lshf.kneighbors(X_test, n_neighbors=2)
-      ... # doctest: +SKIP
-      >>> distances                                        # doctest: +SKIP
-      array([[0.069..., 0.149...],
-             [0.229..., 0.481...],
-             [0.004..., 0.014...]])
-      >>> indices  # doctest: +SKIP
-      array([[1, 2],
-             [2, 0],
-             [4, 0]])
-
-    """
-
-    def __init__(self, n_estimators=10, radius=1.0, n_candidates=50,
-                 n_neighbors=5, min_hash_match=4, radius_cutoff_ratio=.9,
-                 random_state=None):
-        self.n_estimators = n_estimators
-        self.radius = radius
-        self.random_state = random_state
-        self.n_candidates = n_candidates
-        self.n_neighbors = n_neighbors
-        self.min_hash_match = min_hash_match
-        self.radius_cutoff_ratio = radius_cutoff_ratio
-
-        warnings.warn("LSHForest has poor performance and has been deprecated "
-                      "in 0.19. It will be removed in version 0.21.",
-                      DeprecationWarning)
-
-    def _compute_distances(self, query, candidates):
-        """Computes the cosine distance.
-
-        Distance is from the query to points in the candidates array.
-        Returns argsort of distances in the candidates
-        array and sorted distances.
-        """
-        if candidates.shape == (0,):
-            # needed since _fit_X[np.array([])] doesn't work if _fit_X sparse
-            return np.empty(0, dtype=np.int), np.empty(0, dtype=float)
-
-        if sparse.issparse(self._fit_X):
-            candidate_X = self._fit_X[candidates]
-        else:
-            candidate_X = self._fit_X.take(candidates, axis=0, mode='clip')
-        distances = pairwise_distances(query, candidate_X,
-                                       metric='cosine')[0]
-        distance_positions = np.argsort(distances)
-        distances = distances.take(distance_positions, mode='clip', axis=0)
-        return distance_positions, distances
-
-    def _generate_masks(self):
-        """Creates left and right masks for all hash lengths."""
-        tri_size = MAX_HASH_SIZE + 1
-        # Called once on fitting, output is independent of hashes
-        left_mask = np.tril(np.ones((tri_size, tri_size), dtype=int))[:, 1:]
-        right_mask = left_mask[::-1, ::-1]
-
-        self._left_mask = np.packbits(left_mask).view(dtype=HASH_DTYPE)
-        self._right_mask = np.packbits(right_mask).view(dtype=HASH_DTYPE)
-
-    def _get_candidates(self, query, max_depth, bin_queries, n_neighbors):
-        """Performs the Synchronous ascending phase.
-
-        Returns an array of candidates, their distance ranks and
-        distances.
-        """
-        index_size = self._fit_X.shape[0]
-        # Number of candidates considered including duplicates
-        # XXX: not sure whether this is being calculated correctly wrt
-        #      duplicates from different iterations through a single tree
-        n_candidates = 0
-        candidate_set = set()
-        min_candidates = self.n_candidates * self.n_estimators
-        while (max_depth > self.min_hash_match and
-               (n_candidates < min_candidates or
-                len(candidate_set) < n_neighbors)):
-
-            left_mask = self._left_mask[max_depth]
-            right_mask = self._right_mask[max_depth]
-            for i in range(self.n_estimators):
-                start, stop = _find_matching_indices(self.trees_[i],
-                                                     bin_queries[i],
-                                                     left_mask, right_mask)
-                n_candidates += stop - start
-                candidate_set.update(
-                    self.original_indices_[i][start:stop].tolist())
-            max_depth -= 1
-
-        candidates = np.fromiter(candidate_set, count=len(candidate_set),
-                                 dtype=np.intp)
-        # For insufficient candidates, candidates are filled.
-        # Candidates are filled from unselected indices uniformly.
-        if candidates.shape[0] < n_neighbors:
-            warnings.warn(
-                "Number of candidates is not sufficient to retrieve"
-                " %i neighbors with"
-                " min_hash_match = %i. Candidates are filled up"
-                " uniformly from unselected"
-                " indices." % (n_neighbors, self.min_hash_match))
-            remaining = np.setdiff1d(np.arange(0, index_size), candidates)
-            to_fill = n_neighbors - candidates.shape[0]
-            candidates = np.concatenate((candidates, remaining[:to_fill]))
-
-        ranks, distances = self._compute_distances(query,
-                                                   candidates.astype(int))
-
-        return (candidates[ranks[:n_neighbors]],
-                distances[:n_neighbors])
-
-    def _get_radius_neighbors(self, query, max_depth, bin_queries, radius):
-        """Finds radius neighbors from the candidates obtained.
-
-        Their distances from query are smaller than radius.
-        Returns radius neighbors and distances.
-        """
-        ratio_within_radius = 1
-        threshold = 1 - self.radius_cutoff_ratio
-        total_candidates = np.array([], dtype=int)
-        total_neighbors = np.array([], dtype=int)
-        total_distances = np.array([], dtype=float)
-
-        while (max_depth > self.min_hash_match and
-               ratio_within_radius > threshold):
-            left_mask = self._left_mask[max_depth]
-            right_mask = self._right_mask[max_depth]
-            candidates = []
-            for i in range(self.n_estimators):
-                start, stop = _find_matching_indices(self.trees_[i],
-                                                     bin_queries[i],
-                                                     left_mask, right_mask)
-                candidates.extend(
-                    self.original_indices_[i][start:stop].tolist())
-            candidates = np.setdiff1d(candidates, total_candidates)
-            total_candidates = np.append(total_candidates, candidates)
-            ranks, distances = self._compute_distances(query, candidates)
-            m = np.searchsorted(distances, radius, side='right')
-            positions = np.searchsorted(total_distances, distances[:m])
-            total_neighbors = np.insert(total_neighbors, positions,
-                                        candidates[ranks[:m]])
-            total_distances = np.insert(total_distances, positions,
-                                        distances[:m])
-            ratio_within_radius = (total_neighbors.shape[0] /
-                                   float(total_candidates.shape[0]))
-            max_depth = max_depth - 1
-        return total_neighbors, total_distances
-
-    def fit(self, X, y=None):
-        """Fit the LSH forest on the data.
-
-        This creates binary hashes of input data points by getting the
-        dot product of input points and hash_function then
-        transforming the projection into a binary string array based
-        on the sign (positive/negative) of the projection.
-        A sorted array of binary hashes is created.
-
-        Parameters
-        ----------
-        X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)
-            List of n_features-dimensional data points. Each row
-            corresponds to a single data point.
-
-        Returns
-        -------
-        self : object
-        """
-
-        self._fit_X = check_array(X, accept_sparse='csr')
-
-        # Creates a g(p,x) for each tree
-        self.hash_functions_ = []
-        self.trees_ = []
-        self.original_indices_ = []
-
-        rng = check_random_state(self.random_state)
-        int_max = np.iinfo(np.int32).max
-
-        for i in range(self.n_estimators):
-            # This is g(p,x) for a particular tree.
-            # Builds a single tree. Hashing is done on an array of data points.
-            # `GaussianRandomProjection` is used for hashing.
-            # `n_components=hash size and n_features=n_dim.
-            hasher = GaussianRandomProjectionHash(MAX_HASH_SIZE,
-                                                  rng.randint(0, int_max))
-            hashes = hasher.fit_transform(self._fit_X)[:, 0]
-            original_index = np.argsort(hashes)
-            bin_hashes = hashes[original_index]
-            self.original_indices_.append(original_index)
-            self.trees_.append(bin_hashes)
-            self.hash_functions_.append(hasher)
-
-        self._generate_masks()
-
-        return self
-
-    def _query(self, X):
-        """Performs descending phase to find maximum depth."""
-        # Calculate hashes of shape (n_samples, n_estimators, [hash_size])
-        bin_queries = np.asarray([hasher.transform(X)[:, 0]
-                                  for hasher in self.hash_functions_])
-        bin_queries = np.rollaxis(bin_queries, 1)
-
-        # descend phase
-        depths = [_find_longest_prefix_match(tree, tree_queries, MAX_HASH_SIZE,
-                                             self._left_mask, self._right_mask)
-                  for tree, tree_queries in zip(self.trees_,
-                                                np.rollaxis(bin_queries, 1))]
-
-        return bin_queries, np.max(depths, axis=0)
-
-    def kneighbors(self, X, n_neighbors=None, return_distance=True):
-        """Returns n_neighbors of approximate nearest neighbors.
-
-        Parameters
-        ----------
-        X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)
-            List of n_features-dimensional data points.  Each row
-            corresponds to a single query.
-
-        n_neighbors : int, optional (default = None)
-            Number of neighbors required. If not provided, this will
-            return the number specified at the initialization.
-
-        return_distance : boolean, optional (default = True)
-            Returns the distances of neighbors if set to True.
-
-        Returns
-        -------
-        dist : array, shape (n_samples, n_neighbors)
-            Array representing the cosine distances to each point,
-            only present if return_distance=True.
-
-        ind : array, shape (n_samples, n_neighbors)
-            Indices of the approximate nearest points in the population
-            matrix.
-        """
-        if not hasattr(self, 'hash_functions_'):
-            raise ValueError("estimator should be fitted.")
-
-        if n_neighbors is None:
-            n_neighbors = self.n_neighbors
-
-        X = check_array(X, accept_sparse='csr')
-
-        neighbors, distances = [], []
-        bin_queries, max_depth = self._query(X)
-        for i in range(X.shape[0]):
-
-            neighs, dists = self._get_candidates(X[[i]], max_depth[i],
-                                                 bin_queries[i],
-                                                 n_neighbors)
-            neighbors.append(neighs)
-            distances.append(dists)
-
-        if return_distance:
-            return np.array(distances), np.array(neighbors)
-        else:
-            return np.array(neighbors)
-
-    def radius_neighbors(self, X, radius=None, return_distance=True):
-        """Finds the neighbors within a given radius of a point or points.
-
-        Return the indices and distances of some points from the dataset
-        lying in a ball with size ``radius`` around the points of the query
-        array. Points lying on the boundary are included in the results.
-
-        The result points are *not* necessarily sorted by distance to their
-        query point.
-
-        LSH Forest being an approximate method, some true neighbors from the
-        indexed dataset might be missing from the results.
-
-        Parameters
-        ----------
-        X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)
-            List of n_features-dimensional data points. Each row
-            corresponds to a single query.
-
-        radius : float
-            Limiting distance of neighbors to return.
-            (default is the value passed to the constructor).
-
-        return_distance : boolean, optional (default = False)
-            Returns the distances of neighbors if set to True.
-
-        Returns
-        -------
-        dist : array, shape (n_samples,) of arrays
-            Each element is an array representing the cosine distances
-            to some points found within ``radius`` of the respective query.
-            Only present if ``return_distance=True``.
-
-        ind : array, shape (n_samples,) of arrays
-            Each element is an array of indices for neighbors within ``radius``
-            of the respective query.
-        """
-        if not hasattr(self, 'hash_functions_'):
-            raise ValueError("estimator should be fitted.")
-
-        if radius is None:
-            radius = self.radius
-
-        X = check_array(X, accept_sparse='csr')
-
-        neighbors, distances = [], []
-        bin_queries, max_depth = self._query(X)
-        for i in range(X.shape[0]):
-
-            neighs, dists = self._get_radius_neighbors(X[[i]], max_depth[i],
-                                                       bin_queries[i], radius)
-            neighbors.append(neighs)
-            distances.append(dists)
-
-        if return_distance:
-            return _array_of_arrays(distances), _array_of_arrays(neighbors)
-        else:
-            return _array_of_arrays(neighbors)
-
-    def partial_fit(self, X, y=None):
-        """
-        Inserts new data into the already fitted LSH Forest.
-        Cost is proportional to new total size, so additions
-        should be batched.
-
-        Parameters
-        ----------
-        X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)
-            New data point to be inserted into the LSH Forest.
-        """
-        X = check_array(X, accept_sparse='csr')
-        if not hasattr(self, 'hash_functions_'):
-            return self.fit(X)
-
-        if X.shape[1] != self._fit_X.shape[1]:
-            raise ValueError("Number of features in X and"
-                             " fitted array does not match.")
-        n_samples = X.shape[0]
-        n_indexed = self._fit_X.shape[0]
-
-        for i in range(self.n_estimators):
-            bin_X = self.hash_functions_[i].transform(X)[:, 0]
-            # gets the position to be added in the tree.
-            positions = self.trees_[i].searchsorted(bin_X)
-            # adds the hashed value into the tree.
-            self.trees_[i] = np.insert(self.trees_[i],
-                                       positions, bin_X)
-            # add the entry into the original_indices_.
-            self.original_indices_[i] = np.insert(self.original_indices_[i],
-                                                  positions,
-                                                  np.arange(n_indexed,
-                                                            n_indexed +
-                                                            n_samples))
-
-        # adds the entry into the input_array.
-        if sparse.issparse(X) or sparse.issparse(self._fit_X):
-            self._fit_X = sparse.vstack((self._fit_X, X))
-        else:
-            self._fit_X = np.row_stack((self._fit_X, X))
-
-        return self
diff --git a/sklearn/neighbors/tests/test_approximate.py b/sklearn/neighbors/tests/test_approximate.py
deleted file mode 100644
index 1536271897625..0000000000000
--- a/sklearn/neighbors/tests/test_approximate.py
+++ /dev/null
@@ -1,498 +0,0 @@
-"""
-Testing for the approximate neighbor search using
-Locality Sensitive Hashing Forest module
-(sklearn.neighbors.LSHForest).
-"""
-
-# Author: Maheshakya Wijewardena, Joel Nothman
-
-import numpy as np
-import scipy.sparse as sp
-
-from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_array_less
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_true
-from sklearn.utils.testing import assert_not_equal
-from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import ignore_warnings
-
-from sklearn.metrics.pairwise import pairwise_distances
-from sklearn.neighbors import LSHForest
-from sklearn.neighbors import NearestNeighbors
-
-
-def test_lsh_forest_deprecation():
-    assert_warns_message(DeprecationWarning,
-                         "LSHForest has poor performance and has been "
-                         "deprecated in 0.19. It will be removed "
-                         "in version 0.21.", LSHForest)
-
-
-def test_neighbors_accuracy_with_n_candidates():
-    # Checks whether accuracy increases as `n_candidates` increases.
-    n_candidates_values = np.array([.1, 50, 500])
-    n_samples = 100
-    n_features = 10
-    n_iter = 10
-    n_points = 5
-    rng = np.random.RandomState(42)
-    accuracies = np.zeros(n_candidates_values.shape[0], dtype=float)
-    X = rng.rand(n_samples, n_features)
-
-    for i, n_candidates in enumerate(n_candidates_values):
-        lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-            n_candidates=n_candidates, random_state=0)
-        ignore_warnings(lshf.fit)(X)
-        for j in range(n_iter):
-            query = X[rng.randint(0, n_samples)].reshape(1, -1)
-
-            neighbors = lshf.kneighbors(query, n_neighbors=n_points,
-                                        return_distance=False)
-            distances = pairwise_distances(query, X, metric='cosine')
-            ranks = np.argsort(distances)[0, :n_points]
-
-            intersection = np.intersect1d(ranks, neighbors).shape[0]
-            ratio = intersection / float(n_points)
-            accuracies[i] = accuracies[i] + ratio
-
-        accuracies[i] = accuracies[i] / float(n_iter)
-    # Sorted accuracies should be equal to original accuracies
-    print('accuracies:', accuracies)
-    assert_true(np.all(np.diff(accuracies) >= 0),
-                msg="Accuracies are not non-decreasing.")
-    # Highest accuracy should be strictly greater than the lowest
-    assert_true(np.ptp(accuracies) > 0,
-                msg="Highest accuracy is not strictly greater than lowest.")
-
-
-def test_neighbors_accuracy_with_n_estimators():
-    # Checks whether accuracy increases as `n_estimators` increases.
-    n_estimators = np.array([1, 10, 100])
-    n_samples = 100
-    n_features = 10
-    n_iter = 10
-    n_points = 5
-    rng = np.random.RandomState(42)
-    accuracies = np.zeros(n_estimators.shape[0], dtype=float)
-    X = rng.rand(n_samples, n_features)
-
-    for i, t in enumerate(n_estimators):
-        lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-            n_candidates=500, n_estimators=t)
-        ignore_warnings(lshf.fit)(X)
-        for j in range(n_iter):
-            query = X[rng.randint(0, n_samples)].reshape(1, -1)
-            neighbors = lshf.kneighbors(query, n_neighbors=n_points,
-                                        return_distance=False)
-            distances = pairwise_distances(query, X, metric='cosine')
-            ranks = np.argsort(distances)[0, :n_points]
-
-            intersection = np.intersect1d(ranks, neighbors).shape[0]
-            ratio = intersection / float(n_points)
-            accuracies[i] = accuracies[i] + ratio
-
-        accuracies[i] = accuracies[i] / float(n_iter)
-    # Sorted accuracies should be equal to original accuracies
-    assert_true(np.all(np.diff(accuracies) >= 0),
-                msg="Accuracies are not non-decreasing.")
-    # Highest accuracy should be strictly greater than the lowest
-    assert_true(np.ptp(accuracies) > 0,
-                msg="Highest accuracy is not strictly greater than lowest.")
-
-
-@ignore_warnings
-def test_kneighbors():
-    # Checks whether desired number of neighbors are returned.
-    # It is guaranteed to return the requested number of neighbors
-    # if `min_hash_match` is set to 0. Returned distances should be
-    # in ascending order.
-    n_samples = 12
-    n_features = 2
-    n_iter = 10
-    rng = np.random.RandomState(42)
-    X = rng.rand(n_samples, n_features)
-
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        min_hash_match=0)
-    # Test unfitted estimator
-    assert_raises(ValueError, lshf.kneighbors, X[0])
-
-    ignore_warnings(lshf.fit)(X)
-
-    for i in range(n_iter):
-        n_neighbors = rng.randint(0, n_samples)
-        query = X[rng.randint(0, n_samples)].reshape(1, -1)
-        neighbors = lshf.kneighbors(query, n_neighbors=n_neighbors,
-                                    return_distance=False)
-        # Desired number of neighbors should be returned.
-        assert_equal(neighbors.shape[1], n_neighbors)
-
-    # Multiple points
-    n_queries = 5
-    queries = X[rng.randint(0, n_samples, n_queries)]
-    distances, neighbors = lshf.kneighbors(queries,
-                                           n_neighbors=1,
-                                           return_distance=True)
-    assert_equal(neighbors.shape[0], n_queries)
-    assert_equal(distances.shape[0], n_queries)
-    # Test only neighbors
-    neighbors = lshf.kneighbors(queries, n_neighbors=1,
-                                return_distance=False)
-    assert_equal(neighbors.shape[0], n_queries)
-    # Test random point(not in the data set)
-    query = rng.randn(n_features).reshape(1, -1)
-    lshf.kneighbors(query, n_neighbors=1,
-                    return_distance=False)
-    # Test n_neighbors at initialization
-    neighbors = lshf.kneighbors(query, return_distance=False)
-    assert_equal(neighbors.shape[1], 5)
-    # Test `neighbors` has an integer dtype
-    assert_true(neighbors.dtype.kind == 'i',
-                msg="neighbors are not in integer dtype.")
-
-
-def test_radius_neighbors():
-    # Checks whether Returned distances are less than `radius`
-    # At least one point should be returned when the `radius` is set
-    # to mean distance from the considering point to other points in
-    # the database.
-    # Moreover, this test compares the radius neighbors of LSHForest
-    # with the `sklearn.neighbors.NearestNeighbors`.
-    n_samples = 12
-    n_features = 2
-    n_iter = 10
-    rng = np.random.RandomState(42)
-    X = rng.rand(n_samples, n_features)
-
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)()
-    # Test unfitted estimator
-    assert_raises(ValueError, lshf.radius_neighbors, X[0])
-
-    ignore_warnings(lshf.fit)(X)
-
-    for i in range(n_iter):
-        # Select a random point in the dataset as the query
-        query = X[rng.randint(0, n_samples)].reshape(1, -1)
-
-        # At least one neighbor should be returned when the radius is the
-        # mean distance from the query to the points of the dataset.
-        mean_dist = np.mean(pairwise_distances(query, X, metric='cosine'))
-        neighbors = lshf.radius_neighbors(query, radius=mean_dist,
-                                          return_distance=False)
-
-        assert_equal(neighbors.shape, (1,))
-        assert_equal(neighbors.dtype, object)
-        assert_greater(neighbors[0].shape[0], 0)
-        # All distances to points in the results of the radius query should
-        # be less than mean_dist
-        distances, neighbors = lshf.radius_neighbors(query,
-                                                     radius=mean_dist,
-                                                     return_distance=True)
-        assert_array_less(distances[0], mean_dist)
-
-    # Multiple points
-    n_queries = 5
-    queries = X[rng.randint(0, n_samples, n_queries)]
-    distances, neighbors = lshf.radius_neighbors(queries,
-                                                 return_distance=True)
-
-    # dists and inds should not be 1D arrays or arrays of variable lengths
-    # hence the use of the object dtype.
-    assert_equal(distances.shape, (n_queries,))
-    assert_equal(distances.dtype, object)
-    assert_equal(neighbors.shape, (n_queries,))
-    assert_equal(neighbors.dtype, object)
-
-    # Compare with exact neighbor search
-    query = X[rng.randint(0, n_samples)].reshape(1, -1)
-    mean_dist = np.mean(pairwise_distances(query, X, metric='cosine'))
-    nbrs = NearestNeighbors(algorithm='brute', metric='cosine').fit(X)
-
-    distances_exact, _ = nbrs.radius_neighbors(query, radius=mean_dist)
-    distances_approx, _ = lshf.radius_neighbors(query, radius=mean_dist)
-
-    # The following fails on some platforms. See #10244
-
-    # # Radius-based queries do not sort the result points and the order
-    # # depends on the method, the random_state and the dataset order.
-    # # We need to sort the results ourselves before performing any comparison.
-    # sorted_dists_exact = np.sort(distances_exact[0])
-    # sorted_dists_approx = np.sort(distances_approx[0])
-    #
-    # # Distances to exact neighbors are less than or equal to approximate
-    # # counterparts as the approximate radius query might have missed some
-    # # closer neighbors.
-    #
-    # assert_true(np.all(np.less_equal(sorted_dists_exact,
-    #                                  sorted_dists_approx)))
-
-
-@ignore_warnings
-def test_radius_neighbors_boundary_handling():
-    X = [[0.999, 0.001], [0.5, 0.5], [0, 1.], [-1., 0.001]]
-    n_points = len(X)
-
-    # Build an exact nearest neighbors model as reference model to ensure
-    # consistency between exact and approximate methods
-    nnbrs = NearestNeighbors(algorithm='brute', metric='cosine').fit(X)
-
-    # Build a LSHForest model with hyperparameter values that always guarantee
-    # exact results on this toy dataset.
-    lsfh = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        min_hash_match=0, n_candidates=n_points, random_state=42).fit(X)
-
-    # define a query aligned with the first axis
-    query = [[1., 0.]]
-
-    # Compute the exact cosine distances of the query to the four points of
-    # the dataset
-    dists = pairwise_distances(query, X, metric='cosine').ravel()
-
-    # The first point is almost aligned with the query (very small angle),
-    # the cosine distance should therefore be almost null:
-    assert_almost_equal(dists[0], 0, decimal=5)
-
-    # The second point form an angle of 45 degrees to the query vector
-    assert_almost_equal(dists[1], 1 - np.cos(np.pi / 4))
-
-    # The third point is orthogonal from the query vector hence at a distance
-    # exactly one:
-    assert_almost_equal(dists[2], 1)
-
-    # The last point is almost colinear but with opposite sign to the query
-    # therefore it has a cosine 'distance' very close to the maximum possible
-    # value of 2.
-    assert_almost_equal(dists[3], 2, decimal=5)
-
-    # If we query with a radius of one, all the samples except the last sample
-    # should be included in the results. This means that the third sample
-    # is lying on the boundary of the radius query:
-    exact_dists, exact_idx = nnbrs.radius_neighbors(query, radius=1)
-    approx_dists, approx_idx = lsfh.radius_neighbors(query, radius=1)
-
-    assert_array_equal(np.sort(exact_idx[0]), [0, 1, 2])
-    assert_array_equal(np.sort(approx_idx[0]), [0, 1, 2])
-    assert_array_almost_equal(np.sort(exact_dists[0]), dists[:-1])
-    assert_array_almost_equal(np.sort(approx_dists[0]), dists[:-1])
-
-    # If we perform the same query with a slightly lower radius, the third
-    # point of the dataset that lay on the boundary of the previous query
-    # is now rejected:
-    eps = np.finfo(np.float64).eps
-    exact_dists, exact_idx = nnbrs.radius_neighbors(query, radius=1 - eps)
-    approx_dists, approx_idx = lsfh.radius_neighbors(query, radius=1 - eps)
-
-    assert_array_equal(np.sort(exact_idx[0]), [0, 1])
-    assert_array_equal(np.sort(approx_idx[0]), [0, 1])
-    assert_array_almost_equal(np.sort(exact_dists[0]), dists[:-2])
-    assert_array_almost_equal(np.sort(approx_dists[0]), dists[:-2])
-
-
-def test_distances():
-    # Checks whether returned neighbors are from closest to farthest.
-    n_samples = 12
-    n_features = 2
-    n_iter = 10
-    rng = np.random.RandomState(42)
-    X = rng.rand(n_samples, n_features)
-
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)()
-    ignore_warnings(lshf.fit)(X)
-
-    for i in range(n_iter):
-        n_neighbors = rng.randint(0, n_samples)
-        query = X[rng.randint(0, n_samples)].reshape(1, -1)
-        distances, neighbors = lshf.kneighbors(query,
-                                               n_neighbors=n_neighbors,
-                                               return_distance=True)
-
-        # Returned neighbors should be from closest to farthest, that is
-        # increasing distance values.
-        assert_true(np.all(np.diff(distances[0]) >= 0))
-
-        # Note: the radius_neighbors method does not guarantee the order of
-        # the results.
-
-
-def test_fit():
-    # Checks whether `fit` method sets all attribute values correctly.
-    n_samples = 12
-    n_features = 2
-    n_estimators = 5
-    rng = np.random.RandomState(42)
-    X = rng.rand(n_samples, n_features)
-
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        n_estimators=n_estimators)
-    ignore_warnings(lshf.fit)(X)
-
-    # _input_array = X
-    assert_array_equal(X, lshf._fit_X)
-    # A hash function g(p) for each tree
-    assert_equal(n_estimators, len(lshf.hash_functions_))
-    # Hash length = 32
-    assert_equal(32, lshf.hash_functions_[0].components_.shape[0])
-    # Number of trees_ in the forest
-    assert_equal(n_estimators, len(lshf.trees_))
-    # Each tree has entries for every data point
-    assert_equal(n_samples, len(lshf.trees_[0]))
-    # Original indices after sorting the hashes
-    assert_equal(n_estimators, len(lshf.original_indices_))
-    # Each set of original indices in a tree has entries for every data point
-    assert_equal(n_samples, len(lshf.original_indices_[0]))
-
-
-def test_partial_fit():
-    # Checks whether inserting array is consistent with fitted data.
-    # `partial_fit` method should set all attribute values correctly.
-    n_samples = 12
-    n_samples_partial_fit = 3
-    n_features = 2
-    rng = np.random.RandomState(42)
-    X = rng.rand(n_samples, n_features)
-    X_partial_fit = rng.rand(n_samples_partial_fit, n_features)
-
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)()
-
-    # Test unfitted estimator
-    ignore_warnings(lshf.partial_fit)(X)
-    assert_array_equal(X, lshf._fit_X)
-
-    ignore_warnings(lshf.fit)(X)
-
-    # Insert wrong dimension
-    assert_raises(ValueError, lshf.partial_fit,
-                  np.random.randn(n_samples_partial_fit, n_features - 1))
-
-    ignore_warnings(lshf.partial_fit)(X_partial_fit)
-
-    # size of _input_array = samples + 1 after insertion
-    assert_equal(lshf._fit_X.shape[0],
-                 n_samples + n_samples_partial_fit)
-    # size of original_indices_[1] = samples + 1
-    assert_equal(len(lshf.original_indices_[0]),
-                 n_samples + n_samples_partial_fit)
-    # size of trees_[1] = samples + 1
-    assert_equal(len(lshf.trees_[1]),
-                 n_samples + n_samples_partial_fit)
-
-
-def test_hash_functions():
-    # Checks randomness of hash functions.
-    # Variance and mean of each hash function (projection vector)
-    # should be different from flattened array of hash functions.
-    # If hash functions are not randomly built (seeded with
-    # same value), variances and means of all functions are equal.
-    n_samples = 12
-    n_features = 2
-    n_estimators = 5
-    rng = np.random.RandomState(42)
-    X = rng.rand(n_samples, n_features)
-
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        n_estimators=n_estimators,
-        random_state=rng.randint(0, np.iinfo(np.int32).max))
-    ignore_warnings(lshf.fit)(X)
-
-    hash_functions = []
-    for i in range(n_estimators):
-        hash_functions.append(lshf.hash_functions_[i].components_)
-
-    for i in range(n_estimators):
-        assert_not_equal(np.var(hash_functions),
-                         np.var(lshf.hash_functions_[i].components_))
-
-    for i in range(n_estimators):
-        assert_not_equal(np.mean(hash_functions),
-                         np.mean(lshf.hash_functions_[i].components_))
-
-
-def test_candidates():
-    # Checks whether candidates are sufficient.
-    # This should handle the cases when number of candidates is 0.
-    # User should be warned when number of candidates is less than
-    # requested number of neighbors.
-    X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1],
-                        [6, 10, 2]], dtype=np.float32)
-    X_test = np.array([7, 10, 3], dtype=np.float32).reshape(1, -1)
-
-    # For zero candidates
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        min_hash_match=32)
-    ignore_warnings(lshf.fit)(X_train)
-
-    message = ("Number of candidates is not sufficient to retrieve"
-               " %i neighbors with"
-               " min_hash_match = %i. Candidates are filled up"
-               " uniformly from unselected"
-               " indices." % (3, 32))
-    assert_warns_message(UserWarning, message, lshf.kneighbors,
-                         X_test, n_neighbors=3)
-    distances, neighbors = lshf.kneighbors(X_test, n_neighbors=3)
-    assert_equal(distances.shape[1], 3)
-
-    # For candidates less than n_neighbors
-    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        min_hash_match=31)
-    ignore_warnings(lshf.fit)(X_train)
-
-    message = ("Number of candidates is not sufficient to retrieve"
-               " %i neighbors with"
-               " min_hash_match = %i. Candidates are filled up"
-               " uniformly from unselected"
-               " indices." % (5, 31))
-    assert_warns_message(UserWarning, message, lshf.kneighbors,
-                         X_test, n_neighbors=5)
-    distances, neighbors = lshf.kneighbors(X_test, n_neighbors=5)
-    assert_equal(distances.shape[1], 5)
-
-
-def test_graphs():
-    # Smoke tests for graph methods.
-    n_samples_sizes = [5, 10, 20]
-    n_features = 3
-    rng = np.random.RandomState(42)
-
-    for n_samples in n_samples_sizes:
-        X = rng.rand(n_samples, n_features)
-        lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
-            min_hash_match=0)
-        ignore_warnings(lshf.fit)(X)
-
-        kneighbors_graph = lshf.kneighbors_graph(X)
-        radius_neighbors_graph = lshf.radius_neighbors_graph(X)
-
-        assert_equal(kneighbors_graph.shape[0], n_samples)
-        assert_equal(kneighbors_graph.shape[1], n_samples)
-        assert_equal(radius_neighbors_graph.shape[0], n_samples)
-        assert_equal(radius_neighbors_graph.shape[1], n_samples)
-
-
-def test_sparse_input():
-    X1 = sp.rand(50, 100, random_state=0)
-    X2 = sp.rand(10, 100, random_state=1)
-    forest_sparse = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        radius=1, random_state=0).fit(X1)
-    forest_dense = ignore_warnings(LSHForest, category=DeprecationWarning)(
-        radius=1, random_state=0).fit(X1.A)
-
-    d_sparse, i_sparse = forest_sparse.kneighbors(X2, return_distance=True)
-    d_dense, i_dense = forest_dense.kneighbors(X2.A, return_distance=True)
-
-    assert_almost_equal(d_sparse, d_dense)
-    assert_almost_equal(i_sparse, i_dense)
-
-    d_sparse, i_sparse = forest_sparse.radius_neighbors(X2,
-                                                        return_distance=True)
-    d_dense, i_dense = forest_dense.radius_neighbors(X2.A,
-                                                     return_distance=True)
-    assert_equal(d_sparse.shape, d_dense.shape)
-    for a, b in zip(d_sparse, d_dense):
-        assert_almost_equal(a, b)
-    for a, b in zip(i_sparse, i_dense):
-        assert_almost_equal(a, b)
diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py
index 18d2138021605..0b9c612624cff 100644
--- a/sklearn/neighbors/tests/test_kd_tree.py
+++ b/sklearn/neighbors/tests/test_kd_tree.py
@@ -8,7 +8,7 @@
                                        nodeheap_sort, DTYPE, ITYPE)
 from sklearn.neighbors.dist_metrics import DistanceMetric
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import SkipTest, assert_allclose
+from sklearn.utils.testing import assert_allclose
 
 rng = np.random.RandomState(42)
 V = rng.random_sample((3, 3))
diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py
index ed57a1d0fba29..ef833024d5cb8 100644
--- a/sklearn/neighbors/tests/test_lof.py
+++ b/sklearn/neighbors/tests/test_lof.py
@@ -14,7 +14,7 @@
 from sklearn.metrics import roc_auc_score
 
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import assert_greater, ignore_warnings
+from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
index 15905bf37d2e5..d1d69bde6f4a8 100644
--- a/sklearn/preprocessing/__init__.py
+++ b/sklearn/preprocessing/__init__.py
@@ -37,8 +37,6 @@
 
 from .imputation import Imputer
 
-# stub, remove in version 0.21
-from .data import CategoricalEncoder  # noqa
 
 __all__ = [
     'Binarizer',
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index 0c79543338212..66034f6740a8e 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -3,7 +3,6 @@
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import check_array
 from ..utils.testing import assert_allclose_dense_sparse
-from ..externals.six import string_types
 
 
 def _identity(X):
@@ -136,7 +135,7 @@ def fit(self, X, y=None):
             self._check_inverse_transform(X)
         return self
 
-    def transform(self, X, y='deprecated'):
+    def transform(self, X):
         """Transform X using the forward function.
 
         Parameters
@@ -144,22 +143,14 @@ def transform(self, X, y='deprecated'):
         X : array-like, shape (n_samples, n_features)
             Input array.
 
-        y : (ignored)
-            .. deprecated::0.19
-
         Returns
         -------
         X_out : array-like, shape (n_samples, n_features)
             Transformed input.
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-
-        return self._transform(X, y=y, func=self.func, kw_args=self.kw_args)
+        return self._transform(X, func=self.func, kw_args=self.kw_args)
 
-    def inverse_transform(self, X, y='deprecated'):
+    def inverse_transform(self, X):
         """Transform X using the inverse function.
 
         Parameters
@@ -167,35 +158,18 @@ def inverse_transform(self, X, y='deprecated'):
         X : array-like, shape (n_samples, n_features)
             Input array.
 
-        y : (ignored)
-            .. deprecated::0.19
-
         Returns
         -------
         X_out : array-like, shape (n_samples, n_features)
             Transformed input.
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on inverse_transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-        return self._transform(X, y=y, func=self.inverse_func,
+        return self._transform(X, func=self.inverse_func,
                                kw_args=self.inv_kw_args)
 
-    def _transform(self, X, y=None, func=None, kw_args=None):
+    def _transform(self, X, func=None, kw_args=None):
         X = self._check_input(X)
 
         if func is None:
             func = _identity
 
-        if (not isinstance(self.pass_y, string_types) or
-                self.pass_y != 'deprecated'):
-            # We do this to know if pass_y was set to False / True
-            pass_y = self.pass_y
-            warnings.warn("The parameter pass_y is deprecated since 0.19 and "
-                          "will be removed in 0.21", DeprecationWarning)
-        else:
-            pass_y = False
-
-        return func(X, *((y,) if pass_y else ()),
-                    **(kw_args if kw_args else {}))
+        return func(X, **(kw_args if kw_args else {}))
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 4e7337f64a554..bb88ac0200be0 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -21,7 +21,6 @@
 
 from ..base import BaseEstimator, TransformerMixin
 from ..externals import six
-from ..externals.six import string_types
 from ..utils import check_array
 from ..utils.extmath import row_norms
 from ..utils.extmath import _incremental_mean_and_var
@@ -718,24 +717,16 @@ def partial_fit(self, X, y=None):
 
         return self
 
-    def transform(self, X, y='deprecated', copy=None):
+    def transform(self, X, copy=None):
         """Perform standardization by centering and scaling
 
         Parameters
         ----------
         X : array-like, shape [n_samples, n_features]
             The data used to scale along the features axis.
-        y : (ignored)
-            .. deprecated:: 0.19
-               This parameter will be removed in 0.21.
         copy : bool, optional (default: None)
             Copy the input X or not.
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-
         check_is_fitted(self, 'scale_')
 
         copy = copy if copy is not None else self.copy
@@ -1664,7 +1655,7 @@ def fit(self, X, y=None):
         X = check_array(X, accept_sparse='csr')
         return self
 
-    def transform(self, X, y='deprecated', copy=None):
+    def transform(self, X, copy=None):
         """Scale each non zero row of X to unit norm
 
         Parameters
@@ -1672,17 +1663,9 @@ def transform(self, X, y='deprecated', copy=None):
         X : {array-like, sparse matrix}, shape [n_samples, n_features]
             The data to normalize, row by row. scipy.sparse matrices should be
             in CSR format to avoid an un-necessary copy.
-        y : (ignored)
-            .. deprecated:: 0.19
-               This parameter will be removed in 0.21.
         copy : bool, optional (default: None)
             Copy the input X or not.
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-
         copy = copy if copy is not None else self.copy
         X = check_array(X, accept_sparse='csr')
         return normalize(X, norm=self.norm, axis=1, copy=copy)
@@ -1803,7 +1786,7 @@ def fit(self, X, y=None):
         check_array(X, accept_sparse='csr')
         return self
 
-    def transform(self, X, y='deprecated', copy=None):
+    def transform(self, X, copy=None):
         """Binarize each element of X
 
         Parameters
@@ -1812,17 +1795,10 @@ def transform(self, X, y='deprecated', copy=None):
             The data to binarize, element by element.
             scipy.sparse matrices should be in CSR format to avoid an
             un-necessary copy.
-        y : (ignored)
-            .. deprecated:: 0.19
-               This parameter will be removed in 0.21.
+
         copy : bool
             Copy the input X or not.
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-
         copy = copy if copy is not None else self.copy
         return binarize(X, threshold=self.threshold, copy=copy)
 
@@ -1881,16 +1857,14 @@ def fit(self, K, y=None):
         self.K_fit_all_ = self.K_fit_rows_.sum() / n_samples
         return self
 
-    def transform(self, K, y='deprecated', copy=True):
+    def transform(self, K, copy=True):
         """Center kernel matrix.
 
         Parameters
         ----------
         K : numpy array of shape [n_samples1, n_samples2]
             Kernel matrix.
-        y : (ignored)
-            .. deprecated:: 0.19
-               This parameter will be removed in 0.21.
+
         copy : boolean, optional, default True
             Set to False to perform inplace computation.
 
@@ -1898,11 +1872,6 @@ def transform(self, K, y='deprecated', copy=True):
         -------
         K_new : numpy array of shape [n_samples1, n_samples2]
         """
-        if not isinstance(y, string_types) or y != 'deprecated':
-            warnings.warn("The parameter y on transform() is "
-                          "deprecated since 0.19 and will be removed in 0.21",
-                          DeprecationWarning)
-
         check_is_fitted(self, 'K_fit_all_')
 
         K = check_array(K, copy=copy, dtype=FLOAT_DTYPES)
@@ -2937,17 +2906,3 @@ def power_transform(X, method='warn', standardize=True, copy=True):
         method = 'box-cox'
     pt = PowerTransformer(method=method, standardize=standardize, copy=copy)
     return pt.fit_transform(X)
-
-
-class CategoricalEncoder:
-    """
-    CategoricalEncoder briefly existed in 0.20dev. Its functionality
-    has been rolled into the OneHotEncoder and OrdinalEncoder.
-    This stub will be removed in version 0.21.
-    """
-
-    def __init__(*args, **kwargs):
-        raise RuntimeError(
-            "CategoricalEncoder briefly existed in 0.20dev. Its functionality "
-            "has been rolled into the OneHotEncoder and OrdinalEncoder. "
-            "This stub will be removed in version 0.21.")
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index c19d834f89b2c..902ab22ec7132 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -6,7 +6,6 @@
 from __future__ import division
 
 import warnings
-import re
 import itertools
 
 import numpy as np
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
index 67169432defdc..d3833ed97c79d 100644
--- a/sklearn/preprocessing/tests/test_encoders.py
+++ b/sklearn/preprocessing/tests/test_encoders.py
@@ -608,8 +608,3 @@ def test_one_hot_encoder_warning():
     enc = OneHotEncoder()
     X = [['Male', 1], ['Female', 3]]
     np.testing.assert_no_warnings(enc.fit_transform, X)
-
-
-def test_categorical_encoder_stub():
-    from sklearn.preprocessing import CategoricalEncoder
-    assert_raises(RuntimeError, CategoricalEncoder, encoding='ordinal')
diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py
index 464581e5e9c2c..663b4e6b4c9f0 100644
--- a/sklearn/preprocessing/tests/test_function_transformer.py
+++ b/sklearn/preprocessing/tests/test_function_transformer.py
@@ -50,20 +50,17 @@ def test_delegate_to_func():
     # reset the argument stores.
     args_store[:] = []  # python2 compatible inplace list clear.
     kwargs_store.clear()
-    y = object()
-    transformed = assert_warns_message(
-        DeprecationWarning, "pass_y is deprecated",
-        FunctionTransformer(
-            _make_func(args_store, kwargs_store),
-            pass_y=True, validate=False).transform, X, y)
+    transformed = FunctionTransformer(
+        _make_func(args_store, kwargs_store),
+        validate=False).transform(X)
 
     assert_array_equal(transformed, X,
                        err_msg='transform should have returned X unchanged')
 
-    # The function should have received X and y.
+    # The function should have received X
     assert_equal(
         args_store,
-        [X, y],
+        [X],
         'Incorrect positional arguments passed to func: {args}'.format(
             args=args_store,
         ),
diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py
index ff32005399fe2..081e54fbb0dfb 100644
--- a/sklearn/semi_supervised/label_propagation.py
+++ b/sklearn/semi_supervised/label_propagation.py
@@ -322,13 +322,6 @@ class LabelPropagation(BaseLabelPropagation):
     n_neighbors : integer > 0
         Parameter for knn kernel
 
-    alpha : float
-        Clamping factor.
-
-        .. deprecated:: 0.19
-            This parameter will be removed in 0.21.
-            'alpha' is fixed to zero in 'LabelPropagation'.
-
     max_iter : integer
         Change maximum number of iterations allowed
 
@@ -388,10 +381,10 @@ class LabelPropagation(BaseLabelPropagation):
     _variant = 'propagation'
 
     def __init__(self, kernel='rbf', gamma=20, n_neighbors=7,
-                 alpha=None, max_iter=1000, tol=1e-3, n_jobs=None):
+                 max_iter=1000, tol=1e-3, n_jobs=None):
         super(LabelPropagation, self).__init__(
-            kernel=kernel, gamma=gamma, n_neighbors=n_neighbors, alpha=alpha,
-            max_iter=max_iter, tol=tol, n_jobs=n_jobs)
+            kernel=kernel, gamma=gamma, n_neighbors=n_neighbors,
+            max_iter=max_iter, tol=tol, n_jobs=n_jobs, alpha=None)
 
     def _build_graph(self):
         """Matrix representing a fully connected graph between each sample
@@ -410,12 +403,6 @@ class distributions will exceed 1 (normalization may be desired).
         return affinity_matrix
 
     def fit(self, X, y):
-        if self.alpha is not None:
-            warnings.warn(
-                "alpha is deprecated since 0.19 and will be removed in 0.21.",
-                DeprecationWarning
-            )
-            self.alpha = None
         return super(LabelPropagation, self).fit(X, y)
 
 
diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py
index 51b725030cb64..ef594fccb7076 100644
--- a/sklearn/semi_supervised/tests/test_label_propagation.py
+++ b/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -66,19 +66,6 @@ def test_predict_proba():
                                   np.array([[0.5, 0.5]]))
 
 
-def test_alpha_deprecation():
-    X, y = make_classification(n_samples=100)
-    y[::3] = -1
-
-    lp_default = label_propagation.LabelPropagation(kernel='rbf', gamma=0.1)
-    lp_default_y = lp_default.fit(X, y).transduction_
-
-    lp_0 = label_propagation.LabelPropagation(alpha=0, kernel='rbf', gamma=0.1)
-    lp_0_y = assert_warns(DeprecationWarning, lp_0.fit, X, y).transduction_
-
-    assert_array_equal(lp_default_y, lp_0_y)
-
-
 def test_label_spreading_closed_form():
     n_classes = 2
     X, y = make_classification(n_classes=n_classes, n_samples=200,
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index e454633a3a294..8b18e5aafeb93 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -11,8 +11,7 @@
                                    assert_greater, assert_almost_equal,
                                    assert_greater_equal,
                                    assert_array_equal,
-                                   assert_raises,
-                                   ignore_warnings)
+                                   assert_raises)
 from sklearn.datasets import make_classification, make_blobs
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 6e509949b0a88..789b274f8f7bf 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -9,7 +9,6 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns
-from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import ignore_warnings
 
@@ -316,20 +315,6 @@ def test_qda_store_covariance():
     )
 
 
-def test_qda_deprecation():
-    # Test the deprecation
-    clf = QuadraticDiscriminantAnalysis(store_covariances=True)
-    assert_warns_message(DeprecationWarning, "'store_covariances' was renamed"
-                         " to store_covariance in version 0.19 and will be "
-                         "removed in 0.21.", clf.fit, X, y)
-
-    # check that covariance_ (and covariances_ with warning) is stored
-    assert_warns_message(DeprecationWarning, "Attribute ``covariances_`` was "
-                         "deprecated in version 0.19 and will be removed "
-                         "in 0.21. Use ``covariance_`` instead", getattr, clf,
-                         'covariances_')
-
-
 def test_qda_regularization():
     # the default is reg_param=0. and will cause issues
     # when there is a constant variable
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
index 8a2208b20af99..c2ba50f3728ff 100644
--- a/sklearn/tests/test_kernel_approximation.py
+++ b/sklearn/tests/test_kernel_approximation.py
@@ -1,11 +1,11 @@
 import numpy as np
 from scipy.sparse import csr_matrix
+import pytest
 
 from sklearn.utils.testing import assert_array_equal, assert_equal, assert_true
 from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import assert_array_almost_equal, assert_raises
 from sklearn.utils.testing import assert_less_equal
-from sklearn.utils.testing import assert_warns_message
 
 from sklearn.metrics.pairwise import kernel_metrics
 from sklearn.kernel_approximation import RBFSampler
@@ -250,8 +250,9 @@ def linear_kernel(X, Y):
         return np.dot(X, Y.T)
 
     # if degree, gamma or coef0 is passed, we raise a warning
-    msg = "Passing gamma, coef0 or degree to Nystroem"
+    msg = "Don't pass gamma, coef0 or degree to Nystroem"
     params = ({'gamma': 1}, {'coef0': 1}, {'degree': 2})
     for param in params:
         ny = Nystroem(kernel=linear_kernel, **param)
-        assert_warns_message(DeprecationWarning, msg, ny.fit, X)
+        with pytest.raises(ValueError, match=msg):
+            ny.fit(X)
diff --git a/sklearn/utils/arpack.py b/sklearn/utils/arpack.py
deleted file mode 100644
index 0343f7243ebdb..0000000000000
--- a/sklearn/utils/arpack.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# Remove this module in version 0.21
-
-from scipy.sparse.linalg import eigs as _eigs, eigsh as _eigsh, svds as _svds
-
-from .deprecation import deprecated
-
-
-@deprecated("sklearn.utils.arpack.eigs was deprecated in version 0.19 and "
-            "will be removed in 0.21. Use scipy.sparse.linalg.eigs instead.")
-def eigs(A, *args, **kwargs):
-    return _eigs(A, *args, **kwargs)
-
-
-@deprecated("sklearn.utils.arpack.eigsh was deprecated in version 0.19 and "
-            "will be removed in 0.21. Use scipy.sparse.linalg.eigsh instead.")
-def eigsh(A, *args, **kwargs):
-    return _eigsh(A, *args, **kwargs)
-
-
-@deprecated("sklearn.utils.arpack.svds was deprecated in version 0.19 and "
-            "will be removed in 0.21. Use scipy.sparse.linalg.svds instead.")
-def svds(A, *args, **kwargs):
-    return _svds(A, *args, **kwargs)
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 1f177af916904..f3564b1c89dcf 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -17,26 +17,14 @@
 import numpy as np
 from scipy import linalg, sparse
 
-from . import check_random_state, deprecated
+from . import check_random_state
 from .fixes import np_version
-from .fixes import logsumexp as scipy_logsumexp
 from ._logistic_sigmoid import _log_logistic_sigmoid
 from ..externals.six.moves import xrange
 from .sparsefuncs_fast import csr_row_norms
 from .validation import check_array
 
 
-@deprecated("sklearn.utils.extmath.norm was deprecated in version 0.19 "
-            "and will be removed in 0.21. Use scipy.linalg.norm instead.")
-def norm(x):
-    """Compute the Euclidean or Frobenius norm of x.
-
-    Returns the Euclidean norm when x is a vector, the Frobenius norm when x
-    is a matrix (2-d array). More precise than sqrt(squared_norm(x)).
-    """
-    return linalg.norm(x)
-
-
 def squared_norm(x):
     """Squared Euclidean or Frobenius norm of x.
 
@@ -109,12 +97,6 @@ def fast_logdet(A):
     return ld
 
 
-@deprecated("sklearn.utils.extmath.fast_dot was deprecated in version 0.19 "
-            "and will be removed in 0.21. Use the equivalent np.dot instead.")
-def fast_dot(a, b, out=None):
-    return np.dot(a, b, out)
-
-
 def density(w, **kwargs):
     """Compute density of a sparse vector
 
@@ -378,25 +360,6 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
         return U[:, :n_components], s[:n_components], V[:n_components, :]
 
 
-@deprecated("sklearn.utils.extmath.logsumexp was deprecated in version 0.19 "
-            "and will be removed in 0.21. Use scipy.misc.logsumexp instead.")
-def logsumexp(arr, axis=0):
-    """Computes the sum of arr assuming arr is in the log domain.
-    Returns log(sum(exp(arr))) while minimizing the possibility of
-    over/underflow.
-    Examples
-    --------
-    >>> import numpy as np
-    >>> from sklearn.utils.extmath import logsumexp
-    >>> a = np.arange(10)
-    >>> np.log(np.sum(np.exp(a)))
-    9.458...
-    >>> logsumexp(a)  # doctest: +SKIP
-    9.458...
-    """
-    return scipy_logsumexp(arr, axis)
-
-
 def weighted_mode(a, w, axis=0):
     """Returns an array of the weighted modal (most common) value in a
 
@@ -470,12 +433,6 @@ def weighted_mode(a, w, axis=0):
     return mostfrequent, oldcounts
 
 
-@deprecated("sklearn.utils.extmath.pinvh was deprecated in version 0.19 "
-            "and will be removed in 0.21. Use scipy.linalg.pinvh instead.")
-def pinvh(a, cond=None, rcond=None, lower=True):
-    return linalg.pinvh(a, cond, rcond, lower)
-
-
 def cartesian(arrays, out=None):
     """Generate a cartesian product of input arrays.
 
diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py
index 8bbebbd377451..b030af2fed81c 100644
--- a/sklearn/utils/graph.py
+++ b/sklearn/utils/graph.py
@@ -11,10 +11,8 @@
 # License: BSD 3 clause
 
 from scipy import sparse
-from scipy.sparse import csgraph
 
 from .graph_shortest_path import graph_shortest_path  # noqa
-from .deprecation import deprecated
 
 
 ###############################################################################
@@ -68,17 +66,3 @@ def single_source_shortest_path_length(graph, source, cutoff=None):
             break
         level += 1
     return seen  # return all path lengths as dictionary
-
-
-@deprecated("sklearn.utils.graph.connected_components was deprecated in "
-            "version 0.19 and will be removed in 0.21. Use "
-            "scipy.sparse.csgraph.connected_components instead.")
-def connected_components(*args, **kwargs):
-    return csgraph.connected_components(*args, **kwargs)
-
-
-@deprecated("sklearn.utils.graph.graph_laplacian was deprecated in version "
-            "0.19 and will be removed in 0.21. Use "
-            "scipy.sparse.csgraph.laplacian instead.")
-def graph_laplacian(*args, **kwargs):
-    return csgraph.laplacian(*args, **kwargs)
diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py
index 24ddf4680c742..61be8214dd1f1 100644
--- a/sklearn/utils/random.py
+++ b/sklearn/utils/random.py
@@ -8,109 +8,8 @@
 
 from sklearn.utils import check_random_state
 from ._random import sample_without_replacement
-from .deprecation import deprecated
 
-__all__ = ['sample_without_replacement', 'choice']
-
-
-# This is a backport of np.random.choice from numpy 1.7
-# The function can be removed when we bump the requirements to >=1.7
-@deprecated("sklearn.utils.random.choice was deprecated in version 0.19 "
-            "and will be removed in 0.21. Use np.random.choice or "
-            "np.random.RandomState.choice instead.")
-def choice(a, size=None, replace=True, p=None, random_state=None):
-    """
-    choice(a, size=None, replace=True, p=None)
-
-    Generates a random sample from a given 1-D array
-
-    .. versionadded:: 1.7.0
-
-    Parameters
-    -----------
-    a : 1-D array-like or int
-        If an ndarray, a random sample is generated from its elements.
-        If an int, the random sample is generated as if a was np.arange(n)
-
-    size : int or tuple of ints, optional
-        Output shape. Default is None, in which case a single value is
-        returned.
-
-    replace : boolean, optional
-        Whether the sample is with or without replacement.
-
-    p : 1-D array-like, optional
-        The probabilities associated with each entry in a.
-        If not given the sample assumes a uniform distribution over all
-        entries in a.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
-
-
-    Returns
-    --------
-    samples : 1-D ndarray, shape (size,)
-    The generated random samples
-
-    Raises
-    -------
-    ValueError
-    If a is an int and less than zero, if a or p are not 1-dimensional,
-    if a is an array-like of size 0, if p is not a vector of
-    probabilities, if a and p have different lengths, or if
-    replace=False and the sample size is greater than the population
-    size
-
-    See Also
-    ---------
-    randint, shuffle, permutation
-
-    Examples
-    ---------
-    Generate a uniform random sample from np.arange(5) of size 3:
-
-    >>> np.random.choice(5, 3)  # doctest: +SKIP
-    array([0, 3, 4])
-    >>> #This is equivalent to np.random.randint(0,5,3)
-
-    Generate a non-uniform random sample from np.arange(5) of size 3:
-
-    >>> np.random.choice(5, 3, p=[0.1, 0, 0.3, 0.6, 0])  # doctest: +SKIP
-    array([3, 3, 0])
-
-    Generate a uniform random sample from np.arange(5) of size 3 without
-    replacement:
-
-    >>> np.random.choice(5, 3, replace=False)  # doctest: +SKIP
-    array([3,1,0])
-    >>> #This is equivalent to np.random.shuffle(np.arange(5))[:3]
-
-    Generate a non-uniform random sample from np.arange(5) of size
-    3 without replacement:
-
-    >>> np.random.choice(5, 3, replace=False, p=[0.1, 0, 0.3, 0.6, 0])
-    ... # doctest: +SKIP
-    array([2, 3, 0])
-
-    Any of the above can be repeated with an arbitrary array-like
-    instead of just integers. For instance:
-
-    >>> aa_milne_arr = ['pooh', 'rabbit', 'piglet', 'Christopher']
-    >>> np.random.choice(aa_milne_arr, 5, p=[0.5, 0.1, 0.1, 0.3])
-    ... # doctest: +SKIP
-    array(['pooh', 'pooh', 'pooh', 'Christopher', 'piglet'],
-    dtype='|S11')
-
-    """
-    if random_state is not None:
-        random_state = check_random_state(random_state)
-        return random_state.choice(a, size, replace, p)
-    else:
-        return np.random.choice(a, size, replace, p)
+__all__ = ['sample_without_replacement']
 
 
 def random_choice_csc(n_samples, classes, class_probability=None,
diff --git a/sklearn/utils/setup.py b/sklearn/utils/setup.py
index 9590692b0dff0..13d772a5a53b7 100644
--- a/sklearn/utils/setup.py
+++ b/sklearn/utils/setup.py
@@ -9,7 +9,6 @@ def configuration(parent_package='', top_path=None):
     from numpy.distutils.misc_util import Configuration
 
     config = Configuration('utils', parent_package, top_path)
-    config.add_subpackage('sparsetools')
 
     cblas_libs, blas_info = get_blas_info()
     cblas_compile_args = blas_info.pop('extra_compile_args', [])
diff --git a/sklearn/utils/sparsetools/__init__.py b/sklearn/utils/sparsetools/__init__.py
deleted file mode 100644
index a86598410e7fe..0000000000000
--- a/sklearn/utils/sparsetools/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Remove in version 0.21
-
-from scipy.sparse.csgraph import connected_components as \
-     scipy_connected_components
-
-from sklearn.utils.deprecation import deprecated
-
-
-@deprecated("sklearn.utils.sparsetools.connected_components was deprecated in "
-            "version 0.19 and will be removed in 0.21. Use "
-            "scipy.sparse.csgraph.connected_components instead.")
-def connected_components(*args, **kwargs):
-    return scipy_connected_components(*args, **kwargs)
diff --git a/sklearn/utils/sparsetools/setup.py b/sklearn/utils/sparsetools/setup.py
deleted file mode 100644
index 1ff3097b0db73..0000000000000
--- a/sklearn/utils/sparsetools/setup.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Remove in version 0.21
-
-
-def configuration(parent_package='', top_path=None):
-    from numpy.distutils.misc_util import Configuration
-
-    config = Configuration('sparsetools', parent_package, top_path)
-    config.add_subpackage('tests')
-
-    return config
-
-
-if __name__ == '__main__':
-    from numpy.distutils.core import setup
-    setup(**configuration(top_path='').todict())
diff --git a/sklearn/utils/sparsetools/tests/__init__.py b/sklearn/utils/sparsetools/tests/__init__.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/sklearn/utils/stats.py b/sklearn/utils/stats.py
index 82b8912b78824..ff770afa55ad6 100644
--- a/sklearn/utils/stats.py
+++ b/sklearn/utils/stats.py
@@ -1,15 +1,6 @@
 import numpy as np
-from scipy.stats import rankdata as scipy_rankdata
 
 from sklearn.utils.extmath import stable_cumsum
-from sklearn.utils.deprecation import deprecated
-
-
-# Remove in sklearn 0.21
-@deprecated("sklearn.utils.stats.rankdata was deprecated in version 0.19 and "
-            "will be removed in 0.21. Use scipy.stats.rankdata instead.")
-def rankdata(*args, **kwargs):
-    return scipy_rankdata(*args, **kwargs)
 
 
 def _weighted_percentile(array, sample_weight, percentile=50):
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 79b3cb9748ded..54264484d5da2 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -23,12 +23,9 @@
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import skip_if_32bit
 from sklearn.utils.testing import SkipTest
-from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.fixes import np_version
 
 from sklearn.utils.extmath import density
-from sklearn.utils.extmath import logsumexp
-from sklearn.utils.extmath import norm, squared_norm
 from sklearn.utils.extmath import randomized_svd
 from sklearn.utils.extmath import row_norms
 from sklearn.utils.extmath import weighted_mode
@@ -88,19 +85,6 @@ def test_random_weights():
     assert_array_almost_equal(score.ravel(), w[:, :5].sum(1))
 
 
-@ignore_warnings  # Test deprecated backport to be removed in 0.21
-def test_logsumexp():
-    # Try to add some smallish numbers in logspace
-    x = np.array([1e-40] * 1000000)
-    logx = np.log(x)
-    assert_almost_equal(np.exp(logsumexp(logx)), x.sum())
-
-    X = np.vstack([x, x])
-    logX = np.vstack([logx, logx])
-    assert_array_almost_equal(np.exp(logsumexp(logX, axis=0)), X.sum(axis=0))
-    assert_array_almost_equal(np.exp(logsumexp(logX, axis=1)), X.sum(axis=1))
-
-
 def check_randomized_svd_low_rank(dtype):
     # Check that extmath.randomized_svd is consistent with linalg.svd
     n_samples = 100
@@ -179,22 +163,6 @@ def test_randomized_svd_low_rank_all_dtypes(dtype):
     check_randomized_svd_low_rank(dtype)
 
 
-@ignore_warnings  # extmath.norm is deprecated to be removed in 0.21
-def test_norm_squared_norm():
-    X = np.random.RandomState(42).randn(50, 63)
-    X *= 100        # check stability
-    X += 200
-
-    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
-    assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
-    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
-    # Check the warning with an int array and np.dot potential overflow
-    assert_warns_message(
-                    UserWarning, 'Array type is integer, np.dot may '
-                    'overflow. Data should be float type to avoid this issue',
-                    squared_norm, X.astype(int))
-
-
 @pytest.mark.parametrize('dtype',
                          (np.float32, np.float64))
 def test_row_norms(dtype):
diff --git a/sklearn/utils/tests/test_graph.py b/sklearn/utils/tests/test_graph.py
deleted file mode 100644
index ae1ce4a56cb8e..0000000000000
--- a/sklearn/utils/tests/test_graph.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
-# License: BSD 3 clause
-
-import numpy as np
-from scipy import sparse
-
-from sklearn.utils.graph import graph_laplacian
-from sklearn.utils.testing import ignore_warnings
-
-
-@ignore_warnings(category=DeprecationWarning)
-def test_graph_laplacian():
-    for mat in (np.arange(10) * np.arange(10)[:, np.newaxis],
-                np.ones((7, 7)),
-                np.eye(19),
-                np.vander(np.arange(4)) + np.vander(np.arange(4)).T,):
-        sp_mat = sparse.csr_matrix(mat)
-        for normed in (True, False):
-            laplacian = graph_laplacian(mat, normed=normed)
-            n_nodes = mat.shape[0]
-            if not normed:
-                np.testing.assert_array_almost_equal(laplacian.sum(axis=0),
-                                                     np.zeros(n_nodes))
-            np.testing.assert_array_almost_equal(laplacian.T, laplacian)
-            np.testing.assert_array_almost_equal(
-                laplacian, graph_laplacian(sp_mat, normed=normed).toarray())
diff --git a/sklearn/utils/tests/test_stats.py b/sklearn/utils/tests/test_stats.py
deleted file mode 100644
index 36e3bf72b609b..0000000000000
--- a/sklearn/utils/tests/test_stats.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import pytest
-from sklearn.utils.testing import assert_array_equal, ignore_warnings
-
-from sklearn.utils.stats import rankdata
-
-
-_cases = (
-    # values, method, expected
-    ([100], 'max', [1.0]),
-    ([100, 100, 100], 'max', [3.0, 3.0, 3.0]),
-    ([100, 300, 200], 'max', [1.0, 3.0, 2.0]),
-    ([100, 200, 300, 200], 'max', [1.0, 3.0, 4.0, 3.0]),
-    ([100, 200, 300, 200, 100], 'max', [2.0, 4.0, 5.0, 4.0, 2.0]),
-)
-
-
-@pytest.mark.parametrize("values, method, expected", _cases)
-def test_cases_rankdata(values, method, expected):
-
-    # Test deprecated backport to be removed in 0.21
-    with ignore_warnings():
-        r = rankdata(values, method=method)
-        assert_array_equal(r, expected)
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index d6799b666c879..ab6f8f0ff1115 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -4,13 +4,10 @@
 import pytest
 import numpy as np
 import scipy.sparse as sp
-from scipy.linalg import pinv2
-from scipy.sparse.csgraph import laplacian
 
 from sklearn.utils.testing import (assert_equal, assert_raises, assert_true,
-                                   assert_almost_equal, assert_array_equal,
+                                   assert_array_equal,
                                    SkipTest, assert_raises_regex,
-                                   assert_greater_equal, ignore_warnings,
                                    assert_warns_message, assert_no_warnings)
 from sklearn.utils import check_random_state
 from sklearn.utils import deprecated
@@ -22,8 +19,6 @@
 from sklearn.utils import gen_even_slices
 from sklearn.utils import get_chunk_n_rows
 from sklearn.utils import is_scalar_nan
-from sklearn.utils.extmath import pinvh
-from sklearn.utils.arpack import eigsh
 from sklearn.utils.mocking import MockDataFrame
 from sklearn import config_context
 
@@ -108,56 +103,6 @@ def test_safe_mask():
     assert_equal(X_csr[mask].shape[0], 3)
 
 
-@ignore_warnings  # Test deprecated backport to be removed in 0.21
-def test_pinvh_simple_real():
-    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=np.float64)
-    a = np.dot(a, a.T)
-    a_pinv = pinvh(a)
-    assert_almost_equal(np.dot(a, a_pinv), np.eye(3))
-
-
-@ignore_warnings  # Test deprecated backport to be removed in 0.21
-def test_pinvh_nonpositive():
-    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float64)
-    a = np.dot(a, a.T)
-    u, s, vt = np.linalg.svd(a)
-    s[0] *= -1
-    a = np.dot(u * s, vt)  # a is now symmetric non-positive and singular
-    a_pinv = pinv2(a)
-    a_pinvh = pinvh(a)
-    assert_almost_equal(a_pinv, a_pinvh)
-
-
-@ignore_warnings  # Test deprecated backport to be removed in 0.21
-def test_pinvh_simple_complex():
-    a = (np.array([[1, 2, 3], [4, 5, 6], [7, 8, 10]])
-         + 1j * np.array([[10, 8, 7], [6, 5, 4], [3, 2, 1]]))
-    a = np.dot(a, a.conj().T)
-    a_pinv = pinvh(a)
-    assert_almost_equal(np.dot(a, a_pinv), np.eye(3))
-
-
-@ignore_warnings  # Test deprecated backport to be removed in 0.21
-def test_arpack_eigsh_initialization():
-    # Non-regression test that shows null-space computation is better with
-    # initialization of eigsh from [-1,1] instead of [0,1]
-    random_state = check_random_state(42)
-
-    A = random_state.rand(50, 50)
-    A = np.dot(A.T, A)  # create s.p.d. matrix
-    A = laplacian(A) + 1e-7 * np.identity(A.shape[0])
-    k = 5
-
-    # Test if eigsh is working correctly
-    # New initialization [-1,1] (as in original ARPACK)
-    # Was [0,1] before, with which this test could fail
-    v0 = random_state.uniform(-1, 1, A.shape[0])
-    w, _ = eigsh(A, k=k, sigma=0.0, v0=v0)
-
-    # Eigenvalues of s.p.d. matrix should be nonnegative, w[0] is smallest
-    assert_greater_equal(w[0], 0)
-
-
 def test_column_or_1d():
     EXAMPLES = [
         ("binary", ["spam", "egg", "spam"]),

From c13ba26c4780d6088c40db3cc0f8367df4e27f1f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 11 Oct 2018 15:42:02 -0400
Subject: [PATCH 162/163] [MRG] Matplotlib tree plotting (#9251)

* add reingold tillford tree layout algorithm

* add first silly implementation of matplotlib based plotting for trees

* object oriented design for export_graphviz so it can be extended

* add class for mlp export

* add colors

* separately scale x and y, add arrowheads, fix strings

* implement max_depth

* don't use alpha for coloring because it makes boxes transparent

* remove unused variables

* vertical center of boxes

* fix/simplify newline trimming

* somewhere in the middle of stuff

trying to get rid of scalex, scaley

* remove "find_longest_child" for now, fix tests

* make scalex and scaley internal, and ax local.

render everything once to get the bbox sizes, then again to actually plot it with known extents.

* add some margin to the max bbox width

* add _BaseTreeExporter baseclass

* add docstring to plot_tree

* use data coordinates so we can put the plot in a subplot, remove some hacks.

* remove scalex, scaley, add automatic font size

* use rendered stuff for setting limits (well nearly there)

* import plot_tree into tree module

* set limits before font size adjustment?

* add tree plotting via matplotlib to iris example and to docs

* pep8 fix

* skip doctest on plot_tree because matplotlib is not installed on all CI machines

* redo everything in axis pixel coordinates

re-introduce scalex, scaley
add max_extents to tree to get tree size before plotting

* fix max-depth

parent node positioning and don't consider deep nodes in layouting

* consider height in fontsize computation

in case someone gave us a very flat figure

* fix error when max_depth is None

* add docstring for tree plotting fontsize

* starting on jnothman's review

* renaming fixes

* whatsnew for tree plotting

* clear axes prior to doing anything.

* fix doctests

* skip matplotlib doctest

* trying to debug circle failure

* trying to show full traceback

* more print debugging

* remove debugging crud

* hack around matplotlib <1.5 issues

* copy bbox args because old matplotlib is weird.

* pep8 fixes

* add explicit boxstyle

* more pep8

* even more pep8

* add comment about matplotlib version requirement

* remove redundant file

* add whatsnew entry that the merge lost

* fix merge issue

* more merge issues

* whitespace ...

* remove doctest skip to see what's happening

* added some simple invariance tests buchheim function

* refactor
___init__ into superclass

* added some tests of plot_tree

* put skip back in, fix typo, fix versionadded number

* remove unused parameters special_characters and parallel_leaves from mpl plotting

* rename tests to test_reingold_tilford

* added license header from pymag-trees repo

* remove duplicate test file.
---
 doc/Makefile                                |   2 +-
 doc/modules/tree.rst                        |  15 +-
 doc/whats_new/v0.21.rst                     |   5 +
 examples/tree/plot_iris.py                  |   8 +-
 sklearn/tree/__init__.py                    |   5 +-
 sklearn/tree/_reingold_tilford.py           | 203 ++++++
 sklearn/tree/export.py                      | 690 ++++++++++++++------
 sklearn/tree/tests/test_export.py           |  49 +-
 sklearn/tree/tests/test_reingold_tilford.py |  54 ++
 9 files changed, 821 insertions(+), 210 deletions(-)
 create mode 100644 sklearn/tree/_reingold_tilford.py
 create mode 100644 sklearn/tree/tests/test_reingold_tilford.py

diff --git a/doc/Makefile b/doc/Makefile
index fcb547d14e2b0..6629518fc556a 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -13,7 +13,7 @@ endif
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\
+ALLSPHINXOPTS   = -T -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\
     $(EXAMPLES_PATTERN_OPTS) .
 
 
diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 4c3f584b079ab..fe5bed4c0221f 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -124,9 +124,20 @@ Using the Iris dataset, we can construct a tree as follows::
     >>> clf = tree.DecisionTreeClassifier()
     >>> clf = clf.fit(iris.data, iris.target)
 
-Once trained, we can export the tree in `Graphviz
+Once trained, you can plot the tree with the plot_tree function::
+
+
+    >>> tree.plot_tree(clf.fit(iris.data, iris.target)) # doctest: +SKIP
+
+.. figure:: ../auto_examples/tree/images/sphx_glr_plot_iris_002.png
+   :target: ../auto_examples/tree/plot_iris.html
+   :scale: 75
+   :align: center
+
+We can also export the tree in `Graphviz
 <https://www.graphviz.org/>`_ format using the :func:`export_graphviz`
-exporter. If you use the `conda <https://conda.io/>`_ package manager, the graphviz binaries  
+exporter. If you use the `conda <https://conda.io>`_ package manager, the graphviz binaries  
+
 and the python package can be installed with 
 
     conda install python-graphviz
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index a3e8608e3f690..87352b6f8e1f0 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -62,6 +62,11 @@ Support for Python 3.4 and below has been officially dropped.
 
 :mod:`sklearn.tree`
 ...................
+- Decision Trees can now be plotted with matplotlib using
+  :func:`tree.export.plot_tree` without relying on  the ``dot`` library,
+  removing a hard-to-install dependency.
+  :issue:`8508` by `Andreas Müller`_.
+
 - |Feature| ``get_n_leaves()`` and ``get_depth()`` have been added to
   :class:`tree.BaseDecisionTree` and consequently all estimators based
   on it, including :class:`tree.DecisionTreeClassifier`,
diff --git a/examples/tree/plot_iris.py b/examples/tree/plot_iris.py
index f299aab18d7d1..60328c4f90d4f 100644
--- a/examples/tree/plot_iris.py
+++ b/examples/tree/plot_iris.py
@@ -11,6 +11,8 @@
 For each pair of iris features, the decision tree learns decision
 boundaries made of combinations of simple thresholding rules inferred from
 the training samples.
+
+We also show the tree structure of a model built on all of the features.
 """
 print(__doc__)
 
@@ -18,7 +20,7 @@
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import load_iris
-from sklearn.tree import DecisionTreeClassifier
+from sklearn.tree import DecisionTreeClassifier, plot_tree
 
 # Parameters
 n_classes = 3
@@ -62,4 +64,8 @@
 plt.suptitle("Decision surface of a decision tree using paired features")
 plt.legend(loc='lower right', borderpad=0, handletextpad=0)
 plt.axis("tight")
+
+plt.figure()
+clf = DecisionTreeClassifier().fit(iris.data, iris.target)
+plot_tree(clf, filled=True)
 plt.show()
diff --git a/sklearn/tree/__init__.py b/sklearn/tree/__init__.py
index 1394bd914d27c..b3abe30d019fa 100644
--- a/sklearn/tree/__init__.py
+++ b/sklearn/tree/__init__.py
@@ -7,7 +7,8 @@
 from .tree import DecisionTreeRegressor
 from .tree import ExtraTreeClassifier
 from .tree import ExtraTreeRegressor
-from .export import export_graphviz
+from .export import export_graphviz, plot_tree
 
 __all__ = ["DecisionTreeClassifier", "DecisionTreeRegressor",
-           "ExtraTreeClassifier", "ExtraTreeRegressor", "export_graphviz"]
+           "ExtraTreeClassifier", "ExtraTreeRegressor", "export_graphviz",
+           "plot_tree"]
diff --git a/sklearn/tree/_reingold_tilford.py b/sklearn/tree/_reingold_tilford.py
new file mode 100644
index 0000000000000..d83969badb623
--- /dev/null
+++ b/sklearn/tree/_reingold_tilford.py
@@ -0,0 +1,203 @@
+# taken from https://github.com/llimllib/pymag-trees/blob/master/buchheim.py
+# with slight modifications
+
+#            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+#                    Version 2, December 2004
+#
+# Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
+#
+# Everyone is permitted to copy and distribute verbatim or modified
+# copies of this license document, and changing it is allowed as long
+# as the name is changed.
+#
+#            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+#   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+#  0. You just DO WHAT THE FUCK YOU WANT TO.
+
+
+import numpy as np
+
+
+class DrawTree(object):
+    def __init__(self, tree, parent=None, depth=0, number=1):
+        self.x = -1.
+        self.y = depth
+        self.tree = tree
+        self.children = [DrawTree(c, self, depth + 1, i + 1)
+                         for i, c
+                         in enumerate(tree.children)]
+        self.parent = parent
+        self.thread = None
+        self.mod = 0
+        self.ancestor = self
+        self.change = self.shift = 0
+        self._lmost_sibling = None
+        # this is the number of the node in its group of siblings 1..n
+        self.number = number
+
+    def left(self):
+        return self.thread or len(self.children) and self.children[0]
+
+    def right(self):
+        return self.thread or len(self.children) and self.children[-1]
+
+    def lbrother(self):
+        n = None
+        if self.parent:
+            for node in self.parent.children:
+                if node == self:
+                    return n
+                else:
+                    n = node
+        return n
+
+    def get_lmost_sibling(self):
+        if not self._lmost_sibling and self.parent and self != \
+                self.parent.children[0]:
+            self._lmost_sibling = self.parent.children[0]
+        return self._lmost_sibling
+    lmost_sibling = property(get_lmost_sibling)
+
+    def __str__(self):
+        return "%s: x=%s mod=%s" % (self.tree, self.x, self.mod)
+
+    def __repr__(self):
+        return self.__str__()
+
+    def max_extents(self):
+        extents = [c.max_extents() for c in self. children]
+        extents.append((self.x, self.y))
+        return np.max(extents, axis=0)
+
+
+def buchheim(tree):
+    dt = first_walk(DrawTree(tree))
+    min = second_walk(dt)
+    if min < 0:
+        third_walk(dt, -min)
+    return dt
+
+
+def third_walk(tree, n):
+    tree.x += n
+    for c in tree.children:
+        third_walk(c, n)
+
+
+def first_walk(v, distance=1.):
+    if len(v.children) == 0:
+        if v.lmost_sibling:
+            v.x = v.lbrother().x + distance
+        else:
+            v.x = 0.
+    else:
+        default_ancestor = v.children[0]
+        for w in v.children:
+            first_walk(w)
+            default_ancestor = apportion(w, default_ancestor, distance)
+        # print("finished v =", v.tree, "children")
+        execute_shifts(v)
+
+        midpoint = (v.children[0].x + v.children[-1].x) / 2
+
+        w = v.lbrother()
+        if w:
+            v.x = w.x + distance
+            v.mod = v.x - midpoint
+        else:
+            v.x = midpoint
+    return v
+
+
+def apportion(v, default_ancestor, distance):
+    w = v.lbrother()
+    if w is not None:
+        # in buchheim notation:
+        # i == inner; o == outer; r == right; l == left; r = +; l = -
+        vir = vor = v
+        vil = w
+        vol = v.lmost_sibling
+        sir = sor = v.mod
+        sil = vil.mod
+        sol = vol.mod
+        while vil.right() and vir.left():
+            vil = vil.right()
+            vir = vir.left()
+            vol = vol.left()
+            vor = vor.right()
+            vor.ancestor = v
+            shift = (vil.x + sil) - (vir.x + sir) + distance
+            if shift > 0:
+                move_subtree(ancestor(vil, v, default_ancestor), v, shift)
+                sir = sir + shift
+                sor = sor + shift
+            sil += vil.mod
+            sir += vir.mod
+            sol += vol.mod
+            sor += vor.mod
+        if vil.right() and not vor.right():
+            vor.thread = vil.right()
+            vor.mod += sil - sor
+        else:
+            if vir.left() and not vol.left():
+                vol.thread = vir.left()
+                vol.mod += sir - sol
+            default_ancestor = v
+    return default_ancestor
+
+
+def move_subtree(wl, wr, shift):
+    subtrees = wr.number - wl.number
+    # print(wl.tree, "is conflicted with", wr.tree, 'moving', subtrees,
+    # 'shift', shift)
+    # print wl, wr, wr.number, wl.number, shift, subtrees, shift/subtrees
+    wr.change -= shift / subtrees
+    wr.shift += shift
+    wl.change += shift / subtrees
+    wr.x += shift
+    wr.mod += shift
+
+
+def execute_shifts(v):
+    shift = change = 0
+    for w in v.children[::-1]:
+        # print("shift:", w, shift, w.change)
+        w.x += shift
+        w.mod += shift
+        change += w.change
+        shift += w.shift + change
+
+
+def ancestor(vil, v, default_ancestor):
+    # the relevant text is at the bottom of page 7 of
+    # "Improving Walker's Algorithm to Run in Linear Time" by Buchheim et al,
+    # (2002)
+    # http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.16.8757&rep=rep1&type=pdf
+    if vil.ancestor in v.parent.children:
+        return vil.ancestor
+    else:
+        return default_ancestor
+
+
+def second_walk(v, m=0, depth=0, min=None):
+    v.x += m
+    v.y = depth
+
+    if min is None or v.x < min:
+        min = v.x
+
+    for w in v.children:
+        min = second_walk(w, m + v.mod, depth + 1, min)
+
+    return min
+
+
+class Tree(object):
+    def __init__(self, label="", node_id=-1, *children):
+        self.label = label
+        self.node_id = node_id
+        if children:
+            self.children = children
+        else:
+            self.children = []
diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index ef13790e65b42..fe127d77302b6 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -10,6 +10,7 @@
 #          Trevor Stephens <trev.stephens@gmail.com>
 #          Li Li <aiki.nogard@gmail.com>
 # License: BSD 3 clause
+import warnings
 
 from numbers import Integral
 
@@ -20,6 +21,7 @@
 
 from . import _criterion
 from . import _tree
+from ._reingold_tilford import buchheim, Tree
 
 
 def _color_brew(n):
@@ -72,37 +74,30 @@ def __repr__(self):
 SENTINEL = Sentinel()
 
 
-def export_graphviz(decision_tree, out_file=None, max_depth=None,
-                    feature_names=None, class_names=None, label='all',
-                    filled=False, leaves_parallel=False, impurity=True,
-                    node_ids=False, proportion=False, rotate=False,
-                    rounded=False, special_characters=False, precision=3):
-    """Export a decision tree in DOT format.
-
-    This function generates a GraphViz representation of the decision tree,
-    which is then written into `out_file`. Once exported, graphical renderings
-    can be generated using, for example::
-
-        $ dot -Tps tree.dot -o tree.ps      (PostScript format)
-        $ dot -Tpng tree.dot -o tree.png    (PNG format)
+def plot_tree(decision_tree, max_depth=None, feature_names=None,
+              class_names=None, label='all', filled=False,
+              impurity=True, node_ids=False,
+              proportion=False, rotate=False, rounded=False,
+              precision=3, ax=None, fontsize=None):
+    """Plot a decision tree.
 
     The sample counts that are shown are weighted with any sample_weights that
     might be present.
+    This function requires matplotlib, and works best with matplotlib >= 1.5.
+
+    The visualization is fit automatically to the size of the axis.
+    Use the ``figsize`` or ``dpi`` arguments of ``plt.figure``  to control
+    the size of the rendering.
 
     Read more in the :ref:`User Guide <tree>`.
 
+    .. versionadded:: 0.21
+
     Parameters
     ----------
     decision_tree : decision tree regressor or classifier
         The decision tree to be exported to GraphViz.
 
-    out_file : file object or string, optional (default=None)
-        Handle or name of the output file. If ``None``, the result is
-        returned as a string.
-
-        .. versionchanged:: 0.20
-            Default of out_file changed from "tree.dot" to None.
-
     max_depth : int, optional (default=None)
         The maximum depth of the representation. If None, the tree is fully
         generated.
@@ -125,9 +120,6 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None,
         classification, extremity of values for regression, or purity of node
         for multi-output.
 
-    leaves_parallel : bool, optional (default=False)
-        When set to ``True``, draw all leaf nodes at the bottom of the tree.
-
     impurity : bool, optional (default=True)
         When set to ``True``, show the impurity at each node.
 
@@ -145,64 +137,113 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None,
         When set to ``True``, draw node boxes with rounded corners and use
         Helvetica fonts instead of Times-Roman.
 
-    special_characters : bool, optional (default=False)
-        When set to ``False``, ignore special characters for PostScript
-        compatibility.
-
     precision : int, optional (default=3)
         Number of digits of precision for floating point in the values of
         impurity, threshold and value attributes of each node.
 
+    ax : matplotlib axis, optional (default=None)
+        Axes to plot to. If None, use current axis. Any previous content
+        is cleared.
+
+    fontsize : int, optional (default=None)
+        Size of text font. If None, determined automatically to fit figure.
+
     Returns
     -------
-    dot_data : string
-        String representation of the input tree in GraphViz dot format.
-        Only returned if ``out_file`` is None.
-
-        .. versionadded:: 0.18
+    annotations : list of artists
+        List containing the artists for the annotation boxes making up the
+        tree.
 
     Examples
     --------
     >>> from sklearn.datasets import load_iris
     >>> from sklearn import tree
 
-    >>> clf = tree.DecisionTreeClassifier()
+    >>> clf = tree.DecisionTreeClassifier(random_state=0)
     >>> iris = load_iris()
 
     >>> clf = clf.fit(iris.data, iris.target)
-    >>> tree.export_graphviz(clf,
-    ...     out_file='tree.dot')                # doctest: +SKIP
+    >>> tree.plot_tree(clf)  # doctest: +SKIP
+    [Text(251.5,345.217,'X[3] <= 0.8...
 
     """
-
-    def get_color(value):
+    exporter = _MPLTreeExporter(
+        max_depth=max_depth, feature_names=feature_names,
+        class_names=class_names, label=label, filled=filled,
+        impurity=impurity, node_ids=node_ids,
+        proportion=proportion, rotate=rotate, rounded=rounded,
+        precision=precision, fontsize=fontsize)
+    return exporter.export(decision_tree, ax=ax)
+
+
+class _BaseTreeExporter(object):
+    def __init__(self, max_depth=None, feature_names=None,
+                 class_names=None, label='all', filled=False,
+                 impurity=True, node_ids=False,
+                 proportion=False, rotate=False, rounded=False,
+                 precision=3, fontsize=None):
+        self.max_depth = max_depth
+        self.feature_names = feature_names
+        self.class_names = class_names
+        self.label = label
+        self.filled = filled
+        self.impurity = impurity
+        self.node_ids = node_ids
+        self.proportion = proportion
+        self.rotate = rotate
+        self.rounded = rounded
+        self.precision = precision
+        self.fontsize = fontsize
+
+    def get_color(self, value):
         # Find the appropriate color & intensity for a node
-        if colors['bounds'] is None:
+        if self.colors['bounds'] is None:
             # Classification tree
-            color = list(colors['rgb'][np.argmax(value)])
+            color = list(self.colors['rgb'][np.argmax(value)])
             sorted_values = sorted(value, reverse=True)
             if len(sorted_values) == 1:
                 alpha = 0
             else:
-                alpha = int(np.round(255 * (sorted_values[0] -
-                                            sorted_values[1]) /
-                                           (1 - sorted_values[1]), 0))
+                alpha = ((sorted_values[0] - sorted_values[1])
+                         / (1 - sorted_values[1]))
         else:
             # Regression tree or multi-output
-            color = list(colors['rgb'][0])
-            alpha = int(np.round(255 * ((value - colors['bounds'][0]) /
-                                        (colors['bounds'][1] -
-                                         colors['bounds'][0])), 0))
-
-        # Return html color code in #RRGGBBAA format
-        color.append(alpha)
-        hex_codes = [str(i) for i in range(10)]
-        hex_codes.extend(['a', 'b', 'c', 'd', 'e', 'f'])
-        color = [hex_codes[c // 16] + hex_codes[c % 16] for c in color]
-
-        return '#' + ''.join(color)
+            color = list(self.colors['rgb'][0])
+            alpha = ((value - self.colors['bounds'][0]) /
+                     (self.colors['bounds'][1] - self.colors['bounds'][0]))
+        # unpack numpy scalars
+        alpha = float(alpha)
+        # compute the color as alpha against white
+        color = [int(round(alpha * c + (1 - alpha) * 255, 0)) for c in color]
+        # Return html color code in #RRGGBB format
+        return '#%2x%2x%2x' % tuple(color)
+
+    def get_fill_color(self, tree, node_id):
+        # Fetch appropriate color for node
+        if 'rgb' not in self.colors:
+            # Initialize colors and bounds if required
+            self.colors['rgb'] = _color_brew(tree.n_classes[0])
+            if tree.n_outputs != 1:
+                # Find max and min impurities for multi-output
+                self.colors['bounds'] = (np.min(-tree.impurity),
+                                         np.max(-tree.impurity))
+            elif (tree.n_classes[0] == 1 and
+                  len(np.unique(tree.value)) != 1):
+                # Find max and min values in leaf nodes for regression
+                self.colors['bounds'] = (np.min(tree.value),
+                                         np.max(tree.value))
+        if tree.n_outputs == 1:
+            node_val = (tree.value[node_id][0, :] /
+                        tree.weighted_n_node_samples[node_id])
+            if tree.n_classes[0] == 1:
+                # Regression
+                node_val = tree.value[node_id][0, :]
+        else:
+            # If multi-output color node by impurity
+            node_val = -tree.impurity[node_id]
+        return self.get_color(node_val)
 
-    def node_to_str(tree, node_id, criterion):
+    def node_to_str(self, tree, node_id, criterion):
         # Generate the node content string
         if tree.n_outputs == 1:
             value = tree.value[node_id][0, :]
@@ -210,18 +251,13 @@ def node_to_str(tree, node_id, criterion):
             value = tree.value[node_id]
 
         # Should labels be shown?
-        labels = (label == 'root' and node_id == 0) or label == 'all'
+        labels = (self.label == 'root' and node_id == 0) or self.label == 'all'
 
-        # PostScript compatibility for special characters
-        if special_characters:
-            characters = ['&#35;', '<SUB>', '</SUB>', '&le;', '<br/>', '>']
-            node_string = '<'
-        else:
-            characters = ['#', '[', ']', '<=', '\\n', '"']
-            node_string = '"'
+        characters = self.characters
+        node_string = characters[-1]
 
         # Write node ID
-        if node_ids:
+        if self.node_ids:
             if labels:
                 node_string += 'node '
             node_string += characters[0] + str(node_id) + characters[4]
@@ -229,8 +265,8 @@ def node_to_str(tree, node_id, criterion):
         # Write decision criteria
         if tree.children_left[node_id] != _tree.TREE_LEAF:
             # Always write node decision criteria, except for leaves
-            if feature_names is not None:
-                feature = feature_names[tree.feature[node_id]]
+            if self.feature_names is not None:
+                feature = self.feature_names[tree.feature[node_id]]
             else:
                 feature = "X%s%s%s" % (characters[1],
                                        tree.feature[node_id],
@@ -238,24 +274,24 @@ def node_to_str(tree, node_id, criterion):
             node_string += '%s %s %s%s' % (feature,
                                            characters[3],
                                            round(tree.threshold[node_id],
-                                                 precision),
+                                                 self.precision),
                                            characters[4])
 
         # Write impurity
-        if impurity:
+        if self.impurity:
             if isinstance(criterion, _criterion.FriedmanMSE):
                 criterion = "friedman_mse"
             elif not isinstance(criterion, six.string_types):
                 criterion = "impurity"
             if labels:
                 node_string += '%s = ' % criterion
-            node_string += (str(round(tree.impurity[node_id], precision)) +
-                            characters[4])
+            node_string += (str(round(tree.impurity[node_id], self.precision))
+                            + characters[4])
 
         # Write node sample count
         if labels:
             node_string += 'samples = '
-        if proportion:
+        if self.proportion:
             percent = (100. * tree.n_node_samples[node_id] /
                        float(tree.n_node_samples[0]))
             node_string += (str(round(percent, 1)) + '%' +
@@ -265,23 +301,23 @@ def node_to_str(tree, node_id, criterion):
                             characters[4])
 
         # Write node class distribution / regression value
-        if proportion and tree.n_classes[0] != 1:
+        if self.proportion and tree.n_classes[0] != 1:
             # For classification this will show the proportion of samples
             value = value / tree.weighted_n_node_samples[node_id]
         if labels:
             node_string += 'value = '
         if tree.n_classes[0] == 1:
             # Regression
-            value_text = np.around(value, precision)
-        elif proportion:
+            value_text = np.around(value, self.precision)
+        elif self.proportion:
             # Classification
-            value_text = np.around(value, precision)
+            value_text = np.around(value, self.precision)
         elif np.all(np.equal(np.mod(value, 1), 0)):
             # Classification without floating-point weights
             value_text = value.astype(int)
         else:
             # Classification with floating-point weights
-            value_text = np.around(value, precision)
+            value_text = np.around(value, self.precision)
         # Strip whitespace
         value_text = str(value_text.astype('S32')).replace("b'", "'")
         value_text = value_text.replace("' '", ", ").replace("'", "")
@@ -291,14 +327,14 @@ def node_to_str(tree, node_id, criterion):
         node_string += value_text + characters[4]
 
         # Write node majority class
-        if (class_names is not None and
+        if (self.class_names is not None and
                 tree.n_classes[0] != 1 and
                 tree.n_outputs == 1):
             # Only done for single-output classification trees
             if labels:
                 node_string += 'class = '
-            if class_names is not True:
-                class_name = class_names[np.argmax(value)]
+            if self.class_names is not True:
+                class_name = self.class_names[np.argmax(value)]
             else:
                 class_name = "y%s%s%s" % (characters[1],
                                           np.argmax(value),
@@ -306,14 +342,109 @@ def node_to_str(tree, node_id, criterion):
             node_string += class_name
 
         # Clean up any trailing newlines
-        if node_string[-2:] == '\\n':
-            node_string = node_string[:-2]
-        if node_string[-5:] == '<br/>':
-            node_string = node_string[:-5]
+        if node_string.endswith(characters[4]):
+            node_string = node_string[:-len(characters[4])]
 
         return node_string + characters[5]
 
-    def recurse(tree, node_id, criterion, parent=None, depth=0):
+
+class _DOTTreeExporter(_BaseTreeExporter):
+    def __init__(self, out_file=SENTINEL, max_depth=None,
+                 feature_names=None, class_names=None, label='all',
+                 filled=False, leaves_parallel=False, impurity=True,
+                 node_ids=False, proportion=False, rotate=False, rounded=False,
+                 special_characters=False, precision=3):
+
+        super(_DOTTreeExporter, self).__init__(
+            max_depth=max_depth, feature_names=feature_names,
+            class_names=class_names, label=label, filled=filled,
+            impurity=impurity,
+            node_ids=node_ids, proportion=proportion, rotate=rotate,
+            rounded=rounded,
+            precision=precision)
+        self.leaves_parallel = leaves_parallel
+        self.out_file = out_file
+        self.special_characters = special_characters
+
+        # PostScript compatibility for special characters
+        if special_characters:
+            self.characters = ['&#35;', '<SUB>', '</SUB>', '&le;', '<br/>',
+                               '>', '<']
+        else:
+            self.characters = ['#', '[', ']', '<=', '\\n', '"', '"']
+
+        # validate
+        if isinstance(precision, Integral):
+            if precision < 0:
+                raise ValueError("'precision' should be greater or equal to 0."
+                                 " Got {} instead.".format(precision))
+        else:
+            raise ValueError("'precision' should be an integer. Got {}"
+                             " instead.".format(type(precision)))
+
+        # The depth of each node for plotting with 'leaf' option
+        self.ranks = {'leaves': []}
+        # The colors to render each node with
+        self.colors = {'bounds': None}
+
+    def export(self, decision_tree):
+        # Check length of feature_names before getting into the tree node
+        # Raise error if length of feature_names does not match
+        # n_features_ in the decision_tree
+        if self.feature_names is not None:
+            if len(self.feature_names) != decision_tree.n_features_:
+                raise ValueError("Length of feature_names, %d "
+                                 "does not match number of features, %d"
+                                 % (len(self.feature_names),
+                                    decision_tree.n_features_))
+        # each part writes to out_file
+        self.head()
+        # Now recurse the tree and add node & edge attributes
+        if isinstance(decision_tree, _tree.Tree):
+            self.recurse(decision_tree, 0, criterion="impurity")
+        else:
+            self.recurse(decision_tree.tree_, 0,
+                         criterion=decision_tree.criterion)
+
+        self.tail()
+
+    def tail(self):
+        # If required, draw leaf nodes at same depth as each other
+        if self.leaves_parallel:
+            for rank in sorted(self.ranks):
+                self.out_file.write(
+                    "{rank=same ; " +
+                    "; ".join(r for r in self.ranks[rank]) + "} ;\n")
+        self.out_file.write("}")
+
+    def head(self):
+        self.out_file.write('digraph Tree {\n')
+
+        # Specify node aesthetics
+        self.out_file.write('node [shape=box')
+        rounded_filled = []
+        if self.filled:
+            rounded_filled.append('filled')
+        if self.rounded:
+            rounded_filled.append('rounded')
+        if len(rounded_filled) > 0:
+            self.out_file.write(
+                ', style="%s", color="black"'
+                % ", ".join(rounded_filled))
+        if self.rounded:
+            self.out_file.write(', fontname=helvetica')
+        self.out_file.write('] ;\n')
+
+        # Specify graph & edge aesthetics
+        if self.leaves_parallel:
+            self.out_file.write(
+                'graph [ranksep=equally, splines=polyline] ;\n')
+        if self.rounded:
+            self.out_file.write('edge [fontname=helvetica] ;\n')
+        if self.rotate:
+            self.out_file.write('rankdir=LR ;\n')
+
+    def recurse(self, tree, node_id, criterion, parent=None, depth=0):
         if node_id == _tree.TREE_LEAF:
             raise ValueError("Invalid node_id %s" % _tree.TREE_LEAF)
 
@@ -321,93 +452,75 @@ def recurse(tree, node_id, criterion, parent=None, depth=0):
         right_child = tree.children_right[node_id]
 
         # Add node with description
-        if max_depth is None or depth <= max_depth:
+        if self.max_depth is None or depth <= self.max_depth:
 
             # Collect ranks for 'leaf' option in plot_options
             if left_child == _tree.TREE_LEAF:
-                ranks['leaves'].append(str(node_id))
-            elif str(depth) not in ranks:
-                ranks[str(depth)] = [str(node_id)]
+                self.ranks['leaves'].append(str(node_id))
+            elif str(depth) not in self.ranks:
+                self.ranks[str(depth)] = [str(node_id)]
             else:
-                ranks[str(depth)].append(str(node_id))
-
-            out_file.write('%d [label=%s'
-                           % (node_id,
-                              node_to_str(tree, node_id, criterion)))
-
-            if filled:
-                # Fetch appropriate color for node
-                if 'rgb' not in colors:
-                    # Initialize colors and bounds if required
-                    colors['rgb'] = _color_brew(tree.n_classes[0])
-                    if tree.n_outputs != 1:
-                        # Find max and min impurities for multi-output
-                        colors['bounds'] = (np.min(-tree.impurity),
-                                            np.max(-tree.impurity))
-                    elif (tree.n_classes[0] == 1 and
-                          len(np.unique(tree.value)) != 1):
-                        # Find max and min values in leaf nodes for regression
-                        colors['bounds'] = (np.min(tree.value),
-                                            np.max(tree.value))
-                if tree.n_outputs == 1:
-                    node_val = (tree.value[node_id][0, :] /
-                                tree.weighted_n_node_samples[node_id])
-                    if tree.n_classes[0] == 1:
-                        # Regression
-                        node_val = tree.value[node_id][0, :]
-                else:
-                    # If multi-output color node by impurity
-                    node_val = -tree.impurity[node_id]
-                out_file.write(', fillcolor="%s"' % get_color(node_val))
-            out_file.write('] ;\n')
+                self.ranks[str(depth)].append(str(node_id))
+
+            self.out_file.write(
+                '%d [label=%s' % (node_id, self.node_to_str(tree, node_id,
+                                                            criterion)))
+
+            if self.filled:
+                self.out_file.write(', fillcolor="%s"'
+                                    % self.get_fill_color(tree, node_id))
+            self.out_file.write('] ;\n')
 
             if parent is not None:
                 # Add edge to parent
-                out_file.write('%d -> %d' % (parent, node_id))
+                self.out_file.write('%d -> %d' % (parent, node_id))
                 if parent == 0:
                     # Draw True/False labels if parent is root node
-                    angles = np.array([45, -45]) * ((rotate - .5) * -2)
-                    out_file.write(' [labeldistance=2.5, labelangle=')
+                    angles = np.array([45, -45]) * ((self.rotate - .5) * -2)
+                    self.out_file.write(' [labeldistance=2.5, labelangle=')
                     if node_id == 1:
-                        out_file.write('%d, headlabel="True"]' % angles[0])
+                        self.out_file.write('%d, headlabel="True"]' %
+                                            angles[0])
                     else:
-                        out_file.write('%d, headlabel="False"]' % angles[1])
-                out_file.write(' ;\n')
+                        self.out_file.write('%d, headlabel="False"]' %
+                                            angles[1])
+                self.out_file.write(' ;\n')
 
             if left_child != _tree.TREE_LEAF:
-                recurse(tree, left_child, criterion=criterion, parent=node_id,
-                        depth=depth + 1)
-                recurse(tree, right_child, criterion=criterion, parent=node_id,
-                        depth=depth + 1)
+                self.recurse(tree, left_child, criterion=criterion,
+                             parent=node_id, depth=depth + 1)
+                self.recurse(tree, right_child, criterion=criterion,
+                             parent=node_id, depth=depth + 1)
 
         else:
-            ranks['leaves'].append(str(node_id))
+            self.ranks['leaves'].append(str(node_id))
 
-            out_file.write('%d [label="(...)"' % node_id)
-            if filled:
+            self.out_file.write('%d [label="(...)"' % node_id)
+            if self.filled:
                 # color cropped nodes grey
-                out_file.write(', fillcolor="#C0C0C0"')
-            out_file.write('] ;\n' % node_id)
+                self.out_file.write(', fillcolor="#C0C0C0"')
+            self.out_file.write('] ;\n' % node_id)
 
             if parent is not None:
                 # Add edge to parent
-                out_file.write('%d -> %d ;\n' % (parent, node_id))
+                self.out_file.write('%d -> %d ;\n' % (parent, node_id))
 
-    check_is_fitted(decision_tree, 'tree_')
-    own_file = False
-    return_string = False
-    try:
-        if isinstance(out_file, six.string_types):
-            if six.PY3:
-                out_file = open(out_file, "w", encoding="utf-8")
-            else:
-                out_file = open(out_file, "wb")
-            own_file = True
 
-        if out_file is None:
-            return_string = True
-            out_file = six.StringIO()
+class _MPLTreeExporter(_BaseTreeExporter):
+    def __init__(self, max_depth=None, feature_names=None,
+                 class_names=None, label='all', filled=False,
+                 impurity=True, node_ids=False,
+                 proportion=False, rotate=False, rounded=False,
+                 precision=3, fontsize=None):
+
+        super(_MPLTreeExporter, self).__init__(
+            max_depth=max_depth, feature_names=feature_names,
+            class_names=class_names, label=label, filled=filled,
+            impurity=impurity, node_ids=node_ids, proportion=proportion,
+            rotate=rotate, rounded=rounded, precision=precision)
+        self.fontsize = fontsize
 
+        # validate
         if isinstance(precision, Integral):
             if precision < 0:
                 raise ValueError("'precision' should be greater or equal to 0."
@@ -416,57 +529,254 @@ def recurse(tree, node_id, criterion, parent=None, depth=0):
             raise ValueError("'precision' should be an integer. Got {}"
                              " instead.".format(type(precision)))
 
-        # Check length of feature_names before getting into the tree node
-        # Raise error if length of feature_names does not match
-        # n_features_ in the decision_tree
-        if feature_names is not None:
-            if len(feature_names) != decision_tree.n_features_:
-                raise ValueError("Length of feature_names, %d "
-                                 "does not match number of features, %d"
-                                 % (len(feature_names),
-                                    decision_tree.n_features_))
-
         # The depth of each node for plotting with 'leaf' option
-        ranks = {'leaves': []}
+        self.ranks = {'leaves': []}
         # The colors to render each node with
-        colors = {'bounds': None}
+        self.colors = {'bounds': None}
 
-        out_file.write('digraph Tree {\n')
+        self.characters = ['#', '[', ']', '<=', '\n', '', '']
 
-        # Specify node aesthetics
-        out_file.write('node [shape=box')
-        rounded_filled = []
-        if filled:
-            rounded_filled.append('filled')
-        if rounded:
-            rounded_filled.append('rounded')
-        if len(rounded_filled) > 0:
-            out_file.write(', style="%s", color="black"'
-                           % ", ".join(rounded_filled))
-        if rounded:
-            out_file.write(', fontname=helvetica')
-        out_file.write('] ;\n')
+        self.bbox_args = dict(fc='w')
+        if self.rounded:
+            self.bbox_args['boxstyle'] = "round"
+        else:
+            # matplotlib <1.5 requires explicit boxstyle
+            self.bbox_args['boxstyle'] = "square"
+
+        self.arrow_args = dict(arrowstyle="<-")
+
+    def _make_tree(self, node_id, et, depth=0):
+        # traverses _tree.Tree recursively, builds intermediate
+        # "_reingold_tilford.Tree" object
+        name = self.node_to_str(et, node_id, criterion='entropy')
+        if (et.children_left[node_id] != _tree.TREE_LEAF
+                and (self.max_depth is None or depth <= self.max_depth)):
+            children = [self._make_tree(et.children_left[node_id], et,
+                                        depth=depth + 1),
+                        self._make_tree(et.children_right[node_id], et,
+                                        depth=depth + 1)]
+        else:
+            return Tree(name, node_id)
+        return Tree(name, node_id, *children)
+
+    def export(self, decision_tree, ax=None):
+        import matplotlib.pyplot as plt
+        from matplotlib.text import Annotation
+        if ax is None:
+            ax = plt.gca()
+        ax.clear()
+        ax.set_axis_off()
+        my_tree = self._make_tree(0, decision_tree.tree_)
+        draw_tree = buchheim(my_tree)
+
+        # important to make sure we're still
+        # inside the axis after drawing the box
+        # this makes sense because the width of a box
+        # is about the same as the distance between boxes
+        max_x, max_y = draw_tree.max_extents() + 1
+        ax_width = ax.get_window_extent().width
+        ax_height = ax.get_window_extent().height
+
+        scale_x = ax_width / max_x
+        scale_y = ax_height / max_y
+
+        self.recurse(draw_tree, decision_tree.tree_, ax,
+                     scale_x, scale_y, ax_height)
+
+        anns = [ann for ann in ax.get_children()
+                if isinstance(ann, Annotation)]
+
+        # update sizes of all bboxes
+        renderer = ax.figure.canvas.get_renderer()
+
+        for ann in anns:
+            ann.update_bbox_position_size(renderer)
+
+        if self.fontsize is None:
+            # get figure to data transform
+            # adjust fontsize to avoid overlap
+            # get max box width and height
+            try:
+                extents = [ann.get_bbox_patch().get_window_extent()
+                           for ann in anns]
+                max_width = max([extent.width for extent in extents])
+                max_height = max([extent.height for extent in extents])
+                # width should be around scale_x in axis coordinates
+                size = anns[0].get_fontsize() * min(scale_x / max_width,
+                                                    scale_y / max_height)
+                for ann in anns:
+                    ann.set_fontsize(size)
+            except AttributeError:
+                # matplotlib < 1.5
+                warnings.warn("Automatic scaling of tree plots requires "
+                              "matplotlib 1.5 or higher. Please specify "
+                              "fontsize.")
+
+        return anns
+
+    def recurse(self, node, tree, ax, scale_x, scale_y, height, depth=0):
+        # need to copy bbox args because matplotib <1.5 modifies them
+        kwargs = dict(bbox=self.bbox_args.copy(), ha='center', va='center',
+                      zorder=100 - 10 * depth, xycoords='axes pixels')
+
+        if self.fontsize is not None:
+            kwargs['fontsize'] = self.fontsize
+
+        # offset things by .5 to center them in plot
+        xy = ((node.x + .5) * scale_x, height - (node.y + .5) * scale_y)
+
+        if self.max_depth is None or depth <= self.max_depth:
+            if self.filled:
+                kwargs['bbox']['fc'] = self.get_fill_color(tree,
+                                                           node.tree.node_id)
+            if node.parent is None:
+                # root
+                ax.annotate(node.tree.label, xy, **kwargs)
+            else:
+                xy_parent = ((node.parent.x + .5) * scale_x,
+                             height - (node.parent.y + .5) * scale_y)
+                kwargs["arrowprops"] = self.arrow_args
+                ax.annotate(node.tree.label, xy_parent, xy, **kwargs)
+            for child in node.children:
+                self.recurse(child, tree, ax, scale_x, scale_y, height,
+                             depth=depth + 1)
 
-        # Specify graph & edge aesthetics
-        if leaves_parallel:
-            out_file.write('graph [ranksep=equally, splines=polyline] ;\n')
-        if rounded:
-            out_file.write('edge [fontname=helvetica] ;\n')
-        if rotate:
-            out_file.write('rankdir=LR ;\n')
+        else:
+            xy_parent = ((node.parent.x + .5) * scale_x,
+                         height - (node.parent.y + .5) * scale_y)
+            kwargs["arrowprops"] = self.arrow_args
+            kwargs['bbox']['fc'] = 'grey'
+            ax.annotate("\n  (...)  \n", xy_parent, xy, **kwargs)
 
-        # Now recurse the tree and add node & edge attributes
-        recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)
 
-        # If required, draw leaf nodes at same depth as each other
-        if leaves_parallel:
-            for rank in sorted(ranks):
-                out_file.write("{rank=same ; " +
-                               "; ".join(r for r in ranks[rank]) + "} ;\n")
-        out_file.write("}")
+def export_graphviz(decision_tree, out_file=None, max_depth=None,
+                    feature_names=None, class_names=None, label='all',
+                    filled=False, leaves_parallel=False, impurity=True,
+                    node_ids=False, proportion=False, rotate=False,
+                    rounded=False, special_characters=False, precision=3):
+    """Export a decision tree in DOT format.
+
+    This function generates a GraphViz representation of the decision tree,
+    which is then written into `out_file`. Once exported, graphical renderings
+    can be generated using, for example::
+
+        $ dot -Tps tree.dot -o tree.ps      (PostScript format)
+        $ dot -Tpng tree.dot -o tree.png    (PNG format)
+
+    The sample counts that are shown are weighted with any sample_weights that
+    might be present.
+
+    Read more in the :ref:`User Guide <tree>`.
+
+    Parameters
+    ----------
+    decision_tree : decision tree classifier
+        The decision tree to be exported to GraphViz.
+
+    out_file : file object or string, optional (default=None)
+        Handle or name of the output file. If ``None``, the result is
+        returned as a string.
+
+        .. versionchanged:: 0.20
+            Default of out_file changed from "tree.dot" to None.
+
+    max_depth : int, optional (default=None)
+        The maximum depth of the representation. If None, the tree is fully
+        generated.
+
+    feature_names : list of strings, optional (default=None)
+        Names of each of the features.
+
+    class_names : list of strings, bool or None, optional (default=None)
+        Names of each of the target classes in ascending numerical order.
+        Only relevant for classification and not supported for multi-output.
+        If ``True``, shows a symbolic representation of the class name.
+
+    label : {'all', 'root', 'none'}, optional (default='all')
+        Whether to show informative labels for impurity, etc.
+        Options include 'all' to show at every node, 'root' to show only at
+        the top root node, or 'none' to not show at any node.
+
+    filled : bool, optional (default=False)
+        When set to ``True``, paint nodes to indicate majority class for
+        classification, extremity of values for regression, or purity of node
+        for multi-output.
+
+    leaves_parallel : bool, optional (default=False)
+        When set to ``True``, draw all leaf nodes at the bottom of the tree.
+
+    impurity : bool, optional (default=True)
+        When set to ``True``, show the impurity at each node.
+
+    node_ids : bool, optional (default=False)
+        When set to ``True``, show the ID number on each node.
+
+    proportion : bool, optional (default=False)
+        When set to ``True``, change the display of 'values' and/or 'samples'
+        to be proportions and percentages respectively.
+
+    rotate : bool, optional (default=False)
+        When set to ``True``, orient tree left to right rather than top-down.
+
+    rounded : bool, optional (default=False)
+        When set to ``True``, draw node boxes with rounded corners and use
+        Helvetica fonts instead of Times-Roman.
+
+    special_characters : bool, optional (default=False)
+        When set to ``False``, ignore special characters for PostScript
+        compatibility.
+
+    precision : int, optional (default=3)
+        Number of digits of precision for floating point in the values of
+        impurity, threshold and value attributes of each node.
+
+    Returns
+    -------
+    dot_data : string
+        String representation of the input tree in GraphViz dot format.
+        Only returned if ``out_file`` is None.
+
+        .. versionadded:: 0.18
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn import tree
+
+    >>> clf = tree.DecisionTreeClassifier()
+    >>> iris = load_iris()
+
+    >>> clf = clf.fit(iris.data, iris.target)
+    >>> tree.export_graphviz(clf) # doctest: +ELLIPSIS
+    'digraph Tree {...
+    """
+
+    check_is_fitted(decision_tree, 'tree_')
+    own_file = False
+    return_string = False
+    try:
+        if isinstance(out_file, six.string_types):
+            if six.PY3:
+                out_file = open(out_file, "w", encoding="utf-8")
+            else:
+                out_file = open(out_file, "wb")
+            own_file = True
+
+        if out_file is None:
+            return_string = True
+            out_file = six.StringIO()
+
+        exporter = _DOTTreeExporter(
+            out_file=out_file, max_depth=max_depth,
+            feature_names=feature_names, class_names=class_names, label=label,
+            filled=filled, leaves_parallel=leaves_parallel, impurity=impurity,
+            node_ids=node_ids, proportion=proportion, rotate=rotate,
+            rounded=rounded, special_characters=special_characters,
+            precision=precision)
+        exporter.export(decision_tree)
 
         if return_string:
-            return out_file.getvalue()
+            return exporter.out_file.getvalue()
 
     finally:
         if own_file:
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index c43e0a4f32392..2471914fa44ce 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -1,6 +1,7 @@
 """
 Testing for export functions of decision trees (sklearn.tree.export).
 """
+import pytest
 
 from re import finditer, search
 
@@ -9,7 +10,7 @@
 from sklearn.base import is_classifier
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.ensemble import GradientBoostingClassifier
-from sklearn.tree import export_graphviz
+from sklearn.tree import export_graphviz, plot_tree
 from sklearn.externals.six import StringIO
 from sklearn.utils.testing import (assert_in, assert_equal, assert_raises,
                                    assert_less_equal, assert_raises_regex,
@@ -92,13 +93,13 @@ def test_graphviz_toy():
                 'fontname=helvetica] ;\n' \
                 'edge [fontname=helvetica] ;\n' \
                 '0 [label=<X<SUB>0</SUB> &le; 0.0<br/>samples = 100.0%<br/>' \
-                'value = [0.5, 0.5]>, fillcolor="#e5813900"] ;\n' \
+                'value = [0.5, 0.5]>, fillcolor="#ffffff"] ;\n' \
                 '1 [label=<samples = 50.0%<br/>value = [1.0, 0.0]>, ' \
-                'fillcolor="#e58139ff"] ;\n' \
+                'fillcolor="#e58139"] ;\n' \
                 '0 -> 1 [labeldistance=2.5, labelangle=45, ' \
                 'headlabel="True"] ;\n' \
                 '2 [label=<samples = 50.0%<br/>value = [0.0, 1.0]>, ' \
-                'fillcolor="#399de5ff"] ;\n' \
+                'fillcolor="#399de5"] ;\n' \
                 '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \
                 'headlabel="False"] ;\n' \
                 '}'
@@ -126,7 +127,7 @@ def test_graphviz_toy():
     contents2 = 'digraph Tree {\n' \
                 'node [shape=box, style="filled", color="black"] ;\n' \
                 '0 [label="node #0\\nX[0] <= 0.0\\ngini = 0.5\\n' \
-                'samples = 6\\nvalue = [3, 3]", fillcolor="#e5813900"] ;\n' \
+                'samples = 6\\nvalue = [3, 3]", fillcolor="#ffffff"] ;\n' \
                 '1 [label="(...)", fillcolor="#C0C0C0"] ;\n' \
                 '0 -> 1 ;\n' \
                 '2 [label="(...)", fillcolor="#C0C0C0"] ;\n' \
@@ -148,21 +149,21 @@ def test_graphviz_toy():
                 'node [shape=box, style="filled", color="black"] ;\n' \
                 '0 [label="X[0] <= 0.0\\nsamples = 6\\n' \
                 'value = [[3.0, 1.5, 0.0]\\n' \
-                '[3.0, 1.0, 0.5]]", fillcolor="#e5813900"] ;\n' \
+                '[3.0, 1.0, 0.5]]", fillcolor="#ffffff"] ;\n' \
                 '1 [label="samples = 3\\nvalue = [[3, 0, 0]\\n' \
-                '[3, 0, 0]]", fillcolor="#e58139ff"] ;\n' \
+                '[3, 0, 0]]", fillcolor="#e58139"] ;\n' \
                 '0 -> 1 [labeldistance=2.5, labelangle=45, ' \
                 'headlabel="True"] ;\n' \
                 '2 [label="X[0] <= 1.5\\nsamples = 3\\n' \
                 'value = [[0.0, 1.5, 0.0]\\n' \
-                '[0.0, 1.0, 0.5]]", fillcolor="#e5813986"] ;\n' \
+                '[0.0, 1.0, 0.5]]", fillcolor="#f1bd97"] ;\n' \
                 '0 -> 2 [labeldistance=2.5, labelangle=-45, ' \
                 'headlabel="False"] ;\n' \
                 '3 [label="samples = 2\\nvalue = [[0, 1, 0]\\n' \
-                '[0, 1, 0]]", fillcolor="#e58139ff"] ;\n' \
+                '[0, 1, 0]]", fillcolor="#e58139"] ;\n' \
                 '2 -> 3 ;\n' \
                 '4 [label="samples = 1\\nvalue = [[0.0, 0.5, 0.0]\\n' \
-                '[0.0, 0.0, 0.5]]", fillcolor="#e58139ff"] ;\n' \
+                '[0.0, 0.0, 0.5]]", fillcolor="#e58139"] ;\n' \
                 '2 -> 4 ;\n' \
                 '}'
 
@@ -184,13 +185,13 @@ def test_graphviz_toy():
                 'edge [fontname=helvetica] ;\n' \
                 'rankdir=LR ;\n' \
                 '0 [label="X[0] <= 0.0\\nmse = 1.0\\nsamples = 6\\n' \
-                'value = 0.0", fillcolor="#e5813980"] ;\n' \
+                'value = 0.0", fillcolor="#f2c09c"] ;\n' \
                 '1 [label="mse = 0.0\\nsamples = 3\\nvalue = -1.0", ' \
-                'fillcolor="#e5813900"] ;\n' \
+                'fillcolor="#ffffff"] ;\n' \
                 '0 -> 1 [labeldistance=2.5, labelangle=-45, ' \
                 'headlabel="True"] ;\n' \
                 '2 [label="mse = 0.0\\nsamples = 3\\nvalue = 1.0", ' \
-                'fillcolor="#e58139ff"] ;\n' \
+                'fillcolor="#e58139"] ;\n' \
                 '0 -> 2 [labeldistance=2.5, labelangle=45, ' \
                 'headlabel="False"] ;\n' \
                 '{rank=same ; 0} ;\n' \
@@ -207,7 +208,7 @@ def test_graphviz_toy():
     contents2 = 'digraph Tree {\n' \
                 'node [shape=box, style="filled", color="black"] ;\n' \
                 '0 [label="gini = 0.0\\nsamples = 6\\nvalue = 6.0", ' \
-                'fillcolor="#e5813900"] ;\n' \
+                'fillcolor="#ffffff"] ;\n' \
                 '}'
 
 
@@ -308,3 +309,23 @@ def test_precision():
             for finding in finditer(r"<= \d+\.\d+", dot_data):
                 assert_equal(len(search(r"\.\d+", finding.group()).group()),
                              precision + 1)
+
+
+def test_plot_tree():
+    # mostly smoke tests
+    pytest.importorskip("matplotlib.pyplot")
+    # Check correctness of export_graphviz
+    clf = DecisionTreeClassifier(max_depth=3,
+                                 min_samples_split=2,
+                                 criterion="gini",
+                                 random_state=2)
+    clf.fit(X, y)
+
+    # Test export code
+    feature_names = ['first feat', 'sepal_width']
+    nodes = plot_tree(clf, feature_names=feature_names)
+    assert len(nodes) == 3
+    assert nodes[0].get_text() == ("first feat <= 0.0\nentropy = 0.5\n"
+                                   "samples = 6\nvalue = [3, 3]")
+    assert nodes[1].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [3, 0]"
+    assert nodes[2].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [0, 3]"
diff --git a/sklearn/tree/tests/test_reingold_tilford.py b/sklearn/tree/tests/test_reingold_tilford.py
new file mode 100644
index 0000000000000..4cb27ce6effb9
--- /dev/null
+++ b/sklearn/tree/tests/test_reingold_tilford.py
@@ -0,0 +1,54 @@
+import numpy as np
+import pytest
+from sklearn.tree._reingold_tilford import buchheim, Tree
+
+simple_tree = Tree("", 0,
+                   Tree("", 1),
+                   Tree("", 2))
+
+bigger_tree = Tree("", 0,
+                   Tree("", 1,
+                        Tree("", 3),
+                        Tree("", 4,
+                             Tree("", 7),
+                             Tree("", 8)
+                             ),
+                        ),
+                   Tree("", 2,
+                        Tree("", 5),
+                        Tree("", 6)
+                        )
+                   )
+
+
+@pytest.mark.parametrize("tree, n_nodes", [(simple_tree, 3), (bigger_tree, 9)])
+def test_buchheim(tree, n_nodes):
+    def walk_tree(draw_tree):
+        res = [(draw_tree.x, draw_tree.y)]
+        for child in draw_tree.children:
+            # parents higher than children:
+            assert child.y == draw_tree.y + 1
+            res.extend(walk_tree(child))
+        if len(draw_tree.children):
+            # these trees are always binary
+            # parents are centered above children
+            assert draw_tree.x == (draw_tree.children[0].x
+                                   + draw_tree.children[1].x) / 2
+        return res
+
+    layout = buchheim(tree)
+    coordinates = walk_tree(layout)
+    assert len(coordinates) == n_nodes
+    # test that x values are unique per depth / level
+    # we could also do it quicker using defaultdicts..
+    depth = 0
+    while True:
+        x_at_this_depth = []
+        for node in coordinates:
+            if coordinates[1] == depth:
+                x_at_this_depth.append(coordinates[0])
+        if not x_at_this_depth:
+            # reached all leafs
+            break
+        assert len(np.unique(x_at_this_depth)) == len(x_at_this_depth)
+        depth += 1

From 096440e5418c806e12d439c90abee5206b3ca727 Mon Sep 17 00:00:00 2001
From: RianneSchouten <riannemargarethaschouten@gmail.com>
Date: Fri, 12 Oct 2018 13:33:54 +0200
Subject: [PATCH 163/163] save latest changes

---
 examples/plot_multiple_imputation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/plot_multiple_imputation.py b/examples/plot_multiple_imputation.py
index 781d29db0fd4b..f77b9d743d7c0 100644
--- a/examples/plot_multiple_imputation.py
+++ b/examples/plot_multiple_imputation.py
@@ -26,10 +26,10 @@
 and regression coefficients. Other parameters, such as correlation
 coefficients need transformation to suit the assumption of normality.
 If it is not possible to approximate a normal distribution, it is better to use
-robust summary measures such as medians or ranges instead of using Rubin’s
+robust summary measures such as medians or ranges instead of using Rubin's
 pooling rules. This applies to an estimate like explained variance.
 
-In sum, Rubin’s pooling rules are as follows. The overall point estimate after
+In sum, Rubin's pooling rules are as follows. The overall point estimate after
 multiple imputation (denoted by Qbar) is the average of all the m point
 estimates. The variance of the overall point estimate is a combination of
 so-called within imputation variance (Ubar) and between imputation