From 2be4cd089546c173457d1dc9fe95152cedeeb36a Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 00:40:50 +0800
Subject: [PATCH 01/11] update examples using fetch_openml

---
 examples/gaussian_process/plot_gpr_co2.py                     | 4 ++--
 examples/linear_model/plot_sgd_early_stopping.py              | 4 ++--
 .../linear_model/plot_sparse_logistic_regression_mnist.py     | 4 ++--
 examples/multioutput/plot_classifier_chain_yeast.py           | 4 ++--
 examples/neural_networks/plot_mnist_filters.py                | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py
index b0b271a3644a2..86a940f28c4c6 100644
--- a/examples/gaussian_process/plot_gpr_co2.py
+++ b/examples/gaussian_process/plot_gpr_co2.py
@@ -70,9 +70,9 @@
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels \
     import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
-from sklearn.datasets import fetch_mldata
+from sklearn.datasets import fetch_openml
 
-data = fetch_mldata('mauna-loa-atmospheric-co2').data
+data = fetch_openml('mauna-loa-atmospheric-co2').data
 X = data[:, [1]]
 y = data[:, 0]
 
diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py
index 31ce61f39d22c..60696aea1aab3 100644
--- a/examples/linear_model/plot_sgd_early_stopping.py
+++ b/examples/linear_model/plot_sgd_early_stopping.py
@@ -47,7 +47,7 @@
 import matplotlib.pyplot as plt
 
 from sklearn import linear_model
-from sklearn.datasets import fetch_mldata
+from sklearn.datasets import fetch_openml
 from sklearn.model_selection import train_test_split
 from sklearn.utils.testing import ignore_warnings
 from sklearn.exceptions import ConvergenceWarning
@@ -58,7 +58,7 @@
 
 def load_mnist(n_samples=None, class_0=0, class_1=8):
     """Load MNIST, select two classes, shuffle and return only n_samples."""
-    mnist = fetch_mldata('MNIST original')
+    mnist = fetch_openml('MNIST original')
 
     # take only two classes for binary classification
     mask = np.logical_or(mnist.target == class_0, mnist.target == class_1)
diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
index 5610f471b5d05..175c2ab9403cf 100644
--- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py
+++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
@@ -20,7 +20,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from sklearn.datasets import fetch_mldata
+from sklearn.datasets import fetch_openml
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
@@ -35,7 +35,7 @@
 t0 = time.time()
 train_samples = 5000
 
-mnist = fetch_mldata('MNIST original')
+mnist = fetch_openml('MNIST original')
 X = mnist.data.astype('float64')
 y = mnist.target
 random_state = check_random_state(0)
diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py
index 6a90e14dfc379..2c4245f8ef3ad 100644
--- a/examples/multioutput/plot_classifier_chain_yeast.py
+++ b/examples/multioutput/plot_classifier_chain_yeast.py
@@ -44,10 +44,10 @@
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.metrics import jaccard_similarity_score
 from sklearn.linear_model import LogisticRegression
-from sklearn.datasets import fetch_mldata
+from sklearn.datasets import fetch_openml
 
 # Load a multi-label dataset
-yeast = fetch_mldata('yeast')
+yeast = fetch_openml('yeast')
 X = yeast['data']
 Y = yeast['target'].transpose().toarray()
 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2,
diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py
index 6c3b8b2284ea2..144bc3a514b60 100644
--- a/examples/neural_networks/plot_mnist_filters.py
+++ b/examples/neural_networks/plot_mnist_filters.py
@@ -23,10 +23,10 @@
 print(__doc__)
 
 import matplotlib.pyplot as plt
-from sklearn.datasets import fetch_mldata
+from sklearn.datasets import fetch_openml
 from sklearn.neural_network import MLPClassifier
 
-mnist = fetch_mldata("MNIST original")
+mnist = fetch_openml("MNIST original")
 # rescale the data, use the traditional train/test split
 X, y = mnist.data / 255., mnist.target
 X_train, X_test = X[:60000], X[60000:]

From 65bb1a738a64df6f23e81402c7fb97da60025a9e Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 01:19:27 +0800
Subject: [PATCH 02/11] add return_X_y for fetch_openml

---
 sklearn/datasets/openml.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 6864fa865abc0..ce6832abfd1a3 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -347,7 +347,7 @@ def _verify_target_data_type(features_dict, target_columns):
 
 
 def fetch_openml(name=None, version='active', data_id=None, data_home=None,
-                 target_column='default-target', cache=True):
+                 target_column='default-target', cache=True, return_X_y=False):
     """Fetch dataset from openml by name or dataset id.
 
     Datasets are uniquely identified by either an integer ID or by a
@@ -394,6 +394,10 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     cache : boolean, default=True
         Whether to cache downloaded datasets using joblib.
+        
+    return_X_y : boolean, default=False.
+        If True, returns ``(data, target)`` instead of a Bunch object.
+            See below for more information about the `data` and `target` object
 
     Returns
     -------
@@ -415,6 +419,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
             that the value encoded as i is ith in the list.
         details : dict
             More metadata from OpenML
+            
+    (data, target) : tuple if ``return_X_y`` is True
 
         .. note:: EXPERIMENTAL
 
@@ -562,5 +568,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         DESCR=description, details=data_description,
         categories=nominal_attributes,
         url="https://www.openml.org/d/{}".format(data_id))
+    
+    if return_X_y:
+        return X, y
 
     return bunch

From 4bf5d7b33d49581a2bed78410f679d4a05e9a481 Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 02:25:01 +0800
Subject: [PATCH 03/11] modify mnist name

---
 examples/gaussian_process/plot_gpr_co2.py                     | 4 ++--
 examples/linear_model/plot_sgd_early_stopping.py              | 2 +-
 .../linear_model/plot_sparse_logistic_regression_mnist.py     | 2 +-
 examples/multioutput/plot_classifier_chain_yeast.py           | 4 ++--
 examples/neural_networks/plot_mnist_filters.py                | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py
index 86a940f28c4c6..b0b271a3644a2 100644
--- a/examples/gaussian_process/plot_gpr_co2.py
+++ b/examples/gaussian_process/plot_gpr_co2.py
@@ -70,9 +70,9 @@
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels \
     import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
-from sklearn.datasets import fetch_openml
+from sklearn.datasets import fetch_mldata
 
-data = fetch_openml('mauna-loa-atmospheric-co2').data
+data = fetch_mldata('mauna-loa-atmospheric-co2').data
 X = data[:, [1]]
 y = data[:, 0]
 
diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py
index 60696aea1aab3..7b8b5f50bc93f 100644
--- a/examples/linear_model/plot_sgd_early_stopping.py
+++ b/examples/linear_model/plot_sgd_early_stopping.py
@@ -58,7 +58,7 @@
 
 def load_mnist(n_samples=None, class_0=0, class_1=8):
     """Load MNIST, select two classes, shuffle and return only n_samples."""
-    mnist = fetch_openml('MNIST original')
+    mnist = fetch_openml('mnist_784')
 
     # take only two classes for binary classification
     mask = np.logical_or(mnist.target == class_0, mnist.target == class_1)
diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
index 175c2ab9403cf..eced6dffd787f 100644
--- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py
+++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
@@ -35,7 +35,7 @@
 t0 = time.time()
 train_samples = 5000
 
-mnist = fetch_openml('MNIST original')
+mnist = fetch_openml('mnist_784')
 X = mnist.data.astype('float64')
 y = mnist.target
 random_state = check_random_state(0)
diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py
index 2c4245f8ef3ad..6a90e14dfc379 100644
--- a/examples/multioutput/plot_classifier_chain_yeast.py
+++ b/examples/multioutput/plot_classifier_chain_yeast.py
@@ -44,10 +44,10 @@
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.metrics import jaccard_similarity_score
 from sklearn.linear_model import LogisticRegression
-from sklearn.datasets import fetch_openml
+from sklearn.datasets import fetch_mldata
 
 # Load a multi-label dataset
-yeast = fetch_openml('yeast')
+yeast = fetch_mldata('yeast')
 X = yeast['data']
 Y = yeast['target'].transpose().toarray()
 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2,
diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py
index 144bc3a514b60..8c7beedda48fb 100644
--- a/examples/neural_networks/plot_mnist_filters.py
+++ b/examples/neural_networks/plot_mnist_filters.py
@@ -26,7 +26,7 @@
 from sklearn.datasets import fetch_openml
 from sklearn.neural_network import MLPClassifier
 
-mnist = fetch_openml("MNIST original")
+mnist = fetch_openml("mnist_784")
 # rescale the data, use the traditional train/test split
 X, y = mnist.data / 255., mnist.target
 X_train, X_test = X[:60000], X[60000:]

From 2a5b66c9699c10807f717fb18b80c3b21b455fcf Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 02:53:44 +0800
Subject: [PATCH 04/11] roll back exmples

---
 examples/linear_model/plot_sgd_early_stopping.py              | 4 ++--
 .../linear_model/plot_sparse_logistic_regression_mnist.py     | 4 ++--
 examples/neural_networks/plot_mnist_filters.py                | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py
index 7b8b5f50bc93f..31ce61f39d22c 100644
--- a/examples/linear_model/plot_sgd_early_stopping.py
+++ b/examples/linear_model/plot_sgd_early_stopping.py
@@ -47,7 +47,7 @@
 import matplotlib.pyplot as plt
 
 from sklearn import linear_model
-from sklearn.datasets import fetch_openml
+from sklearn.datasets import fetch_mldata
 from sklearn.model_selection import train_test_split
 from sklearn.utils.testing import ignore_warnings
 from sklearn.exceptions import ConvergenceWarning
@@ -58,7 +58,7 @@
 
 def load_mnist(n_samples=None, class_0=0, class_1=8):
     """Load MNIST, select two classes, shuffle and return only n_samples."""
-    mnist = fetch_openml('mnist_784')
+    mnist = fetch_mldata('MNIST original')
 
     # take only two classes for binary classification
     mask = np.logical_or(mnist.target == class_0, mnist.target == class_1)
diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
index eced6dffd787f..5610f471b5d05 100644
--- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py
+++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
@@ -20,7 +20,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from sklearn.datasets import fetch_openml
+from sklearn.datasets import fetch_mldata
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
@@ -35,7 +35,7 @@
 t0 = time.time()
 train_samples = 5000
 
-mnist = fetch_openml('mnist_784')
+mnist = fetch_mldata('MNIST original')
 X = mnist.data.astype('float64')
 y = mnist.target
 random_state = check_random_state(0)
diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py
index 8c7beedda48fb..6c3b8b2284ea2 100644
--- a/examples/neural_networks/plot_mnist_filters.py
+++ b/examples/neural_networks/plot_mnist_filters.py
@@ -23,10 +23,10 @@
 print(__doc__)
 
 import matplotlib.pyplot as plt
-from sklearn.datasets import fetch_openml
+from sklearn.datasets import fetch_mldata
 from sklearn.neural_network import MLPClassifier
 
-mnist = fetch_openml("mnist_784")
+mnist = fetch_mldata("MNIST original")
 # rescale the data, use the traditional train/test split
 X, y = mnist.data / 255., mnist.target
 X_train, X_test = X[:60000], X[60000:]

From 68981bf78cb22e32649eaec7d6b99e9ddb41cf83 Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 03:22:38 +0800
Subject: [PATCH 05/11] delete whitespace

---
 sklearn/datasets/openml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index ce6832abfd1a3..32d6c2adf597f 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -568,7 +568,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         DESCR=description, details=data_description,
         categories=nominal_attributes,
         url="https://www.openml.org/d/{}".format(data_id))
-    
+
     if return_X_y:
         return X, y
 

From 7ac4dfb27cb4937400590b316ce966f0bcdb67bc Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 14:29:51 +0800
Subject: [PATCH 06/11] fix pep8 error

---
 sklearn/datasets/openml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 32d6c2adf597f..f87a0b301d354 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -394,7 +394,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     cache : boolean, default=True
         Whether to cache downloaded datasets using joblib.
-        
+
     return_X_y : boolean, default=False.
         If True, returns ``(data, target)`` instead of a Bunch object.
             See below for more information about the `data` and `target` object
@@ -419,7 +419,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
             that the value encoded as i is ith in the list.
         details : dict
             More metadata from OpenML
-            
+
     (data, target) : tuple if ``return_X_y`` is True
 
         .. note:: EXPERIMENTAL

From e6a702dc80b54696f631f2a4700ba93bca789399 Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 15:20:55 +0800
Subject: [PATCH 07/11] add check_return_X_y for test_openml

---
 sklearn/datasets/tests/test_openml.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index da2b913eda7ed..346aa841d7016 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -15,7 +15,8 @@
                                    assert_raise_message)
 from sklearn.externals.six import string_types
 from sklearn.externals.six.moves.urllib.error import HTTPError
-
+from sklearn.datasets.tests.test_common import check_return_X_y
+from functools import partial
 
 currdir = os.path.dirname(os.path.abspath(__file__))
 # if True, urlopen will be monkey patched to only use local files
@@ -124,6 +125,11 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
         # np.isnan doesn't work on CSR matrix
         assert (np.count_nonzero(np.isnan(data_by_id.data)) ==
                 expected_missing)
+
+    # test return_X_y option
+    fetch_func = partial(fetch_openml, data_id=data_id, cache=False,
+                         target_column=target_column)
+    check_return_X_y(data_by_id, fetch_func)
     return data_by_id
 
 

From 18cbe0fad64d58e2fb1f705aed29d626af11e0bc Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Fri, 17 Aug 2018 15:24:22 +0800
Subject: [PATCH 08/11] add check_return_X_y for test_openml

---
 sklearn/datasets/tests/test_openml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 346aa841d7016..3a24213062fc2 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -18,6 +18,7 @@
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
+
 currdir = os.path.dirname(os.path.abspath(__file__))
 # if True, urlopen will be monkey patched to only use local files
 test_offline = True

From e50c91b20cbbb578dde53ba2f5fbbfe854f0e168 Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Sat, 18 Aug 2018 04:07:29 +0800
Subject: [PATCH 09/11] fix format

---
 sklearn/datasets/openml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index f87a0b301d354..150e6b5b1a5b0 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -396,8 +396,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         Whether to cache downloaded datasets using joblib.
 
     return_X_y : boolean, default=False.
-        If True, returns ``(data, target)`` instead of a Bunch object.
-            See below for more information about the `data` and `target` object
+        If True, returns ``(data, target)`` instead of a Bunch object. 
+        See below for more information about the `data` and `target` objects.
 
     Returns
     -------

From a360473f7319347991f4bfdaecd68fc47aba51eb Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Sat, 18 Aug 2018 04:23:44 +0800
Subject: [PATCH 10/11] fix format

---
 sklearn/datasets/openml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 150e6b5b1a5b0..f7b8ea785ee0c 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -396,8 +396,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         Whether to cache downloaded datasets using joblib.
 
     return_X_y : boolean, default=False.
-        If True, returns ``(data, target)`` instead of a Bunch object. 
-        See below for more information about the `data` and `target` objects.
+        If True, returns ``(data, target)`` instead of a Bunch object. See
+        below for more information about the `data` and `target` objects.
 
     Returns
     -------

From 85f88b3c156849c7aab96a1df3a2fbacf9009a4a Mon Sep 17 00:00:00 2001
From: vufg <nthupzf@gmail.com>
Date: Sat, 18 Aug 2018 23:32:14 +0800
Subject: [PATCH 11/11] move return_X_y

---
 sklearn/datasets/openml.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index f7b8ea785ee0c..a35fa51307993 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -563,13 +563,13 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     elif y.shape[1] == 0:
         y = None
 
+    if return_X_y:
+        return X, y
+
     bunch = Bunch(
         data=X, target=y, feature_names=data_columns,
         DESCR=description, details=data_description,
         categories=nominal_attributes,
         url="https://www.openml.org/d/{}".format(data_id))
 
-    if return_X_y:
-        return X, y
-
     return bunch