8000 Use fetch_openml · scikit-learn/scikit-learn@22e1ab8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 22e1ab8

Browse files
committed
Use fetch_openml
1 parent 767e13f commit 22e1ab8

File tree

4 files changed

+14
-71
lines changed

4 files changed

+14
-71
lines changed

examples/linear_model/plot_sgd_early_stopping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
import matplotlib.pyplot as plt
4848

4949
from sklearn import linear_model
50-
from sklearn.datasets import fetch_mldata
50+
from sklearn.datasets import fetch_openml
5151
from sklearn.model_selection import train_test_split
5252
from sklearn.utils.testing import ignore_warnings
5353
from sklearn.exceptions import ConvergenceWarning
@@ -58,7 +58,7 @@
5858

5959
def load_mnist(n_samples=None, class_0=0, class_1=8):
6060
"""Load MNIST, select two classes, shuffle and return only n_samples."""
61-
mnist = fetch_mldata('MNIST original')
61+
mnist = fetch_openml('mnist_784', version=1)
6262

6363
# take only two classes for binary classification
6464
mask = np.logical_or(mnist.target == class_0, mnist.target == class_1)

examples/linear_model/plot_sparse_logistic_regression_mnist.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,14 @@
1717
1818
"""
1919
import time
20-
import io
2120
import matplotlib.pyplot as plt
2221
import numpy as np
23-
from scipy.io.arff import loadarff
2422

25-
from sklearn.datasets import get_data_home
26-
from sklearn.externals.joblib import Memory
23+
from sklearn.datasets import fetch_openml
2724
from sklearn.linear_model import LogisticRegression
2825
from sklearn.model_selection import train_test_split
2926
from sklearn.preprocessing import StandardScaler
3027
from sklearn.utils import check_random_state
31-
try:
32-
from urllib.request import urlopen
33-
except ImportError:
34-
# Python 2
35-
from urllib2 import urlopen
3628

3729
print(__doc__)
3830

@@ -43,19 +35,9 @@
4335
t0 = time.time()
4436
train_samples = 5000
4537

46-
memory = Memory(get_data_home())
47-
48-
49-
@memory.cache()
50-
def fetch_mnist():
51-
content = urlopen(
52-
'https://www.openml.org/data/download/52667/mnist_784.arff').read()
53-
data, meta = loadarff(io.StringIO(content.decode('utf8')))
54-
data = data.view([('pixels', '<f8', 784), ('class', '|S1')])
55-
return data['pixels'], data['class']
56-
57-
58-
X, y = fetch_mnist()
38+
mnist = fetch_openml('mnist_784', version=1)
39+
X = mnist.data
40+
y = mnist.target
5941

6042
random_state = check_random_state(0)
6143
permutation = random_state.permutation(X.shape[0])

examples/multioutput/plot_classifier_chain_yeast.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -35,43 +35,21 @@
3535
# Author: Adam Kleczewski
3636
# License: BSD 3 clause
3737

38-
import io
3938
import numpy as np
4039
import matplotlib.pyplot as plt
41-
from scipy.io.arff import loadarff
42-
from sklearn.datasets import get_data_home
43-
from sklearn.externals.joblib import Memory
40+
from sklearn.datasets import fetch_openml
4441
from sklearn.multioutput import ClassifierChain
4542
from sklearn.model_selection import train_test_split
4643
from sklearn.multiclass import OneVsRestClassifier
4744
from sklearn.metrics import jaccard_similarity_score
4845
from sklearn.linear_model import LogisticRegression
49-
try:
50-
from urllib.request import urlopen
51-
except ImportError:
52-
# Python 2
53-
from urllib2 import urlopen
5446

5547
print(__doc__)
5648

5749
# Load a multi-label dataset
58-
59-
memory = Memory(get_data_home())
60-
61-
62-
@memory.cache()
63-
def fetch_yeast():
64-
url = 'https://www.openml.org/data/download/4644190/file2754771351f4.arff'
65-
content = urlopen(url).read()
66-
# loadarff doesn't like nominals to be quoted
67-
content = content.decode('utf8').replace('"', '')
68-
data, meta = loadarff(io.StringIO(content))
69-
data = data.view([('features', '<f8', 103), ('classes', '|S5', 14)])
70-
return data['features'], data['classes'] == b'TRUE'
71-
72-
73-
X, Y = fetch_yeast()
74-
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2,
50+
yeast = fetch_openml('yeast', version=2)
51+
X_train, X_test, Y_train, Y_test = train_test_split(yeast.data, yeast.target,
52+
test_size=.2,
7553
random_state=0)
7654

7755
# Fit an independent logistic regression model for each class using the

examples/neural_networks/plot_mnist_filters.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,33 +20,16 @@
2020
for a very short time. Training longer would result in weights with a much
2121
smoother spatial appearance.
2222
"""
23-
import io
24-
from scipy.io.arff import loadarff
2523
import matplotlib.pyplot as plt
26-
from sklearn.datasets import get_data_home
27-
from sklearn.externals.joblib import Memory
24+
from sklearn.datasets import fetch_openml
2825
from sklearn.neural_network import MLPClassifier
29-
try:
30-
from urllib.request import urlopen
31-
except ImportError:
32-
# Python 2
33-
from urllib2 import urlopen
3426

3527
print(__doc__)
3628

37-
memory = Memory(get_data_home())
29+
mnist = fetch_openml('mnist_784', version=1)
30+
X = mnist.data
31+
y = mnist.target
3832

39-
40-
@memory.cache()
41-
def fetch_mnist():
42-
content = urlopen(
43-
'https://www.openml.org/data/download/52667/mnist_784.arff').read()
44-
data, meta = loadarff(io.StringIO(content.decode('utf8')))
45-
data = data.view([('pixels', '<f8', 784), ('class', '|S1')])
46-
return data['pixels'], data['class']
47-
48-
49-
X, y = fetch_mnist()
5033
# rescale the data, use the traditional train/test split
5134
X_train, X_test = X[:60000], X[60000:]
5235
y_train, y_test = y[:60000], y[60000:]

0 commit comments

Comments
 (0)
0