8000 BUG: for several datasets, ``download_if_missing`` keyword was ignore… · maskani-moh/scikit-learn@72085ff · GitHub
[go: up one dir, main page]

Skip to content

Commit 72085ff

Browse files
rgommersmaskani-moh
authored andcommitted
BUG: for several datasets, download_if_missing keyword was ignored. (scikit-learn#7944)
1 parent a1b8f90 commit 72085ff

File tree

7 files changed

+21
-8
lines changed

7 files changed

+21
-8
lines changed

sklearn/datasets/california_housing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,12 @@ def fetch_california_housing(data_home=None, download_if_missing=True):
8787
data_home = get_data_home(data_home=data_home)
8888
if not exists(data_home):
8989
makedirs(data_home)
90+
9091
filepath = _pkl_filepath(data_home, TARGET_FILENAME)
9192
if not exists(filepath):
93+
if not download_if_missing:
94+
raise IOError("Data not found and `download_if_missing` is False")
95+
9296
print('downloading Cal. housing from %s to %s' % (DATA_URL, data_home))
9397
archive_fileobj = BytesIO(urlopen(DATA_URL).read())
9498
fileobj = tarfile.open(

sklearn/datasets/covtype.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ def fetch_covtype(data_home=None, download_if_missing=True,
9999

100100
joblib.dump(X, samples_path, compress=9)
101101
joblib.dump(y, targets_path, compress=9)
102+
elif not available:
103+
if not download_if_missing:
104+
raise IOError("Data not found and `download_if_missing` is False")
102105

103106
try:
104107
X, y

sklearn/datasets/kddcup99.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,9 @@ def _fetch_brute_kddcup99(subset=None, data_home=None,
345345

346346
joblib.dump(X, samples_path, compress=0)
347347
joblib.dump(y, targets_path, compress=0)
348+
elif not available:
349+
if not download_if_missing:
350+
raise IOError("Data not found and `download_if_missing` is False")
348351

349352
try:
350353
X, y

sklearn/datasets/olivetti_faces.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
111111
makedirs(data_home)
112112
filepath = _pkl_filepath(data_home, TARGET_FILENAME)
113113
if not exists(filepath):
114+
if not download_if_missing:
115+
raise IOError("Data not found and `download_if_missing` is False")
116+
114117
print('downloading Olivetti faces from %s to %s'
115118
% (DATA_URL, data_home))
116119
fhandle = urlopen(DATA_URL)
@@ -121,6 +124,7 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
121124
del mfile
122125
else:
123126
faces = joblib.load(filepath)
127+
124128
# We want floating point data, but float32 is enough (there is only
125129
# one byte of precision in the original uint8s anyway)
126130
faces = np.float32(faces)

sklearn/datasets/species_distributions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,9 @@ def fetch_species_distributions(data_home=None,
222222
archive_path = _pkl_filepath(data_home, DATA_ARCHIVE_NAME)
223223

224224
if not exists(archive_path):
225+
if not download_if_missing:
226+
raise IOError("Data not found and `download_if_missing` is False")
227+
225228
print('Downloading species data from %s to %s' % (SAMPLES_URL,
226229
data_home))
227230
X = np.load(BytesIO(urlopen(SAMPLES_URL).read()))

sklearn/datasets/tests/test_covtype.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
Skipped if covtype is not already downloaded to data_home.
44
"""
55

6-
import errno
76
from sklearn.datasets import fetch_covtype
87
from sklearn.utils.testing import assert_equal, SkipTest
98

@@ -15,9 +14,8 @@ def fetch(*args, **kwargs):
1514
def test_fetch():
1615
try:
1716
data1 = fetch(shuffle=True, random_state=42)
18-
except IOError as e:
19-
if e.errno == errno.ENOENT:
20-
raise SkipTest("Covertype dataset can not be loaded.")
17+
except IOError:
18+
raise SkipTest("Covertype dataset can not be loaded.")
2119

2220
data2 = fetch(shuffle=True, random_state=37)
2321

sklearn/datasets/tests/test_kddcup99.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,15 @@
55
scikit-learn data folder.
66
"""
77

8-
import errno
98
from sklearn.datasets import fetch_kddcup99
109
from sklearn.utils.testing import assert_equal, SkipTest
1110

1211

1312
def test_percent10():
1413
try:
1514
data = fetch_kddcup99(download_if_missing=False)
16-
except IOError as e:
17-
if e.errno == errno.ENOENT:
18-
raise SkipTest("kddcup99 dataset can not be loaded.")
15+
except IOError:
16+
raise SkipTest("kddcup99 dataset can not be loaded.")
1917

2018
assert_equal(data.data.shape, (494021, 41))
2119
assert_equal(data.target.shape, (494021,))

0 commit comments

Comments
 (0)
0