8000 [MRG] Additional Warnings in case OpenML auto-detected a problem with… · xhluca/scikit-learn@c657b03 · GitHub
[go: up one dir, main page]

Skip to content

Commit c657b03

Browse files
janvanrijnXing
authored and
Xing
committed
[MRG] Additional Warnings in case OpenML auto-detected a problem with dataset (scikit-learn#12541)
* added additional warning output * added features gzip * added gzipped datasets * fix file naming * changed expected warning msg
1 parent 664a23c commit c657b03

8 files changed

+30
-0
lines changed

sklearn/datasets/openml.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,12 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
511511
data_description['version'],
512512
data_description['name'],
513513
data_description['url']))
514+
if 'error' in data_description:
515+
warn("OpenML registered a problem with the dataset. It might be "
516+
"unusable. Error: {}".format(data_description['error']))
517+
if 'warning' in data_description:
518+
warn("OpenML raised a warning on the dataset. It might be "
519+
"unusable. Warning: {}".format(data_description['warning']))
514520

515521
# download data features, meta-info about column types
516522
features_list = _get_data_features(data_id, data_home)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

sklearn/datasets/tests/test_openml.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,30 @@ def test_string_attribute(monkeypatch, gzip_response):
607607
fetch_openml, data_id=data_id, cache=False)
608608

609609

610+
@pytest.mark.parametrize('gzip_response', [True, False])
611+
def test_dataset_with_openml_error(monkeypatch, gzip_response):
612+
data_id = 1
613+
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
614+
assert_warns_message(
615+
UserWarning,
616+
"OpenML registered a problem with the dataset. It might be unusable. "
617+
"Error:",
618+
fetch_openml, data_id=data_id, cache=False
619+
)
620+
621+
622+
@pytest.mark.parametrize('gzip_response', [True, False])
623+
def test_dataset_with_openml_warning(monkeypatch, gzip_response):
624+
data_id = 3
625+
_monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
626+
assert_warns_message(
627+
UserWarning,
628+
"OpenML raised a warning on the dataset. It might be unusable. "
629+
"Warning:",
630+
fetch_openml, data_id=data_id, cache=False
631+
)
632+
633+
610634
@pytest.mark.parametrize('gzip_response', [True, False])
611635
def test_illegal_column(monkeypatch, gzip_response):
612636
data_id = 61

0 commit comments

Comments
 (0)
0