8000 API make load_* args in datasets kwarg only (#16719) · viclafargue/scikit-learn@73ee000 · GitHub
[go: up one dir, main page]

Skip to content

Commit 73ee000

Browse files
adrinjalaliNicolasHugthomasjpfan
authored andcommitted
API make load_* args in datasets kwarg only (scikit-learn#16719)
* API male load_* args in datasets kwarg only * more loaders * pep8 * fix test_omp usage * fix some usages * Update sklearn/datasets/_samples_generator.py Co-Authored-By: Thomas J Fan <thomasjpfan@gmail.com> Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> Co-authored-by: Thomas J Fan <thomasjpfan@gmail.com>
1 parent 58bd011 commit 73ee000

14 files changed

+117
-47
lines changed

sklearn/datasets/_base.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from ..utils import Bunch
1818
from ..utils import check_random_state
1919
from ..utils import check_pandas_support
20+
from ..utils.validation import _deprecate_positional_args
2021

2122
import numpy as np
2223

@@ -80,7 +81,8 @@ def _convert_data_dataframe(caller_name, data, target,
8081
return combined_df, X, y
8182

8283

83-
def load_files(container_path, description=None, categories=None,
84+
@_deprecate_positional_args
85+
def load_files(container_path, *, description=None, categories=None,
8486
load_content=True, shuffle=True, encoding=None,
8587
decode_error='strict', random_state=0):
8688
"""Load text files with categories as subfolder names.
@@ -267,7 +269,8 @@ def load_data(module_path, data_file_name):
267269
return data, target, target_names
268270

269271

270-
def load_wine(return_X_y=False, as_frame=False):
272+
@_deprecate_positional_args
273+
def load_wine(*, return_X_y=False, as_frame=False):
271274
"""Load and return the wine dataset (classification).
272275
273276
.. versionadded:: 0.18
@@ -381,7 +384,8 @@ def load_wine(return_X_y=False, as_frame=False):
381384
feature_names=feature_names)
382385

383386

384-
def load_iris(return_X_y=False, as_frame=False):
387+
@_deprecate_positional_args
388+
def load_iris(*, return_X_y=False, as_frame=False):
385389
"""Load and return the iris dataset (classification).
386390
387391
The iris dataset is a classic and very easy multi-class classification
@@ -495,7 +499,8 @@ def load_iris(return_X_y=False, as_frame=False):
495499
filename=iris_csv_filename)
496500

497501

498-
def load_breast_cancer(return_X_y=False, as_frame=False):
502+
@_deprecate_positional_args
503+
def load_breast_cancer(*, return_X_y=False, as_frame=False):
499504
"""Load and return the breast cancer wisconsin dataset (classification).
500505
501506
The breast cancer dataset is a classic and very easy binary classification
@@ -619,7 +624,8 @@ def load_breast_cancer(return_X_y=False, as_frame=False):
619624
filename=csv_filename)
620625

621626

622-
def load_digits(n_class=10, return_X_y=False, as_frame=False):
627+
@_deprecate_positional_args
628+
def load_digits(*, n_class=10, return_X_y=False, as_frame=False):
623629
"""Load and return the digits dataset (classification).
624630
625631
Each datapoint is a 8x8 image of a digit.
@@ -742,7 +748,8 @@ def load_digits(n_class=10, return_X_y=False, as_frame=False):
742748
DESCR=descr)
743749

744750

745-
def load_diabetes(return_X_y=False, as_frame=False):
751+
@_deprecate_positional_args
752+
def load_diabetes(*, return_X_y=False, as_frame=False):
746753
"""Load and return the diabetes dataset (regression).
747754
748755
============== ==================
@@ -834,7 +841,8 @@ def load_diabetes(return_X_y=False, as_frame=False):
834841
target_filename=target_filename)
835842

836843

837-
def load_linnerud(return_X_y=False, as_frame=False):
844+
@_deprecate_positional_args
845+
def load_linnerud(*, return_X_y=False, as_frame=False):
838846
"""Load and return the physical excercise linnerud dataset.
839847
840848
This dataset is suitable for multi-ouput regression tasks.
@@ -937,7 +945,8 @@ def load_linnerud(return_X_y=False, as_frame=False):
937945
target_filename=target_filename)
938946

939947

940-
def load_boston(return_X_y=False):
948+
@_deprecate_positional_args
949+
def load_boston(*, return_X_y=False):
941950
"""Load and return the boston house-prices dataset (regression).
942951
943952
============== ==============

sklearn/datasets/_california_housing.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
from ._base import _pkl_filepath
3737
from ._base import RemoteFileMetadata
3838
from ..utils import Bunch
39+
from ..utils.validation import _deprecate_positional_args
40+
3941

4042
# The original data can be found at:
4143
# https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz
@@ -48,7 +50,8 @@
4850
logger = logging.getLogger(__name__)
4951

5052

51-
def fetch_california_housing(data_home=None, download_if_missing=True,
53+
@_deprecate_positional_args
54+
def fetch_california_housing(*, data_home=None, download_if_missing=True,
5255
return_X_y=False, as_frame=False):
5356
"""Load the California housing dataset (regression).
5457

sklearn/datasets/_covtype.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from ..utils import Bunch
2929
from ._base import _pkl_filepath
3030
from ..utils import check_random_state
31+
from ..utils.validation import _deprecate_positional_args
32+
3133

3234
# The original data can be found in:
3335
# https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz
@@ -40,7 +42,8 @@
4042
logger = logging.getLogger(__name__)
4143

4244

43-
def fetch_covtype(data_home=None, download_if_missing=True,
45+
@_deprecate_positional_args
46+
def fetch_covtype(*, data_home=None, download_if_missing=True,
4447
random_state=None, shuffle=False, return_X_y=False):
4548
"""Load the covertype dataset (classification).
4649

sklearn/datasets/_kddcup99.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from ..utils import Bunch
2424
from ..utils import check_random_state
2525
from ..utils import shuffle as shuffle_method
26+
from ..utils.validation import _deprecate_positional_args
27+
2628

2729
# The original data can be found at:
2830
# https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz
@@ -43,7 +45,8 @@
4345
logger = logging.getLogger(__name__)
4446

4547

46-
def fetch_kddcup99(subset=None, data_home=None, shuffle=False,
48+
@_deprecate_positional_args
49+
def fetch_kddcup99(*, subset=None, data_home=None, shuffle=False,
4750
random_state=None,
4851
percent10=True, download_if_missing=True, return_X_y=False):
4952
"""Load the kddcup99 dataset (classification).

sklearn/datasets/_lfw.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from ._base import get_data_home, _fetch_remote, RemoteFileMetadata
2222
from ..utils import Bunch
23+
from ..utils.validation import _deprecate_positional_args
2324

2425
logger = logging.getLogger(__name__)
2526

@@ -215,7 +216,8 @@ def _fetch_lfw_people(data_folder_path, slice_=None, color=False, resize=None,
215216
return faces, target, target_names
216217

217218

218-
def fetch_lfw_people(data_home=None, funneled=True, resize=0.5,
219+
@_deprecate_positional_args
220+
def fetch_lfw_people(*, data_home=None, funneled=True, resize=0.5,
219221
min_faces_per_person=0, color=False,
220222
slice_=(slice(70, 195), slice(78, 172)),
221223
download_if_missing=True, return_X_y=False):
@@ -385,7 +387,9 @@ def _fetch_lfw_pairs(index_file_path, data_folder_path, slice_=None,
385387
return pairs, target, np.array(['Different persons', 'Same person'])
386388

387389

388-
def fetch_lfw_pairs(subset='train', data_home=None, funneled=True, resize=0.5,
390+
@_deprecate_positional_args
391+
def fetch_lfw_pairs(*, subset='train', data_home=None, funneled=True,
392+
resize=0.5,
389393
color=False, slice_=(slice(70, 195), slice(78, 172)),
390394
download_if_missing=True):
391395
"""Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).

sklearn/datasets/_olivetti_faces.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from ._base import RemoteFileMetadata
2626
from ._base import _pkl_filepath
2727
from ..utils import check_random_state, Bunch
28+
from ..utils.validation import _deprecate_positional_args
2829

2930
# The original data can be found at:
3031
# https://cs.nyu.edu/~roweis/data/olivettifaces.mat
@@ -35,7 +36,8 @@
3536
'd5fca46a4b8906c18e454d41af987794'))
3637

3738

38-
def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
39+
@_deprecate_positional_args
40+
def fetch_olivetti_faces(*, data_home=None, shuffle=False, random_state=0,
3941
download_if_missing=True, return_X_y=False):
4042
"""Load the Olivetti faces data-set from AT&T (classification).
4143

sklearn/datasets/_openml.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from ..utils import get_chunk_n_rows
2424
from ..utils import _chunk_generator
2525
from ..utils import check_pandas_support # noqa
26+
from ..utils.validation import _deprecate_positional_args
2627

2728
__all__ = ['fetch_openml']
2829

@@ -608,7 +609,8 @@ def _valid_data_column_names(features_list, target_columns):
608609
return valid_data_column_names
609610

610611

611-
def fetch_openml(name=None, version='active', data_id=None, data_home=None,
612+
@_deprecate_positional_args
613+
def fetch_openml(name=None, *, version='active', data_id=None, data_home=None,
612614
target_column='default-target', cache=True, return_X_y=False,
613615
as_frame=False):
614616
"""Fetch dataset from openml by name or dataset id.

sklearn/datasets/_rcv1.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from ._svmlight_format_io import load_svmlight_files
2626
from ..utils import shuffle as shuffle_
2727
from ..utils import Bunch
28+
from ..utils.validation import _deprecate_positional_args
2829

2930

3031
# The original vectorized data can be found at:
@@ -75,7 +76,8 @@
7576
logger = logging.getLogger(__name__)
7677

7778

78-
def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
79+
@_deprecate_positional_args
80+
def fetch_rcv1(*, data_home=None, subset='all', download_if_missing=True,
7981
random_state=None, shuffle=False, return_X_y=False):
8082
"""Load the RCV1 multilabel dataset (classification).
8183

0 commit comments

Comments
 (0)
0