8000 DOC add examples for fetchers and load_linnerud and load_svmlight_files by Higgs32584 · Pull Request #28300 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

DOC add examples for fetchers and load_linnerud and load_svmlight_files #28300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Mar 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions sklearn/datasets/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,15 @@ def load_linnerud(*, return_X_y=False, as_frame=False):
features in `X` and a target in `y` of a given sample.

.. versionadded:: 0.18

Examples
--------
>>> from sklearn.datasets import load_linnerud
>>> linnerud = load_linnerud()
>>> linnerud.data.shape
(20, 3)
>>> linnerud.target.shape
(20, 3)
"""
data_filename = "linnerud_exercise.csv"
target_filename = "linnerud_physiological.csv"
Expand Down
29 changes: 29 additions & 0 deletions sklearn/datasets/_lfw.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,22 @@ def fetch_lfw_people(
ndarray of shape (n_samples,) containing the target samples.

.. versionadded:: 0.20

Examples
--------
>>> from sklearn.datasets import fetch_lfw_people
>>> lfw_people = fetch_lfw_people()
>>> lfw_people.data.shape
(13233, 2914)
>>> lfw_people.target.shape
(13233,)
>>> for name in lfw_people.target_names[:5]:
... print(name)
AJ Cook
AJ Lamas
Aaron Eckhart
Aaron Guiel
Aaron Patterson
"""
lfw_home, data_folder_path = _check_fetch_lfw(
data_home=data_home,
Expand Down Expand Up @@ -570,6 +586,19 @@ def fetch_lfw_pairs(
0 corresponds to "Different person", 1 corresponds to "same person".
DESCR : str
Description of the Labeled Faces in the Wild (LFW) dataset.

Examples
--------
>>> from sklearn.datasets import fetch_lfw_pairs
>>> lfw_pairs_train = fetch_lfw_pairs(subset='train')
>>> list(lfw_pairs_train.target_names)
['Different persons', 'Same person']
>>> lfw_pairs_train.pairs.shape
(2200, 2, 62, 47)
>>> lfw_pairs_train.data.shape
(2200, 5828)
>>> lfw_pairs_train.target.shape
(2200,)
"""
lfw_home, data_folder_path = _check_fetch_lfw(
data_home=data_home,
Expand Down
11 changes: 11 additions & 0 deletions sklearn/datasets/_olivetti_faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,17 @@ def fetch_olivetti_faces(
Tuple with the `data` and `target` objects described above.

.. versionadded:: 0.22

Examples
--------
>>> from sklearn.datasets import fetch_olivetti_faces
>>> olivetti_faces = fetch_olivetti_faces()
>>> olivetti_faces.data.shape
(400, 4096)
>>> olivetti_faces.target.shape
(400,)
>>> olivetti_faces.images.shape
(400, 64, 64)
"""
data_home = get_data_home(data_home=data_home)
if not exists(data_home):
Expand Down
19 changes: 18 additions & 1 deletion sklearn/datasets/_svmlight_format_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def load_svmlight_file(
To use joblib.Memory to cache the svmlight file::

from joblib import Memory
from .datasets import load_svmlight_file
from sklearn.datasets import load_svmlight_file
mem = Memory("./mycache")

@mem.cache
Expand Down Expand Up @@ -359,6 +359,23 @@ def load_svmlight_files(
matrix X_test, it is essential that X_train and X_test have the same
number of features (X_train.shape[1] == X_test.shape[1]). This may not
be the case if you load the files individually with load_svmlight_file.

Examples
--------
To use joblib.Memory to cache the svmlight file::

from joblib import Memory
from sklearn.datasets import load_svmlight_file
mem = Memory("./mycache")

@mem.cache
def get_data():
data_train, target_train, data_test, target_test = load_svmlight_files(
["svmlight_file_train", "svmlight_file_test"]
)
return data_train, target_train, data_test, target_test

X_train, y_train, X_test, y_test = get_data()
"""
if (offset != 0 or length > 0) and zero_based == "auto":
# disable heuristic search to avoid getting inconsistent results on
Expand Down
9 changes: 9 additions & 0 deletions sklearn/datasets/_twenty_newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,15 @@ def fetch_20newsgroups_vectorized(
description above.

.. versionadded:: 0.20

Examples
--------
>>> from sklearn.datasets import fetch_20newsgroups_vectorized
>>> newsgroups_vectorized = fetch_20newsgroups_vectorized(subset='test')
>>> newsgroups_vectorized.data.shape
(7532, 130107)
>>> newsgroups_vectorized.target.shape
(7532,)
"""
data_home = get_data_home(data_home=data_home)
filebase = "20newsgroup_vectorized"
Expand Down
0