scikit-learn · glemaitre · Mar 2, 2024 · Jan 28, 2024 · Jan 28, 2024 · Jan 28, 2024
diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
@@ -1234,6 +1234,15 @@ def load_linnerud(*, return_X_y=False, as_frame=False):
         features in `X` and a target in `y` of a given sample.
 
         .. versionadded:: 0.18
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_linnerud
+    >>> linnerud = load_linnerud()
+    >>> linnerud.data.shape
+    (20, 3)
+    >>> linnerud.target.shape
+    (20, 3)
     """
     data_filename = "linnerud_exercise.csv"
     target_filename = "linnerud_physiological.csv"

diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py
@@ -361,6 +361,22 @@ def fetch_lfw_people(
         ndarray of shape (n_samples,) containing the target samples.
 
         .. versionadded:: 0.20
+
+    Examples
+    --------
+    >>> from sklearn.datasets import fetch_lfw_people
+    >>> lfw_people = fetch_lfw_people()
+    >>> lfw_people.data.shape
+    (13233, 2914)
+    >>> lfw_people.target.shape
+    (13233,)
+    >>> for name in lfw_people.target_names[:5]:
+    ...    print(name)
+    AJ Cook
+    AJ Lamas
+    Aaron Eckhart
+    Aaron Guiel
+    Aaron Patterson
     """
     lfw_home, data_folder_path = _check_fetch_lfw(
         data_home=data_home,
@@ -570,6 +586,19 @@ def fetch_lfw_pairs(
             0 corresponds to "Different person", 1 corresponds to "same person".
         DESCR : str
             Description of the Labeled Faces in the Wild (LFW) dataset.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import fetch_lfw_pairs
+    >>> lfw_pairs_train = fetch_lfw_pairs(subset='train')
+    >>> list(lfw_pairs_train.target_names)
+    ['Different persons', 'Same person']
+    >>> lfw_pairs_train.pairs.shape
+    (2200, 2, 62, 47)
+    >>> lfw_pairs_train.data.shape
+    (2200, 5828)
+    >>> lfw_pairs_train.target.shape
+    (2200,)
     """
     lfw_home, data_folder_path = _check_fetch_lfw(
         data_home=data_home,

diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py
@@ -127,6 +127,17 @@ def fetch_olivetti_faces(
         Tuple with the `data` and `target` objects described above.
 
         .. versionadded:: 0.22
+
+    Examples
+    --------
+    >>> from sklearn.datasets import fetch_olivetti_faces
+    >>> olivetti_faces = fetch_olivetti_faces()
+    >>> olivetti_faces.data.shape
+    (400, 4096)
+    >>> olivetti_faces.target.shape
+    (400,)
+    >>> olivetti_faces.images.shape
+    (400, 64, 64)
     """
     data_home = get_data_home(data_home=data_home)
     if not exists(data_home):

diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py
@@ -176,7 +176,7 @@ def load_svmlight_file(
     To use joblib.Memory to cache the svmlight file::
 
         from joblib import Memory
-        from .datasets import load_svmlight_file
+        from sklearn.datasets import load_svmlight_file
         mem = Memory("./mycache")
 
         @mem.cache
@@ -359,6 +359,23 @@ def load_svmlight_files(
     matrix X_test, it is essential that X_train and X_test have the same
     number of features (X_train.shape[1] == X_test.shape[1]). This may not
     be the case if you load the files individually with load_svmlight_file.
+
+    Examples
+    --------
+    To use joblib.Memory to cache the svmlight file::
+
+        from joblib import Memory
+        from sklearn.datasets import load_svmlight_file
+        mem = Memory("./mycache")
+
+        @mem.cache
+        def get_data():
+            data_train, target_train, data_test, target_test = load_svmlight_files(
+                ["svmlight_file_train", "svmlight_file_test"]
+            )
+            return data_train, target_train, data_test, target_test
+
+        X_train, y_train, X_test, y_test = get_data()
     """
     if (offset != 0 or length > 0) and zero_based == "auto":
         # disable heuristic search to avoid getting inconsistent results on

diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py
@@ -504,6 +504,15 @@ def fetch_20newsgroups_vectorized(
         description above.
 
         .. versionadded:: 0.20
+
+    Examples
+    --------
+    >>> from sklearn.datasets import fetch_20newsgroups_vectorized
+    >>> newsgroups_vectorized = fetch_20newsgroups_vectorized(subset='test')
+    >>> newsgroups_vectorized.data.shape
+    (7532, 130107)
+    >>> newsgroups_vectorized.target.shape
+    (7532,)
     """
     data_home = get_data_home(data_home=data_home)
     filebase = "20newsgroup_vectorized"