8000 DOC Improve default values in SGD documentation (#15967) · scikit-learn/scikit-learn@3339d80 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3339d80

Browse files
ankishbrth
authored andcommitted
DOC Improve default values in SGD documentation (#15967)
1 parent 725ca8f commit 3339d80

File tree

1 file changed

+56
-59
lines changed

1 file changed

+56
-59
lines changed

sklearn/linear_model/_stochastic_gradient.py

+56-59
Original file line numberDiff line numberDiff line change
@@ -244,12 +244,12 @@ def _make_validation_split(self, y):
244244
245245
Parameters
246246
----------
247-
y : array, shape (n_samples, )
247+
y : ndarray of shape (n_samples, )
248248
Target values.
249249
250250
Returns
251251
-------
252-
validation_mask : array, shape (n_samples, )
252+
validation_mask : ndarray of shape (n_samples, )
253253
Equal to 1 on the validation set, 0 on the training set.
254254
"""
255255
n_samples = y.shape[0]
@@ -362,11 +362,11 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
362362
sample_weight : numpy array of shape [n_samples, ]
363363
The weight of each sample
364364
365-
validation_mask : numpy array of shape [n_samples, ] or None
365+
validation_mask : numpy array of shape [n_samples, ], default=None
366366
Precomputed validation mask in case _fit_binary is called in the
367367
context of a one-vs-rest reduction.
368368
369-
random_state : int, RandomState instance or None, optional (default=None)
369+
random_state : int, RandomState instance, default=None
370370
If int, random_state is the seed used by the random number generator;
371371
If RandomState instance, random_state is the random number generator;
372372
If None, the random number generator is the RandomState instance used
@@ -641,18 +641,18 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
641641
X : {array-like, sparse matrix}, shape (n_samples, n_features)
642642
Subset of the training data.
643643
644-
y : numpy array, shape (n_samples,)
644+
y : ndarray of shape (n_samples,)
645645
Subset of the target values.
646646
647-
classes : array, shape (n_classes,)
647+
classes : ndarray of shape (n_classes,), default=None
648648
Classes across all calls to partial_fit.
649649
Can be obtained by via `np.unique(y_all)`, where y_all is the
650650
target vector of the entire dataset.
651651
This argument is required for the first call to partial_fit
652652
and can be omitted in the subsequent calls.
653653
Note that y doesn't need to contain all labels in `classes`.
654654
655-
sample_weight : array-like, shape (n_samples,), optional
655+
sample_weight : array-like, shape (n_samples,), default=None
656656
Weights applied to individual samples.
657657
If not provided, uniform weights are assumed.
658658
@@ -685,16 +685,16 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
685685
X : {array-like, sparse matrix}, shape (n_samples, n_features)
686686
Training data.
687687
688-
y : numpy array, shape (n_samples,)
688+
y : ndarray of shape (n_samples,)
689689
Target values.
690690
691-
coef_init : array, shape (n_classes, n_features)
691+
coef_init : ndarray of shape (n_classes, n_features), default=None
692692
The initial coefficients to warm-start the optimization.
693693
694-
intercept_init : array, shape (n_classes,)
694+
intercept_init : ndarray of shape (n_classes,), default=None
695695
The initial intercept to warm-start the optimization.
696696
697-
sample_weight : array-like, shape (n_samples,), optional
697+
sample_weight : array-like, shape (n_samples,), default=None
698698
Weights applied to individual samples.
699699
If not provided, uniform weights are assumed. These weights will
700700
be multiplied with class_weight (passed through the
@@ -738,7 +738,7 @@ class SGDClassifier(BaseSGDClassifier):
738738
739739
Parameters
740740
----------
741-
loss : str, default: 'hinge'
741+
loss : str, default='hinge'
742742
The loss function to be used. Defaults to 'hinge', which gives a
743743
linear SVM.
744744
@@ -754,42 +754,41 @@ class SGDClassifier(BaseSGDClassifier):
754754
The other losses are designed for regression but can be useful in
755755
classification as well; see SGDRegressor for a description.
756756
757-
penalty : str, 'none', 'l2', 'l1', or 'elasticnet'
757+
penalty : {'l2', 'l1', 'elasticnet'}, default='l2'
758758
The penalty (aka regularization term) to be used. Defaults to 'l2'
759759
which is the standard regularizer for linear SVM models. 'l1' and
760760
'elasticnet' might bring sparsity to the model (feature selection)
761761
not achievable with 'l2'.
762762
763-
alpha : float
763+
alpha : float, default=0.0001
764764
Constant that multiplies the regularization term. Defaults to 0.0001.
765765
Also used to compute learning_rate when set to 'optimal'.
766766
767-
l1_ratio : float
767+
l1_ratio : float, default=0.15
768768
The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
769769
l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
770770
Defaults to 0.15.
771771
772-
fit_intercept : bool
772+
fit_intercept : bool, default=True
773773
Whether the intercept should be estimated or not. If False, the
774774
data is assumed to be already centered. Defaults to True.
775775
776-
max_iter : int, optional (default=1000)
776+
max_iter : int, default=1000
777777
The maximum number of passes over the training data (aka epochs).
778778
It only impacts the behavior in the ``fit`` method, and not the
779779
:meth:`partial_fit` method.
780780
781781
.. versionadded:: 0.19
782782
783-
tol : float or None, optional (default=1e-3)
783+
tol : float, default=1e-3
784784
The stopping criterion. If it is not None, the iterations will stop
785785
when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive
786786
epochs.
787787
788788
.. versionadded:: 0.19
789789
790-
shuffle : bool, optional
790+
shuffle : bool, default=True
791791
Whether or not the training data should be shuffled after each epoch.
792-
Defaults to True.
793792
794793
verbose : int, default=0
795794
The verbosity level.
@@ -802,21 +801,21 @@ class SGDClassifier(BaseSGDClassifier):
802801
For epsilon-insensitive, any differences between the current prediction
803802
and the correct label are ignored if they are less than this threshold.
804803
805-
n_jobs : int or None, optional (default=None)
804+
n_jobs : int, default=None
806805
The number of CPUs to use to do the OVA (One Versus All, for
807806
multi-class problems) computation.
808807
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
809808
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
810809
for more details.
811810
812-
random_state : int, RandomState instance or None, optional (default=None)
811+
random_state : int, RandomState instance, default=None
813812
The seed of the pseudo random number generator to use when shuffling
814813
the data. If int, random_state is the seed used by the random number
815814
generator; If RandomState instance, random_state is the random number
816815
generator; If None, the random number generator is the RandomState
817816
instance used by `np.random`.
818817
819-
learning_rate : str, optional
818+
learning_rate : str, default='optimal'
820819
The learning rate schedule:
821820
822821
'constant':
@@ -832,12 +831,12 @@ class SGDClassifier(BaseSGDClassifier):
832831
training loss by tol or fail to increase validation score by tol if
833832
early_stopping is True, the current learning rate is divided by 5.
834833
835-
eta0 : double
834+
eta0 : double, default=0.0
836835
The initial learning rate for the 'constant', 'invscaling' or
837836
'adaptive' schedules. The default value is 0.0 as eta0 is not used by
838837
the default schedule 'optimal'.
839838
840-
power_t : double
839+
power_t : double, default=0.5
841840
The exponent for inverse scaling learning rate [default 0.5].
842841
843842
early_stopping : bool, default=False
@@ -861,7 +860,7 @@ class SGDClassifier(BaseSGDClassifier):
861860
862861
.. versionadded:: 0.20
863862
864-
class_weight : dict, {class_label: weight} or "balanced" or None, optional
863+
class_weight : dict, {class_label: weight} or "balanced", default=None
865864
Preset for the class_weight fit parameter.
866865
867866
Weights associated with classes. If not given, all classes
@@ -893,11 +892,11 @@ class SGDClassifier(BaseSGDClassifier):
893892
894893
Attributes
895894
----------
896-
coef_ : array, shape (1, n_features) if n_classes == 2 else (n_classes,\
897-
n_features)
895+
coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \
896+
(n_classes, n_features)
898897
Weights assigned to the features.
899898
900-
intercept_ : array, shape (1,) if n_classes == 2 else (n_classes,)
899+
intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)
901900
Constants in decision function.
902901
903902
n_iter_ : int
@@ -979,7 +978,7 @@ def predict_proba(self):
979978
980979
Returns
981980
-------
982-
array, shape (n_samples, n_classes)
981+
ndarray of shape (n_samples, n_classes)
983982
Returns the probability of the sample for each class in the model,
984983
where classes are ordered as they are in `self.classes_`.
985984
@@ -1140,7 +1139,7 @@ def partial_fit(self, X, y, sample_weight=None):
11401139
y : numpy array of shape (n_samples,)
11411140
Subset of target values
11421141
1143-
sample_weight : array-like, shape (n_samples,), optional
1142+
sample_weight : array-like, shape (n_samples,), default=None
11441143
Weights applied to individual samples.
11451144
If not provided, uniform weights are assumed.
11461145
@@ -1198,16 +1197,16 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
11981197
X : {array-like, sparse matrix}, shape (n_samples, n_features)
11991198
Training data
12001199
1201-
y : numpy array, shape (n_samples,)
1200+
y : ndarray of shape (n_samples,)
12021201
Target values
12031202
1204-
coef_init : array, shape (n_features,)
1203+
coef_init : ndarray of shape (n_features,), default=None
12051204
The initial coefficients to warm-start the optimization.
12061205
1207-
intercept_init : array, shape (1,)
1206+
intercept_init : ndarray of shape (1,), default=None
12081207
The initial intercept to warm-start the optimization.
12091208
1210-
sample_weight : array-like, shape (n_samples,), optional
1209+
sample_weight : array-like, shape (n_samples,), default=None
12111210
Weights applied to individual samples (1. for unweighted).
12121211
12131212
Returns
@@ -1229,7 +1228,7 @@ def _decision_function(self, X):
12291228
12301229
Returns
12311230
-------
1232-
array, shape (n_samples,)
1231+
ndarray of shape (n_samples,)
12331232
Predicted target values per element in X.
12341233
"""
12351234
check_is_fitted(self)
@@ -1249,7 +1248,7 @@ def predict(self, X):
12491248
12501249
Returns
12511250
-------
1252-
array, shape (n_samples,)
1251+
ndarray of shape (n_samples,)
12531252
Predicted target values per element in X.
12541253
"""
12551254
return self._decision_function(X)
@@ -1359,7 +1358,7 @@ class SGDRegressor(BaseSGDRegressor):
13591358
13601359
Parameters
13611360
----------
1362-
loss : str, default: 'squared_loss'
1361+
loss : str, default='squared_loss'
13631362
The loss function to be used. The possible values are 'squared_loss',
13641363
'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'
13651364
@@ -1371,44 +1370,42 @@ class SGDRegressor(BaseSGDRegressor):
13711370
'squared_epsilon_insensitive' is the same but becomes squared loss past
13721371
a tolerance of epsilon.
13731372
1374-
penalty : str, 'none', 'l2', 'l1', or 'elasticnet'
1373+
penalty : {'l2', 'l1', 'elasticnet'}, default='l2'
13751374
The penalty (aka regularization term) to be used. Defaults to 'l2'
13761375
which is the standard regularizer for linear SVM models. 'l1' and
13771376
'elasticnet' might bring sparsity to the model (feature selection)
13781377
not achievable with 'l2'.
13791378
1380-
alpha : float
1381-
Constant that multiplies the regularization term. Defaults to 0.0001
1379+
alpha : float, default=0.0001
1380+
Constant that multiplies the regularization term.
13821381
Also used to compute learning_rate when set to 'optimal'.
13831382
1384-
l1_ratio : float
1383+
l1_ratio : float, default=0.15
13851384
The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
13861385
l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
1387-
Defaults to 0.15.
13881386
1389-
fit_intercept : bool
1387+
fit_intercept : bool, default=True
13901388
Whether the intercept should be estimated or not. If False, the
1391-
data is assumed to be already centered. Defaults to True.
1389+
data is assumed to be already centered.
13921390
1393-
max_iter : int, optional (default=1000)
1391+
max_iter : int, default=1000
13941392
The maximum number of passes over the training data (aka epochs).
13951393
It only impacts the behavior in the ``fit`` method, and not the
13961394
:meth:`partial_fit` method.
13971395
13981396
.. versionadded:: 0.19
13991397
1400-
tol : float or None, optional (default=1e-3)
1398+
tol : float, default=1e-3
14011399
The stopping criterion. If it is not None, the iterations will stop
14021400
when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive
14031401
epochs.
14041402
14051403
.. versionadded:: 0.19
14061404
1407-
shuffle : bool, optional
1405+
shuffle : bool, default=True
14081406
Whether or not the training data should be shuffled after each epoch.
1409-
Defaults to True.
14101407
1411-
verbose : integer, default=0
1408+
verbose : int, default=0
14121409
The verbosity level.
14131410
14141411
epsilon : float, default=0.1
@@ -1419,14 +1416,14 @@ class SGDRegressor(BaseSGDRegressor):
14191416
For epsilon-insensitive, any differences between the current prediction
14201417
and the correct label are ignored if they are less than this threshold.
14211418
1422-
random_state : int, RandomState instance or None, optional (default=None)
1419+
random_state : int, RandomState instance, default=None
14231420
The seed of the pseudo random number generator to use when shuffling
14241421
the data. If int, random_state is the seed used by the random number
14251422
generator; If RandomState instance, random_state is the random number
14261423
generator; If None, the random number generator is the RandomState
14271424
instance used by `np.random`.
14281425
1429-
learning_rate : string, optional
1426+
learning_rate : string, default='invscaling'
14301427
The learning rate schedule:
14311428
14321429
'constant':
@@ -1442,12 +1439,12 @@ class SGDRegressor(BaseSGDRegressor):
14421439
training loss by tol or fail to increase validation score by tol if
14431440
early_stopping is True, the current learning rate is divided by 5.
14441441
1445-
eta0 : double
1442+
eta0 : double, default=0.01
14461443
The initial learning rate for the 'constant', 'invscaling' or
14471444
'adaptive' schedules. The default value is 0.01.
14481445
1449-
power_t : double
1450-
The exponent for inverse scaling learning rate [default 0.25].
1446+
power_t : double, default=0.25
1447+
The exponent for inverse scaling learning rate.
14511448
14521449
early_stopping : bool, default=False
14531450
Whether to use early stopping to terminate training when validation
@@ -1492,16 +1489,16 @@ class SGDRegressor(BaseSGDRegressor):
14921489
14931490
Attributes
14941491
----------
1495-
coef_ : array, shape (n_features,)
1492+
coef_ : ndarray of shape (n_features,)
14961493
Weights assigned to the features.
14971494
1498-
intercept_ : array, shape (1,)
1495+
intercept_ : ndarray of shape (1,)
14991496
The intercept term.
15001497
1501-
average_coef_ : array, shape (n_features,)
1498+
average_coef_ : ndarray of shape (n_features,)
15021499
Averaged weights assigned to the features.
15031500
1504-
average_intercept_ : array, shape (1,)
1501+
average_intercept_ : ndarray of shape (1,)
15051502
The averaged intercept term.
15061503
15071504
n_iter_ : int

0 commit comments

Comments
 (0)
0