@@ -244,12 +244,12 @@ def _make_validation_split(self, y):
244
244
245
245
Parameters
246
246
----------
247
- y : array, shape (n_samples, )
247
+ y : ndarray of shape (n_samples, )
248
248
Target values.
249
249
250
250
Returns
251
251
-------
252
- validation_mask : array, shape (n_samples, )
252
+ validation_mask : ndarray of shape (n_samples, )
253
253
Equal to 1 on the validation set, 0 on the training set.
254
254
"""
255
255
n_samples = y .shape [0 ]
@@ -362,11 +362,11 @@ def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,
362
362
sample_weight : numpy array of shape [n_samples, ]
363
363
The weight of each sample
364
364
365
- validation_mask : numpy array of shape [n_samples, ] or None
365
+ validation_mask : numpy array of shape [n_samples, ], default= None
366
366
Precomputed validation mask in case _fit_binary is called in the
367
367
context of a one-vs-rest reduction.
368
368
369
- random_state : int, RandomState instance or None, optional ( default=None)
369
+ random_state : int, RandomState instance, default=None
370
370
If int, random_state is the seed used by the random number generator;
371
371
If RandomState instance, random_state is the random number generator;
372
372
If None, the random number generator is the RandomState instance used
@@ -641,18 +641,18 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
641
641
X : {array-like, sparse matrix}, shape (n_samples, n_features)
642
642
Subset of the training data.
643
643
644
- y : numpy array, shape (n_samples,)
644
+ y : ndarray of shape (n_samples,)
645
645
Subset of the target values.
646
646
647
- classes : array, shape (n_classes,)
647
+ classes : ndarray of shape (n_classes,), default=None
648
648
Classes across all calls to partial_fit.
649
649
Can be obtained by via `np.unique(y_all)`, where y_all is the
650
650
target vector of the entire dataset.
651
651
This argument is required for the first call to partial_fit
652
652
and can be omitted in the subsequent calls.
653
653
Note that y doesn't need to contain all labels in `classes`.
654
654
655
- sample_weight : array-like, shape (n_samples,), optional
655
+ sample_weight : array-like, shape (n_samples,), default=None
656
656
Weights applied to individual samples.
657
657
If not provided, uniform weights are assumed.
658
658
@@ -685,16 +685,16 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
685
685
X : {array-like, sparse matrix}, shape (n_samples, n_features)
686
686
Training data.
687
687
688
- y : numpy array, shape (n_samples,)
688
+ y : ndarray of shape (n_samples,)
689
689
Target values.
690
690
691
- coef_init : array, shape (n_classes, n_features)
691
+ coef_init : ndarray of shape (n_classes, n_features), default=None
692
692
The initial coefficients to warm-start the optimization.
693
693
694
- intercept_init : array, shape (n_classes,)
694
+ intercept_init : ndarray of shape (n_classes,), default=None
695
695
The initial intercept to warm-start the optimization.
696
696
697
- sample_weight : array-like, shape (n_samples,), optional
697
+ sample_weight : array-like, shape (n_samples,), default=None
698
698
Weights applied to individual samples.
699
699
If not provided, uniform weights are assumed. These weights will
700
700
be multiplied with class_weight (passed through the
@@ -738,7 +738,7 @@ class SGDClassifier(BaseSGDClassifier):
738
738
739
739
Parameters
740
740
----------
741
- loss : str, default: 'hinge'
741
+ loss : str, default= 'hinge'
742
742
The loss function to be used. Defaults to 'hinge', which gives a
743
743
linear SVM.
744
744
@@ -754,42 +754,41 @@ class SGDClassifier(BaseSGDClassifier):
754
754
The other losses are designed for regression but can be useful in
755
755
classification as well; see SGDRegressor for a description.
756
756
757
- penalty : str, 'none ', 'l2 ', 'l1', or 'elasticnet '
757
+ penalty : {'l2 ', 'l1 ', 'elasticnet'}, default='l2 '
758
758
The penalty (aka regularization term) to be used. Defaults to 'l2'
759
759
which is the standard regularizer for linear SVM models. 'l1' and
760
760
'elasticnet' might bring sparsity to the model (feature selection)
761
761
not achievable with 'l2'.
762
762
763
- alpha : float
763
+ alpha : float, default=0.0001
764
764
Constant that multiplies the regularization term. Defaults to 0.0001.
765
765
Also used to compute learning_rate when set to 'optimal'.
766
766
767
- l1_ratio : float
767
+ l1_ratio : float, default=0.15
768
768
The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
769
769
l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
770
770
Defaults to 0.15.
771
771
772
- fit_intercept : bool
772
+ fit_intercept : bool, default=True
773
773
Whether the intercept should be estimated or not. If False, the
774
774
data is assumed to be already centered. Defaults to True.
775
775
776
- max_iter : int, optional ( default=1000)
776
+ max_iter : int, default=1000
777
777
The maximum number of passes over the training data (aka epochs).
778
778
It only impacts the behavior in the ``fit`` method, and not the
779
779
:meth:`partial_fit` method.
780
780
781
781
.. versionadded:: 0.19
782
782
783
- tol : float or None, optional ( default=1e-3)
783
+ tol : float, default=1e-3
784
784
The stopping criterion. If it is not None, the iterations will stop
785
785
when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive
786
786
epochs.
787
787
788
788
.. versionadded:: 0.19
789
789
790
- shuffle : bool, optional
790
+ shuffle : bool, default=True
791
791
Whether or not the training data should be shuffled after each epoch.
792
- Defaults to True.
793
792
794
793
verbose : int, default=0
795
794
The verbosity level.
@@ -802,21 +801,21 @@ class SGDClassifier(BaseSGDClassifier):
802
801
For epsilon-insensitive, any differences between the current prediction
803
802
and the correct label are ignored if they are less than this threshold.
804
803
805
- n_jobs : int or None, optional ( default=None)
804
+ n_jobs : int, default=None
806
805
The number of CPUs to use to do the OVA (One Versus All, for
807
806
multi-class problems) computation.
808
807
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
809
808
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
810
809
for more details.
811
810
812
- random_state : int, RandomState instance or None, optional ( default=None)
811
+ random_state : int, RandomState instance, default=None
813
812
The seed of the pseudo random number generator to use when shuffling
814
813
the data. If int, random_state is the seed used by the random number
815
814
generator; If RandomState instance, random_state is the random number
816
815
generator; If None, the random number generator is the RandomState
817
816
instance used by `np.random`.
818
817
819
- learning_rate : str, optional
818
+ learning_rate : str, default='optimal'
820
819
The learning rate schedule:
821
820
822
821
'constant':
@@ -832,12 +831,12 @@ class SGDClassifier(BaseSGDClassifier):
832
831
training loss by tol or fail to increase validation score by tol if
833
832
early_stopping is True, the current learning rate is divided by 5.
834
833
835
- eta0 : double
834
+ eta0 : double, default=0.0
836
835
The initial learning rate for the 'constant', 'invscaling' or
837
836
'adaptive' schedules. The default value is 0.0 as eta0 is not used by
838
837
the default schedule 'optimal'.
839
838
840
- power_t : double
839
+ power_t : double, default=0.5
841
840
The exponent for inverse scaling learning rate [default 0.5].
842
841
843
842
early_stopping : bool, default=False
@@ -861,7 +860,7 @@ class SGDClassifier(BaseSGDClassifier):
861
860
862
861
.. versionadded:: 0.20
863
862
864
- class_weight : dict, {class_label: weight} or "balanced" or None, optional
863
+ class_weight : dict, {class_label: weight} or "balanced", default=None
865
864
Preset for the class_weight fit parameter.
866
865
867
866
Weights associated with classes. If not given, all classes
@@ -893,11 +892,11 @@ class SGDClassifier(BaseSGDClassifier):
893
892
894
893
Attributes
895
894
----------
896
- coef_ : array, shape (1, n_features) if n_classes == 2 else (n_classes, \
897
- n_features)
895
+ coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \
896
+ (n_classes, n_features)
898
897
Weights assigned to the features.
899
898
900
- intercept_ : array, shape (1,) if n_classes == 2 else (n_classes,)
899
+ intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)
901
900
Constants in decision function.
902
901
903
902
n_iter_ : int
@@ -979,7 +978,7 @@ def predict_proba(self):
979
978
980
979
Returns
981
980
-------
982
- array, shape (n_samples, n_classes)
981
+ ndarray of shape (n_samples, n_classes)
983
982
Returns the probability of the sample for each class in the model,
984
983
where classes are ordered as they are in `self.classes_`.
985
984
@@ -1140,7 +1139,7 @@ def partial_fit(self, X, y, sample_weight=None):
1140
1139
y : numpy array of shape (n_samples,)
1141
1140
Subset of target values
1142
1141
1143
- sample_weight : array-like, shape (n_samples,), optional
1142
+ sample_weight : array-like, shape (n_samples,), default=None
1144
1143
Weights applied to individual samples.
1145
1144
If not provided, uniform weights are assumed.
1146
1145
@@ -1198,16 +1197,16 @@ def fit(self, X, y, coef_init=None, intercept_init=None,
1198
1197
X : {array-like, sparse matrix}, shape (n_samples, n_features)
1199
1198
Training data
1200
1199
1201
- y : numpy array, shape (n_samples,)
1200
+ y : ndarray of shape (n_samples,)
1202
1201
Target values
1203
1202
1204
- coef_init : array, shape (n_features,)
1203
+ coef_init : ndarray of shape (n_features,), default=None
1205
1204
The initial coefficients to warm-start the optimization.
1206
1205
1207
- intercept_init : array, shape (1,)
1206
+ intercept_init : ndarray of shape (1,), default=None
1208
1207
The initial intercept to warm-start the optimization.
1209
1208
1210
- sample_weight : array-like, shape (n_samples,), optional
1209
+ sample_weight : array-like, shape (n_samples,), default=None
1211
1210
Weights applied to individual samples (1. for unweighted).
1212
1211
1213
1212
Returns
@@ -1229,7 +1228,7 @@ def _decision_function(self, X):
1229
1228
1230
1229
Returns
1231
1230
-------
1232
- array, shape (n_samples,)
1231
+ ndarray of shape (n_samples,)
1233
1232
Predicted target values per element in X.
1234
1233
"""
1235
1234
check_is_fitted (self )
@@ -1249,7 +1248,7 @@ def predict(self, X):
1249
1248
1250
1249
Returns
1251
1250
-------
1252
- array, shape (n_samples,)
1251
+ ndarray of shape (n_samples,)
1253
1252
Predicted target values per element in X.
1254
1253
"""
1255
1254
return self ._decision_function (X )
@@ -1359,7 +1358,7 @@ class SGDRegressor(BaseSGDRegressor):
1359
1358
1360
1359
Parameters
1361
1360
----------
1362
- loss : str, default: 'squared_loss'
1361
+ loss : str, default= 'squared_loss'
1363
1362
The loss function to be used. The possible values are 'squared_loss',
1364
1363
'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'
1365
1364
@@ -1371,44 +1370,42 @@ class SGDRegressor(BaseSGDRegressor):
1371
1370
'squared_epsilon_insensitive' is the same but becomes squared loss past
1372
1371
a tolerance of epsilon.
1373
1372
1374
- penalty : str, 'none ', 'l2 ', 'l1', or 'elasticnet '
1373
+ penalty : {'l2 ', 'l1 ', 'elasticnet'}, default='l2 '
1375
1374
The penalty (aka regularization term) to be used. Defaults to 'l2'
1376
1375
which is the standard regularizer for linear SVM models. 'l1' and
1377
1376
'elasticnet' might bring sparsity to the model (feature selection)
1378
1377
not achievable with 'l2'.
1379
1378
1380
- alpha : float
1381
- Constant that multiplies the regularization term. Defaults to 0.0001
1379
+ alpha : float, default=0.0001
1380
+ Constant that multiplies the regularization term.
1382
1381
Also used to compute learning_rate when set to 'optimal'.
1383
1382
1384
- l1_ratio : float
1383
+ l1_ratio : float, default=0.15
1385
1384
The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
1386
1385
l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
1387
- Defaults to 0.15.
1388
1386
1389
- fit_intercept : bool
1387
+ fit_intercept : bool, default=True
1390
1388
Whether the intercept should be estimated or not. If False, the
1391
- data is assumed to be already centered. Defaults to True.
1389
+ data is assumed to be already centered.
1392
1390
1393
- max_iter : int, optional ( default=1000)
1391
+ max_iter : int, default=1000
1394
1392
The maximum number of passes over the training data (aka epochs).
1395
1393
It only impacts the behavior in the ``fit`` method, and not the
1396
1394
:meth:`partial_fit` method.
1397
1395
1398
1396
.. versionadded:: 0.19
1399
1397
1400
- tol : float or None, optional ( default=1e-3)
1398
+ tol : float, default=1e-3
1401
1399
The stopping criterion. If it is not None, the iterations will stop
1402
1400
when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive
1403
1401
epochs.
1404
1402
1405
1403
.. versionadded:: 0.19
1406
1404
1407
- shuffle : bool, optional
1405
+ shuffle : bool, default=True
1408
1406
Whether or not the training data should be shuffled after each epoch.
1409
- Defaults to True.
1410
1407
1411
- verbose : integer , default=0
1408
+ verbose : int , default=0
1412
1409
The verbosity level.
1413
1410
1414
1411
epsilon : float, default=0.1
@@ -1419,14 +1416,14 @@ class SGDRegressor(BaseSGDRegressor):
1419
1416
For epsilon-insensitive, any differences between the current prediction
1420
1417
and the correct label are ignored if they are less than this threshold.
1421
1418
1422
- random_state : int, RandomState instance or None, optional ( default=None)
1419
+ random_state : int, RandomState instance, default=None
1423
1420
The seed of the pseudo random number generator to use when shuffling
1424
1421
the data. If int, random_state is the seed used by the random number
1425
1422
generator; If RandomState instance, random_state is the random number
1426
1423
generator; If None, the random number generator is the RandomState
1427
1424
instance used by `np.random`.
1428
1425
1429
- learning_rate : string, optional
1426
+ learning_rate : string, default='invscaling'
1430
1427
The learning rate schedule:
1431
1428
1432
1429
'constant':
@@ -1442,12 +1439,12 @@ class SGDRegressor(BaseSGDRegressor):
1442
1439
training loss by tol or fail to increase validation score by tol if
1443
1440
early_stopping is True, the current learning rate is divided by 5.
1444
1441
1445
- eta0 : double
1442
+ eta0 : double, default=0.01
1446
1443
The initial learning rate for the 'constant', 'invscaling' or
1447
1444
'adaptive' schedules. The default value is 0.01.
1448
1445
1449
- power_t : double
1450
- The exponent for inverse scaling learning rate [default 0.25] .
1446
+ power_t : double, default=0.25
1447
+ The exponent for inverse scaling learning rate.
1451
1448
1452
1449
early_stopping : bool, default=False
1453
1450
Whether to use early stopping to terminate training when validation
@@ -1492,16 +1489,16 @@ class SGDRegressor(BaseSGDRegressor):
1492
1489
1493
1490
Attributes
1494
1491
----------
1495
- coef_ : array, shape (n_features,)
1492
+ coef_ : ndarray of shape (n_features,)
1496
1493
Weights assigned to the features.
1497
1494
1498
- intercept_ : array, shape (1,)
1495
+ intercept_ : ndarray of shape (1,)
1499
1496
The intercept term.
1500
1497
1501
- average_coef_ : array, shape (n_features,)
1498
+ average_coef_ : ndarray of shape (n_features,)
1502
1499
Averaged weights assigned to the features.
1503
1500
1504
- average_intercept_ : array, shape (1,)
1501
+ average_intercept_ : ndarray of shape (1,)
1505
1502
The averaged intercept term.
1506
1503
1507
1504
n_iter_ : int
0 commit comments