|
16 | 16 | import numpy as np
|
17 | 17 | from scipy import optimize, sparse
|
18 | 18 |
|
19 |
| -from .base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator |
| 19 | +from .base import (LinearClassifierMixin, SparseCoefMixin, BaseEstimator, |
| 20 | + LinearModel) |
20 | 21 | from .sag import sag_solver
|
21 | 22 | from ..feature_selection.from_model import _LearntSelectorMixin
|
22 | 23 | from ..preprocessing import LabelEncoder, LabelBinarizer
|
@@ -948,7 +949,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
|
948 | 949 |
|
949 | 950 |
|
950 | 951 | class LogisticRegression(BaseEstimator, LinearClassifierMixin,
|
951 |
| - _LearntSelectorMixin, SparseCoefMixin): |
| 952 | + _LearntSelectorMixin, SparseCoefMixin, LinearModel): |
952 | 953 | """Logistic Regression (aka logit, MaxEnt) classifier.
|
953 | 954 |
|
954 | 955 | In the multiclass case, the training algorithm uses the one-vs-rest (OvR)
|
@@ -1001,6 +1002,19 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
|
1001 | 1002 | To lessen the effect of regularization on synthetic feature weight
|
1002 | 1003 | (and therefore on the intercept) intercept_scaling has to be increased.
|
1003 | 1004 |
|
| 1005 | + normalize : boolean, optional, default: False |
| 1006 | + If True, the regressors X will be normalized before regression. |
| 1007 | + This parameter is ignored when `fit_intercept` is set to False. |
| 1008 | + When the regressors are normalized, note that this makes the |
| 1009 | + hyperparameters learnt more robust and almost independent of the number |
| 1010 | + of samples. The same property is not valid for standardized data. |
| 1011 | + However, if you wish to standardize, please use |
| 1012 | + `preprocessing.StandardScaler` before calling `fit` on an estimator |
| 1013 | + with `normalize=False`. |
| 1014 | +
|
| 1015 | + copy_X : boolean, optional, default: True |
| 1016 | + If True, X will be copied; else, it may be overwritten. |
| 1017 | +
|
1004 | 1018 | class_weight : dict or 'balanced', default: None
|
1005 | 1019 | Weights associated with classes in the form ``{class_label: weight}``.
|
1006 | 1020 | If not given, all classes are supposed to have weight one.
|
@@ -1114,16 +1128,19 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
|
1114 | 1128 | """
|
1115 | 1129 |
|
1116 | 1130 | def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
|
1117 |
| - fit_intercept=True, intercept_scaling=1, class_weight=None, |
1118 |
| - random_state=None, solver='liblinear', max_iter=100, |
1119 |
| - multi_class='ovr', verbose=0, warm_start=False, n_jobs=1): |
| 1131 | + fit_intercept=True, intercept_scaling=1, normalize=False, |
| 1132 | + copy_X=True, class_weight=None, random_state=None, |
| 1133 | + solver='liblinear', max_iter=100, multi_class='ovr', |
| 1134 | + verbose=0, warm_start=False, n_jobs=1):<
10000
/div> |
1120 | 1135 |
|
1121 | 1136 | self.penalty = penalty
|
1122 | 1137 | self.dual = dual
|
1123 | 1138 | self.tol = tol
|
1124 | 1139 | self.C = C
|
1125 | 1140 | self.fit_intercept = fit_intercept
|
1126 | 1141 | self.intercept_scaling = intercept_scaling
|
| 1142 | + self.normalize = normalize |
| 1143 | + self.copy_X = copy_X |
1127 | 1144 | self.class_weight = class_weight
|
1128 | 1145 | self.random_state = random_state
|
1129 | 1146 | self.solver = solver
|
@@ -1176,13 +1193,18 @@ def fit(self, X, y, sample_weight=None):
|
1176 | 1193 | _check_solver_option(self.solver, self.multi_class, self.penalty,
|
1177 | 1194 | self.dual)
|
1178 | 1195 |
|
| 1196 | + X, y, X_offset, y_offset, X_scale = self._preprocess_data( |
| 1197 | + X, y, self.fit_intercept, self.normalize, self.copy_X, |
| 1198 | + sample_weight=sample_weight) |
| 1199 | + |
1179 | 1200 | if self.solver == 'liblinear':
|
1180 | 1201 | self.coef_, self.intercept_, n_iter_ = _fit_liblinear(
|
1181 | 1202 | X, y, self.C, self.fit_intercept, self.intercept_scaling,
|
1182 | 1203 | self.class_weight, self.penalty, self.dual, self.verbose,
|
1183 | 1204 | self.max_iter, self.tol, self.random_state,
|
1184 | 1205 | sample_weight=sample_weight)
|
1185 | 1206 | self.n_iter_ = np.array([n_iter_])
|
| 1207 | + self._set_intercept(X_offset, y_offset, X_scale) |
1186 | 1208 | return self
|
1187 | 1209 |
|
1188 | 1210 | if self.solver == 'sag':
|
@@ -1252,6 +1274,8 @@ def fit(self, X, y, sample_weight=None):
|
1252 | 1274 | self.intercept_ = self.coef_[:, -1]
|
1253 | 1275 | self.coef_ = self.coef_[:, :-1]
|
1254 | 1276 |
|
| 1277 | + self._set_intercept(X_offset, y_offset, X_scale) |
| 1278 | + |
1255 | 1279 | return self
|
1256 | 1280 |
|
1257 | 1281 | def predict_proba(self, X):
|
|
0 commit comments