@@ -187,6 +187,7 @@ def _preprocess_data(
187187 fit_intercept ,
188188 normalize = False ,
189189 copy = True ,
190+ copy_y = True ,
190191 sample_weight = None ,
191192 check_input = True ,
192193):
@@ -230,13 +231,14 @@ def _preprocess_data(
230231
231232 if check_input :
232233 X = check_array (X , copy = copy , accept_sparse = ["csr" , "csc" ], dtype = FLOAT_DTYPES )
233- elif copy :
234- if sp .issparse (X ):
235- X = X .copy ()
236- else :
237- X = X .copy (order = "K" )
238-
239- y = np .asarray (y , dtype = X .dtype )
234+ y = check_array (y , dtype = X .dtype , copy = copy_y , ensure_2d = False )
235+ else :
236+ y = y .astype (X .dtype , copy = copy_y )
237+ if copy :
238+ if sp .issparse (X ):
239+ X = X .copy ()
240+ else :
241+ X = X .copy (order = "K" )
240242
241243 if fit_intercept :
242244 if sp .issparse (X ):
@@ -276,7 +278,7 @@ def _preprocess_data(
276278 X_scale = np .ones (X .shape [1 ], dtype = X .dtype )
277279
278280 y_offset = np .average (y , axis = 0 , weights = sample_weight )
279- y = y - y_offset
281+ y -= y_offset
280282 else :
281283 X_offset = np .zeros (X .shape [1 ], dtype = X .dtype )
282284 X_scale = np .ones (X .shape [1 ], dtype = X .dtype )
@@ -293,7 +295,7 @@ def _preprocess_data(
293295# sample_weight makes the refactoring tricky.
294296
295297
296- def _rescale_data (X , y , sample_weight ):
298+ def _rescale_data (X , y , sample_weight , inplace = False ):
297299 """Rescale data sample-wise by square root of sample_weight.
298300
299301 For many linear models, this enables easy support for sample_weight because
@@ -328,18 +330,24 @@ def _rescale_data(X, y, sample_weight):
328330 if sp .issparse (X ):
329331 X = safe_sparse_dot (sw_matrix , X )
330332 else :
331- # XXX: we do not do inplace multiplication on X for consistency
332- # with the sparse case and because the _rescale_data currently
333- # does not make it explicit if it's ok to do it or not.
334- X = X * sample_weight_sqrt [:, np .newaxis ]
333+ if inplace :
334+ X *= sample_weight_sqrt [:, np . newaxis ]
335+ else :
336+ X = X * sample_weight_sqrt [:, np .newaxis ]
335337
336338 if sp .issparse (y ):
337339 y = safe_sparse_dot (sw_matrix , y )
338340 else :
339- if y .ndim == 1 :
340- y = y * sample_weight_sqrt
341+ if inplace :
342+ if y .ndim == 1 :
343+ y *= sample_weight_sqrt
344+ else :
345+ y *= sample_weight_sqrt [:, np .newaxis ]
341346 else :
342- y = y * sample_weight_sqrt [:, np .newaxis ]
347+ if y .ndim == 1 :
348+ y = y * sample_weight_sqrt
349+ else :
350+ y = y * sample_weight_sqrt [:, np .newaxis ]
343351 return X , y , sample_weight_sqrt
344352
345353
@@ -674,17 +682,26 @@ def fit(self, X, y, sample_weight=None):
674682 sample_weight , X , dtype = X .dtype , only_non_negative = True
675683 )
676684
685+ # Note that neither _rescale_data nor the rest of the fit method of
686+ # LinearRegression can benefit from in-place operations when X is a
687+ # sparse matrix. Therefore, let's not copy X when it is sparse.
688+ copy_X_in_preprocess_data = self .copy_X and not sp .issparse (X )
689+
677690 X , y , X_offset , y_offset , X_scale = _preprocess_data (
678691 X ,
679692 y ,
680693 fit_intercept = self .fit_intercept ,
681- copy = self . copy_X ,
694+ copy = copy_X_in_preprocess_data ,
682695 sample_weight = sample_weight ,
683696 )
684697
685- # Sample weight can be implemented via a simple rescaling.
686698 if has_sw :
687- X , y , sample_weight_sqrt = _rescale_data (X , y , sample_weight )
699+ # Sample weight can be implemented via a simple rescaling. Note
700+ # that we safely do inplace rescaling when _preprocess_data has
701+ # already made a copy if requested.
702+ X , y , sample_weight_sqrt = _rescale_data (
703+ X , y , sample_weight , inplace = copy_X_in_preprocess_data
704+ )
688705
689706 if self .positive :
690707 if y .ndim < 2 :
<
3271
div class="d-flex flex-row">
0 commit comments