2
2
# Mathieu Blondel <mathieu@mblondel.org>
3
3
# Olivier Grisel <olivier.grisel@ensta.org>
4
4
# Andreas Mueller <amueller@ais.uni-bonn.de>
5
+ # Eric Martin <eric@ericmart.in>
5
6
# License: BSD 3 clause
6
7
7
8
from itertools import chain , combinations
@@ -442,9 +443,16 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
442
443
443
444
Attributes
444
445
----------
446
+ powers_ : array, shape (n_input_features, n_output_features)
447
+ powers_[i, j] is the exponent of the jth input in the ith output.
445
448
446
- powers_ :
447
- powers_[i, j] is the exponent of the jth input in the ith output.
449
+ n_input_features_ : int
450
+ The total number of input features.
451
+
452
+ n_output_features_ : int
453
+ The total number of polynomial output features. The number of output
454
+ features is computed by iterating over all suitably sized combinations
455
+ of input features.
448
456
449
457
Notes
450
458
-----
@@ -461,23 +469,32 @@ def __init__(self, degree=2, interaction_only=False, include_bias=True):
461
469
self .include_bias = include_bias
462
470
463
471
@staticmethod
464
- def _power_matrix (n_features , degree , interaction_only , include_bias ):
465
- """Compute the matrix of polynomial powers"""
472
+ def _combinations (n_features , degree , interaction_only , include_bias ):
466
473
comb = (combinations if interaction_only else combinations_w_r )
467
474
start = int (not include_bias )
468
- combn = chain .from_iterable (comb (range (n_features ), i )
469
- for i in range (start , degree + 1 ))
470
- powers = np .vstack (bincount (c , minlength = n_features ) for c in combn )
471
- return powers
475
+ return chain .from_iterable (comb (range (n_features ), i )
476
+ for i in range (start , degree + 1 ))
477
+
478
+ @property
479
+ def powers_ (self ):
480
+ check_is_fitted (self , 'n_input_features_' )
481
+
482
+ combinations = self ._combinations (self .n_input_features_ , self .degree ,
483
+ self .interaction_only ,
484
+ self .include_bias )
485
+ return np .vstack (np .bincount (c , minlength = self .n_input_features_ )
486
+ for c in combinations )
472
487
473
488
def fit (self , X , y = None ):
474
489
"""
475
- Compute the polynomial feature combinations
490
+ Compute number of output features.
476
491
"""
477
492
n_samples , n_features = check_array (X ).shape
478
- self . powers_ = self ._power_matrix (n_features , self .degree ,
493
+ combinations = self ._combinations (n_features , self .degree ,
479
494
self .interaction_only ,
480
495
self .include_bias )
496
+ self .n_input_features_ = n_features
497
+ self .n_output_features_ = sum (1 for _ in combinations )
481
498
return self
482
499
483
500
def transform (self , X , y = None ):
@@ -494,15 +511,24 @@ def transform(self, X, y=None):
494
511
The matrix of features, where NP is the number of polynomial
495
512
features generated from the combination of inputs.
496
513
"""
497
- check_is_fitted (self , 'powers_' )
514
+ check_is_fitted (self , [ 'n_input_features_' , 'n_output_features_' ] )
498
515
499
516
X = check_array (X )
500
517
n_samples , n_features = X .shape
501
518
502
- if n_features != self .powers_ . shape [ 1 ] :
519
+ if n_features != self .n_input_features_ :
503
520
raise ValueError ("X shape does not match training shape" )
504
521
505
- return (X [:, None , :] ** self .powers_ ).prod (- 1 )
522
+ # allocate output data
523
+ XP = np .empty ((n_samples , self .n_output_features_ ), dtype = X .dtype )
524
+
525
+ combinations = self ._combinations (n_features , self .degree ,
526
+ self .interaction_only ,
527
+ self .include_bias )
528
+ for i , c in enumerate (combinations ):
529
+ XP [:, i ] = X [:, c ].prod (1 )
530
+
531
+ return XP
506
532
507
533
508
534
def normalize (X , norm = 'l2' , axis = 1 , copy = True ):
@@ -1083,7 +1109,8 @@ def _transform(self, X):
1083
1109
# We use only those catgorical features of X that are known using fit.
1084
1110
# i.e lesser than n_values_ using mask.
1085
1111
# This means, if self.handle_unknown is "ignore", the row_indices and
1086
- # col_indices corresponding to the unknown categorical feature are ignored.
1112
+ # col_indices corresponding to the unknown categorical feature are
1113
+ # ignored.
1087
1114
mask = (X < self .n_values_ ).ravel ()
1088
1115
if np .any (~ mask ):
1089
1116
if self .handle_unknown not in ['error' , 'ignore' ]:
0 commit comments