@@ -2537,7 +2537,7 @@ class PowerTransformer(BaseEstimator, TransformerMixin):
2537
2537
>>> print(pt.fit(data))
2538
2538
PowerTransformer(copy=True, method='yeo-johnson', standardize=True)
2539
2539
>>> print(pt.lambdas_)
2540
- [ 1.38668178 -3.10053309 ]
2540
+ [1.38668178e+00 5.93926346e-09 ]
2541
2541
>>> print(pt.transform(data))
2542
2542
[[-1.31616039 -0.70710678]
2543
2543
[ 0.20998268 -0.70710678]
@@ -2718,18 +2718,23 @@ def _box_cox_inverse_tranform(self, x, lmbda):
2718
2718
def _yeo_johnson_inverse_transform (self , x , lmbda ):
2719
2719
"""Return inverse-transformed input x following Yeo-Johnson inverse
2720
2720
transform with parameter lambda.
2721
+
2722
+ Notes
2723
+ -----
2724
+ We're comparing lmbda to 1e-19 instead of strict equality to 0. See
2725
+ scipy/special/_boxcox.pxd for a rationale behind this
2721
2726
"""
2722
- x_inv = np .zeros_like ( x )
2727
+ x_inv = np .zeros ( x . shape , dtype = x . dtype )
2723
2728
pos = x >= 0
2724
2729
2725
2730
# when x >= 0
2726
- if abs ( lmbda ) < np . spacing ( 1. ) :
2731
+ if lmbda < 1e-19 :
2727
2732
x_inv [pos ] = np .exp (x [pos ]) - 1
2728
2733
else : # lmbda != 0
2729
2734
x_inv [pos ] = np .power (x [pos ] * lmbda + 1 , 1 / lmbda ) - 1
2730
2735
2731
2736
# when x < 0
2732
- if abs ( lmbda - 2 ) > np . spacing ( 1. ) :
2737
+ if lmbda < 2 - 1e-19 :
2733
2738
x_inv [~ pos ] = 1 - np .power (- (2 - lmbda ) * x [~ pos ] + 1 ,
2734
2739
1 / (2 - lmbda ))
2735
2740
else : # lmbda == 2
@@ -2740,22 +2745,27 @@ def _yeo_johnson_inverse_transform(self, x, lmbda):
2740
2745
def _yeo_johnson_transform (self , x , lmbda ):
2741
2746
"""Return transformed input x following Yeo-Johnson transform with
2742
2747
parameter lambda.
2748
+
2749
+ Notes
2750
+ -----
2751
+ We're comparing lmbda to 1e-19 instead of strict equality to 0. See
2752
+ scipy/special/_boxcox.pxd for a rationale behind this
2743
2753
"""
2744
2754
2745
- out = np .zeros_like ( x )
2755
+ out = np .zeros ( shape = x . shape , dtype = x . dtype )
2746
2756
pos = x >= 0 # binary mask
2747
2757
2748
2758
# when x >= 0
2749
- if abs ( lmbda ) < np . spacing ( 1. ) :
2750
- out [pos ] = np .log1p (x [pos ])
2759
+ if lmbda < 1e-19 :
2760
+ out [pos ] = np .log (x [pos ] + 1 )
2751
2761
else : # lmbda != 0
2752
2762
out [pos ] = (np .power (x [pos ] + 1 , lmbda ) - 1 ) / lmbda
2753
2763
2754
2764
# when x < 0
2755
- if abs ( lmbda - 2 ) > np . spacing ( 1. ) :
2765
+ if lmbda < 2 - 1e-19 :
2756
2766
out [~ pos ] = - (np .power (- x [~ pos ] + 1 , 2 - lmbda ) - 1 ) / (2 - lmbda )
2757
2767
else : # lmbda == 2
2758
- out [~ pos ] = - np .log1p (- x [~ pos ])
2768
+ out [~ pos ] = - np .log (- x [~ pos ] + 1 )
2759
2769
2760
2770
return out
2761
2771
@@ -2784,8 +2794,12 @@ def _neg_log_likelihood(lmbda):
2784
2794
x_trans = self ._yeo_johnson_transform (x , lmbda )
2785
2795
n_samples = x .shape [0 ]
2786
2796
2787
- loglike = - n_samples / 2 * np .log (x_trans .var ())
2788
- loglike += (lmbda - 1 ) * (np .sign (x ) * np .log1p (np .abs (x ))).sum ()
2797
+ # Estimated mean and variance of the normal distribution
2798
+ est_mean = x_trans .sum () / n_samples
2799
+ est_var = np .power (x_trans - est_mean , 2 ).sum () / n_samples
2800
+
2801
+ loglike = - n_samples / 2 * np .log (est_var )
2802
+ loglike += (lmbda - 1 ) * (np .sign (x ) * np .log (np .abs (x ) + 1 )).sum ()
2789
2803
2790
2804
return - loglike
2791
2805
0 commit comments