2
2
3
3
import pytest
4
4
5
- from numpy .testing import assert_allclose
6
- from scipy import linalg
7
-
8
5
from sklearn .exceptions import ChangedBehaviorWarning
9
6
from sklearn .utils import check_random_state
10
7
from sklearn .utils .testing import (assert_array_equal , assert_no_warnings ,
@@ -98,75 +95,6 @@ def test_lda_predict():
98
95
assert_raises (ValueError , clf .fit , X , y )
99
96
100
97
101
- @pytest .mark .parametrize ("n_classes" , [2 , 3 ])
102
- @pytest .mark .parametrize ("solver" , ["svd" , "lsqr" , "eigen" ])
103
- def test_lda_predict_proba (solver , n_classes ):
104
- def generate_dataset (n_samples , centers , covariances , random_state = None ):
105
- """Generate a multivariate normal data given some centers and
106
- covariances"""
107
- rng = check_random_state (random_state )
108
- X = np .vstack ([rng .multivariate_normal (mean , cov ,
109
- size = n_samples // len (centers ))
110
- for mean , cov in zip (centers , covariances )])
111
- y = np .hstack ([[clazz ] * (n_samples // len (centers ))
112
- for clazz in range (len (centers ))])
113
- return X , y
114
-
115
- blob_centers = np .array ([[0 , 0 ], [- 10 , 40 ], [- 30 , 30 ]])[:n_classes ]
116
- blob_stds = np .array ([[[10 , 10 ], [10 , 100 ]]] * len (blob_centers ))
117
- X , y = generate_dataset (
118
- n_samples = 90000 , centers = blob_centers , covariances = blob_stds ,
119
- random_state = 42
120
- )
121
- lda = LinearDiscriminantAnalysis (solver = solver , store_covariance = True ,
122
- shrinkage = None ).fit (X , y )
123
- # check that the empirical means and covariances are close enough to the
124
- # one used to generate the data
125
- assert_allclose (lda .means_ , blob_centers , atol = 1e-1 )
126
- assert_allclose (lda .covariance_ , blob_stds [0 ], atol = 1 )
127
-
128
- # implement the method to compute the probability given in The Elements
129
- # of Statistical Learning (cf. p.127, Sect. 4.4.5 "Logistic Regression
130
- # or LDA?")
131
- precision = linalg .inv (blob_stds [0 ])
132
- alpha_k = []
133
- alpha_k_0 = []
134
- for clazz in range (len (blob_centers ) - 1 ):
135
- alpha_k .append (
136
- np .dot (precision ,
137
- (blob_centers [clazz ] - blob_centers [- 1 ])[:, np .newaxis ]))
138
- alpha_k_0 .append (
139
- np .dot (- 0.5 * (blob_centers [clazz ] +
140
- blob_centers [- 1 ])[np .newaxis , :], alpha_k [- 1 ]))
141
-
142
- sample = np .array ([[- 22 , 22 ]])
143
-
144
- def discriminant_func (sample , coef , intercept , clazz ):
145
- return np .exp (intercept [clazz ] + np .dot (sample , coef [clazz ]))
146
-
147
- prob = np .array ([float (
148
- discriminant_func (sample , alpha_k , alpha_k_0 , clazz ) /
149
- (1 + sum ([discriminant_func (sample , alpha_k , alpha_k_0 , clazz )
150
- for clazz in range (n_classes - 1 )]))) for clazz in range (
151
- n_classes - 1 )])
152
-
153
- prob_ref = 1 - np .sum (prob )
154
-
155
- # check the consistency of the computed probability
156
- # all probabilities should sum to one
157
- prob_ref_2 = float (
158
- 1 / (1 + sum ([discriminant_func (sample , alpha_k , alpha_k_0 , clazz )
159
- for clazz in range (n_classes - 1 )]))
160
- )
161
-
162
- assert prob_ref == pytest .approx (prob_ref_2 )
163
- # check that the probability of LDA are close to the theoretical
164
- # probabilties
165
- assert_allclose (lda .predict_proba (sample ),
166
- np .hstack ([prob , prob_ref ])[np .newaxis ],
167
- atol = 1e-2 )
168
-
169
-
170
98
def test_lda_priors ():
171
99
# Test priors (negative priors)
172
100
priors = np .array ([0.5 , - 0.5 ])
@@ -301,7 +229,7 @@ def test_lda_scaling():
301
229
302
230
303
231
def test_lda_store_covariance ():
304
- # Test for solver 'lsqr' and 'eigen'
232
+ # Test for slover 'lsqr' and 'eigen'
305
233
# 'store_covariance' has no effect on 'lsqr' and 'eigen' solvers
306
234
for solver in ('lsqr' , 'eigen' ):
307
235
clf = LinearDiscriminantAnalysis (solver = solver ).fit (X6 , y6 )
@@ -317,7 +245,7 @@ def test_lda_store_covariance():
<
61CE
code>317 245
np .array ([[0.422222 , 0.088889 ], [0.088889 , 0.533333 ]])
318
246
)
319
247
320
- # Test for SVD solver , the default is to not set the covariances_ attribute
248
+ # Test for SVD slover , the default is to not set the covariances_ attribute
321
249
clf = LinearDiscriminantAnalysis (solver = 'svd' ).fit (X6 , y6 )
322
250
assert not hasattr (clf , 'covariance_' )
323
251
0 commit comments