@@ -278,6 +278,80 @@ def __init__(self, n_components=None, copy=True, whiten=False,
278
278
self .iterated_power = iterated_power
279
279
self .random_state = random_state
280
280
281
+ def get_feature_names (self , input_features = None , show_coef = False ):
282
+ """
283
+ Return dominant feature names for each component
284
+
285
+ Parameters
286
+ ----------
287
+ input_features : list of string, shape (n_features), optional
288
+ String names for input features if available. By default,
289
+ "x0", "x1", ... "xn_features" is used.
290
+
291
+ show_coef : boolean or integer, default False
292
+ When it is "True", return the principal components as the
293
+ combination of the input features. If "False", will be just
294
+ the component names. If it is an integer n, it returns the
295
+ sorted top n contributions to each component.
296
+
297
+
298
+ Returns
299
+ -------
300
+ output_feature_names : list of string, shape (n_components)
301
+ When show_coef is "True", it is represented by the contribution
302
+ of input features and show_coef is "False", it just represents
303
+ the component names
304
+
305
+ Examples
306
+ --------
307
+ >>> import numpy as np
308
+ >>> from sklearn.decomposition import PCA
309
+ >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
310
+ >>> pca = PCA(n_components=2).fit(X)
311
+ >>> pca.get_feature_names(show_coef=True)
312
+ ['0.84*x0 + 0.54*x1', '0.54*x0 - 0.84*x1']
313
+ >>> pca.get_feature_names(show_coef=1)
314
+ ['0.84*x0', '-0.84*x1']
315
+ >>> pca.get_feature_names()
316
+ ['pc0', 'pc1']
317
+ """
318
+ check_is_fitted (self , 'components_' )
319
+
320
+ n_features = self .components_ .shape [1 ]
321
+ components = self .components_
322
+
323
+ if input_features is None :
324
+ input_features = ['x%d' % i for i in range (n_features )]
325
+ else :
326
+ if len (input_features ) != n_features :
327
+ raise ValueError ("Length of input_features is {0} but it must"
328
+ "equal number of features when fitted: {1}." .format
329
+ (len (input_features ), n_features ))
330
+
331
+ def name_generator (coefficients , names ):
332
+ yield "{0:.2g}*{1}" .format (coefficients [0 ], names [0 ])
333
+ for c , n in zip (coefficients [1 :], names [1 :]):
334
+ yield "{0:s} {1:.2g}*{2}" .format ('-' if c < 0 else '+' , abs (c ), n )
335
+
336
+ if show_coef is True :
337
+ feature_names = [' ' .join (name_generator (components [i ],input_features ))
338
+
8000
for i in range (self .n_components )]
339
+ elif show_coef is False :
340
+ feature_names = ['pc{0}' .format (i ) for i in range (self .n_components )]
341
+ elif isinstance (show_coef , six .integer_types ):
342
+ if show_coef < 0 or show_coef > n_features :
343
+ raise ValueError ("show_coef is {0} but it must be between 1 and"
344
+ "number of features {1}" .format (show_coef , n_features ))
345
+ contribution = np .argsort (np .abs (components ), axis = 1 )[:, ::- 1 ]
346
+ required = contribution [:,:show_coef ]
347
+ input_features = np .asarray (input_features )
348
+ feature_names = [' ' .join (name_generator (components [i ][required [i ]], input_features [required [i ]]))
349
+ for i in range (self .n_components )]
350
+ else :
351
+ raise ValueError ("show_coef must be integer or boolean" )
352
+ return feature_names
353
+
354
+
281
355
def fit (self , X , y = None ):
282
356
"""Fit the model with X.
283
357
0 commit comments