22
22
from ..base import _fit_context
23
23
from ..utils import check_random_state
24
24
from ..utils ._arpack import _init_arpack_v0
25
- from ..utils ._array_api import get_namespace
25
+ from ..utils ._array_api import _convert_to_numpy , get_namespace
26
26
from ..utils ._param_validation import Interval , RealNotInt , StrOptions
27
27
from ..utils .deprecation import deprecated
28
28
from ..utils .extmath import fast_logdet , randomized_svd , stable_cumsum , svd_flip
@@ -60,6 +60,7 @@ def _assess_dimension(spectrum, rank, n_samples):
60
60
Automatic Choice of Dimensionality for PCA. NIPS 2000: 598-604
61
61
<https://proceedings.neurips.cc/paper/2000/file/7503cfacd12053d309b6bed5c89de212-Paper.pdf>`_
62
62
"""
63
+ xp , _ = get_namespace (spectrum )
63
64
64
65
n_features = spectrum .shape [0 ]
65
66
if not 1 <= rank < n_features :
@@ -73,29 +74,29 @@ def _assess_dimension(spectrum, rank, n_samples):
73
74
# small and won't be the max anyway. Also, it can lead to numerical
74
75
# issues below when computing pa, in particular in log((spectrum[i] -
75
76
# spectrum[j]) because this will take the log of something very small.
76
- return - np .inf
77
+ return - xp .inf
77
78
78
79
pu = - rank * log (2.0 )
79
80
for i in range (1 , rank + 1 ):
80
81
pu += (
81
82
gammaln ((n_features - i + 1 ) / 2.0 )
82
- - log (np .pi ) * (n_features - i + 1 ) / 2.0
83
+ - log (xp .pi ) * (n_features - i + 1 ) / 2.0
83
84
)
84
85
85
- pl = np .sum (np .log (spectrum [:rank ]))
86
+ pl = xp .sum (xp .log (spectrum [:rank ]))
86
87
pl = - pl * n_samples / 2.0
87
88
88
- v = max (eps , np .sum (spectrum [rank :]) / (n_features - rank ))
89
- pv = - np . log (v ) * n_samples * (n_features - rank ) / 2.0
89
+ v = max (eps , xp .sum (spectrum [rank :]) / (n_features - rank ))
90
+ pv = - log (v ) * n_samples * (n_features - rank ) / 2.0
90
91
91
92
m = n_features * rank - rank * (rank + 1.0 ) / 2.0
92
- pp = log (2.0 * np .pi ) * (m + rank ) / 2.0
93
+ pp = log (2.0 * xp .pi ) * (m + rank ) / 2.0
93
94
94
95
pa = 0.0
95
- spectrum_ = spectrum . copy ( )
96
+ spectrum_ = xp . asarray ( spectrum , copy = True )
96
97
spectrum_ [rank :n_features ] = v
97
98
for i in range (rank ):
98
- for j in range (i + 1 , len ( spectrum ) ):
99
+ for j in range (i + 1 , spectrum . shape [ 0 ] ):
99
100
pa += log (
100
101
(spectrum [i ] - spectrum [j ]) * (1.0 / spectrum_ [j ] - 1.0 / spectrum_ [i ])
101
102
) + log (n_samples )
@@ -116,7 +117,7 @@ def _infer_dimension(spectrum, n_samples):
116
117
ll [0 ] = - xp .inf # we don't want to return n_components = 0
117
118
for rank in range (1 , spectrum .shape [0 ]):
118
119
ll [rank ] = _assess_dimension (spectrum , rank , n_samples )
119
- return ll .argmax ()
120
+ return xp .argmax (ll )
120
121
121
122
122
123
class PCA (_BasePCA ):
@@ -578,8 +579,24 @@ def _fit_full(self, X, n_components):
578
579
# side='right' ensures that number of features selected
579
580
# their variance is always greater than n_components float
580
581
# passed. More discussion in issue: #15669
581
- ratio_cumsum = stable_cumsum (explained_variance_ratio_ )
582
- n_components = xp .searchsorted (ratio_cumsum , n_components , side = "right" ) + 1
582
+ if is_array_api_compliant :
583
+ # Convert to numpy as xp.cumsum and xp.searchsorted are not
584
+ # part of the Array API standard yet:
585
+ #
586
+ # https://github.com/data-apis/array-api/issues/597
587
+ # https://github.com/data-apis/array-api/issues/688
588
+ #
589
+ # Furthermore, it's not always safe to call them for namespaces
590
+ # that already implement them: for instance as
591
+ # cupy.searchsorted does not accept a float as second argument.
592
+ explained_variance_ratio_np = _convert_to_numpy (
593
+ explained_variance_ratio_ , xp = xp
594
+ )
595
+ else :
596
+ explained_variance_ratio_np = explained_variance_ratio_
597
+ ratio_cumsum = stable_cumsum (explained_variance_ratio_np )
598
+ n_components = np .searchsorted (ratio_cumsum , n_components , side = "right" ) + 1
599
+
583
600
# Compute noise covariance using Probabilistic PCA model
584
601
# The sigma2 maximum likelihood (cf. eq. 12.46)
585
602
if n_components < min (n_features , n_samples ):
0 commit comments