@@ -55,14 +55,19 @@ def _csr_row_norms(np.ndarray[floating, ndim=1, mode="c"] X_data,
55
55
return norms
56
56
57
57
58
- def csr_mean_variance_axis0 (X ):
58
+ def csr_mean_variance_axis0 (X , ddof = 0 ):
59
59
""" Compute mean and variance along axis 0 on a CSR matrix
60
60
61
61
Parameters
62
62
----------
63
63
X : CSR sparse matrix, shape (n_samples, n_features)
64
64
Input data.
65
65
66
+ ddof : int, optional
67
+ “Delta Degrees of Freedom”: the divisor used in the calculation is
68
+ ``N - ddof``, where ``N`` represents the number of elements. By default
69
+ ddof is zero.
70
+
66
71
Returns
67
72
-------
68
73
means : float array with shape (n_features,)
@@ -74,15 +79,17 @@ def csr_mean_variance_axis0(X):
74
79
"""
75
80
if X.dtype not in [np.float32, np.float64]:
76
81
X = X.astype(np.float64)
77
- means, variances, _ = _csr_mean_variance_axis0(X.data, X.shape[0 ],
78
- X.shape[1 ], X.indices)
82
+ means, variances, _ = _csr_mean_variance_axis0(
83
+ X.data, X.shape[0 ], X.shape[1 ], X.indices, ddof
84
+ )
79
85
return means, variances
80
86
81
87
82
88
def _csr_mean_variance_axis0 (np.ndarray[floating , ndim = 1 , mode = " c" ] X_data,
83
89
unsigned long long n_samples ,
84
90
unsigned long long n_features ,
85
- np.ndarray[integral , ndim = 1 ] X_indices):
91
+ np.ndarray[integral , ndim = 1 ] X_indices,
92
+ unsigned long long ddof = 0 ):
86
93
# Implement the function here since variables using fused types
87
94
# cannot be declared directly and can only be passed as function arguments
88
95
cdef:
@@ -130,19 +137,24 @@ def _csr_mean_variance_axis0(np.ndarray[floating, ndim=1, mode="c"] X_data,
130
137
131
138
for i in xrange (n_features):
132
139
variances[i] += (n_samples - counts_nan[i] - counts[i]) * means[i]** 2
133
- variances[i] /= (n_samples - counts_nan[i])
140
+ variances[i] /= (n_samples - ddof - counts_nan[i])
134
141
135
142
return means, variances, counts_nan
136
143
137
144
138
- def csc_mean_variance_axis0 (X ):
145
+ def csc_mean_variance_axis0 (X , ddof = 0 ):
139
146
""" Compute mean and variance along axis 0 on a CSC matrix
140
147
141
148
Parameters
142
149
----------
143
150
X : CSC sparse matrix, shape (n_samples, n_features)
144
151
Input data.
145
152
153
+ ddof : int, optional
154
+ “Delta Degrees of Freedom”: the divisor used in the calculation is
155
+ ``N - ddof``, where ``N`` represents the number of elements. By default
156
+ ddof is zero.
157
+
146
158
Returns
147
159
-------
148
160
means : float array with shape (n_features,)
@@ -154,17 +166,18 @@ def csc_mean_variance_axis0(X):
154
166
"""
155
167
if X.dtype not in [np.float32, np.float64]:
156
168
X = X.astype(np.float64)
157
- means, variances, _ = _csc_mean_variance_axis0(X.data, X.shape[ 0 ],
158
- X.shape[1 ], X.indices,
159
- X.indptr )
169
+ means, variances, _ = _csc_mean_variance_axis0(
170
+ X.data, X.shape[ 0 ], X.shape[1 ], X.indices, X.indptr, ddof
171
+ )
160
172
return means, variances
161
173
162
174
163
175
def _csc_mean_variance_axis0 (np.ndarray[floating , ndim = 1 ] X_data,
164
176
unsigned long long n_samples ,
165
177
unsigned long long n_features ,
166
178
np.ndarray[integral , ndim = 1 ] X_indices,
167
- np.ndarray[integral , ndim = 1 ] X_indptr):
179
+ np.ndarray[integral , ndim = 1 ] X_indptr,
180
+ unsigned long long ddof = 0 ):
168
181
# Implement the function here since variables using fused types
169
182
# cannot be declared directly and can only be passed as function arguments
170
183
cdef:
@@ -209,7 +222,7 @@ def _csc_mean_variance_axis0(np.ndarray[floating, ndim=1] X_data,
209
222
variances[i] += diff * diff
210
223
211
224
variances[i] += (n_samples - counts_nan[i] - counts) * means[i]** 2
212
- variances[i] /= (n_samples - counts_nan[i])
225
+ variances[i] /= (n_samples - ddof - counts_nan[i])
213
226
214
227
return means, variances, counts_nan
215
228
0 commit comments