From 207f9746026b5c2c0393eed39a99be5f298751b0 Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Fri, 3 Mar 2023 17:52:34 +0500
Subject: [PATCH 1/2] MAINT replace cnp.ndarray with memory views in
 sparsefuncs_fast

---
 setup.py                           |   1 +
 sklearn/utils/sparsefuncs_fast.pyx | 189 ++++++++++++++++-------------
 2 files changed, 107 insertions(+), 83 deletions(-)

diff --git a/setup.py b/setup.py
index 633404e48ef4c..a6c57a33303a5 100755
--- a/setup.py
+++ b/setup.py
@@ -129,6 +129,7 @@
     "sklearn.utils._weight_vector",
     "sklearn.utils.arrayfuncs",
     "sklearn.utils.murmurhash",
+    "sklearn.utils.sparsefuncs_fast",
 )
 
 
diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 4b975872c0e0a..79b82c5b1b273 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -100,12 +100,14 @@ def csr_mean_variance_axis0(X, weights=None, return_sum_weights=False):
     return means, variances
 
 
-def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
-                             unsigned long long n_samples,
-                             unsigned long long n_features,
-                             cnp.ndarray[integral, ndim=1] X_indices,
-                             cnp.ndarray[integral, ndim=1] X_indptr,
-                             cnp.ndarray[floating, ndim=1] weights):
+def _csr_mean_variance_axis0(
+    const floating[::1] X_data,
+    unsigned long long n_samples,
+    unsigned long long n_features,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+    const floating[:] weights,
+):
     # Implement the function here since variables using fused types
     # cannot be declared directly and can only be passed as function arguments
     cdef:
@@ -114,21 +116,20 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
         integral i, col_ind
         cnp.float64_t diff
         # means[j] contains the mean of feature j
-        cnp.ndarray[cnp.float64_t, ndim=1] means = np.zeros(n_features)
+        cnp.float64_t[:] means = np.zeros(n_features)
         # variances[j] contains the variance of feature j
-        cnp.ndarray[cnp.float64_t, ndim=1] variances = np.zeros(n_features)
+        cnp.float64_t[:] variances = np.zeros(n_features)
 
-        cnp.ndarray[cnp.float64_t, ndim=1] sum_weights = np.full(
-            fill_value=np.sum(weights, dtype=np.float64), shape=n_features)
-        cnp.ndarray[cnp.float64_t, ndim=1] sum_weights_nz = np.zeros(
-            shape=n_features)
-        cnp.ndarray[cnp.float64_t, ndim=1] correction = np.zeros(
-            shape=n_features)
+        cnp.float64_t[:] sum_weights = np.full(
+            fill_value=np.sum(weights, dtype=np.float64), shape=n_features
+        )
+        cnp.float64_t[:] sum_weights_nz = np.zeros(shape=n_features)
+        cnp.float64_t[:] correction = np.zeros(shape=n_features)
 
-        cnp.ndarray[cnp.uint64_t, ndim=1] counts = np.full(
-            fill_value=weights.shape[0], shape=n_features, dtype=np.uint64)
-        cnp.ndarray[cnp.uint64_t, ndim=1] counts_nz = np.zeros(
-            shape=n_features, dtype=np.uint64)
+        cnp.uint64_t[:] counts = np.full(
+            fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
+        )
+        cnp.uint64_t[:] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
 
     for row_ind in range(len(X_indptr) - 1):
         for i in range(X_indptr[row_ind], X_indptr[row_ind + 1]):
@@ -177,11 +178,15 @@ def _csr_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
         )
 
     if floating is float:
-        return (np.array(means, dtype=np.float32),
-                np.array(variances, dtype=np.float32),
-                np.array(sum_weights, dtype=np.float32))
+        return (
+            np.array(means, dtype=np.float32),
+            np.array(variances, dtype=np.float32),
+            np.array(sum_weights, dtype=np.float32),
+        )
     else:
-        return means, variances, sum_weights
+        return (
+            np.asarray(means), np.asarray(variances), np.asarray(sum_weights)
+        )
 
 
 def csc_mean_variance_axis0(X, weights=None, return_sum_weights=False):
@@ -229,12 +234,14 @@ def csc_mean_variance_axis0(X, weights=None, return_sum_weights=False):
     return means, variances
 
 
-def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
-                             unsigned long long n_samples,
-                             unsigned long long n_features,
-                             cnp.ndarray[integral, ndim=1] X_indices,
-                             cnp.ndarray[integral, ndim=1] X_indptr,
-                             cnp.ndarray[floating, ndim=1] weights):
+def _csc_mean_variance_axis0(
+    const floating[::1] X_data,
+    unsigned long long n_samples,
+    unsigned long long n_features,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+    const floating[:] weights,
+):
     # Implement the function here since variables using fused types
     # cannot be declared directly and can only be passed as function arguments
     cdef:
@@ -242,21 +249,20 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
         unsigned long long feature_idx, col_ind
         cnp.float64_t diff
         # means[j] contains the mean of feature j
-        cnp.ndarray[cnp.float64_t, ndim=1] means = np.zeros(n_features)
+        cnp.float64_t[:] means = np.zeros(n_features)
         # variances[j] contains the variance of feature j
-        cnp.ndarray[cnp.float64_t, ndim=1] variances = np.zeros(n_features)
+        cnp.float64_t[:] variances = np.zeros(n_features)
 
-        cnp.ndarray[cnp.float64_t, ndim=1] sum_weights = np.full(
-            fill_value=np.sum(weights, dtype=np.float64), shape=n_features)
-        cnp.ndarray[cnp.float64_t, ndim=1] sum_weights_nz = np.zeros(
-            shape=n_features)
-        cnp.ndarray[cnp.float64_t, ndim=1] correction = np.zeros(
-            shape=n_features)
+        cnp.float64_t[:] sum_weights = np.full(
+            fill_value=np.sum(weights, dtype=np.float64), shape=n_features
+        )
+        cnp.float64_t[:] sum_weights_nz = np.zeros(shape=n_features)
+        cnp.float64_t[:] correction = np.zeros(shape=n_features)
 
-        cnp.ndarray[cnp.uint64_t, ndim=1] counts = np.full(
-            fill_value=weights.shape[0], shape=n_features, dtype=np.uint64)
-        cnp.ndarray[cnp.uint64_t, ndim=1] counts_nz = np.zeros(
-            shape=n_features, dtype=np.uint64)
+        cnp.uint64_t[:] counts = np.full(
+            fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
+        )
+        cnp.uint64_t[:] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
 
     for col_ind in range(n_features):
         for i in range(X_indptr[col_ind], X_indptr[col_ind + 1]):
@@ -308,7 +314,9 @@ def _csc_mean_variance_axis0(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
                 np.array(variances, dtype=np.float32),
                 np.array(sum_weights, dtype=np.float32))
     else:
-        return means, variances, sum_weights
+        return (
+            np.asarray(means), np.asarray(variances), np.asarray(sum_weights)
+        )
 
 
 def incr_mean_variance_axis0(X, last_mean, last_var, last_n, weights=None):
@@ -383,18 +391,20 @@ def incr_mean_variance_axis0(X, last_mean, last_var, last_n, weights=None):
                                      weights.astype(X_dtype, copy=False))
 
 
-def _incr_mean_variance_axis0(cnp.ndarray[floating, ndim=1] X_data,
-                              floating n_samples,
-                              unsigned long long n_features,
-                              cnp.ndarray[int, ndim=1] X_indices,
-                              # X_indptr might be either in32 or int64
-                              cnp.ndarray[integral, ndim=1] X_indptr,
-                              str X_format,
-                              cnp.ndarray[floating, ndim=1] last_mean,
-                              cnp.ndarray[floating, ndim=1] last_var,
-                              cnp.ndarray[floating, ndim=1] last_n,
-                              # previous sum of the weights (ie float)
-                              cnp.ndarray[floating, ndim=1] weights):
+def _incr_mean_variance_axis0(
+    const floating[:] X_data,
+    floating n_samples,
+    unsigned long long n_features,
+    const int[:] X_indices,
+    # X_indptr might be either in32 or int64
+    const integral[:] X_indptr,
+    str X_format,
+    floating[:] last_mean,
+    floating[:] last_var,
+    floating[:] last_n,
+    # previous sum of the weights (ie float)
+    const floating[:] weights,
+):
     # Implement the function here since variables using fused types
     # cannot be declared directly and can only be passed as function arguments
     cdef:
@@ -404,10 +414,10 @@ def _incr_mean_variance_axis0(cnp.ndarray[floating, ndim=1] X_data,
         # new = the current increment
         # updated = the aggregated stats
         # when arrays, they are indexed by i per-feature
-        cnp.ndarray[floating, ndim=1] new_mean
-        cnp.ndarray[floating, ndim=1] new_var
-        cnp.ndarray[floating, ndim=1] updated_mean
-        cnp.ndarray[floating, ndim=1] updated_var
+        floating[:] new_mean
+        floating[:] new_var
+        floating[:] updated_mean
+        floating[:] updated_var
 
     if floating is float:
         dtype = np.float32
@@ -420,9 +430,9 @@ def _incr_mean_variance_axis0(cnp.ndarray[floating, ndim=1] X_data,
     updated_var = np.zeros_like(new_mean, dtype=dtype)
 
     cdef:
-        cnp.ndarray[floating, ndim=1] new_n
-        cnp.ndarray[floating, ndim=1] updated_n
-        cnp.ndarray[floating, ndim=1] last_over_new_n
+        floating[:] new_n
+        floating[:] updated_n
+        floating[:] last_over_new_n
 
     # Obtain new stats first
     updated_n = np.zeros(shape=n_features, dtype=dtype)
@@ -444,7 +454,7 @@ def _incr_mean_variance_axis0(cnp.ndarray[floating, ndim=1] X_data,
             break
 
     if is_first_pass:
-        return new_mean, new_var, new_n
+        return np.asarray(new_mean), np.asarray(new_var), np.asarray(new_n)
 
     for i in range(n_features):
         updated_n[i] = last_n[i] + new_n[i]
@@ -471,7 +481,11 @@ def _incr_mean_variance_axis0(cnp.ndarray[floating, ndim=1] X_data,
             updated_mean[i] = last_mean[i]
             updated_n[i] = last_n[i]
 
-    return updated_mean, updated_var, updated_n
+    return (
+        np.asarray(updated_mean),
+        np.asarray(updated_var),
+        np.asarray(updated_n),
+    )
 
 
 def inplace_csr_row_normalize_l1(X):
@@ -479,10 +493,12 @@ def inplace_csr_row_normalize_l1(X):
     _inplace_csr_row_normalize_l1(X.data, X.shape, X.indices, X.indptr)
 
 
-def _inplace_csr_row_normalize_l1(cnp.ndarray[floating, ndim=1] X_data,
-                                  shape,
-                                  cnp.ndarray[integral, ndim=1] X_indices,
-                                  cnp.ndarray[integral, ndim=1] X_indptr):
+def _inplace_csr_row_normalize_l1(
+    floating[:] X_data,
+    shape,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+):
     cdef:
         unsigned long long n_samples = shape[0]
         unsigned long long n_features = shape[1]
@@ -515,10 +531,12 @@ def inplace_csr_row_normalize_l2(X):
     _inplace_csr_row_normalize_l2(X.data, X.shape, X.indices, X.indptr)
 
 
-def _inplace_csr_row_normalize_l2(cnp.ndarray[floating, ndim=1] X_data,
-                                  shape,
-                                  cnp.ndarray[integral, ndim=1] X_indices,
-                                  cnp.ndarray[integral, ndim=1] X_indptr):
+def _inplace_csr_row_normalize_l2(
+    floating[:] X_data,
+    shape,
+    const integral[:] X_indices,
+    const integral[:] X_indptr,
+):
     cdef:
         unsigned long long n_samples = shape[0]
         unsigned long long n_features = shape[1]
@@ -543,10 +561,12 @@ def _inplace_csr_row_normalize_l2(cnp.ndarray[floating, ndim=1] X_data,
             X_data[j] /= sum_
 
 
-def assign_rows_csr(X,
-                    cnp.ndarray[cnp.npy_intp, ndim=1] X_rows,
-                    cnp.ndarray[cnp.npy_intp, ndim=1] out_rows,
-                    cnp.ndarray[floating, ndim=2, mode="c"] out):
+def assign_rows_csr(
+    X,
+    const cnp.npy_intp[:] X_rows,
+    const cnp.npy_intp[:] out_rows,
+    floating[:, ::1] out,
+):
     """Densify selected rows of a CSR matrix into a preallocated array.
 
     Like out[out_rows] = X[X_rows].toarray() but without copying.
@@ -562,18 +582,21 @@ def assign_rows_csr(X,
     cdef:
         # npy_intp (np.intp in Python) is what np.where returns,
         # but int is what scipy.sparse uses.
-        int i, ind, j
+        int i, ind, j, k
         cnp.npy_intp rX
-        cnp.ndarray[floating, ndim=1] data = X.data
-        cnp.ndarray[int, ndim=1] indices = X.indices, indptr = X.indptr
+        const floating[:] data = X.data
+        const int[:] indices = X.indices, indptr = X.indptr
 
     if X_rows.shape[0] != out_rows.shape[0]:
         raise ValueError("cannot assign %d rows to %d"
                          % (X_rows.shape[0], out_rows.shape[0]))
 
-    out[out_rows] = 0.
-    for i in range(X_rows.shape[0]):
-        rX = X_rows[i]
-        for ind in range(indptr[rX], indptr[rX + 1]):
-            j = indices[ind]
-            out[out_rows[i], j] = data[ind]
+    with nogil:
+        for k in range(out_rows.shape[0]):
+            out[out_rows[k]] = 0.0
+
+        for i in range(X_rows.shape[0]):
+            rX = X_rows[i]
+            for ind in range(indptr[rX], indptr[rX + 1]):
+                j = indices[ind]
+                out[out_rows[i], j] = data[ind]

From 7fae6df0eeb4a21e948ef8863bddd042b532100c Mon Sep 17 00:00:00 2001
From: OmarManzoor <omar.salman@arbisoft.com>
Date: Mon, 6 Mar 2023 15:24:07 +0500
Subject: [PATCH 2/2] PR suggestions: make memory views contiguous, which are
 implemented in the file

---
 sklearn/utils/sparsefuncs_fast.pyx | 42 +++++++++++++++---------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 79b82c5b1b273..a2e0089250a6d 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -116,20 +116,20 @@ def _csr_mean_variance_axis0(
         integral i, col_ind
         cnp.float64_t diff
         # means[j] contains the mean of feature j
-        cnp.float64_t[:] means = np.zeros(n_features)
+        cnp.float64_t[::1] means = np.zeros(n_features)
         # variances[j] contains the variance of feature j
-        cnp.float64_t[:] variances = np.zeros(n_features)
+        cnp.float64_t[::1] variances = np.zeros(n_features)
 
-        cnp.float64_t[:] sum_weights = np.full(
+        cnp.float64_t[::1] sum_weights = np.full(
             fill_value=np.sum(weights, dtype=np.float64), shape=n_features
         )
-        cnp.float64_t[:] sum_weights_nz = np.zeros(shape=n_features)
-        cnp.float64_t[:] correction = np.zeros(shape=n_features)
+        cnp.float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
+        cnp.float64_t[::1] correction = np.zeros(shape=n_features)
 
-        cnp.uint64_t[:] counts = np.full(
+        cnp.uint64_t[::1] counts = np.full(
             fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
         )
-        cnp.uint64_t[:] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
+        cnp.uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
 
     for row_ind in range(len(X_indptr) - 1):
         for i in range(X_indptr[row_ind], X_indptr[row_ind + 1]):
@@ -249,20 +249,20 @@ def _csc_mean_variance_axis0(
         unsigned long long feature_idx, col_ind
         cnp.float64_t diff
         # means[j] contains the mean of feature j
-        cnp.float64_t[:] means = np.zeros(n_features)
+        cnp.float64_t[::1] means = np.zeros(n_features)
         # variances[j] contains the variance of feature j
-        cnp.float64_t[:] variances = np.zeros(n_features)
+        cnp.float64_t[::1] variances = np.zeros(n_features)
 
-        cnp.float64_t[:] sum_weights = np.full(
+        cnp.float64_t[::1] sum_weights = np.full(
             fill_value=np.sum(weights, dtype=np.float64), shape=n_features
         )
-        cnp.float64_t[:] sum_weights_nz = np.zeros(shape=n_features)
-        cnp.float64_t[:] correction = np.zeros(shape=n_features)
+        cnp.float64_t[::1] sum_weights_nz = np.zeros(shape=n_features)
+        cnp.float64_t[::1] correction = np.zeros(shape=n_features)
 
-        cnp.uint64_t[:] counts = np.full(
+        cnp.uint64_t[::1] counts = np.full(
             fill_value=weights.shape[0], shape=n_features, dtype=np.uint64
         )
-        cnp.uint64_t[:] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
+        cnp.uint64_t[::1] counts_nz = np.zeros(shape=n_features, dtype=np.uint64)
 
     for col_ind in range(n_features):
         for i in range(X_indptr[col_ind], X_indptr[col_ind + 1]):
@@ -414,10 +414,10 @@ def _incr_mean_variance_axis0(
         # new = the current increment
         # updated = the aggregated stats
         # when arrays, they are indexed by i per-feature
-        floating[:] new_mean
-        floating[:] new_var
-        floating[:] updated_mean
-        floating[:] updated_var
+        floating[::1] new_mean
+        floating[::1] new_var
+        floating[::1] updated_mean
+        floating[::1] updated_var
 
     if floating is float:
         dtype = np.float32
@@ -430,9 +430,9 @@ def _incr_mean_variance_axis0(
     updated_var = np.zeros_like(new_mean, dtype=dtype)
 
     cdef:
-        floating[:] new_n
-        floating[:] updated_n
-        floating[:] last_over_new_n
+        floating[::1] new_n
+        floating[::1] updated_n
+        floating[::1] last_over_new_n
 
     # Obtain new stats first
     updated_n = np.zeros(shape=n_features, dtype=dtype)