From 78700804a8800c600953cd5402518cc98e152c00 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Mon, 12 Sep 2022 10:42:56 +0200
Subject: [PATCH 1/7] ENH csr_row_norms optimization

---
 sklearn/utils/sparsefuncs_fast.pyx | 38 ++++++++++++++++++------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 895a41c23634b..c71594061cb9b 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -27,27 +27,35 @@ def csr_row_norms(X):
     """L2 norm of each row in CSR matrix X."""
     if X.dtype not in [np.float32, np.float64]:
         X = X.astype(np.float64)
-    return _csr_row_norms(X.data, X.shape, X.indices, X.indptr)
+    return np.asarray(_csr_row_norms(X.data, X.indices, X.indptr))
 
 
-def _csr_row_norms(cnp.ndarray[floating, ndim=1, mode="c"] X_data,
-                   shape,
-                   cnp.ndarray[integral, ndim=1, mode="c"] X_indices,
-                   cnp.ndarray[integral, ndim=1, mode="c"] X_indptr):
+def _csr_row_norms(
+    floating[::1] X_data,
+    integral[::1] X_indices,
+    integral[::1] X_indptr,
+):
     cdef:
-        unsigned long long n_samples = shape[0]
-        unsigned long long i
+        integral n_samples = X_indptr.shape[0] - 1
+        integral i
         integral j
-        double sum_
+        floating sum_
 
-    norms = np.empty(n_samples, dtype=X_data.dtype)
-    cdef floating[::1] norms_view = norms
+    if floating is float:
+        dtype = np.float32
+    else:
+        dtype = np.float64
 
-    for i in range(n_samples):
-        sum_ = 0.0
-        for j in range(X_indptr[i], X_indptr[i + 1]):
-            sum_ += X_data[j] * X_data[j]
-        norms_view[i] = sum_
+    cdef floating[::1] norms = (
+        np.empty(n_samples, dtype=dtype)
+    )
+
+    with nogil:
+        for i in range(n_samples):
+            sum_ = 0.0
+            for j in range(X_indptr[i], X_indptr[i + 1]):
+                sum_ += X_data[j] * X_data[j]
+            norms[i] = sum_
 
     return norms
 

From f4ff936d9f60411773560d313d14ea2ebdb25095 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Mon, 12 Sep 2022 12:05:00 +0200
Subject: [PATCH 2/7] Apply suggestions from code review

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/sparsefuncs_fast.pyx | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index c71594061cb9b..a3eda2dfd8d7c 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -41,14 +41,9 @@ def _csr_row_norms(
         integral j
         floating sum_
 
-    if floating is float:
-        dtype = np.float32
-    else:
-        dtype = np.float64
+    dtype = np.float32 if floating is float else np.float64
 
-    cdef floating[::1] norms = (
-        np.empty(n_samples, dtype=dtype)
-    )
+    cdef floating[::1] norms = np.empty(n_samples, dtype=dtype)
 
     with nogil:
         for i in range(n_samples):

From 496c6cc2b149f292c6a452e75510d8f39d7950d6 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Mon, 12 Sep 2022 15:50:58 +0200
Subject: [PATCH 3/7] Update sklearn/utils/sparsefuncs_fast.pyx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com>
---
 sklearn/utils/sparsefuncs_fast.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index a3eda2dfd8d7c..2eb33f02ed9b3 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -24,7 +24,7 @@ ctypedef cnp.float64_t DOUBLE
 
 
 def csr_row_norms(X):
-    """L2 norm of each row in CSR matrix X."""
+    """Squared L2 norm of each row in CSR matrix X."""
     if X.dtype not in [np.float32, np.float64]:
         X = X.astype(np.float64)
     return np.asarray(_csr_row_norms(X.data, X.indices, X.indptr))

From 5e84dad2519a5889daa409a7b13355881fe34972 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Thu, 19 Jan 2023 11:01:41 +0100
Subject: [PATCH 4/7] Apply suggestions from code review

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/sparsefuncs_fast.pyx | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 2eb33f02ed9b3..bd60a02f5e802 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -27,18 +27,17 @@ def csr_row_norms(X):
     """Squared L2 norm of each row in CSR matrix X."""
     if X.dtype not in [np.float32, np.float64]:
         X = X.astype(np.float64)
-    return np.asarray(_csr_row_norms(X.data, X.indices, X.indptr))
+    return _csr_row_norms(X.data, X.indices, X.indptr).base
 
 
 def _csr_row_norms(
-    floating[::1] X_data,
-    integral[::1] X_indices,
-    integral[::1] X_indptr,
+    const floating[::1] X_data,
+    const integral[::1] X_indices,
+    const integral[::1] X_indptr,
 ):
     cdef:
         integral n_samples = X_indptr.shape[0] - 1
-        integral i
-        integral j
+        integral i, j
         floating sum_
 
     dtype = np.float32 if floating is float else np.float64

From a4fa7ec6a85d4bd023f8da93a418483c32550c3e Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Mon, 23 Jan 2023 16:58:18 +0100
Subject: [PATCH 5/7] Apply suggestions from code review

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/utils/sparsefuncs_fast.pyx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 2292bc1690111..8673f30eba972 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -42,14 +42,12 @@ def _csr_row_norms(
 
     dtype = np.float32 if floating is float else np.float64
 
-    cdef floating[::1] norms = np.empty(n_samples, dtype=dtype)
+    cdef floating[::1] norms = np.zeros(n_samples, dtype=dtype)
 
     with nogil:
         for i in range(n_samples):
-            sum_ = 0.0
             for j in range(X_indptr[i], X_indptr[i + 1]):
-                sum_ += X_data[j] * X_data[j]
-            norms[i] = sum_
+                norms[i] += X_data[j] * X_data[j]
 
     return norms
 

From 92316b3a9d39cdf90c48a883f3ce5f90ae4de667 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Thu, 26 Jan 2023 11:19:38 +0100
Subject: [PATCH 6/7] Apply suggestions from code review

Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
---
 sklearn/utils/sparsefuncs_fast.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 8673f30eba972..629da25a19c2b 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -27,7 +27,7 @@ def csr_row_norms(X):
     """Squared L2 norm of each row in CSR matrix X."""
     if X.dtype not in [np.float32, np.float64]:
         X = X.astype(np.float64)
-    return _csr_row_norms(X.data, X.indices, X.indptr).base
+    return _csr_row_norms(X.data, X.indices, X.indptr)
 
 
 def _csr_row_norms(
@@ -38,7 +38,7 @@ def _csr_row_norms(
     cdef:
         integral n_samples = X_indptr.shape[0] - 1
         integral i, j
-        floating sum_
+        double sum_
 
     dtype = np.float32 if floating is float else np.float64
 
@@ -49,7 +49,7 @@ def _csr_row_norms(
             for j in range(X_indptr[i], X_indptr[i + 1]):
                 norms[i] += X_data[j] * X_data[j]
 
-    return norms
+    return norms.base
 
 
 def csr_mean_variance_axis0(X, weights=None, return_sum_weights=False):

From 036a88715de61dd823418400f25503dcccf0d153 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Fri, 27 Jan 2023 11:27:21 +0100
Subject: [PATCH 7/7] Apply suggestion from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger
<jeremiedbb@users.noreply.github.com>
---
 sklearn/utils/sparsefuncs_fast.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx
index 629da25a19c2b..9cbfcc1f7a3f6 100644
--- a/sklearn/utils/sparsefuncs_fast.pyx
+++ b/sklearn/utils/sparsefuncs_fast.pyx
@@ -49,7 +49,7 @@ def _csr_row_norms(
             for j in range(X_indptr[i], X_indptr[i + 1]):
                 norms[i] += X_data[j] * X_data[j]
 
-    return norms.base
+    return np.asarray(norms)
 
 
 def csr_mean_variance_axis0(X, weights=None, return_sum_weights=False):