8000 ENH speed up LDL by using einsum and q_inv · scikit-learn/scikit-learn@7ef5877 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7ef5877

Browse files
committed
ENH speed up LDL by using einsum and q_inv
1 parent 4e1a696 commit 7ef5877

File tree

2 files changed

+35
-20
lines changed

2 files changed

+35
-20
lines changed

sklearn/linear_model/_linear_loss.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -707,8 +707,8 @@ class Multinomial_LDL_Decomposition:
707707
p : ndarray of shape (n_samples, n_classes)
708708
Array of predicted probabilities per class (and sample).
709709
710-
q : ndarray of shape (n_samples, n_classes)
711-
Helper array, q[:, j] = 1 - sum_{i=0}^j p[:, i].
710+
q_inv : ndarray of shape (n_samples, n_classes)
711+
Helper array, inverse of q[:, j] = 1 - sum_{i=0}^j p[:, i], i.e. 1/q.
712712
713713
sqrt_d : ndarray of shape (n_samples, n_classes)
714714
Square root of the diagonal matrix D, D_ii = p_i * q_i / q_{i-1}
@@ -727,33 +727,35 @@ class Multinomial_LDL_Decomposition:
727727

728728
def __init__(self, *, proba, proba_sum_to_1=True):
729729
self.p = proba
730-
self.q = 1 - np.cumsum(self.p, axis=1) # contiguity of p
730+
q = 1 - np.cumsum(self.p, axis=1) # contiguity of p
731731
self.proba_sum_to_1 = proba_sum_to_1
732732
if self.p.dtype == np.float32:
733733
eps = 2 * np.finfo(np.float32).resolution
734734
else:
735735
eps = 2 * np.finfo(np.float64).resolution
736-
if np.any(self.q[:, -1] > eps):
736+
if np.any(q[:, -1] > eps):
737737
warnings.warn(
738738
"Probabilities proba are assumed to sum to 1, but they don't."
739739
)
740740
if self.proba_sum_to_1:
741741
# If np.sum(p, axis=1) = 1 then q[:, -1] = d[:, -1] = 0.
742-
self.q[:, -1] = 0.0
742+
q[:, -1] = 0.0
743743
# One might not need all classes for p to sum to 1, so we detect and
744744
# correct all values close to zero.
745-
self.q[self.q <= eps] = 0.0
745+
q[q <= eps] = 0.0
746746
self.p[self.p <= eps] = 0.0
747-
d = self.p * self.q
748-
# From now on, self.q is always used in the denominator. We handle q == 0 by
747+
d = self.p * q
748+
# From now on, q is always used in the denominator. We handle q == 0 by
749749
# setting q to 1 whenever q == 0 such that a division of q is a no-op in this
750750
# case.
751-
self.q[self.q == 0] = 1
751+
q[q == 0] = 1
752+
# And we use the inverse of q, 1/q.
753+
self.q_inv = 1 / q
752754
if self.proba_sum_to_1:
753755
# If q_{i - 1} = 0, then also q_i = 0.
754-
d[:, 1:-1] /= self.q[:, :-2] # d[:, -1] = 0 anyway.
756+
d[:, 1:-1] *= self.q_inv[:, :-2] # d[:, -1] = 0 anyway.
755757
else:
756-
d[:, 1:] /= self.q[:, :-1]
758+
d[:, 1:] *= self.q_inv[:, :-1]
757759
self.sqrt_d = np.sqrt(d)
758760

759761
def sqrt_D_Lt_matmul(self, x):
@@ -787,8 +789,16 @@ def sqrt_D_Lt_matmul(self, x):
787789
n_classes = self.p.shape[1]
788790
for i in range(0, n_classes - 1): # row i
789791
# L_ij = -p_i / q_j, we need transpose L'
790-
for j in range(i + 1, n_classes): # column j
791-
x[:, i] -= self.p[:, j] / self.q[:, i] * x[:, j]
792+
# for j in range(i + 1, n_classes): # column j
793+
# x[:, i] -= self.p[:, j] / self.q[:, i] * x[:, j]
794+
# The following is the same but faster.
795+
px = np.einsum(
796+
"ij,ij->i",
797+
self.p[:, i + 1 : n_classes],
798+
x[:, i + 1 : n_classes],
799+
order="A",
800+
)
801+
x[:, i] -= px * self.q_inv[:, i]
792802
x *= self.sqrt_d
793803
return x
794804

@@ -823,8 +833,11 @@ def L_sqrt_D_matmul(self, x):
823833
x *= self.sqrt_d
824834
for i in range(n_classes - 1, 0, -1): # row i
825835
# L_ij = -p_i / q_j
826-
for j in range(0, i): # column j
827-
x[:, i] -= self.p[:, i] / self.q[:, j] * x[:, j]
836+
# for j in range(0, i): # column j
837+
# x[:, i] -= self.p[:, i] / self.q[:, j] * x[:, j]
838+
# The following is the same but faster.
839+
qx = np.einsum("ij,ij->i", self.q_inv[:, :i], x[:, :i], order="A")
840+
x[:, i] -= qx * self.p[:, i]
828841
return x
829842

830843
def inverse_L_sqrt_D_matmul(self, x):
@@ -855,11 +868,13 @@ def inverse_L_sqrt_D_matmul(self, x):
855868
n_classes = self.p.shape[1]
856869
for i in range(n_classes - 1, 0, -1): # row i
857870
if i > 0:
858-
fj = self.p[:, i] / self.q[:, i - 1]
871+
fj = self.p[:, i] * self.q_inv[:, i - 1]
859872
else:
860873
fj = self.p[:, i]
861-
for j in range(0, i): # column j
862-
x[:, i] += fj * x[:, j]
874+
# for j in range(0, i): # column j
875+
# x[:, i] += fj * x[:, j]
876+
# The following is the same but faster.
877+
x[:, i] += fj * np.sum(x[:, :i], axis=1)
863878
if self.proba_sum_to_1:
864879
# x[:, :-1] /= self.sqrt_d[:, :-1]
865880
mask = self.sqrt_d[:, :-1] == 0

sklearn/linear_model/tests/test_linear_loss.py

Lines changed: 2 additions & 2 deletions
9E19
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def test_multinomial_LDL_decomposition_binomial_single_point():
441441
zero = 1
442442

443443
LDL = Multinomial_LDL_Decomposition(proba=p)
444-
assert_allclose(LDL.q[0, :], [1 - p0, zero])
444+
assert_allclose(1 / LDL.q_inv[0, :], [1 - p0, zero])
445445
assert_allclose(LDL.sqrt_d[0, :] ** 2, [p0 * (1 - p0), 0])
446446

447447
# C = diag(D) L' x with x = [[1, 0]] (n_samples=1, n_classes=2)
@@ -600,7 +600,7 @@ def test_multinomial_LDL_decomposition(global_random_seed):
600600
# Note that LDL sets q = 1 whenever q = 0 for easier handling of divisions by zero.
601601
# As q[:, -1] = 0 if p sums to 1, we do not compare the last values corresponding
602602
# to the last class.
603-
assert_allclose(LDL.q[:, :-1], 1 - np.cumsum(LDL.p, axis=1)[:, :-1])
603+
assert_allclose(1 / LDL.q_inv[:, :-1], 1 - np.cumsum(LDL.p, axis=1)[:, :-1])
604604

605605
# C = diag(D) L' x with x = X @ coef = raw_prediction
606606
C = LDL.sqrt_D_Lt_matmul(raw_prediction.copy())

0 commit comments

Comments
 (0)
0