@@ -1275,13 +1275,10 @@ cdef class DatasetsPair:
1275
1275
distance_metric._validate_data(X )
1276
1276
distance_metric._validate_data(Y )
1277
1277
1278
- if not issparse(X ) and not issparse(Y ):
1279
- return DenseDenseDatasetsPair(X, Y, distance_metric)
1280
- if issparse(X) and not issparse(Y):
1281
- return SparseDenseDatasetsPair(X, Y, distance_metric)
1282
- if not issparse(X) and issparse(Y):
1283
- return DenseSparseDatasetsPair(X, Y, distance_metric)
1284
- return SparseSparseDatasetsPair(X, Y, distance_metric)
1278
+ if issparse(X ) or issparse(Y ):
1279
+ raise ValueError (" Only dense datasets are supported for X and Y." )
1280
+
1281
+ return DenseDenseDatasetsPair(X, Y, distance_metric)
1285
1282
1286
1283
@classmethod
1287
1284
def unpack_csr_matrix (cls , X: csr_matrix ):
@@ -1351,191 +1348,3 @@ cdef class DenseDenseDatasetsPair(DatasetsPair):
1351
1348
return self .distance_metric.dist(& self .X[i, 0 ],
1352
1349
& self .Y[j, 0 ],
1353
1350
self .d)
1354
-
1355
- @final
1356
- cdef class SparseSparseDatasetsPair(DatasetsPair):
1357
- """ Compute distances between vectors of two CSR matrices.
1358
-
1359
- Parameters
1360
- ----------
1361
- X: sparse matrix of shape (n_samples_X, n_features)
1362
- Rows represent vectors. Must be in CSR format.
1363
-
1364
- Y: sparse matrix of shape (n_samples_Y, n_features)
1365
- Rows represent vectors. Must be in CSR format.
1366
-
1367
- distance_metric: DistanceMetric
1368
- The distance metric responsible for computing distances
1369
- between two vectors of (X, Y).
1370
- """
1371
- cdef:
1372
- const DTYPE_t[:] X_data
1373
- const ITYPE_t[:] X_indices,
1374
- const ITYPE_t[:] X_indptr,
1375
-
1376
- const DTYPE_t[:] Y_data
1377
- const ITYPE_t[:] Y_indices
1378
- const ITYPE_t[:] Y_indptr
1379
-
1380
-
1381
- def __init__ (self , X , Y , DistanceMetric distance_metric ):
1382
- DatasetsPair.__init__ (self , distance_metric)
1383
-
1384
- self .X_data, self .X_indices, self .X_indptr = self .unpack_csr_matrix(X)
1385
- self .Y_data, self .Y_indices, self .Y_indptr = self .unpack_csr_matrix(Y)
1386
-
1387
- @final
1388
- cdef ITYPE_t n_samples_X(self ) nogil:
1389
- return self .X_indptr.shape[0 ] - 1
1390
-
1391
- @final
1392
- cdef ITYPE_t n_samples_Y(self ) nogil:
1393
- return self .Y_indptr.shape[0 ] - 1
1394
-
1395
- @final
1396
- cdef DTYPE_t surrogate_dist(self , ITYPE_t i, ITYPE_t j) nogil:
1397
- cdef:
1398
- ITYPE_t xi_start = self .X_indptr[i]
1399
- ITYPE_t xi_end = self .X_indptr[i + 1 ]
1400
- ITYPE_t yj_start = self .Y_indptr[j]
1401
- ITYPE_t yj_end = self .Y_indptr[j + 1 ]
1402
-
1403
- return self .distance_metric.csr_rdist(
1404
- self .X_data[xi_start:xi_end],
1405
- self .X_indices[xi_start:xi_end],
1406
- self .Y_data[yj_start:yj_end],
1407
- self .Y_indices[yj_start:yj_end],
1408
- )
1409
-
1410
- @final
1411
- cdef DTYPE_t dist(self , ITYPE_t i, ITYPE_t j) nogil:
1412
- cdef:
1413
- ITYPE_t xi_start = self .X_indptr[i]
1414
- ITYPE_t xi_end = self .X_indptr[i + 1 ]
1415
- ITYPE_t yj_start = self .Y_indptr[j]
1416
- ITYPE_t yj_end = self .Y_indptr[j + 1 ]
1417
-
1418
- return self .distance_metric.csr_dist(
1419
- self .X_data[xi_start:xi_end],
1420
- self .X_indices[xi_start:xi_end],
1421
- self .Y_data[yj_start:yj_end],
1422
- self .Y_indices[yj_start:yj_end]
1423
- )
1424
-
1425
- @final
1426
- cdef class SparseDenseDatasetsPair(DatasetsPair):
1427
- """ Compute distances between vectors of a CSR matrix and a dense array.
1428
-
1429
- Parameters
1430
- ----------
1431
- X: sparse matrix of shape (n_samples_X, n_features)
1432
- Rows represent vectors. Must be in CSR format.
1433
-
1434
- Y: ndarray of shape (n_samples_Y, n_features)
1435
- Rows represent vectors. Must be C-contiguous.
1436
-
1437
- distance_metric: DistanceMetric
1438
- The distance metric responsible for computing distances
1439
- between two vectors of (X, Y).
1440
- """
1441
- cdef:
1442
- const DTYPE_t[:] X_data
1443
- const ITYPE_t[:] X_indices,
1444
- const ITYPE_t[:] X_indptr,
1445
-
1446
- const DTYPE_t[:, ::1 ] Y
1447
- const ITYPE_t[:] Y_indices
1448
-
1449
- def __init__ (self , X , Y , DistanceMetric distance_metric ):
1450
- super ().__init__(distance_metric)
1451
-
1452
- self .X_data, self .X_indices, self .X_indptr = self .unpack_csr_matrix(X)
1453
-
1454
- # This array already has been checked here
1455
- self .Y = Y
1456
- self .Y_indices = np.arange(self .Y.shape[1 ], dtype = ITYPE)
1457
-
1458
- @final
1459
- cdef ITYPE_t n_samples_X(self ) nogil:
1460
- return self .X_indptr.shape[0 ] - 1
1461
-
1462
- @final
1463
- cdef ITYPE_t n_samples_Y(self ) nogil:
1464
- return self .Y.shape[0 ]
1465
-
1466
- @final
1467
- cdef DTYPE_t surrogate_dist(self , ITYPE_t i, ITYPE_t j) nogil:
1468
- cdef:
1469
- ITYPE_t xi_start = self .X_indptr[i]
1470
- ITYPE_t xi_end = self .X_indptr[i + 1 ]
1471
-
1472
- # TODO: the 2D to 1D memory-view conversion might make computation slower, see:
1473
- # https://github.com/scikit-learn/scikit-learn/issues/17299
1474
- # Alternatively, we could pass pointers and indices and access elements
1475
- # then in distance_metric.dist. This works but would complexify this API.
1476
- return self .distance_metric.csr_rdist(
1477
- self .X_data[xi_start:xi_end],
1478
- self .X_indices[xi_start:xi_end],
1479
- self .Y[j, :],
1480
- self .Y_indices
1481
- )
1482
-
1483
- @final
1484
- cdef DTYPE_t dist(self , ITYPE_t i, ITYPE_t j) nogil:
1485
- cdef:
1486
- ITYPE_t xi_start = self .X_indptr[i]
1487
- ITYPE_t xi_end = self .X_indptr[i + 1 ]
1488
-
1489
- # TODO: same as previous comment
1490
- return self .distance_metric.csr_dist(
1491
- self .X_data[xi_start:xi_end],
1492
- self .X_indices[xi_start:xi_end],
1493
- self .Y[j, :],
1494
- self .Y_indices
1495
- )
1496
-
1497
- @final
1498
- cdef class DenseSparseDatasetsPair(DatasetsPair):
1499
- """ Compute distances between vectors of a dense array and a CSR matrix.
1500
-
1501
- Parameters
1502
- ----------
1503
- X: ndarray of shape (n_samples_X, n_features)
1504
- Rows represent vectors. Must be C-contiguous.
1505
-
1506
- Y: sparse matrix of shape (n_samples_Y, n_features)
1507
- Rows represent vectors. Must be in CSR format.
1508
-
1509
- distance_metric: DistanceMetric
1510
- The distance metric responsible for computing distances
1511
- between two vectors of (X, Y).
1512
- """
1513
- cdef:
1514
- # As distance metrics are symmetric functions, we can
1515
- # simply rely on the SparseDenseDatasetsPair and swap arguments.
1516
- DatasetsPair datasets_pair
1517
-
1518
- def __init__ (self , X , Y , DistanceMetric distance_metric ):
1519
- super ().__init__(distance_metric)
1520
- # Swapping arguments on the constructor
1521
- self .datasets_pair = SparseDenseDatasetsPair(Y, X, distance_metric)
1522
-
1523
- @final
1524
- cdef ITYPE_t n_samples_X(self ) nogil:
1525
- # Swapping interface
1526
- return self .datasets_pair.n_samples_Y()
1527
-
1528
- @final
1529
- cdef ITYPE_t n_samples_Y(self ) nogil:
1530
- # Swapping interface
1531
- return self .datasets_pair.n_samples_X()
1532
-
1533
- @final
1534
- cdef DTYPE_t surrogate_dist(self , ITYPE_t i, ITYPE_t j) nogil:
1535
- # Swapping arguments on the same interface
1536
- return self .datasets_pair.surrogate_dist(j, i)
1537
-
1538
- @final
1539
- cdef DTYPE_t dist(self , ITYPE_t i, ITYPE_t j) nogil:
1540
- # Swapping arguments on the same interface
1541
- return self .datasets_pair.dist(j, i)
0 commit comments