8000 DRAFT ENH Introduce `AVX` SIMD instructions for `ManhattanDistances.dist` by Micky774 · Pull Request #27145 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

DRAFT ENH Introduce AVX SIMD instructions for ManhattanDistances.dist #27145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,6 @@ sklearn/neighbors/_kd_tree.pyx

# Default JupyterLite content
jupyterlite_contents

#SIMD
!sklearn/metrics/_simd/*
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "xsimd"]
path = xsimd
url = https://github.com/xtensor-stack/xsimd
62 changes: 59 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import sys
import traceback
from os.path import join
from pathlib import Path
from textwrap import dedent

from setuptools import Command, Extension, setup
from setuptools.command.build_ext import build_ext
Expand All @@ -28,7 +30,6 @@
# away from numpy.distutils?
builtins.__SKLEARN_SETUP__ = True


DISTNAME = "scikit-learn"
DESCRIPTION = "A set of python modules for machine learning and data mining"
with open("README.rst") as f:
Expand All @@ -43,13 +44,35 @@
"Documentation": "https://scikit-learn.org/stable/documentation.html",
"Source Code": "https://github.com/scikit-learn/scikit-learn",
}
SIMD_DIRECTORY = join("sklearn", "metrics", "_simd")

# We can actually import a restricted version of sklearn that
# does not need the compiled code
import sklearn # noqa
import sklearn._min_dependencies as min_deps # noqa
from sklearn._build_utils import _check_cython_version # noqa
from sklearn.externals._packaging.version import parse as parse_version # noqa
from sklearn._build_utils.pre_build_helpers import compile_test_program # noqa

runtime_check_program = dedent("""
#include "xsimd.hpp"
#include <iostream>
int main(){
std::cout << xsimd::available_architectures().avx;
return 0;
}
""")
# XXX: Can we do this without an absolute path?
dir_path = os.path.dirname(os.path.realpath(__file__))
xsimd_include_path = Path(dir_path, "xsimd", "include", "xsimd")
HAS_AVX_RUNTIME = int(
compile_test_program(
runtime_check_program,
extra_preargs=["-lstdc++"],
extra_postargs=[f"-I{xsimd_include_path}"],
extension="cpp",
)[0]
)


VERSION = sklearn.__version__
Expand Down Expand Up @@ -252,8 +275,23 @@ def check_package_status(package, min_version):
"metrics": [
{"sources": ["_pairwise_fast.pyx"], "include_np": True},
{
"sources": ["_dist_metrics.pyx.tp", "_dist_metrics.pxd.tp"],
"sources": [
"_dist_metrics.pyx.tp",
"_dist_metrics.pxd.tp",
join("_simd", "_dist_optim.cpp"),
],
"include_np": True,
"language": "c++",
"extra_compile_args": ["-std=c++11"],
"define_macros": [("DIST_METRICS", None)],
"include_dirs": [join("..", "..", "xsimd", "include", "xsimd")],
},
{
"sources": ["_runtime_check.pyx"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
"include_dirs": [join("..", "..", "xsimd", "include", "xsimd")],
},
],
"metrics.cluster": [
Expand Down Expand Up @@ -471,6 +509,23 @@ def configure_extension_modules():
build_with_debug_symbols = (
os.environ.get("SKLEARN_BUILD_ENABLE_DEBUG_SYMBOLS", "0") != "0"
)
BUILD_WITH_SIMD = os.environ.get("SKLEARN_NO_SIMD", "0") == "0" and HAS_AVX_RUNTIME
if BUILD_WITH_SIMD:
libraries.append(
(
"avx_dist_metrics",
{
"language": "c++",
"sources": [join(SIMD_DIRECTORY, "simd.cpp")],
"cflags": ["-std=c++14", "-mavx"],
"extra_link_args": ["-std=c++14"],
"include_dirs": [join("xsimd", "include", "xsimd")],
},
),
)
extension_config["metrics"][1]["define_macros"].append(
("WITH_SIMD", BUILD_WITH_SIMD)
)
if os.name == "posix":
if build_with_debug_symbols:
default_extra_compile_args.append("-g")
Expand Down Expand Up @@ -538,13 +593,13 @@ def configure_extension_modules():
optimization_level = extension.get(
"optimization_level", default_optimization_level
)
define_macros = extension.get("define_macros", [])
if os.name == "posix":
extra_compile_args.append(f"-{optimization_level}")
else:
extra_compile_args.append(f"/{optimization_level}")

libraries_ext = extension.get("libraries", []) + default_libraries

new_ext = Extension(
name=name,
sources=sources,
Expand All @@ -554,6 +609,7 @@ def configure_extension_modules():
depends=depends,
extra_link_args=extension.get("extra_link_args", None),
extra_compile_args=extra_compile_args,
define_macros=define_macros,
)
cython_exts.append(new_ext)

Expand Down
8 changes: 5 additions & 3 deletions sklearn/_build_utils/pre_build_helpers.py
9E7A
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from setuptools.command.build_ext import customize_compiler, new_compiler


def compile_test_program(code, extra_preargs=None, extra_postargs=None):
def compile_test_program(code, extra_preargs=None, extra_postargs=None, extension="c"):
"""Check that some C code can be compiled and run"""
ccompiler = new_compiler()
customize_compiler(ccompiler)
Expand All @@ -22,14 +22,16 @@ def compile_test_program(code, extra_preargs=None, extra_postargs=None):
os.chdir(tmp_dir)

# Write test program
with open("test_program.c", "w") as f:
with open(f"test_program.{extension}", "w") as f:
f.write(code)

os.mkdir("objects")

# Compile, test program
ccompiler.compile(
["test_program.c"], output_dir="objects", extra_postargs=extra_postargs
[f"test_program.{extension}"],
output_dir="objects",
extra_postargs=extra_postargs,
)

# Link test program
Expand Down
2 changes: 2 additions & 0 deletions sklearn/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
median_absolute_error,
r2_score,
)
from ._runtime_check import get_available_architectures
from ._scorer import check_scoring, get_scorer, get_scorer_names, make_scorer
from .cluster import (
adjusted_mutual_info_score,
Expand Down Expand Up @@ -123,6 +124,7 @@
"f1_score",
"fbeta_score",
"fowlkes_mallows_score",
"get_available_architectures",
"get_scorer",
"hamming_loss",
"hinge_loss",
Expand Down
6 changes: 6 additions & 0 deletions sklearn/metrics/_dist_metrics.pxd.tp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ from ..utils._typedefs cimport float64_t, float32_t, int32_t, intp_t
cdef class DistanceMetric:
pass

cdef extern from "_simd/_dist_optim.cpp":
cdef Type xsimd_manhattan_dist[Type](Type * x, Type * y, intp_t size) nogil
cdef Type xsimd_manhattan_dist_scalar[Type](Type * x, Type * y, intp_t size) nogil
bint WITH_SIMD

{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}

######################################################################
Expand Down Expand Up @@ -70,6 +75,7 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric):
cdef intp_t size
cdef object func
cdef object kwargs
cdef bint simd_runtime

cdef {{INPUT_DTYPE_t}} dist(
self,
Expand Down
41 changes: 23 additions & 18 deletions sklearn/metrics/_dist_metrics.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ from scipy.sparse import csr_matrix, issparse
from ..utils._typedefs cimport float64_t, float32_t, int32_t, intp_t
from ..utils import check_array
from ..utils.fixes import parse_version, sp_base_version

from ._runtime_check import get_available_architectures
cdef inline double fmax(double a, double b) noexcept nogil:
return max(a, b)

Expand Down Expand Up @@ -837,7 +837,7 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric):

intp_t i1, i2
intp_t x1_start, x1_end
{{INPUT_DTYPE_t}} * x2_data
const {{INPUT_DTYPE_t}} * x2_data

with nogil:
# Use the exact same adaptation for CSR than in SparseDenseDatasetsPair
Expand Down Expand Up @@ -901,7 +901,7 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric):
{{INPUT_DTYPE_t}}[:, ::1] Darr = np.empty((n_X, n_Y), dtype={{INPUT_DTYPE}}, order='C')

intp_t i1, i2
{{INPUT_DTYPE_t}} * x1_data
const {{INPUT_DTYPE_t}} * x1_data

intp_t x2_start, x2_end

Expand Down Expand Up @@ -1083,7 +1083,8 @@ cdef class EuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const intp_t size,
) except -1 nogil:
return sqrt(
self.rdist_csr(
EuclideanDistance{{name_suffix}}.rdist_csr(
self,
x1_data,
x1_indices,
x2_data,
Expand Down Expand Up @@ -1132,7 +1133,7 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const {{INPUT_DTYPE_t}}* x2,
intp_t size,
) except -1 nogil:
return sqrt(self.rdist(x1, x2, size))
return sqrt(SEuclideanDistance{{name_suffix}}.rdist(self, x1, x2, size))

cdef inline {{INPUT_DTYPE_t}} _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil:
return sqrt(rdist)
Expand Down Expand Up @@ -1212,7 +1213,8 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const intp_t size,
) except -1 nogil:
return sqrt(
self.rdist_csr(
SEuclideanDistance{{name_suffix}}.rdist_csr(
self,
x1_data,
x1_indices,
x2_data,
Expand All @@ -1235,18 +1237,18 @@ cdef class ManhattanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
"""
def __init__(self):
self.p = 1
self.simd_runtime = get_available_architectures()["avx"] and WITH_SIMD

cdef inline {{INPUT_DTYPE_t}} dist(
self,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
intp_t size,
) except -1 nogil:
cdef float64_t d = 0
cdef intp_t j
for j in range(size):
d += fabs(x1[j] - x2[j])
return d
if self.simd_runtime:
return xsimd_manhattan_dist[{{INPUT_DTYPE_t}}](x1, x2, size)
else:
return xsimd_manhattan_dist_scalar[{{INPUT_DTYPE_t}}](x1, x2, size)

cdef inline {{INPUT_DTYPE_t}} dist_csr(
self,
Expand Down Expand Up @@ -1463,7 +1465,7 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const {{INPUT_DTYPE_t}}* x2,
intp_t size,
) except -1 nogil:
return pow(self.rdist(x1, x2, size), 1. / self.p)
return pow(MinkowskiDistance{{name_suffix}}.rdist(self, x1, x2, size), 1. / self.p)

cdef inline {{INPUT_DTYPE_t}} _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil:
return pow(rdist, 1. / self.p)
Expand Down Expand Up @@ -1570,7 +1572,8 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const intp_t size,
) except -1 nogil:
return pow(
self.rdist_csr(
MinkowskiDistance{{name_suffix}}.rdist_csr(
self,
x1_data,
x1_indices,
x2_data,
Expand Down Expand Up @@ -1655,7 +1658,7 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const {{INPUT_DTYPE_t}}* x2,
intp_t size,
) except -1 nogil:
return sqrt(self.rdist(x1, x2, size))
return sqrt(MahalanobisDistance{{name_suffix}}.rdist(self, x1, x2, size))

cdef inline {{INPUT_DTYPE_t}} _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil:
return sqrt(rdist)
Expand Down Expand Up @@ -1736,7 +1739,8 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const intp_t size,
) except -1 nogil:
return sqrt(
self.rdist_csr(
MahalanobisDistance{{name_suffix}}.rdist_csr(
self,
x1_data,
x1_indices,
x2_data,
Expand Down Expand Up @@ -2641,7 +2645,7 @@ cdef class HaversineDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const {{INPUT_DTYPE_t}}* x2,
intp_t size,
) except -1 nogil:
return 2 * asin(sqrt(self.rdist(x1, x2, size)))
return 2 * asin(sqrt(HaversineDistance{{name_suffix}}.rdist(self, x1, x2, size)))

cdef inline {{INPUT_DTYPE_t}} _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil:
return 2 * asin(sqrt(rdist))
Expand Down Expand Up @@ -2669,7 +2673,8 @@ cdef class HaversineDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const int32_t x2_end,
const intp_t size,
) except -1 nogil:
return 2 * asin(sqrt(self.rdist_csr(
return 2 * asin(sqrt(HaversineDistance{{name_suffix}}.rdist_csr(
self,
x1_data,
x1_indices,
x2_data,
Expand Down Expand Up @@ -2779,7 +2784,7 @@ cdef class PyFuncDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
const {{INPUT_DTYPE_t}}* x2,
intp_t size,
) except -1 nogil:
return self._dist(x1, x2, size)
return PyFuncDistance{{name_suffix}}._dist(self, x1, x2, size)

cdef inline {{INPUT_DTYPE_t}} _dist(
self,
Expand Down
28 changes: 28 additions & 0 deletions sklearn/metrics/_runtime_check.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cdef extern from "xsimd.hpp" namespace "xsimd::detail":
ctypedef struct supported_arch:
unsigned int sse2
unsigned int sse3
unsigned int ssse3
unsigned int sse4_1
unsigned int sse4_2
unsigned int sse4a
unsigned int fma3_sse
unsigned int fma4
unsigned int xop
unsigned int avx
unsigned int fma3_avx
unsigned int avx2
unsigned int fma3_avx2
unsigned int avx512f
unsigned int avx512cd
unsigned int avx512dq
unsigned int avx512bw
unsigned int neon
unsigned int neon64
unsigned int sve

cdef extern from "xsimd.hpp" namespace "xsimd":
cdef supported_arch available_architectures() noexcept nogil

cpdef get_available_architectures():
return available_architectures()
Loading
0