8000 Move dotblas to multiarray by charris · Pull Request #4969 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

Move dotblas to multiarray #4969

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2f6da63
ENH: optimize STRING_compare by using memcmp
juliantaylor Mar 31, 2014
f615c6c
ENH: Add 'HAVE_CBLAS' macro for build purposes.
charris Aug 11, 2014
ace49c2
ENH: When cblas is available use it in descr->f->dot.
charris Aug 14, 2014
efd023f
MAINT, STY: Remove use of alterdot, restoredot in _dotblas.c.
charris Aug 15, 2014
4e6066b
ENH: Move dotblas_matrixproduct down into multiarray.
charris Aug 16, 2014
028d2de
MAINT: Update waf to 1.7.16
charris Aug 17, 2014
52b8ab3
ENH: Move dotblas_innerproduct down into multiarray.
charris Aug 17, 2014
df0b65c
MAINT: Refactor ndarray.dot method to call PyArray_MatrixProduct2.
charris Aug 17, 2014
63cc74e
ENH: Move vdot to multiarray.
charris Aug 20, 2014
fad1377
DOC: Update docs to reflect deprecation of alterdot and restoredot.
charris Aug 22, 2014
f6ab31 8000 3
TST: Add vdot tests, move tests from test_blasdot to test_multiarray.
charris Aug 22, 2014
d8af083
ENH: np.dot: better "matrices not aligned" message
larsmans Aug 23, 2014
a746e3a
BUG: Capitalize environmental variables in numpy/core/__init__.py.
charris Aug 23, 2014
d07c4c7
ENH: include shapes in "matrices not aligned" msg
larsmans Aug 23, 2014
ce32d9e
BLD: check for CBLAS header in "unoptimized" blas
juliantaylor Aug 24, 2014
91f89e4
Merge pull request #3 from juliantaylor/system-blas
charris Aug 24, 2014
ec5ef40
Merge branch 'dot-errmsg' into pr/4969
larsmans Aug 24, 2014
cfd462e
Merge pull request #2 from larsmans/dot-errmsg-no-dotblas
charris Aug 25, 2014
a3c70cc
STY: Add spaces around '-'.
charris Aug 25, 2014
affeaf5
TST: Silence some warning that turns up on OpenBSD.
charris Aug 29, 2014
4097ec3
BUG: fix percentage reporting when testing.assert_allclose fails.
pp-mo Aug 31, 2014
ea32c90
Use more portable test methods.
pp-mo Sep 1, 2014
138b3bf
Merge pull request #5031 from charris/speedup-old-polynomial-functions
seberg Sep 2, 2014
4a501a0
Merge pull request #5025 from pp-mo/assert_allclose_percent
juliantaylor Sep 2, 2014
588bcc4
Merge pull request #5020 from charris/disable-some-test-warnings
juliantaylor Sep 2, 2014
0f0575c
Merge pull request #4572 from juliantaylor/string-cmp
juliantaylor Sep 2, 2014
889821a
Merge branch 'old-move-dotblas' into move-dotblas-to-multiarray
charris Sep 2, 2014
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ENH: When cblas is available use it in descr->f->dot.
Importing _dotblas currently executes _dotblas.alterdot, which replaces
the default descr->f->dot function with a cblas based version for float,
double, complex float, and complex double data types. This PR changes
the default descr->f->dot to use cblas whenever it is available. After
this change, the alterdot and restoredot functions serve no purpose, so
are changed to do nothing and deprecated. Note that those functions were
already doing nothing when _dotblas was not available.
  • Loading branch information
charris committed Aug 17, 2014
commit ace49c2ebcd42b9d41bdac11729f3c58e525480e
135 changes: 2 additions & 133 deletions numpy/core/blasdot/_dotblas.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,121 +24,6 @@ static char module_doc[] =

static PyArray_DotFunc *oldFunctions[NPY_NTYPES];

#define MIN(a, b) ((a) < (b) ? (a) : (b))

/*
* Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done
* (BLAS won't handle negative or zero strides the way we want).
*/
static NPY_INLINE int
blas_stride(npy_intp stride, unsigned itemsize)
{
if (stride <= 0 || stride % itemsize != 0) {
return 0;
}
stride /= itemsize;

if (stride > INT_MAX) {
return 0;
}
return stride;
}

/*
* The following functions do a "chunked" dot product using BLAS when
* sizeof(npy_intp) > sizeof(int), because BLAS libraries can typically not
* handle more than INT_MAX elements per call.
*
* The chunksize is the greatest power of two less than INT_MAX.
*/
#if NPY_MAX_INTP > INT_MAX
# define CHUNKSIZE (INT_MAX / 2 + 1)
#else
# define CHUNKSIZE NPY_MAX_INTP
#endif

static void
FLOAT_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res,
npy_intp n, void *tmp)
{
int na = blas_stride(stridea, sizeof(float));
int nb = blas_stride(strideb, sizeof(float));

if (na && nb) {
double r = 0.; /* double for stability */
float *fa = a, *fb = b;

while (n > 0) {
int chunk = MIN(n, CHUNKSIZE);

r += cblas_sdot(chunk, fa, na, fb, nb);
fa += chunk * na;
fb += chunk * nb;
n -= chunk;
}
*((float *)res) = r;
}
else {
oldFunctions[NPY_FLOAT](a, stridea, b, strideb, res, n, tmp);
}
}

static void
DOUBLE_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res,
npy_intp n, void *tmp)
{
int na = blas_stride(stridea, sizeof(double));
int nb = blas_stride(strideb, sizeof(double));

if (na && nb) {
double r = 0.;
double *da = a, *db = b;

while (n > 0) {
int chunk = MIN(n, CHUNKSIZE);

r += cblas_ddot(chunk, da, na, db, nb);
da += chunk * na;
db += chunk * nb;
n -= chunk;
}
*((double *)res) = r;
}
else {
oldFunctions[NPY_DOUBLE](a, stridea, b, strideb, res, n, tmp);
}
}

static void
CFLOAT_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res,
npy_intp n, void *tmp)
{
int na = blas_stride(stridea, sizeof(npy_cfloat));
int nb = blas_stride(strideb, sizeof(npy_cfloat));

if (na && nb) {
cblas_cdotu_sub((int)n, (float *)a, na, (float *)b, nb, (float *)res);
}
else {
oldFunctions[NPY_CFLOAT](a, stridea, b, strideb, res, n, tmp);
}
}

static void
CDOUBLE_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res,
npy_intp n, void *tmp)
{
int na = blas_stride(stridea, sizeof(npy_cdouble));
int nb = blas_stride(strideb, sizeof(npy_cdouble));

if (na && nb) {
cblas_zdotu_sub((int)n, (double *)a, na, (double *)b, nb,
(double *)res);
}
else {
oldFunctions[NPY_CDOUBLE](a, stridea, b, strideb, res, n, tmp);
}
}

/*
* Helper: call appropriate BLAS dot function for typenum.
Expand All @@ -149,20 +34,8 @@ blas_dot(int typenum, npy_intp n,
void *a, npy_intp stridea, void *b, npy_intp strideb, void *res)
{
PyArray_DotFunc *dot = NULL;
switch (typenum) {
case NPY_DOUBLE:
dot = DOUBLE_dot;
break;
case NPY_FLOAT:
dot = FLOAT_dot;
break;
case NPY_CDOUBLE:
dot = CDOUBLE_dot;
break;
case NPY_CFLOAT:
dot = CFLOAT_dot;
break;
}

dot = oldFunctions[typenum];
assert(dot != NULL);
dot(a, stridea, b, strideb, res, n, NULL);
}
Expand Down Expand Up @@ -257,19 +130,15 @@ dotblas_alterdot(PyObject *NPY_UNUSED(dummy), PyObject *args)
if (!altered) {
descr = PyArray_DescrFromType(NPY_FLOAT);
oldFunctions[NPY_FLOAT] = descr->f->dotfunc;
descr->f->dotfunc = (PyArray_DotFunc *)FLOAT_dot;

descr = PyArray_DescrFromType(NPY_DOUBLE);
oldFunctions[NPY_DOUBLE] = descr->f->dotfunc;
descr->f->dotfunc = (PyArray_DotFunc *)DOUBLE_dot;

descr = PyArray_DescrFromType(NPY_CFLOAT);
oldFunctions[NPY_CFLOAT] = descr->f->dotfunc;
descr->f->dotfunc = (PyArray_DotFunc *)CFLOAT_dot;

descr = PyArray_DescrFromType(NPY_CDOUBLE);
oldFunctions[NPY_CDOUBLE] = descr->f->dotfunc;
descr->f->dotfunc = (PyArray_DotFunc *)CDOUBLE_dot;

altered = NPY_TRUE;
}
Expand Down
16 changes: 12 additions & 4 deletions numpy/core/bscript
Original file line number Diff line number Diff line change
Expand Up @@ -488,14 +488,22 @@ def pre_build(context):
pjoin('src', 'multiarray', 'usertypes.c')]
else:
sources = extension.sources

use = 'npysort npymath'
defines = ['_FILE_OFFSET_BITS=64',
'_LARGEFILE_SOURCE=1',
'_LARGEFILE64_SOURCE=1']

if bld.env.HAS_CBLAS:
use += ' CBLAS'
defines.append('HAVE_CBLAS')

includes = ["src/multiarray", "src/private"]
return context.default_builder(extension,
includes=includes,
source=sources,
use="npysort npymath",
defines=['_FILE_OFFSET_BITS=64',
'_LARGEFILE_SOURCE=1',
'_LARGEFILE64_SOURCE=1']
use=use,
defines=defines
)
context.register_builder("multiarray", builder_multiarray)

Expand Down