diff --git a/doc/release/1.10.0-notes.rst b/doc/release/1.10.0-notes.rst index 3f52bd5af4d2..386edac45a95 100644 --- a/doc/release/1.10.0-notes.rst +++ b/doc/release/1.10.0-notes.rst @@ -11,6 +11,7 @@ Highlights Dropped Support =============== * The polytemplate.py file has been removed. +* The _dotblas module is no longer available. Future Changes @@ -33,6 +34,17 @@ Improvements Changes ======= +dotblas functionality moved to multiarray +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The cblas versions of dot, inner, and vdot have been integrated into +the multiarray module. In particular, vdot is now a multiarray function, +which it was not before. + Deprecations ============ + +alterdot, restoredot Deprecated +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The alterdot and restoredot functions no longer do anything, and are +deprecated. diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 09311a5364d4..6f3a10de4b0c 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -2174,43 +2174,6 @@ def luf(lamdaexpr, *args, **kwargs): """) -add_newdoc('numpy.core', 'alterdot', - """ - Change `dot`, `vdot`, and `inner` to use accelerated BLAS functions. - - Typically, as a user of Numpy, you do not explicitly call this function. If - Numpy is built with an accelerated BLAS, this function is automatically - called when Numpy is imported. - - When Numpy is built with an accelerated BLAS like ATLAS, these functions - are replaced to make use of the faster implementations. The faster - implementations only affect float32, float64, complex64, and complex128 - arrays. Furthermore, the BLAS API only includes matrix-matrix, - matrix-vector, and vector-vector products. Products of arrays with larger - dimensionalities use the built in functions and are not accelerated. - - See Also - -------- - restoredot : `restoredot` undoes the effects of `alterdot`. - - """) - -add_newdoc('numpy.core', 'restoredot', - """ - Restore `dot`, `vdot`, and `innerproduct` to the default non-BLAS - implementations. - - Typically, the user will only need to call this when troubleshooting and - installation problem, reproducing the conditions of a build without an - accelerated BLAS, or when being very careful about benchmarking linear - algebra operations. - - See Also - -------- - alterdot : `restoredot` undoes the effects of `alterdot`. - - """) - add_newdoc('numpy.core', 'vdot', """ vdot(a, b) diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py index 0b8d5bb17786..371d34b588ee 100644 --- a/numpy/core/__init__.py +++ b/numpy/core/__init__.py @@ -3,7 +3,20 @@ from .info import __doc__ from numpy.version import version as __version__ +# disables OpenBLAS affinity setting of the main thread that limits +# python threads or processes to one core +import os +envbak = os.environ.copy() +if 'OPENBLAS_MAIN_FREE' not in os.environ: + os.environ['OPENBLAS_MAIN_FREE'] = '1' +if 'GOTOBLAS_MAIN_FREE' not in os.environ: + os.environ['GOTOBLAS_MAIN_FREE'] = '1' from . import multiarray +os.environ.clear() +os.environ.update(envbak) +del envbak +del os + from . import umath from . import _internal # for freeze programs from . import numerictypes as nt diff --git a/numpy/core/bento.info b/numpy/core/bento.info index 299bd8ca516c..0a22dc710e3a 100644 --- a/numpy/core/bento.info +++ b/numpy/core/bento.info @@ -33,9 +33,6 @@ Library: Sources: src/private/scalarmathmodule.h.src, src/scalarmathmodule.c.src - Extension: _dotblas - Sources: - blasdot/_dotblas.c Extension: test_rational Sources: src/umath/test_rational.c.src diff --git a/numpy/core/blasdot/cblas.h b/numpy/core/blasdot/cblas.h deleted file mode 100644 index 25de09edfe8c..000000000000 --- a/numpy/core/blasdot/cblas.h +++ /dev/null @@ -1,578 +0,0 @@ -#ifndef CBLAS_H -#define CBLAS_H -#include - -/* Allow the use in C++ code. */ -#ifdef __cplusplus -extern "C" -{ -#endif - -/* - * Enumerated and derived types - */ -#define CBLAS_INDEX size_t /* this may vary between platforms */ - -enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; -enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; -enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; -enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; -enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; - -/* - * =========================================================================== - * Prototypes for level 1 BLAS functions (complex are recast as routines) - * =========================================================================== - */ -float cblas_sdsdot(const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY); -double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, - const int incY); -float cblas_sdot(const int N, const float *X, const int incX, - const float *Y, const int incY); -double cblas_ddot(const int N, const double *X, const int incX, - const double *Y, const int incY); - -/* - * Functions having prefixes Z and C only - */ -void cblas_cdotu_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotu); -void cblas_cdotc_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotc); - -void cblas_zdotu_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotu); -void cblas_zdotc_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotc); - - -/* - * Functions having prefixes S D SC DZ - */ -float cblas_snrm2(const int N, const float *X, const int incX); -float cblas_sasum(const int N, const float *X, const int incX); - -double cblas_dnrm2(const int N, const double *X, const int incX); -double cblas_dasum(const int N, const double *X, const int incX); - -float cblas_scnrm2(const int N, const void *X, const int incX); -float cblas_scasum(const int N, const void *X, const int incX); - -double cblas_dznrm2(const int N, const void *X, const int incX); -double cblas_dzasum(const int N, const void *X, const int incX); - - -/* - * Functions having standard 4 prefixes (S D C Z) - */ -CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); -CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); -CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); -CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); - -/* - * =========================================================================== - * Prototypes for level 1 BLAS routines - * =========================================================================== - */ - -/* - * Routines with standard 4 prefixes (s, d, c, z) - */ -void cblas_sswap(const int N, float *X, const int incX, - float *Y, const int incY); -void cblas_scopy(const int N, const float *X, const int incX, - float *Y, const int incY); -void cblas_saxpy(const int N, const float alpha, const float *X, - const int incX, float *Y, const int incY); - -void cblas_dswap(const int N, double *X, const int incX, - double *Y, const int incY); -void cblas_dcopy(const int N, const double *X, const int incX, - double *Y, const int incY); -void cblas_daxpy(const int N, const double alpha, const double *X, - const int incX, double *Y, const int incY); - -void cblas_cswap(const int N, void *X, const int incX, - void *Y, const int incY); -void cblas_ccopy(const int N, const void *X, const int incX, - void *Y, const int incY); -void cblas_caxpy(const int N, const void *alpha, const void *X, - const int incX, void *Y, const int incY); - -void cblas_zswap(const int N, void *X, const int incX, - void *Y, const int incY); -void cblas_zcopy(const int N, const void *X, const int incX, - void *Y, const int incY); -void cblas_zaxpy(const int N, const void *alpha, const void *X, - const int incX, void *Y, const int incY); - - -/* - * Routines with S and D prefix only - */ -void cblas_srotg(float *a, float *b, float *c, float *s); -void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); -void cblas_srot(const int N, float *X, const int incX, - float *Y, const int incY, const float c, const float s); -void cblas_srotm(const int N, float *X, const int incX, - float *Y, const int incY, const float *P); - -void cblas_drotg(double *a, double *b, double *c, double *s); -void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); -void cblas_drot(const int N, double *X, const int incX, - double *Y, const int incY, const double c, const double s); -void cblas_drotm(const int N, double *X, const int incX, - double *Y, const int incY, const double *P); - - -/* - * Routines with S D C Z CS and ZD prefixes - */ -void cblas_sscal(const int N, const float alpha, float *X, const int incX); -void cblas_dscal(const int N, const double alpha, double *X, const int incX); -void cblas_cscal(const int N, const void *alpha, void *X, const int incX); -void cblas_zscal(const int N, const void *alpha, void *X, const int incX); -void cblas_csscal(const int N, const float alpha, void *X, const int incX); -void cblas_zdscal(const int N, const double alpha, void *X, const int incX); - -/* - * =========================================================================== - * Prototypes for level 2 BLAS - * =========================================================================== - */ - -/* - * Routines with standard 4 prefixes (S, D, C, Z) - */ -void cblas_sgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const float alpha, const float *A, const int lda, - const float *X, const int incX, const float beta, - float *Y, const int incY); -void cblas_sgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const float alpha, - const float *A, const int lda, const float *X, - const int incX, const float beta, float *Y, const int incY); -void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *A, const int lda, - float *X, const int incX); -void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const float *A, const int lda, - float *X, const int incX); -void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *Ap, float *X, const int incX); -void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *A, const int lda, float *X, - const int incX); -void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const float *A, const int lda, - float *X, const int incX); -void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *Ap, float *X, const int incX); - -void cblas_dgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const double alpha, const double *A, const int lda, - const double *X, const int incX, const double beta, - double *Y, const int incY); -void cblas_dgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const double alpha, - const double *A, const int lda, const double *X, - const int incX, const double beta, double *Y, const int incY); -void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *A, const int lda, - double *X, const int incX); -void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const double *A, const int lda, - double *X, const int incX); -void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *Ap, double *X, const int incX); -void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *A, const int lda, double *X, - const int incX); -void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const double *A, const int lda, - double *X, const int incX); -void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *Ap, double *X, const int incX); - -void cblas_cgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY); -void cblas_cgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const void *alpha, - const void *A, const int lda, const void *X, - const int incX, const void *beta, void *Y, const int incY); -void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, - void *X, const int incX); -void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); -void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, void *X, - const int incX); -void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); - -void cblas_zgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY); -void cblas_zgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const void *alpha, - const void *A, const int lda, const void *X, - const int incX, const void *beta, void *Y, const int incY); -void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, - void *X, const int incX); -void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); -void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, void *X, - const int incX); -void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); - - -/* - * Routines with S and D prefixes only - */ -void cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *A, - const int lda, const float *X, const int incX, - const float beta, float *Y, const int incY); -void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const float alpha, const float *A, - const int lda, const float *X, const int incX, - const float beta, float *Y, const int incY); -void cblas_sspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *Ap, - const float *X, const int incX, - const float beta, float *Y, const int incY); -void cblas_sger(const enum CBLAS_ORDER order, const int M, const int N, - const float alpha, const float *X, const int incX, - const float *Y, const int incY, float *A, const int lda); -void cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, float *A, const int lda); -void cblas_sspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, float *Ap); -void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY, float *A, - const int lda); -void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY, float *A); - -void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *A, - const int lda, const double *X, const int incX, - const double beta, double *Y, const int incY); -void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const double alpha, const double *A, - const int lda, const double *X, const int incX, - const double beta, double *Y, const int incY); -void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *Ap, - const double *X, const int incX, - const double beta, double *Y, const int incY); -void cblas_dger(const enum CBLAS_ORDER order, const int M, const int N, - const double alpha, const double *X, const int incX, - const double *Y, const int incY, double *A, const int lda); -void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, double *A, const int lda); -void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, double *Ap); -void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, const double *Y, const int incY, double *A, - const int lda); -void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, const double *Y, const int incY, double *A); - - -/* - * Routines with C and Z prefixes only - */ -void cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *Ap, - const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_cgeru(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const void *X, const int incX, - void *A, const int lda); -void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const void *X, - const int incX, void *A); -void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *Ap); - -void cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *Ap, - const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const void *X, const int incX, - void *A, const int lda); -void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const void *X, - const int incX, void *A); -void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *Ap); - -/* - * =========================================================================== - * Prototypes for level 3 BLAS - * =========================================================================== - */ - -/* - * Routines with standard 4 prefixes (S, D, C, Z) - */ -void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const float alpha, const float *A, - const int lda, const float *B, const int ldb, - const float beta, float *C, const int ldc); -void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const float alpha, const float *A, const int lda, - const float *B, const int ldb, const float beta, - float *C, const int ldc); -void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const float *A, const int lda, - const float beta, float *C, const int ldc); -void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const float *A, const int lda, - const float *B, const int ldb, const float beta, - float *C, const int ldc); -void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const float alpha, const float *A, const int lda, - float *B, const int ldb); -void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const float alpha, const float *A, const int lda, - float *B, const int ldb); - -void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const double alpha, const double *A, - const int lda, const double *B, const int ldb, - const double beta, double *C, const int ldc); -void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const double alpha, const double *A, const int lda, - const double *B, const int ldb, const double beta, - double *C, const int ldc); -void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const double *A, const int lda, - const double beta, double *C, const int ldc); -void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const double *A, const int lda, - const double *B, const int ldb, const double beta, - double *C, const int ldc); -void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const double alpha, const double *A, const int lda, - double *B, const int ldb); -void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const double alpha, const double *A, const int lda, - double *B, const int ldb); - -void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const void *alpha, const void *A, - const int lda, const void *B, const int ldb, - const void *beta, void *C, const int ldc); -void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *beta, void *C, const int ldc); -void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); -void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); - -void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const void *alpha, const void *A, - const int lda, const void *B, const int ldb, - const void *beta, void *C, const int ldc); -void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *beta, void *C, const int ldc); -void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); -void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); - - -/* - * Routines with prefixes C and Z only - */ -void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const void *A, const int lda, - const float beta, void *C, const int ldc); -void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const float beta, - void *C, const int ldc); - -void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const void *A, const int lda, - const double beta, void *C, const int ldc); -void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const double beta, - void *C, const int ldc); - -void cblas_xerbla(int p, const char *rout, const char *form, ...); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/numpy/core/bscript b/numpy/core/bscript index 416e16524bb2..da9ff2799e92 100644 --- a/numpy/core/bscript +++ b/numpy/core/bscript @@ -443,7 +443,8 @@ def pre_build(context): "src/multiarray/einsum.c.src"] bld(target="multiarray_templates", source=multiarray_templates) if ENABLE_SEPARATE_COMPILATION: - sources = [pjoin('src', 'multiarray', 'arrayobject.c'), + sources = [ + pjoin('src', 'multiarray', 'arrayobject.c'), pjoin('src', 'multiarray', 'alloc.c'), pjoin('src', 'multiarray', 'arraytypes.c.src'), pjoin('src', 'multiarray', 'array_assign.c'), @@ -485,17 +486,30 @@ def pre_build(context): pjoin('src', 'multiarray', 'sequence.c'), pjoin('src', 'multiarray', 'shape.c'), pjoin('src', 'multiarray', 'ucsnarrow.c'), - pjoin('src', 'multiarray', 'usertypes.c')] + pjoin('src', 'multiarray', 'usertypes.c'), + pjoin('src', 'multiarray', 'vdot.c'), + ] + + if bld.env.HAS_CBLAS: + sources.append(pjoin('src', 'multiarray', 'cblasfuncs.c')) else: sources = extension.sources + + use = 'npysort npymath' + defines = ['_FILE_OFFSET_BITS=64', + '_LARGEFILE_SOURCE=1', + '_LARGEFILE64_SOURCE=1'] + + if bld.env.HAS_CBLAS: + use += ' CBLAS' + defines.append('HAVE_CBLAS') + includes = ["src/multiarray", "src/private"] return context.default_builder(extension, includes=includes, source=sources, - use="npysort npymath", - defines=['_FILE_OFFSET_BITS=64', - '_LARGEFILE_SOURCE=1', - '_LARGEFILE64_SOURCE=1'] + use=use, + defines=defines ) context.register_builder("multiarray", builder_multiarray) @@ -537,9 +551,3 @@ def pre_build(context): context.tweak_extension("scalarmath", use="npymath", includes=["src/private"]) context.tweak_extension("multiarray_tests", use="npymath", includes=["src/private"]) context.tweak_extension("umath_tests", use="npymath", includes=["src/private"]) - - def build_dotblas(extension): - if bld.env.HAS_CBLAS: - return context.default_builder(extension, use="CBLAS", - includes=["src/multiarray", "src/private"]) - context.register_builder("_dotblas", build_dotblas) diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index efd8af45dbd2..ac51f5d011b9 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -4,7 +4,7 @@ import sys import warnings import collections -from . import multiarray +from numpy.core import multiarray from . import umath from .umath import (invert, sin, UFUNC_BUFSIZE_DEFAULT, ERR_IGNORE, ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT, ERR_LOG, @@ -1077,31 +1077,61 @@ def outer(a, b, out=None): # try to import blas optimized dot if available envbak = os.environ.copy() -try: - # importing this changes the dot function for basic 4 types - # to blas-optimized versions. - - # disables openblas affinity setting of the main thread that limits - # python threads or processes to one core - if 'OPENBLAS_MAIN_FREE' not in os.environ: - os.environ['OPENBLAS_MAIN_FREE'] = '1' - if 'GOTOBLAS_MAIN_FREE' not in os.environ: - os.environ['GOTOBLAS_MAIN_FREE'] = '1' - from ._dotblas import dot, vdot, inner, alterdot, restoredot -except ImportError: - # docstrings are in add_newdocs.py - inner = multiarray.inner - dot = multiarray.dot - def vdot(a, b): - return dot(asarray(a).ravel().conj(), asarray(b).ravel()) - def alterdot(): - pass - def restoredot(): - pass -finally: - os.environ.clear() - os.environ.update(envbak) - del envbak +dot = multiarray.dot +inner = multiarray.inner +vdot = multiarray.vdot + +def alterdot(): + """ + Change `dot`, `vdot`, and `inner` to use accelerated BLAS functions. + + Typically, as a user of Numpy, you do not explicitly call this + function. If Numpy is built with an accelerated BLAS, this function is + automatically called when Numpy is imported. + + When Numpy is built with an accelerated BLAS like ATLAS, these + functions are replaced to make use of the faster implementations. The + faster implementations only affect float32, float64, complex64, and + complex128 arrays. Furthermore, the BLAS API only includes + matrix-matrix, matrix-vector, and vector-vector products. Products of + arrays with larger dimensionalities use the built in functions and are + not accelerated. + + .. note:: Deprecated in Numpy 1.10 + The cblas functions have been integrated into the multarray + module and alterdot now longer does anything. It will be + removed in Numpy 1.11.0. + + See Also + -------- + restoredot : `restoredot` undoes the effects of `alterdot`. + + """ + warnings.warn("alterdot no longer does anything.", DeprecationWarning) + + +def restoredot(): + """ + Restore `dot`, `vdot`, and `innerproduct` to the default non-BLAS + implementations. + + Typically, the user will only need to call this when troubleshooting + and installation problem, reproducing the conditions of a build without + an accelerated BLAS, or when being very careful about benchmarking + linear algebra operations. + + .. note:: Deprecated in Numpy 1.10 + The cblas functions have been integrated into the multarray + module and restoredot now longer does anything. It will be + removed in Numpy 1.11.0. + + See Also + -------- + alterdot : `restoredot` undoes the effects of `alterdot`. + + """ + warnings.warn("restoredot no longer does anything.", DeprecationWarning) + def tensordot(a, b, axes=2): """ @@ -2153,6 +2183,41 @@ def identity(n, dtype=None): from numpy import eye return eye(n, dtype=dtype) +def _allclose_points(a, b, rtol=1.e-5, atol=1.e-8): + """ + This is the point-wise inner calculation of 'allclose', which is subtly + different from 'isclose'. Provided as a comparison routine for use in + testing.assert_allclose. + See those routines for further details. + + """ + x = array(a, copy=False, ndmin=1) + y = array(b, copy=False, ndmin=1) + + # make sure y is an inexact type to avoid abs(MIN_INT); will cause + # casting of x later. + dtype = multiarray.result_type(y, 1.) + y = array(y, dtype=dtype, copy=False) + + xinf = isinf(x) + yinf = isinf(y) + if any(xinf) or any(yinf): + # Check that x and y have inf's only in the same positions + if not all(xinf == yinf): + return False + # Check that sign of inf's in x and y is the same + if not all(x[xinf] == y[xinf]): + return False + + x = x[~xinf] + y = y[~xinf] + + # ignore invalid fpe's + with errstate(invalid='ignore'): + r = less_equal(abs(x - y), atol + rtol * abs(y)) + + return r + def allclose(a, b, rtol=1.e-5, atol=1.e-8): """ Returns True if two arrays are element-wise equal within a tolerance. @@ -2208,32 +2273,7 @@ def allclose(a, b, rtol=1.e-5, atol=1.e-8): False """ - x = array(a, copy=False, ndmin=1) - y = array(b, copy=False, ndmin=1) - - # make sure y is an inexact type to avoid abs(MIN_INT); will cause - # casting of x later. - dtype = multiarray.result_type(y, 1.) - y = array(y, dtype=dtype, copy=False) - - xinf = isinf(x) - yinf = isinf(y) - if any(xinf) or any(yinf): - # Check that x and y have inf's only in the same positions - if not all(xinf == yinf): - return False - # Check that sign of inf's in x and y is the same - if not all(x[xinf] == y[xinf]): - return False - - x = x[~xinf] - y = y[~xinf] - - # ignore invalid fpe's - with errstate(invalid='ignore'): - r = all(less_equal(abs(x - y), atol + rtol * abs(y))) - - return r + return all(_allclose_points(a, b, rtol=rtol, atol=atol)) def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): """ diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 15f66fa6cb35..2da2e837a9f7 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -772,6 +772,7 @@ def generate_multiarray_templated_sources(ext, build_dir): join('src', 'multiarray', 'shape.h'), join('src', 'multiarray', 'ucsnarrow.h'), join('src', 'multiarray', 'usertypes.h'), + join('src', 'multiarray', 'vdot.h'), join('src', 'private', 'lowlevel_strided_loops.h'), join('include', 'numpy', 'arrayobject.h'), join('include', 'numpy', '_neighborhood_iterator_imp.h'), @@ -838,23 +839,33 @@ def generate_multiarray_templated_sources(ext, build_dir): join('src', 'multiarray', 'scalarapi.c'), join('src', 'multiarray', 'scalartypes.c.src'), join('src', 'multiarray', 'usertypes.c'), - join('src', 'multiarray', 'ucsnarrow.c')] + join('src', 'multiarray', 'ucsnarrow.c'), + join('src', 'multiarray', 'vdot.c'), + ] + blas_info = get_info('blas_opt', 0) + if blas_info and ('HAVE_CBLAS', None) in blas_info.get('define_macros', []): + extra_info = blas_info + multiarray_src.append(join('src', 'multiarray', 'cblasfuncs.c')) + else: + extra_info = {} if not ENABLE_SEPARATE_COMPILATION: multiarray_deps.extend(multiarray_src) multiarray_src = [join('src', 'multiarray', 'multiarraymodule_onefile.c')] multiarray_src.append(generate_multiarray_templated_sources) + config.add_extension('multiarray', - sources = multiarray_src + + sources=multiarray_src + [generate_config_h, - generate_numpyconfig_h, - generate_numpy_api, - join(codegen_dir, 'generate_numpy_api.py'), - join('*.py')], - depends = deps + multiarray_deps, - libraries = ['npymath', 'npysort']) + generate_numpyconfig_h, + generate_numpy_api, + join(codegen_dir, 'generate_numpy_api.py'), + join('*.py')], + depends=deps + multiarray_deps, + libraries=['npymath', 'npysort'], + extra_info=extra_info) ####################################################################### # umath module # @@ -942,28 +953,6 @@ def generate_umath_c(ext, build_dir): libraries = ['npymath'], ) - ####################################################################### - # _dotblas module # - ####################################################################### - - # Configure blasdot - blas_info = get_info('blas_opt', 0) - #blas_info = {} - def get_dotblas_sources(ext, build_dir): - if blas_info: - if ('NO_ATLAS_INFO', 1) in blas_info.get('define_macros', []): - return None # dotblas needs ATLAS, Fortran compiled blas will not be sufficient. - return ext.depends[:1] - return None # no extension module will be built - - config.add_extension('_dotblas', - sources = [get_dotblas_sources], - depends = [join('blasdot', '_dotblas.c'), - join('blasdot', 'cblas.h'), - ], - include_dirs = ['blasdot'], - extra_info = blas_info - ) ####################################################################### # umath_tests module # diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 595ecfcdd11d..9273e003c820 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -3,9 +3,10 @@ #include "Python.h" #include "structmember.h" + #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE -#include "numpy/npy_common.h" + #include "numpy/arrayobject.h" #include "numpy/arrayscalars.h" #include "npy_pycompat.h" @@ -27,6 +28,12 @@ #include "numpyos.h" #include +#if defined(HAVE_CBLAS) +#include "cblasfuncs.h" +#include +#include +#endif + /* ***************************************************************************** @@ -2632,12 +2639,14 @@ STRING_compare(char *ip1, char *ip2, PyArrayObject *ap) const unsigned char *c1 = (unsigned char *)ip1; const unsigned char *c2 = (unsigned char *)ip2; const size_t len = PyArray_DESCR(ap)->elsize; - size_t i; + int i; - for(i = 0; i < len; ++i) { - if (c1[i] != c2[i]) { - return (c1[i] > c2[i]) ? 1 : -1; - } + i = memcmp(c1, c2, len); + if (i > 0) { + return 1; + } + else if (i < 0) { + return -1; } return 0; } @@ -3092,6 +3101,115 @@ static int * dot means inner product */ +/************************** MAYBE USE CBLAS *********************************/ + + +/**begin repeat + * + * #name = FLOAT, DOUBLE# + * #type = npy_float, npy_double# + * #prefix = s, d# + */ +NPY_NO_EXPORT void +@name@_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, + npy_intp n, void *NPY_UNUSED(ignore)) +{ +#if defined(HAVE_CBLAS) + int is1b = blas_stride(is1, sizeof(@type@)); + int is2b = blas_stride(is2, sizeof(@type@)); + + if (is1b && is2b) + { + double sum = 0.; /* double for stability */ + + while (n > 0) { + int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK; + + sum += cblas_@prefix@dot(chunk, + (@type@ *) ip1, is1b, + (@type@ *) ip2, is2b); + /* use char strides here */ + ip1 += chunk * is1; + ip2 += chunk * is2; + n -= chunk; + } + *((@type@ *)op) = (@type@)sum; + } + else +#endif + { + @type@ sum = (@type@)0; /* could make this double */ + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const @type@ ip1r = *((@type@ *)ip1); + const @type@ ip2r = *((@type@ *)ip2); + + sum += ip1r * ip2r; + } + *((@type@ *)op) = sum; + } +} +/**end repeat**/ + +/**begin repeat + * + * #name = CFLOAT, CDOUBLE# + * #ctype = npy_cfloat, npy_cdouble# + * #type = npy_float, npy_double# + * #prefix = c, z# + */ +NPY_NO_EXPORT void +@name@_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, + char *op, npy_intp n, void *NPY_UNUSED(ignore)) +{ +#if defined(HAVE_CBLAS) + int is1b = blas_stride(is1, sizeof(@ctype@)); + int is2b = blas_stride(is2, sizeof(@ctype@)); + + if (is1b && is2b) { + double sum[2] = {0., 0.}; /* double for stability */ + + while (n > 0) { + int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK; + @type@ tmp[2]; + + cblas_@prefix@dotu_sub((int)n, ip1, is1b, ip2, is2b, tmp); + sum[0] += (double)tmp[0]; + sum[1] += (double)tmp[1]; + /* use char strides here */ + ip1 += chunk * is1; + ip2 += chunk * is2; + n -= chunk; + } + ((@type@ *)op)[0] = (@type@)sum[0]; + ((@type@ *)op)[1] = (@type@)sum[1]; + } + else +#endif + { + @type@ sumr = (@type@)0.0; + @type@ sumi = (@type@)0.0; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const @type@ ip1r = ((@type@ *)ip1)[0]; + const @type@ ip1i = ((@type@ *)ip1)[1]; + const @type@ ip2r = ((@type@ *)ip2)[0]; + const @type@ ip2i = ((@type@ *)ip2)[1]; + + sumr += ip1r * ip2r - ip1i * ip2i; + sumi += ip1r * ip2i + ip1i * ip2r; + } + ((@type@ *)op)[0] = sumr; + ((@type@ *)op)[1] = sumi; + } +} + +/**end repeat**/ + +/**************************** NO CBLAS VERSIONS *****************************/ + static void BOOL_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, void *NPY_UNUSED(ignore)) @@ -3112,16 +3230,13 @@ BOOL_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, * * #name = BYTE, UBYTE, SHORT, USHORT, INT, UINT, * LONG, ULONG, LONGLONG, ULONGLONG, - * FLOAT, DOUBLE, LONGDOUBLE, - * DATETIME, TIMEDELTA# + * LONGDOUBLE, DATETIME, TIMEDELTA# * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, * npy_long, npy_ulong, npy_longlong, npy_ulonglong, - * npy_float, npy_double, npy_longdouble, - * npy_datetime, npy_timedelta# + * npy_longdouble, npy_datetime, npy_timedelta# * #out = npy_long, npy_ulong, npy_long, npy_ulong, npy_long, npy_ulong, * npy_long, npy_ulong, npy_longlong, npy_ulonglong, - * npy_float, npy_double, npy_longdouble, - * npy_datetime, npy_timedelta# + * npy_longdouble, npy_datetime, npy_timedelta# */ static void @name@_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, @@ -3139,8 +3254,8 @@ static void /**end repeat**/ static void -HALF_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, - void *NPY_UNUSED(ignore)) +HALF_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, + npy_intp n, void *NPY_UNUSED(ignore)) { float tmp = 0.0f; npy_intp i; @@ -3152,28 +3267,27 @@ HALF_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, *((npy_half *)op) = npy_float_to_half(tmp); } -/**begin repeat - * - * #name = CFLOAT, CDOUBLE, CLONGDOUBLE# - * #type = npy_float, npy_double, npy_longdouble# - */ -static void @name@_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, - char *op, npy_intp n, void *NPY_UNUSED(ignore)) +static void +CLONGDOUBLE_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, + char *op, npy_intp n, void *NPY_UNUSED(ignore)) { - @type@ tmpr = (@type@)0.0, tmpi=(@type@)0.0; + npy_longdouble tmpr = 0.0L; + npy_longdouble tmpi = 0.0L; npy_intp i; for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { - tmpr += ((@type@ *)ip1)[0] * ((@type@ *)ip2)[0] - - ((@type@ *)ip1)[1] * ((@type@ *)ip2)[1]; - tmpi += ((@type@ *)ip1)[1] * ((@type@ *)ip2)[0] - + ((@type@ *)ip1)[0] * ((@type@ *)ip2)[1]; + const npy_longdouble ip1r = ((npy_longdouble *)ip1)[0]; + const npy_longdouble ip1i = ((npy_longdouble *)ip1)[1]; + const npy_longdouble ip2r = ((npy_longdouble *)ip2)[0]; + const npy_longdouble ip2i = ((npy_longdouble *)ip2)[1]; + + tmpr += ip1r * ip2r - ip1i * ip2i; + tmpi += ip1r * ip2i + ip1i * ip2r; } - ((@type@ *)op)[0] = tmpr; ((@type@ *)op)[1] = tmpi; + ((npy_longdouble *)op)[0] = tmpr; + ((npy_longdouble *)op)[1] = tmpi; } -/**end repeat**/ - static void OBJECT_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, void *NPY_UNUSED(ignore)) diff --git a/numpy/core/src/multiarray/arraytypes.h b/numpy/core/src/multiarray/arraytypes.h index ff7d4ae408b6..9eed3e92b462 100644 --- a/numpy/core/src/multiarray/arraytypes.h +++ b/numpy/core/src/multiarray/arraytypes.h @@ -1,6 +1,8 @@ #ifndef _NPY_ARRAYTYPES_H_ #define _NPY_ARRAYTYPES_H_ +#include "common.h" + #ifdef NPY_ENABLE_SEPARATE_COMPILATION extern NPY_NO_EXPORT PyArray_Descr LONGLONG_Descr; extern NPY_NO_EXPORT PyArray_Descr LONG_Descr; @@ -10,4 +12,17 @@ extern NPY_NO_EXPORT PyArray_Descr INT_Descr; NPY_NO_EXPORT int set_typeinfo(PyObject *dict); +/* needed for blasfuncs */ +NPY_NO_EXPORT void +FLOAT_dot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + +NPY_NO_EXPORT void +CFLOAT_dot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + +NPY_NO_EXPORT void +DOUBLE_dot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + +NPY_NO_EXPORT void +CDOUBLE_dot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + #endif diff --git a/numpy/core/blasdot/_dotblas.c b/numpy/core/src/multiarray/cblasfuncs.c similarity index 54% rename from numpy/core/blasdot/_dotblas.c rename to numpy/core/src/multiarray/cblasfuncs.c index 48aa39ff87df..bc09ed6e8013 100644 --- a/numpy/core/blasdot/_dotblas.c +++ b/numpy/core/src/multiarray/cblasfuncs.c @@ -4,141 +4,15 @@ */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION -#include "Python.h" - -#include "numpy/arrayobject.h" -#include "npy_config.h" -#include "npy_pycompat.h" -#include "ufunc_override.h" -#ifndef CBLAS_HEADER -#define CBLAS_HEADER "cblas.h" -#endif -#include CBLAS_HEADER +#define _MULTIARRAYMODULE +#include #include -#include -#include - -static char module_doc[] = -"This module provides a BLAS optimized\nmatrix multiply, inner product and dot for numpy arrays"; - -static PyArray_DotFunc *oldFunctions[NPY_NTYPES]; - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -/* - * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done - * (BLAS won't handle negative or zero strides the way we want). - */ -static NPY_INLINE int -blas_stride(npy_intp stride, unsigned itemsize) -{ - if (stride <= 0 || stride % itemsize != 0) { - return 0; - } - stride /= itemsize; - - if (stride > INT_MAX) { - return 0; - } - return stride; -} - -/* - * The following functions do a "chunked" dot product using BLAS when - * sizeof(npy_intp) > sizeof(int), because BLAS libraries can typically not - * handle more than INT_MAX elements per call. - * - * The chunksize is the greatest power of two less than INT_MAX. - */ -#if NPY_MAX_INTP > INT_MAX -# define CHUNKSIZE (INT_MAX / 2 + 1) -#else -# define CHUNKSIZE NPY_MAX_INTP -#endif - -static void -FLOAT_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res, - npy_intp n, void *tmp) -{ - int na = blas_stride(stridea, sizeof(float)); - int nb = blas_stride(strideb, sizeof(float)); - - if (na && nb) { - double r = 0.; /* double for stability */ - float *fa = a, *fb = b; +#include +#include +#include "arraytypes.h" +#include "common.h" - while (n > 0) { - int chunk = MIN(n, CHUNKSIZE); - - r += cblas_sdot(chunk, fa, na, fb, nb); - fa += chunk * na; - fb += chunk * nb; - n -= chunk; - } - *((float *)res) = r; - } - else { - oldFunctions[NPY_FLOAT](a, stridea, b, strideb, res, n, tmp); - } -} - -static void -DOUBLE_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res, - npy_intp n, void *tmp) -{ - int na = blas_stride(stridea, sizeof(double)); - int nb = blas_stride(strideb, sizeof(double)); - - if (na && nb) { - double r = 0.; - double *da = a, *db = b; - - while (n > 0) { - int chunk = MIN(n, CHUNKSIZE); - - r += cblas_ddot(chunk, da, na, db, nb); - da += chunk * na; - db += chunk * nb; - n -= chunk; - } - *((double *)res) = r; - } - else { - oldFunctions[NPY_DOUBLE](a, stridea, b, strideb, res, n, tmp); - } -} - -static void -CFLOAT_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res, - npy_intp n, void *tmp) -{ - int na = blas_stride(stridea, sizeof(npy_cfloat)); - int nb = blas_stride(strideb, sizeof(npy_cfloat)); - - if (na && nb) { - cblas_cdotu_sub((int)n, (float *)a, na, (float *)b, nb, (float *)res); - } - else { - oldFunctions[NPY_CFLOAT](a, stridea, b, strideb, res, n, tmp); - } -} - -static void -CDOUBLE_dot(void *a, npy_intp stridea, void *b, npy_intp strideb, void *res, - npy_intp n, void *tmp) -{ - int na = blas_stride(stridea, sizeof(npy_cdouble)); - int nb = blas_stride(strideb, sizeof(npy_cdouble)); - - if (na && nb) { - cblas_zdotu_sub((int)n, (double *)a, na, (double *)b, nb, - (double *)res); - } - else { - oldFunctions[NPY_CDOUBLE](a, stridea, b, strideb, res, n, tmp); - } -} /* * Helper: call appropriate BLAS dot function for typenum. @@ -148,29 +22,27 @@ static void blas_dot(int typenum, npy_intp n, void *a, npy_intp stridea, void *b, npy_intp strideb, void *res) { - PyArray_DotFunc *dot = NULL; switch (typenum) { case NPY_DOUBLE: - dot = DOUBLE_dot; + DOUBLE_dot(a, stridea, b, strideb, res, n, NULL); break; case NPY_FLOAT: - dot = FLOAT_dot; + FLOAT_dot(a, stridea, b, strideb, res, n, NULL); break; case NPY_CDOUBLE: - dot = CDOUBLE_dot; + CDOUBLE_dot(a, stridea, b, strideb, res, n, NULL); break; case NPY_CFLOAT: - dot = CFLOAT_dot; + CFLOAT_dot(a, stridea, b, strideb, res, n, NULL); break; } - assert(dot != NULL); - dot(a, stridea, b, strideb, res, n, NULL); } static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0}; static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0}; + /* * Helper: dispatch to appropriate cblas_?gemm for typenum. */ @@ -182,7 +54,6 @@ gemm(int typenum, enum CBLAS_ORDER order, { const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B); void *Rdata = PyArray_DATA(R); - int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1; switch (typenum) { @@ -240,124 +111,52 @@ gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, } -static npy_bool altered=NPY_FALSE; - -/* - * alterdot() changes all dot functions to use blas. - */ -static PyObject * -dotblas_alterdot(PyObject *NPY_UNUSED(dummy), PyObject *args) -{ - PyArray_Descr *descr; - - if (!PyArg_ParseTuple(args, "")) return NULL; - - /* Replace the dot functions to the ones using blas */ - - if (!altered) { - descr = PyArray_DescrFromType(NPY_FLOAT); - oldFunctions[NPY_FLOAT] = descr->f->dotfunc; - descr->f->dotfunc = (PyArray_DotFunc *)FLOAT_dot; - - descr = PyArray_DescrFromType(NPY_DOUBLE); - oldFunctions[NPY_DOUBLE] = descr->f->dotfunc; - descr->f->dotfunc = (PyArray_DotFunc *)DOUBLE_dot; - - descr = PyArray_DescrFromType(NPY_CFLOAT); - oldFunctions[NPY_CFLOAT] = descr->f->dotfunc; - descr->f->dotfunc = (PyArray_DotFunc *)CFLOAT_dot; - - descr = PyArray_DescrFromType(NPY_CDOUBLE); - oldFunctions[NPY_CDOUBLE] = descr->f->dotfunc; - descr->f->dotfunc = (PyArray_DotFunc *)CDOUBLE_dot; - - altered = NPY_TRUE; - } - - Py_INCREF(Py_None); - return Py_None; -} - -/* - * restoredot() restores dots to defaults. - */ -static PyObject * -dotblas_restoredot(PyObject *NPY_UNUSED(dummy), PyObject *args) -{ - PyArray_Descr *descr; - - if (!PyArg_ParseTuple(args, "")) return NULL; - - if (altered) { - descr = PyArray_DescrFromType(NPY_FLOAT); - descr->f->dotfunc = oldFunctions[NPY_FLOAT]; - oldFunctions[NPY_FLOAT] = NULL; - Py_XDECREF(descr); - - descr = PyArray_DescrFromType(NPY_DOUBLE); - descr->f->dotfunc = oldFunctions[NPY_DOUBLE]; - oldFunctions[NPY_DOUBLE] = NULL; - Py_XDECREF(descr); - - descr = PyArray_DescrFromType(NPY_CFLOAT); - descr->f->dotfunc = oldFunctions[NPY_CFLOAT]; - oldFunctions[NPY_CFLOAT] = NULL; - Py_XDECREF(descr); - - descr = PyArray_DescrFromType(NPY_CDOUBLE); - descr->f->dotfunc = oldFunctions[NPY_CDOUBLE]; - oldFunctions[NPY_CDOUBLE] = NULL; - Py_XDECREF(descr); - - altered = NPY_FALSE; - } - - Py_INCREF(Py_None); - return Py_None; -} - typedef enum {_scalar, _column, _row, _matrix} MatrixShape; + static MatrixShape _select_matrix_shape(PyArrayObject *array) { switch (PyArray_NDIM(array)) { - case 0: - return _scalar; - case 1: - if (PyArray_DIM(array, 0) > 1) - return _column; - return _scalar; - case 2: - if (PyArray_DIM(array, 0) > 1) { - if (PyArray_DIM(array, 1) == 1) + case 0: + return _scalar; + case 1: + if (PyArray_DIM(array, 0) > 1) return _column; - else - return _matrix; - } - if (PyArray_DIM(array, 1) == 1) return _scalar; - return _row; + case 2: + if (PyArray_DIM(array, 0) > 1) { + if (PyArray_DIM(array, 1) == 1) + return _column; + else + return _matrix; + } + if (PyArray_DIM(array, 1) == 1) + return _scalar; + return _row; } return _matrix; } -/* This also makes sure that the data segment is aligned with - an itemsize address as well by returning one if not true. -*/ +/* + * This also makes sure that the data segment is aligned with + * an itemsize address as well by returning one if not true. + */ static int _bad_strides(PyArrayObject *ap) { - register int itemsize = PyArray_ITEMSIZE(ap); - register int i, N=PyArray_NDIM(ap); - register npy_intp *strides = PyArray_STRIDES(ap); + int itemsize = PyArray_ITEMSIZE(ap); + int i, N=PyArray_NDIM(ap); + npy_intp *strides = PyArray_STRIDES(ap); - if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) + if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) { return 1; - for (i=0; i 2) || (PyArray_NDIM(ap2) > 2)) { - /* - * This function doesn't handle dimensions greater than 2 - * (or negative striding) -- other - * than to ensure the dot function is altered - */ - if (!altered) { - /* need to alter dot product */ - PyObject *tmp1, *tmp2; - tmp1 = PyTuple_New(0); - tmp2 = dotblas_alterdot(NULL, tmp1); - Py_DECREF(tmp1); - Py_DECREF(tmp2); - } - ret = (PyArrayObject *)PyArray_MatrixProduct2((PyObject *)ap1, - (PyObject *)ap2, - out); - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - } if (_bad_strides(ap1)) { - op1 = PyArray_NewCopy(ap1, NPY_ANYORDER); + PyObject *op1 = PyArray_NewCopy(ap1, NPY_ANYORDER); + Py_DECREF(ap1); ap1 = (PyArrayObject *)op1; if (ap1 == NULL) { @@ -480,7 +199,8 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa } } if (_bad_strides(ap2)) { - op2 = PyArray_NewCopy(ap2, NPY_ANYORDER); + PyObject *op2 = PyArray_NewCopy(ap2, NPY_ANYORDER); + Py_DECREF(ap2); ap2 = (PyArrayObject *)op2; if (ap2 == NULL) { @@ -494,7 +214,8 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa PyArrayObject *oap1, *oap2; oap1 = ap1; oap2 = ap2; /* One of ap1 or ap2 is a scalar */ - if (ap1shape == _scalar) { /* Make ap2 the scalar */ + if (ap1shape == _scalar) { + /* Make ap2 the scalar */ PyArrayObject *t = ap1; ap1 = ap2; ap2 = t; @@ -529,7 +250,7 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa l = PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1); if (PyArray_DIM(oap2, 0) != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); + dot_alignment_error(oap1, PyArray_NDIM(oap1) - 1, oap2, 0); goto fail; } nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2; @@ -541,7 +262,8 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa * Either PyArray_NDIM(ap1) is 1 dim or PyArray_NDIM(ap2) is 1 dim * and the other is 2-dim */ - dimensions[0] = (PyArray_NDIM(oap1) == 2) ? PyArray_DIM(oap1, 0) : PyArray_DIM(oap2, 1); + dimensions[0] = (PyArray_NDIM(oap1) == 2) ? + PyArray_DIM(oap1, 0) : PyArray_DIM(oap2, 1); l = dimensions[0]; /* * Fix it so that dot(shape=(N,1), shape=(1,)) @@ -579,13 +301,15 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1); if (PyArray_DIM(ap2, 0) != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); + dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, ap2, 0); goto fail; } nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2; - if (nd == 1) - dimensions[0] = (PyArray_NDIM(ap1) == 2) ? PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 1); + if (nd == 1) { + dimensions[0] = (PyArray_NDIM(ap1) == 2) ? + PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 1); + } else if (nd == 2) { dimensions[0] = PyArray_DIM(ap1, 0); dimensions[1] = PyArray_DIM(ap2, 1); @@ -603,8 +327,9 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa subtype = Py_TYPE(ap1); } - if (out) { + if (out != NULL) { int d; + /* verify that out is usable */ if (Py_TYPE(out) != subtype || PyArray_NDIM(out) != nd || @@ -625,11 +350,12 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa } Py_INCREF(out); ret = out; - } else { + } + else { + PyObject *tmp = (PyObject *)(prior2 > prior1 ? ap2 : ap1); + ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, - typenum, NULL, NULL, 0, 0, - (PyObject *) - (prior2 > prior1 ? ap2 : ap1)); + typenum, NULL, NULL, 0, 0, tmp); } if (ret == NULL) { @@ -637,7 +363,7 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa } numbytes = PyArray_NBYTES(ret); memset(PyArray_DATA(ret), 0, numbytes); - if (numbytes==0 || l == 0) { + if (numbytes == 0 || l == 0) { Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); @@ -654,11 +380,14 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa if (typenum == NPY_DOUBLE) { if (l == 1) { *((double *)PyArray_DATA(ret)) = *((double *)PyArray_DATA(ap2)) * - *((double *)PyArray_DATA(ap1)); + *((double *)PyArray_DATA(ap1)); } else if (ap1shape != _matrix) { - cblas_daxpy(l, *((double *)PyArray_DATA(ap2)), (double *)PyArray_DATA(ap1), - ap1stride/sizeof(double), (double *)PyArray_DATA(ret), 1); + cblas_daxpy(l, + *((double *)PyArray_DATA(ap2)), + (double *)PyArray_DATA(ap1), + ap1stride/sizeof(double), + (double *)PyArray_DATA(ret), 1); } else { int maxind, oind, i, a1s, rets; @@ -666,7 +395,7 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa double val; maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); - oind = 1-maxind; + oind = 1 - maxind; ptr = PyArray_DATA(ap1); rptr = PyArray_DATA(ret); l = PyArray_DIM(ap1, maxind); @@ -692,8 +421,11 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { - cblas_zaxpy(l, (double *)PyArray_DATA(ap2), (double *)PyArray_DATA(ap1), - ap1stride/sizeof(npy_cdouble), (double *)PyArray_DATA(ret), 1); + cblas_zaxpy(l, + (double *)PyArray_DATA(ap2), + (double *)PyArray_DATA(ap1), + ap1stride/sizeof(npy_cdouble), + (double *)PyArray_DATA(ret), 1); } else { int maxind, oind, i, a1s, rets; @@ -701,7 +433,7 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa double *pval; maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); - oind = 1-maxind; + oind = 1 - maxind; ptr = PyArray_DATA(ap1); rptr = PyArray_DATA(ret); l = PyArray_DIM(ap1, maxind); @@ -722,8 +454,11 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa *((float *)PyArray_DATA(ap1)); } else if (ap1shape != _matrix) { - cblas_saxpy(l, *((float *)PyArray_DATA(ap2)), (float *)PyArray_DATA(ap1), - ap1stride/sizeof(float), (float *)PyArray_DATA(ret), 1); + cblas_saxpy(l, + *((float *)PyArray_DATA(ap2)), + (float *)PyArray_DATA(ap1), + ap1stride/sizeof(float), + (float *)PyArray_DATA(ret), 1); } else { int maxind, oind, i, a1s, rets; @@ -731,7 +466,7 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa float val; maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); - oind = 1-maxind; + oind = 1 - maxind; ptr = PyArray_DATA(ap1); rptr = PyArray_DATA(ret); l = PyArray_DIM(ap1, maxind); @@ -757,8 +492,11 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { - cblas_caxpy(l, (float *)PyArray_DATA(ap2), (float *)PyArray_DATA(ap1), - ap1stride/sizeof(npy_cfloat), (float *)PyArray_DATA(ret), 1); + cblas_caxpy(l, + (float *)PyArray_DATA(ap2), + (float *)PyArray_DATA(ap1), + ap1stride/sizeof(npy_cfloat), + (float *)PyArray_DATA(ret), 1); } else { int maxind, oind, i, a1s, rets; @@ -766,7 +504,7 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa float *pval; maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); - oind = 1-maxind; + oind = 1 - maxind; ptr = PyArray_DATA(ap1); rptr = PyArray_DATA(ret); l = PyArray_DIM(ap1, maxind); @@ -918,7 +656,7 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa Py_DECREF(ap2); return PyArray_Return(ret); - fail: +fail: Py_XDECREF(ap1); Py_XDECREF(ap2); Py_XDECREF(ret); @@ -933,65 +671,28 @@ dotblas_matrixproduct(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject* kwa * floating point types. Like the generic NumPy equivalent the product * sum is over the last dimension of a and b. * NB: The first argument is not conjugated. + * + * This is for use by PyArray_InnerProduct. It is assumed on entry that the + * arrays ap1 and ap2 have a common data type given by typenum that is + * float, double, cfloat, or cdouble and have dimension <= 2, and have the + * contiguous flag set. The * __numpy_ufunc__ nonsense is also assumed to + * have been taken care of. */ -static PyObject * -dotblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) +NPY_NO_EXPORT PyObject * +cblas_innerproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2) { - PyObject *op1, *op2; - PyArrayObject *ap1, *ap2, *ret; int j, l, lda, ldb; - int typenum, nd; + int nd; + double prior1, prior2; + PyArrayObject *ret; npy_intp dimensions[NPY_MAXDIMS]; PyTypeObject *subtype; - double prior1, prior2; - - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; - - /* - * Inner product using the BLAS. The product sum is taken along the last - * dimensions of the two arrays. - * Only speeds things up for float double and complex types. - */ - - - typenum = PyArray_ObjectType(op1, 0); - typenum = PyArray_ObjectType(op2, typenum); - - /* This function doesn't handle other types */ - if ((typenum != NPY_DOUBLE && typenum != NPY_CDOUBLE && - typenum != NPY_FLOAT && typenum != NPY_CFLOAT)) { - return PyArray_Return((PyArrayObject *)PyArray_InnerProduct(op1, op2)); - } - - ret = NULL; - ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); - if (ap1 == NULL) return NULL; - ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); - if (ap2 == NULL) goto fail; - - if ((PyArray_NDIM(ap1) > 2) || (PyArray_NDIM(ap2) > 2)) { - /* This function doesn't handle dimensions greater than 2 -- other - than to ensure the dot function is altered - */ - if (!altered) { - /* need to alter dot product */ - PyObject *tmp1, *tmp2; - tmp1 = PyTuple_New(0); - tmp2 = dotblas_alterdot(NULL, tmp1); - Py_DECREF(tmp1); - Py_DECREF(tmp2); - } - ret = (PyArrayObject *)PyArray_InnerProduct((PyObject *)ap1, - (PyObject *)ap2); - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - } if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { /* One of ap1 or ap2 is a scalar */ - if (PyArray_NDIM(ap1) == 0) { /* Make ap2 the scalar */ + if (PyArray_NDIM(ap1) == 0) { + /* Make ap2 the scalar */ PyArrayObject *t = ap1; ap1 = ap2; ap2 = t; @@ -1002,18 +703,23 @@ dotblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) } nd = PyArray_NDIM(ap1); } - else { /* (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2) */ - /* Both ap1 and ap2 are vectors or matrices */ - l = PyArray_DIM(ap1, PyArray_NDIM(ap1)-1); + else { + /* + * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2) + * Both ap1 and ap2 are vectors or matrices + */ + l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1); - if (PyArray_DIM(ap2, PyArray_NDIM(ap2)-1) != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); + if (PyArray_DIM(ap2, PyArray_NDIM(ap2) - 1) != l) { + dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, + ap2, PyArray_NDIM(ap2) - 1); goto fail; } - nd = PyArray_NDIM(ap1)+PyArray_NDIM(ap2)-2; + nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2; if (nd == 1) - dimensions[0] = (PyArray_NDIM(ap1) == 2) ? PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 0); + dimensions[0] = (PyArray_NDIM(ap1) == 2) ? + PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 0); else if (nd == 2) { dimensions[0] = PyArray_DIM(ap1, 0); dimensions[1] = PyArray_DIM(ap2, 0); @@ -1027,36 +733,49 @@ dotblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, typenum, NULL, NULL, 0, 0, - (PyObject *)\ - (prior2 > prior1 ? ap2 : ap1)); + (PyObject *) + (prior2 > prior1 ? ap2 : ap1)); + + if (ret == NULL) { + goto fail; + } - if (ret == NULL) goto fail; - NPY_BEGIN_ALLOW_THREADS + NPY_BEGIN_ALLOW_THREADS; memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret)); if (PyArray_NDIM(ap2) == 0) { /* Multiplication by a scalar -- Level 1 BLAS */ if (typenum == NPY_DOUBLE) { - cblas_daxpy(l, *((double *)PyArray_DATA(ap2)), (double *)PyArray_DATA(ap1), 1, + cblas_daxpy(l, + *((double *)PyArray_DATA(ap2)), + (double *)PyArray_DATA(ap1), 1, (double *)PyArray_DATA(ret), 1); } else if (typenum == NPY_CDOUBLE) { - cblas_zaxpy(l, (double *)PyArray_DATA(ap2), (double *)PyArray_DATA(ap1), 1, + cblas_zaxpy(l, + (double *)PyArray_DATA(ap2), + (double *)PyArray_DATA(ap1), 1, (double *)PyArray_DATA(ret), 1); } else if (typenum == NPY_FLOAT) { - cblas_saxpy(l, *((float *)PyArray_DATA(ap2)), (float *)PyArray_DATA(ap1), 1, + cblas_saxpy(l, + *((float *)PyArray_DATA(ap2)), + (float *)PyArray_DATA(ap1), 1, (float *)PyArray_DATA(ret), 1); } else if (typenum == NPY_CFLOAT) { - cblas_caxpy(l, (float *)PyArray_DATA(ap2), (float *)PyArray_DATA(ap1), 1, + cblas_caxpy(l, + (float *)PyArray_DATA(ap2), + (float *)PyArray_DATA(ap1), 1, (float *)PyArray_DATA(ret), 1); } } else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 1) { /* Dot product between two vectors -- Level 1 BLAS */ - blas_dot(typenum, l, PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1), - PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), PyArray_DATA(ret)); + blas_dot(typenum, l, + PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1), + PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), + PyArray_DATA(ret)); } else if (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 1) { /* Matrix-vector multiplication -- Level 2 BLAS */ @@ -1068,115 +787,17 @@ dotblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1); gemv(typenum, CblasRowMajor, CblasNoTrans, ap2, lda, ap1, 1, ret); } - else { /* (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2) */ - /* Matrix matrix multiplication -- Level 3 BLAS */ + else { + /* + * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2) + * Matrix matrix multiplication -- Level 3 BLAS + */ lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1); ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1); gemm(typenum, CblasRowMajor, CblasNoTrans, CblasTrans, PyArray_DIM(ap1, 0), PyArray_DIM(ap2, 0), PyArray_DIM(ap1, 1), ap1, lda, ap2, ldb, ret); } - NPY_END_ALLOW_THREADS - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - - fail: - Py_XDECREF(ap1); - Py_XDECREF(ap2); - Py_XDECREF(ret); - return NULL; -} - - -/* - * vdot(a,b) - * - * Returns the dot product of a and b for scalars and vectors of - * floating point and complex types. The first argument, a, is conjugated. - */ -static PyObject *dotblas_vdot(PyObject *NPY_UNUSED(dummy), PyObject *args) { - PyObject *op1, *op2; - PyArrayObject *ap1=NULL, *ap2=NULL, *ret=NULL; - int l; - int typenum; - npy_intp dimensions[NPY_MAXDIMS]; - PyArray_Descr *type; - - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; - - /* - * Conjugating dot product using the BLAS for vectors. - * Multiplies op1 and op2, each of which must be vector. - */ - - typenum = PyArray_ObjectType(op1, 0); - typenum = PyArray_ObjectType(op2, typenum); - - type = PyArray_DescrFromType(typenum); - Py_INCREF(type); - ap1 = (PyArrayObject *)PyArray_FromAny(op1, type, 0, 0, 0, NULL); - if (ap1==NULL) {Py_DECREF(type); goto fail;} - op1 = PyArray_Flatten(ap1, 0); - if (op1==NULL) {Py_DECREF(type); goto fail;} - Py_DECREF(ap1); - ap1 = (PyArrayObject *)op1; - - ap2 = (PyArrayObject *)PyArray_FromAny(op2, type, 0, 0, 0, NULL); - if (ap2==NULL) goto fail; - op2 = PyArray_Flatten(ap2, 0); - if (op2 == NULL) goto fail; - Py_DECREF(ap2); - ap2 = (PyArrayObject *)op2; - - if (typenum != NPY_FLOAT && typenum != NPY_DOUBLE && - typenum != NPY_CFLOAT && typenum != NPY_CDOUBLE) { - if (!altered) { - /* need to alter dot product */ - PyObject *tmp1, *tmp2; - tmp1 = PyTuple_New(0); - tmp2 = dotblas_alterdot(NULL, tmp1); - Py_DECREF(tmp1); - Py_DECREF(tmp2); - } - if (PyTypeNum_ISCOMPLEX(typenum)) { - op1 = PyArray_Conjugate(ap1, NULL); - if (op1==NULL) goto fail; - Py_DECREF(ap1); - ap1 = (PyArrayObject *)op1; - } - ret = (PyArrayObject *)PyArray_InnerProduct((PyObject *)ap1, - (PyObject *)ap2); - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - } - - if (PyArray_DIM(ap2, 0) != PyArray_DIM(ap1, PyArray_NDIM(ap1)-1)) { - PyErr_SetString(PyExc_ValueError, "vectors have different lengths"); - goto fail; - } - l = PyArray_DIM(ap1, PyArray_NDIM(ap1)-1); - - ret = (PyArrayObject *)PyArray_SimpleNew(0, dimensions, typenum); - if (ret == NULL) goto fail; - - NPY_BEGIN_ALLOW_THREADS; - - /* Dot product between two vectors -- Level 1 BLAS */ - if (typenum == NPY_DOUBLE || typenum == NPY_FLOAT) { - blas_dot(typenum, l, PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1), - PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), PyArray_DATA(ret)); - } - else if (typenum == NPY_CDOUBLE) { - cblas_zdotc_sub(l, (double *)PyArray_DATA(ap1), 1, - (double *)PyArray_DATA(ap2), 1, (double *)PyArray_DATA(ret)); - } - else if (typenum == NPY_CFLOAT) { - cblas_cdotc_sub(l, (float *)PyArray_DATA(ap1), 1, - (float *)PyArray_DATA(ap2), 1, (float *)PyArray_DATA(ret)); - } - NPY_END_ALLOW_THREADS; Py_DECREF(ap1); @@ -1189,65 +810,3 @@ static PyObject *dotblas_vdot(PyObject *NPY_UNUSED(dummy), PyObject *args) { Py_XDECREF(ret); return NULL; } - -static struct PyMethodDef dotblas_module_methods[] = { - {"dot", (PyCFunction)dotblas_matrixproduct, METH_VARARGS|METH_KEYWORDS, NULL}, - {"inner", (PyCFunction)dotblas_innerproduct, 1, NULL}, - {"vdot", (PyCFunction)dotblas_vdot, 1, NULL}, - {"alterdot", (PyCFunction)dotblas_alterdot, 1, NULL}, - {"restoredot", (PyCFunction)dotblas_restoredot, 1, NULL}, - {NULL, NULL, 0, NULL} /* sentinel */ -}; - -#if defined(NPY_PY3K) -static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - "_dotblas", - NULL, - -1, - dotblas_module_methods, - NULL, - NULL, - NULL, - NULL -}; -#endif - -/* Initialization function for the module */ -#if defined(NPY_PY3K) -#define RETVAL m -PyMODINIT_FUNC PyInit__dotblas(void) -#else -#define RETVAL -PyMODINIT_FUNC init_dotblas(void) -#endif -{ -#if defined(NPY_PY3K) - int i; - - PyObject *d, *s, *m; - m = PyModule_Create(&moduledef); -#else - int i; - - PyObject *d, *s; - Py_InitModule3("_dotblas", dotblas_module_methods, module_doc); -#endif - - /* add the functions */ - - /* Import the array object */ - import_array(); - - /* Initialise the array of dot functions */ - for (i = 0; i < NPY_NTYPES; i++) - oldFunctions[i] = NULL; - - /* alterdot at load */ - d = PyTuple_New(0); - s = dotblas_alterdot(NULL, d); - Py_DECREF(d); - Py_DECREF(s); - - return RETVAL; -} diff --git a/numpy/core/src/multiarray/cblasfuncs.h b/numpy/core/src/multiarray/cblasfuncs.h new file mode 100644 index 000000000000..d3ec08db608b --- /dev/null +++ b/numpy/core/src/multiarray/cblasfuncs.h @@ -0,0 +1,10 @@ +#ifndef _NPY_CBLASFUNCS_H_ +#define _NPY_CBLASFUNCS_H_ + +NPY_NO_EXPORT PyObject * +cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *); + +NPY_NO_EXPORT PyObject * +cblas_innerproduct(int, PyArrayObject *, PyArrayObject *); + +#endif diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 35b705aff4e6..2d18af06e73a 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -787,7 +787,7 @@ offset_bounds_from_strides(const int itemsize, const int nd, * @param Dimensionality of the shape * @param npy_intp pointer to shape array * @param String to append after the shape `(1, 2)%s`. - * + * * @return Python unicode string */ NPY_NO_EXPORT PyObject * @@ -836,7 +836,59 @@ convert_shape_to_string(npy_intp n, npy_intp *vals, char *ending) } else { tmp = PyUString_FromFormat(")%s", ending); - } + } PyUString_ConcatAndDel(&ret, tmp); return ret; } + + +NPY_NO_EXPORT void +dot_alignment_error(PyArrayObject *a, int i, PyArrayObject *b, int j) +{ + PyObject *errmsg = NULL, *format = NULL, *fmt_args = NULL, + *i_obj = NULL, *j_obj = NULL, + *shape1 = NULL, *shape2 = NULL, + *shape1_i = NULL, *shape2_j = NULL; + + format = PyUString_FromString("shapes %s and %s not aligned:" + " %d (dim %d) != %d (dim %d)"); + + shape1 = convert_shape_to_string(PyArray_NDIM(a), PyArray_DIMS(a), ""); + shape2 = convert_shape_to_string(PyArray_NDIM(b), PyArray_DIMS(b), ""); + + i_obj = PyLong_FromLong(i); + j_obj = PyLong_FromLong(j); + + shape1_i = PyLong_FromSsize_t(PyArray_DIM(a, i)); + shape2_j = PyLong_FromSsize_t(PyArray_DIM(b, j)); + + if (!format || !shape1 || !shape2 || !i_obj || !j_obj || + !shape1_i || !shape2_j) { + goto end; + } + + fmt_args = PyTuple_Pack(6, shape1, shape2, + shape1_i, i_obj, shape2_j, j_obj); + if (fmt_args == NULL) { + goto end; + } + + errmsg = PyUString_Format(format, fmt_args); + if (errmsg != NULL) { + PyErr_SetObject(PyExc_ValueError, errmsg); + } + else { + PyErr_SetString(PyExc_ValueError, "shapes are not aligned"); + } + +end: + Py_XDECREF(errmsg); + Py_XDECREF(fmt_args); + Py_XDECREF(format); + Py_XDECREF(i_obj); + Py_XDECREF(j_obj); + Py_XDECREF(shape1); + Py_XDECREF(shape2); + Py_XDECREF(shape1_i); + Py_XDECREF(shape2_j); +} diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index 6b49d6b4cf5c..9cf2e27bfb50 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -3,6 +3,7 @@ #include #include #include +#include #define error_converting(x) (((x) == -1) && PyErr_Occurred()) @@ -72,6 +73,13 @@ offset_bounds_from_strides(const int itemsize, const int nd, NPY_NO_EXPORT PyObject * convert_shape_to_string(npy_intp n, npy_intp *vals, char *ending); +/* + * Sets ValueError with "matrices not aligned" message for np.dot and friends + * when a.shape[i] should match b.shape[j], but doesn't. + */ +NPY_NO_EXPORT void +dot_alignment_error(PyArrayObject *a, int i, PyArrayObject *b, int j); + /* * Returns -1 and sets an exception if *index is an invalid index for @@ -208,6 +216,35 @@ _is_basic_python_type(PyObject * obj) return 0; } +/* + * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done + * (BLAS won't handle negative or zero strides the way we want). + */ +static NPY_INLINE int +blas_stride(npy_intp stride, unsigned itemsize) +{ + /* + * Should probably check pointer alignment also, but this may cause + * problems if we require complex to be 16 byte aligned. + */ + if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) { + stride /= itemsize; + if (stride <= INT_MAX) { + return stride; + } + } + return 0; +} + +/* + * Define a chunksize for CBLAS. CBLAS counts in integers. + */ +#if NPY_MAX_INTP > INT_MAX +# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1) +#else +# define NPY_CBLAS_CHUNK NPY_MAX_INTP +#endif + #include "ucsnarrow.h" #endif diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index bfd3bc3c1593..17b95b654242 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -9,9 +9,8 @@ #include "numpy/arrayscalars.h" #include "npy_config.h" - #include "npy_pycompat.h" - +#include "ufunc_override.h" #include "common.h" #include "ctors.h" #include "calculation.h" @@ -2011,31 +2010,52 @@ array_cumprod(PyArrayObject *self, PyObject *args, PyObject *kwds) static PyObject * array_dot(PyArrayObject *self, PyObject *args, PyObject *kwds) { - PyObject *fname, *ret, *b, *out = NULL; - static PyObject *numpycore = NULL; - char * kwords[] = {"b", "out", NULL }; + static PyUFuncObject *cached_npy_dot = NULL; + int errval; + PyObject *override = NULL; + PyObject *a = (PyObject *)self, *b, *o = Py_None; + PyObject *newargs; + PyArrayObject *ret; + char* kwlist[] = {"b", "out", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O", kwords, &b, &out)) { + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O", kwlist, &b, &o)) { return NULL; } - /* Since blas-dot is exposed only on the Python side, we need to grab it - * from there */ - if (numpycore == NULL) { - numpycore = PyImport_ImportModule("numpy.core"); - if (numpycore == NULL) { - return NULL; - } + if (cached_npy_dot == NULL) { + PyObject *module = PyImport_ImportModule("numpy.core.multiarray"); + cached_npy_dot = (PyUFuncObject*)PyDict_GetItemString( + PyModule_GetDict(module), "dot"); + + Py_INCREF(cached_npy_dot); + Py_DECREF(module); } - fname = PyUString_FromString("dot"); - if (out == NULL) { - ret = PyObject_CallMethodObjArgs(numpycore, fname, self, b, NULL); + + if ((newargs = PyTuple_Pack(3, a, b, o)) == NULL) { + return NULL; } - else { - ret = PyObject_CallMethodObjArgs(numpycore, fname, self, b, out, NULL); + errval = PyUFunc_CheckOverride(cached_npy_dot, "__call__", + newargs, NULL, &override, 2); + Py_DECREF(newargs); + + if (errval) { + return NULL; } - Py_DECREF(fname); - return ret; + else if (override) { + return override; + } + + if (o == Py_None) { + o = NULL; + } + if (o != NULL && !PyArray_Check(o)) { + PyErr_SetString(PyExc_TypeError, + "'out' must be an array"); + return NULL; + } + ret = (PyArrayObject *)PyArray_MatrixProduct2(a, b, (PyArrayObject *)o); + return PyArray_Return(ret); } diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 8e9b656cf336..6d5bf887ea3f 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -57,6 +57,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "ufunc_override.h" #include "scalarmathmodule.h" /* for npy_mul_with_overflow_intp */ #include "multiarraymodule.h" +#include "cblasfuncs.h" +#include "vdot.h" /* Only here for API compatibility */ NPY_NO_EXPORT PyTypeObject PyBigArray_Type; @@ -835,8 +837,18 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) ap2 = (PyArrayObject *)PyArray_FromAny(op2, typec, 0, 0, NPY_ARRAY_ALIGNED, NULL); if (ap2 == NULL) { - goto fail; + Py_DECREF(ap1); + return NULL; } + +#if defined(HAVE_CBLAS) + if (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2 && + (NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum || + NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) { + return cblas_innerproduct(typenum, ap1, ap2); + } +#endif + if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { ret = (PyArray_NDIM(ap1) == 0 ? ap1 : ap2); ret = (PyArrayObject *)Py_TYPE(ret)->tp_as_number->nb_multiply( @@ -848,7 +860,8 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) l = PyArray_DIMS(ap1)[PyArray_NDIM(ap1) - 1]; if (PyArray_DIMS(ap2)[PyArray_NDIM(ap2) - 1] != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); + dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, + ap2, PyArray_NDIM(ap2) - 1); goto fail; } @@ -882,7 +895,8 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) } is1 = PyArray_STRIDES(ap1)[PyArray_NDIM(ap1) - 1]; is2 = PyArray_STRIDES(ap2)[PyArray_NDIM(ap2) - 1]; - op = PyArray_DATA(ret); os = PyArray_DESCR(ret)->elsize; + op = PyArray_DATA(ret); + os = PyArray_DESCR(ret)->elsize; axis = PyArray_NDIM(ap1) - 1; it1 = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *)ap1, &axis); axis = PyArray_NDIM(ap2) - 1; @@ -915,7 +929,17 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) } /*NUMPY_API - * Numeric.matrixproduct(a,v,out) + * Numeric.matrixproduct(a,v) + * just like inner product but does the swapaxes stuff on the fly + */ +NPY_NO_EXPORT PyObject * +PyArray_MatrixProduct(PyObject *op1, PyObject *op2) +{ + return PyArray_MatrixProduct2(op1, op2, NULL); +} + +/*NUMPY_API + * Numeric.matrixproduct2(a,v,out) * just like inner product but does the swapaxes stuff on the fly */ NPY_NO_EXPORT PyObject * @@ -950,8 +974,18 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out) ap2 = (PyArrayObject *)PyArray_FromAny(op2, typec, 0, 0, NPY_ARRAY_ALIGNED, NULL); if (ap2 == NULL) { - goto fail; + Py_DECREF(ap1); + return NULL; + } + +#if defined(HAVE_CBLAS) + if (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2 && + (NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum || + NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) { + return cblas_matrixproduct(typenum, ap1, ap2, out); } +#endif + if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { ret = (PyArray_NDIM(ap1) == 0 ? ap1 : ap2); ret = (PyArrayObject *)Py_TYPE(ret)->tp_as_number->nb_multiply( @@ -968,7 +1002,7 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out) matchDim = 0; } if (PyArray_DIMS(ap2)[matchDim] != l) { - PyErr_SetString(PyExc_ValueError, "objects are not aligned"); + dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, ap2, matchDim); goto fail; } nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2; @@ -986,14 +1020,9 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out) if(PyArray_NDIM(ap2) > 1) { dimensions[j++] = PyArray_DIMS(ap2)[PyArray_NDIM(ap2)-1]; } - /* - fprintf(stderr, "nd=%d dimensions=", nd); - for(i=0; ielsize; + ip1 = PyArray_DATA(ap1); + ip2 = PyArray_DATA(ap2); + op = PyArray_DATA(ret); + + switch (typenum) { + case NPY_CFLOAT: + vdot = (PyArray_DotFunc *)CFLOAT_vdot; + break; + case NPY_CDOUBLE: + vdot = (PyArray_DotFunc *)CDOUBLE_vdot; + break; + case NPY_CLONGDOUBLE: + vdot = (PyArray_DotFunc *)CLONGDOUBLE_vdot; + break; + case NPY_OBJECT: + vdot = (PyArray_DotFunc *)OBJECT_vdot; + break; + default: + vdot = type->f->dotfunc; + if (vdot == NULL) { + PyErr_SetString(PyExc_ValueError, + "function not available for this data type"); + goto fail; + } + } + + if (n < 500) { + vdot(ip1, stride, ip2, stride, op, n, NULL); + } + else { + NPY_BEGIN_THREADS_DESCR(type); + vdot(ip1, stride, ip2, stride, op, n, NULL); + NPY_END_THREADS_DESCR(type); + } + + Py_XDECREF(ap1); + Py_XDECREF(ap2); + return PyArray_Return(ret); +fail: + Py_XDECREF(ap1); + Py_XDECREF(ap2); + Py_XDECREF(ret); + return NULL; } + static int einsum_sub_op_from_str(PyObject *args, PyObject **str_obj, char **subscripts, PyArrayObject **op) @@ -3805,6 +3937,9 @@ static struct PyMethodDef array_module_methods[] = { {"dot", (PyCFunction)array_matrixproduct, METH_VARARGS | METH_KEYWORDS, NULL}, + {"vdot", + (PyCFunction)array_vdot, + METH_VARARGS | METH_KEYWORDS, NULL}, {"einsum", (PyCFunction)array_einsum, METH_VARARGS|METH_KEYWORDS, NULL}, diff --git a/numpy/core/src/multiarray/multiarraymodule_onefile.c b/numpy/core/src/multiarray/multiarraymodule_onefile.c index 2d05c20ef007..04fef61ce0f3 100644 --- a/numpy/core/src/multiarray/multiarraymodule_onefile.c +++ b/numpy/core/src/multiarray/multiarraymodule_onefile.c @@ -2,7 +2,7 @@ * This file includes all the .c files needed for a complete multiarray module. * This is used in the case where separate compilation is not enabled * - * Note that the order of the includs matters + * Note that the order of the includes matters */ #include "common.c" @@ -15,6 +15,7 @@ #include "datetime_busday.c" #include "datetime_busdaycal.c" #include "arraytypes.c" +#include "vdot.c" #include "hashdescr.c" #include "numpyos.c" @@ -50,9 +51,10 @@ #include "array_assign_scalar.c" #include "array_assign_array.c" #include "ucsnarrow.c" - #include "arrayobject.c" - #include "numpymemoryview.c" - #include "multiarraymodule.c" + +#if defined(HAVE_CBLAS) +#include "cblasfuncs.c" +#endif diff --git a/numpy/core/src/multiarray/vdot.c b/numpy/core/src/multiarray/vdot.c new file mode 100644 index 000000000000..0b80b6ccaa7c --- /dev/null +++ b/numpy/core/src/multiarray/vdot.c @@ -0,0 +1,183 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include +#include "common.h" +#include "vdot.h" + +#if defined(HAVE_CBLAS) +#include +#endif + + +/* + * All data is assumed aligned. + */ +NPY_NO_EXPORT void +CFLOAT_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, + char *op, npy_intp n, void *NPY_UNUSED(ignore)) +{ +#if defined(HAVE_CBLAS) + int is1b = blas_stride(is1, sizeof(npy_cfloat)); + int is2b = blas_stride(is2, sizeof(npy_cfloat)); + + if (is1b && is2b) { + double sum[2] = {0., 0.}; /* double for stability */ + + while (n > 0) { + int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK; + float tmp[2]; + + cblas_cdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp); + sum[0] += (double)tmp[0]; + sum[1] += (double)tmp[1]; + /* use char strides here */ + ip1 += chunk * is1; + ip2 += chunk * is2; + n -= chunk; + } + ((float *)op)[0] = (float)sum[0]; + ((float *)op)[1] = (float)sum[1]; + } + else +#endif + { + float sumr = (float)0.0; + float sumi = (float)0.0; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const float ip1r = ((float *)ip1)[0]; + const float ip1i = ((float *)ip1)[1]; + const float ip2r = ((float *)ip2)[0]; + const float ip2i = ((float *)ip2)[1]; + + sumr += ip1r * ip2r + ip1i * ip2i; + sumi += ip1r * ip2i - ip1i * ip2r; + } + ((float *)op)[0] = sumr; + ((float *)op)[1] = sumi; + } +} + + +/* + * All data is assumed aligned. + */ +NPY_NO_EXPORT void +CDOUBLE_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, + char *op, npy_intp n, void *NPY_UNUSED(ignore)) +{ +#if defined(HAVE_CBLAS) + int is1b = blas_stride(is1, sizeof(npy_cdouble)); + int is2b = blas_stride(is2, sizeof(npy_cdouble)); + + if (is1b && is2b) { + double sum[2] = {0., 0.}; /* double for stability */ + + while (n > 0) { + int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK; + double tmp[2]; + + cblas_zdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp); + sum[0] += (double)tmp[0]; + sum[1] += (double)tmp[1]; + /* use char strides here */ + ip1 += chunk * is1; + ip2 += chunk * is2; + n -= chunk; + } + ((double *)op)[0] = (double)sum[0]; + ((double *)op)[1] = (double)sum[1]; + } + else +#endif + { + double sumr = (double)0.0; + double sumi = (double)0.0; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const double ip1r = ((double *)ip1)[0]; + const double ip1i = ((double *)ip1)[1]; + const double ip2r = ((double *)ip2)[0]; + const double ip2i = ((double *)ip2)[1]; + + sumr += ip1r * ip2r + ip1i * ip2i; + sumi += ip1r * ip2i - ip1i * ip2r; + } + ((double *)op)[0] = sumr; + ((double *)op)[1] = sumi; + } +} + + +/* + * All data is assumed aligned. + */ +NPY_NO_EXPORT void +CLONGDOUBLE_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, + char *op, npy_intp n, void *NPY_UNUSED(ignore)) +{ + npy_longdouble tmpr = 0.0L; + npy_longdouble tmpi = 0.0L; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + const npy_longdouble ip1r = ((npy_longdouble *)ip1)[0]; + const npy_longdouble ip1i = ((npy_longdouble *)ip1)[1]; + const npy_longdouble ip2r = ((npy_longdouble *)ip2)[0]; + const npy_longdouble ip2i = ((npy_longdouble *)ip2)[1]; + + tmpr += ip1r * ip2r + ip1i * ip2i; + tmpi += ip1r * ip2i - ip1i * ip2r; + } + ((npy_longdouble *)op)[0] = tmpr; + ((npy_longdouble *)op)[1] = tmpi; +} + +/* + * All data is assumed aligned. + */ +NPY_NO_EXPORT void +OBJECT_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, + void *NPY_UNUSED(ignore)) +{ + npy_intp i; + PyObject *tmp0, *tmp1, *tmp2, *tmp = NULL; + PyObject **tmp3; + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + if ((*((PyObject **)ip1) == NULL) || (*((PyObject **)ip2) == NULL)) { + tmp1 = Py_False; + Py_INCREF(Py_False); + } + else { + tmp0 = PyObject_CallMethod(*((PyObject **)ip1), "conjugate", NULL); + if (tmp0 == NULL) { + Py_XDECREF(tmp); + return; + } + tmp1 = PyNumber_Multiply(tmp0, *((PyObject **)ip2)); + Py_DECREF(tmp0); + if (tmp1 == NULL) { + Py_XDECREF(tmp); + return; + } + } + if (i == 0) { + tmp = tmp1; + } + else { + tmp2 = PyNumber_Add(tmp, tmp1); + Py_XDECREF(tmp); + Py_XDECREF(tmp1); + if (tmp2 == NULL) { + return; + } + tmp = tmp2; + } + } + tmp3 = (PyObject**) op; + tmp2 = *tmp3; + *((PyObject **)op) = tmp; + Py_XDECREF(tmp2); +} diff --git a/numpy/core/src/multiarray/vdot.h b/numpy/core/src/multiarray/vdot.h new file mode 100644 index 000000000000..0f60ca6d19a5 --- /dev/null +++ b/numpy/core/src/multiarray/vdot.h @@ -0,0 +1,18 @@ +#ifndef _NPY_VDOT_H_ +#define _NPY_VDOT_H_ + +#include "common.h" + +NPY_NO_EXPORT void +CFLOAT_vdot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + +NPY_NO_EXPORT void +CDOUBLE_vdot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + +NPY_NO_EXPORT void +CLONGDOUBLE_vdot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + +NPY_NO_EXPORT void +OBJECT_vdot(char *, npy_intp, char *, npy_intp, char *, npy_intp, void *); + +#endif diff --git a/numpy/core/tests/test_blasdot.py b/numpy/core/tests/test_blasdot.py deleted file mode 100644 index caa576abcf7e..000000000000 --- a/numpy/core/tests/test_blasdot.py +++ /dev/null @@ -1,171 +0,0 @@ -from __future__ import division, absolute_import, print_function - -import numpy as np -import sys -from numpy.core import zeros, float64 -from numpy.testing import dec, TestCase, assert_almost_equal, assert_, \ - assert_raises, assert_array_equal, assert_allclose, assert_equal -from numpy.core.multiarray import inner as inner_ - -DECPREC = 14 - -class TestInner(TestCase): - def test_vecself(self): - """Ticket 844.""" - # Inner product of a vector with itself segfaults or give meaningless - # result - a = zeros(shape = (1, 80), dtype = float64) - p = inner_(a, a) - assert_almost_equal(p, 0, decimal = DECPREC) - -try: - import numpy.core._dotblas as _dotblas -except ImportError: - _dotblas = None - -@dec.skipif(_dotblas is None, "Numpy is not compiled with _dotblas") -def test_blasdot_used(): - from numpy.core import dot, vdot, inner, alterdot, restoredot - assert_(dot is _dotblas.dot) - assert_(vdot is _dotblas.vdot) - assert_(inner is _dotblas.inner) - assert_(alterdot is _dotblas.alterdot) - assert_(restoredot is _dotblas.restoredot) - - -def test_dot_2args(): - from numpy.core import dot - - a = np.array([[1, 2], [3, 4]], dtype=float) - b = np.array([[1, 0], [1, 1]], dtype=float) - c = np.array([[3, 2], [7, 4]], dtype=float) - - d = dot(a, b) - assert_allclose(c, d) - -def test_dot_3args(): - np.random.seed(22) - f = np.random.random_sample((1024, 16)) - v = np.random.random_sample((16, 32)) - - r = np.empty((1024, 32)) - for i in range(12): - np.dot(f, v, r) - assert_equal(sys.getrefcount(r), 2) - r2 = np.dot(f, v, out=None) - assert_array_equal(r2, r) - assert_(r is np.dot(f, v, out=r)) - - v = v[:, 0].copy() # v.shape == (16,) - r = r[:, 0].copy() # r.shape == (1024,) - r2 = np.dot(f, v) - assert_(r is np.dot(f, v, r)) - assert_array_equal(r2, r) - -def test_dot_3args_errors(): - np.random.seed(22) - f = np.random.random_sample((1024, 16)) - v = np.random.random_sample((16, 32)) - - r = np.empty((1024, 31)) - assert_raises(ValueError, np.dot, f, v, r) - - r = np.empty((1024,)) - assert_raises(ValueError, np.dot, f, v, r) - - r = np.empty((32,)) - assert_raises(ValueError, np.dot, f, v, r) - - r = np.empty((32, 1024)) - assert_raises(ValueError, np.dot, f, v, r) - assert_raises(ValueError, np.dot, f, v, r.T) - - r = np.empty((1024, 64)) - assert_raises(ValueError, np.dot, f, v, r[:, ::2]) - assert_raises(ValueError, np.dot, f, v, r[:, :32]) - - r = np.empty((1024, 32), dtype=np.float32) - assert_raises(ValueError, np.dot, f, v, r) - - r = np.empty((1024, 32), dtype=int) - assert_raises(ValueError, np.dot, f, v, r) - -def test_dot_array_order(): - """ Test numpy dot with different order C, F - - Comparing results with multiarray dot. - Double and single precisions array are compared using relative - precision of 7 and 5 decimals respectively. - Use 30 decimal when comparing exact operations like: - (a.b)' = b'.a' - """ - _dot = np.core.multiarray.dot - a_dim, b_dim, c_dim = 10, 4, 7 - orders = ["C", "F"] - dtypes_prec = {np.float64: 7, np.float32: 5} - np.random.seed(7) - - for arr_type, prec in dtypes_prec.items(): - for a_order in orders: - a = np.asarray(np.random.randn(a_dim, a_dim), - dtype=arr_type, order=a_order) - assert_array_equal(np.dot(a, a), a.dot(a)) - # (a.a)' = a'.a', note that mse~=1e-31 needs almost_equal - assert_almost_equal(a.dot(a), a.T.dot(a.T).T, decimal=prec) - - # - # Check with making explicit copy - # - a_T = a.T.copy(order=a_order) - assert_almost_equal(a_T.dot(a_T), a.T.dot(a.T), decimal=prec) - assert_almost_equal(a.dot(a_T), a.dot(a.T), decimal=prec) - assert_almost_equal(a_T.dot(a), a.T.dot(a), decimal=prec) - - # - # Compare with multiarray dot - # - assert_almost_equal(a.dot(a), _dot(a, a), decimal=prec) - assert_almost_equal(a.T.dot(a), _dot(a.T, a), decimal=prec) - assert_almost_equal(a.dot(a.T), _dot(a, a.T), decimal=prec) - assert_almost_equal(a.T.dot(a.T), _dot(a.T, a.T), decimal=prec) - for res in a.dot(a), a.T.dot(a), a.dot(a.T), a.T.dot(a.T): - assert res.flags.c_contiguous - - for b_order in orders: - b = np.asarray(np.random.randn(a_dim, b_dim), - dtype=arr_type, order=b_order) - b_T = b.T.copy(order=b_order) - assert_almost_equal(a_T.dot(b), a.T.dot(b), decimal=prec) - assert_almost_equal(b_T.dot(a), b.T.dot(a), decimal=prec) - # (b'.a)' = a'.b - assert_almost_equal(b.T.dot(a), a.T.dot(b).T, decimal=prec) - assert_almost_equal(a.dot(b), _dot(a, b), decimal=prec) - assert_almost_equal(b.T.dot(a), _dot(b.T, a), decimal=prec) - - - for c_order in orders: - c = np.asarray(np.random.randn(b_dim, c_dim), - dtype=arr_type, order=c_order) - c_T = c.T.copy(order=c_order) - assert_almost_equal(c.T.dot(b.T), c_T.dot(b_T), decimal=prec) - assert_almost_equal(c.T.dot(b.T).T, b.dot(c), decimal=prec) - assert_almost_equal(b.dot(c), _dot(b, c), decimal=prec) - assert_almost_equal(c.T.dot(b.T), _dot(c.T, b.T), decimal=prec) - -def test_dot_override(): - class A(object): - def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs): - return "A" - - class B(object): - def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs): - return NotImplemented - - a = A() - b = B() - c = np.array([[1]]) - - assert_equal(np.dot(a, b), "A") - assert_equal(c.dot(a), "A") - assert_raises(TypeError, np.dot, b, c) - assert_raises(TypeError, c.dot, b) diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index ef56766f5f41..9e2248205431 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -5,13 +5,11 @@ """ from __future__ import division, absolute_import, print_function -import sys import operator import warnings -from nose.plugins.skip import SkipTest import numpy as np -from numpy.testing import (dec, run_module_suite, assert_raises, +from numpy.testing import (run_module_suite, assert_raises, assert_warns, assert_array_equal, assert_) @@ -34,11 +32,9 @@ def setUp(self): warnings.filterwarnings("always", message=self.message, category=DeprecationWarning) - def tearDown(self): self.warn_ctx.__exit__() - def assert_deprecated(self, function, num=1, ignore_others=False, function_fails=False, exceptions=(DeprecationWarning,), args=(), kwargs={}): @@ -102,7 +98,6 @@ def assert_deprecated(self, function, num=1, ignore_others=False, if exceptions == tuple(): raise AssertionError("Error raised during function call") - def assert_not_deprecated(self, function, args=(), kwargs={}): """Test if DeprecationWarnings are given and raised. @@ -143,6 +138,7 @@ class TestFloatNonIntegerArgumentDeprecation(_DeprecationTestCase): def test_indexing(self): a = np.array([[[5]]]) + def assert_deprecated(*args, **kwargs): self.assert_deprecated(*args, exceptions=(IndexError,), **kwargs) @@ -172,7 +168,6 @@ def assert_deprecated(*args, **kwargs): assert_deprecated(lambda: a[0.0:, 0.0], num=2) assert_deprecated(lambda: a[0.0:, 0.0,:], num=2) - def test_valid_indexing(self): a = np.array([[[5]]]) assert_not_deprecated = self.assert_not_deprecated @@ -183,9 +178,9 @@ def test_valid_indexing(self): assert_not_deprecated(lambda: a[:, 0,:]) assert_not_deprecated(lambda: a[:,:,:]) - def test_slicing(self): a = np.array([[5]]) + def assert_deprecated(*args, **kwargs): self.assert_deprecated(*args, exceptions=(IndexError,), **kwargs) @@ -217,7 +212,6 @@ def assert_deprecated(*args, **kwargs): # should still get the DeprecationWarning if step = 0. assert_deprecated(lambda: a[::0.0], function_fails=True) - def test_valid_slicing(self): a = np.array([[[5]]]) assert_not_deprecated = self.assert_not_deprecated @@ -231,7 +225,6 @@ def test_valid_slicing(self): assert_not_deprecated(lambda: a[:2:2]) assert_not_deprecated(lambda: a[1:2:2]) - def test_non_integer_argument_deprecations(self): a = np.array([[5]]) @@ -240,7 +233,6 @@ def test_non_integer_argument_deprecations(self): self.assert_deprecated(np.take, args=(a, [0], 1.)) self.assert_deprecated(np.take, args=(a, [0], np.float64(1.))) - def test_non_integer_sequence_multiplication(self): # Numpy scalar sequence multiply should not work with non-integers def mult(a, b): @@ -248,7 +240,6 @@ def mult(a, b): self.assert_deprecated(mult, args=([1], np.float_(3))) self.assert_not_deprecated(mult, args=([1], np.int_(3))) - def test_reduce_axis_float_index(self): d = np.zeros((3,3,3)) self.assert_deprecated(np.min, args=(d, 0.5)) @@ -303,7 +294,6 @@ def test_array_to_index_deprecation(self): # Check slicing. Normal indexing checks arrays specifically. self.assert_deprecated(lambda: a[a:a:a], exceptions=(), num=3) - class TestNonIntegerArrayLike(_DeprecationTestCase): """Tests that array likes, i.e. lists give a deprecation warning when they cannot be safely cast to an integer. @@ -320,7 +310,6 @@ def test_basic(self): self.assert_not_deprecated(a.__getitem__, ([],)) - def test_boolean_futurewarning(self): a = np.arange(10) with warnings.catch_warnings(): @@ -378,12 +367,13 @@ class TestRankDeprecation(_DeprecationTestCase): """Test that np.rank is deprecated. The function should simply be removed. The VisibleDeprecationWarning may become unnecessary. """ + def test(self): a = np.arange(10) assert_warns(np.VisibleDeprecationWarning, np.rank, a) -class TestComparisonDepreactions(_DeprecationTestCase): +class TestComparisonDeprecations(_DeprecationTestCase): """This tests the deprecation, for non-elementwise comparison logic. This used to mean that when an error occured during element-wise comparison (i.e. broadcasting) NotImplemented was returned, but also in the comparison @@ -408,7 +398,6 @@ def test_normal_types(self): b = np.array([1, np.array([1,2,3])], dtype=object) self.assert_deprecated(op, args=(a, b), num=None) - def test_string(self): # For two string arrays, strings always raised the broadcasting error: a = np.array(['a', 'b']) @@ -420,7 +409,6 @@ def test_string(self): # following works (and returns False) due to dtype mismatch: a == [] - def test_none_comparison(self): # Test comparison of None, which should result in elementwise # comparison in the future. [1, 2] == None should be [False, False]. @@ -455,14 +443,14 @@ def test_scalar_none_comparison(self): assert_(np.equal(np.datetime64('NaT'), None)) -class TestIdentityComparisonDepreactions(_DeprecationTestCase): +class TestIdentityComparisonDeprecations(_DeprecationTestCase): """This tests the equal and not_equal object ufuncs identity check deprecation. This was due to the usage of PyObject_RichCompareBool. This tests that for example for `a = np.array([np.nan], dtype=object)` `a == a` it is warned that False and not `np.nan is np.nan` is returned. - Should be kept in sync with TestComparisonDepreactions and new tests + Should be kept in sync with TestComparisonDeprecations and new tests added when the deprecation is over. Requires only removing of @identity@ (and blocks) from the ufunc loops.c.src of the OBJECT comparisons. """ @@ -488,11 +476,11 @@ def test_identity_equality_mismatch(self): np.less_equal(a, a) np.greater_equal(a, a) - def test_comparison_error(self): class FunkyType(object): def __eq__(self, other): raise TypeError("I won't compare") + def __ne__(self, other): raise TypeError("I won't compare") @@ -500,7 +488,6 @@ def __ne__(self, other): self.assert_deprecated(np.equal, args=(a, a)) self.assert_deprecated(np.not_equal, args=(a, a)) - def test_bool_error(self): # The comparison result cannot be interpreted as a bool a = np.array([np.array([1, 2, 3]), None], dtype=object) @@ -508,5 +495,18 @@ def test_bool_error(self): self.assert_deprecated(np.not_equal, args=(a, a)) +class TestAlterdotRestoredotDeprecations(_DeprecationTestCase): + """The alterdot/restoredot functions are deprecated. + + These functions no longer do anything in numpy 1.10, so should not be + used. + + """ + + def test_alterdot_restoredot_deprecation(self): + self.assert_deprecated(np.alterdot) + self.assert_deprecated(np.restoredot) + + if __name__ == "__main__": run_module_suite() diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 96db8cde3cb5..059842c51d91 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -3355,6 +3355,44 @@ def __array_finalize__(self, obj): res = dat.var(1) assert_(res.info == dat.info) +class TestVdot(TestCase): + def test_basic(self): + dt_numeric = np.typecodes['AllFloat'] + np.typecodes['AllInteger'] + dt_complex = np.typecodes['Complex'] + + # test real + a = np.eye(3) + for dt in dt_numeric + 'O': + b = a.astype(dt) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), 3) + + # test complex + a = np.eye(3) * 1j + for dt in dt_complex + 'O': + b = a.astype(dt) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), 3) + + # test boolean + b = np.eye(3, dtype=np.bool) + res = np.vdot(b, b) + assert_(np.isscalar(res)) + assert_equal(np.vdot(b, b), True) + + def test_vdot_array_order(self): + a = array([[1, 2], [3, 4]], order='C') + b = array([[1, 2], [3, 4]], order='F') + res = np.vdot(a, a) + + # integer arrays are exact + assert_equal(np.vdot(a, b), res) + assert_equal(np.vdot(b, a), res) + assert_equal(np.vdot(b, b), res) + + class TestDot(TestCase): def test_dot_2args(self): from numpy.core.multiarray import dot @@ -3417,6 +3455,16 @@ def test_dot_3args_errors(self): r = np.empty((1024, 32), dtype=int) assert_raises(ValueError, dot, f, v, r) + def test_dot_array_order(self): + a = array([[1, 2], [3, 4]], order='C') + b = array([[1, 2], [3, 4]], order='F') + res = np.dot(a, a) + + # integer arrays are exact + assert_equal(np.dot(a, b), res) + assert_equal(np.dot(b, a), res) + assert_equal(np.dot(b, b), res) + def test_dot_scalar_and_matrix_of_objects(self): # Ticket #2469 arr = np.matrix([1, 2], dtype=object) @@ -3424,6 +3472,24 @@ def test_dot_scalar_and_matrix_of_objects(self): assert_equal(np.dot(arr, 3), desired) assert_equal(np.dot(3, arr), desired) + def test_dot_override(self): + class A(object): + def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs): + return "A" + + class B(object): + def __numpy_ufunc__(self, ufunc, method, pos, inputs, **kwargs): + return NotImplemented + + a = A() + b = B() + c = np.array([[1]]) + + assert_equal(np.dot(a, b), "A") + assert_equal(c.dot(a), "A") + assert_raises(TypeError, np.dot, b, c) + assert_raises(TypeError, c.dot, b) + class TestInner(TestCase): @@ -3434,6 +3500,14 @@ def test_inner_scalar_and_matrix_of_objects(self): assert_equal(np.inner(arr, 3), desired) assert_equal(np.inner(3, arr), desired) + def test_vecself(self): + # Ticket 844. + # Inner product of a vector with itself segfaults or give + # meaningless result + a = zeros(shape = (1, 80), dtype = float64) + p = inner(a, a) + assert_almost_equal(p, 0, decimal=14) + class TestSummarization(TestCase): def test_1d(self): @@ -3454,7 +3528,6 @@ def test_2d(self): ' [ 501, 502, 503, ..., 999, 1000, 1001]])' assert_(repr(A) == reprA) - class TestChoose(TestCase): def setUp(self): self.x = 2*ones((3,), dtype=int) diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 3030c68abb08..e32519316eb6 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -333,11 +333,10 @@ def test_log1p(self): assert_almost_equal(ncu.log1p(1e-6), ncu.log(1+1e-6)) def test_special(self): - assert_equal(ncu.log1p(np.nan), np.nan) - assert_equal(ncu.log1p(np.inf), np.inf) - with np.errstate(divide="ignore"): + with np.errstate(invalid="ignore", divide="ignore"): + assert_equal(ncu.log1p(np.nan), np.nan) + assert_equal(ncu.log1p(np.inf), np.inf) assert_equal(ncu.log1p(-1.), -np.inf) - with np.errstate(invalid="ignore"): assert_equal(ncu.log1p(-2.), np.nan) assert_equal(ncu.log1p(-np.inf), np.nan) diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py index 48c92c548224..ddb1513c4258 100644 --- a/numpy/distutils/system_info.py +++ b/numpy/distutils/system_info.py @@ -434,7 +434,7 @@ class UmfpackNotFoundError(NotFoundError): the UMFPACK environment variable.""" -class system_info: +class system_info(object): """ get_info() is the only public method. Don't use others. """ @@ -962,7 +962,8 @@ def calc_info(self): if info is None: return dict_append(info, - define_macros=[('SCIPY_MKL_H', None)], + define_macros=[('SCIPY_MKL_H', None), + ('HAVE_CBLAS', None)], include_dirs=incl_dirs) if sys.platform == 'win32': pass # win32 has no pthread library @@ -1120,6 +1121,7 @@ def calc_info(self): h = os.path.dirname(h) dict_append(info, include_dirs=[h]) info['language'] = 'c' + info['define_macros'] = [('HAVE_CBLAS', None)] atlas_version, atlas_extra_info = get_atlas_version(**atlas) dict_append(atlas, **atlas_extra_info) @@ -1414,7 +1416,8 @@ def calc_info(self): if args: self.set_info(extra_compile_args=args, extra_link_args=link_args, - define_macros=[('NO_ATLAS_INFO', 3)]) + define_macros=[('NO_ATLAS_INFO', 3), + ('HAVE_CBLAS', None)]) return #atlas_info = {} ## uncomment for testing @@ -1515,7 +1518,8 @@ def calc_info(self): if args: self.set_info(extra_compile_args=args, extra_link_args=link_args, - define_macros=[('NO_ATLAS_INFO', 3)]) + define_macros=[('NO_ATLAS_INFO', 3), + ('HAVE_CBLAS', None)]) return need_blas = 0 @@ -1556,9 +1560,33 @@ def calc_info(self): info = self.check_libs(lib_dirs, blas_libs, []) if info is None: return - info['language'] = 'f77' # XXX: is it generally true? + if self.has_cblas(): + info['language'] = 'c' + info['define_macros'] = [('HAVE_CBLAS', None)] + else: + info['language'] = 'f77' # XXX: is it generally true? self.set_info(**info) + def has_cblas(self): + # primitive cblas check by looking for the header + res = False + c = distutils.ccompiler.new_compiler() + tmpdir = tempfile.mkdtemp() + s = """#include """ + src = os.path.join(tmpdir, 'source.c') + try: + with open(src, 'wt') as f: + f.write(s) + try: + c.compile([src], output_dir=tmpdir, + include_dirs=self.get_include_dirs()) + res = True + except distutils.ccompiler.CompileError: + res = False + finally: + shutil.rmtree(tmpdir) + return res + class openblas_info(blas_info): section = 'openblas' @@ -1580,9 +1608,10 @@ def calc_info(self): return if not self.check_embedded_lapack(info): - return None + return - info['language'] = 'f77' # XXX: is it generally true? + info['language'] = 'c' + info['define_macros'] = [('HAVE_CBLAS', None)] self.set_info(**info) diff --git a/numpy/linalg/bscript b/numpy/linalg/bscript index deed4fd72fec..70fdd9de3b5d 100644 --- a/numpy/linalg/bscript +++ b/numpy/linalg/bscript @@ -20,7 +20,7 @@ def pbuild(context): return context.default_builder(extension, includes=includes, **kw) - + context.register_builder("lapack_lite", build_lapack_lite) context.register_builder("_umath_linalg", build_lapack_lite) diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py index aa0a2669fd7d..5189f7e633f4 100644 --- a/numpy/testing/tests/test_utils.py +++ b/numpy/testing/tests/test_utils.py @@ -449,6 +449,15 @@ def test_min_int(self): # Should not raise: assert_allclose(a, a) + def test_report_fail_percentage(self): + a = np.array([1, 1, 1, 1]) + b = np.array([1, 1, 1, 2]) + try: + assert_allclose(a, b) + msg = '' + except AssertionError as exc: + msg = exc.args[0] + self.assertTrue("mismatch 25.0%" in msg) class TestArrayAlmostEqualNulp(unittest.TestCase): @dec.knownfailureif(True, "Github issue #347") diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py index bd184d922041..71c7145f9c51 100644 --- a/numpy/testing/utils.py +++ b/numpy/testing/utils.py @@ -1289,7 +1289,7 @@ def assert_allclose(actual, desired, rtol=1e-7, atol=0, """ import numpy as np def compare(x, y): - return np.allclose(x, y, rtol=rtol, atol=atol) + return np.core.numeric._allclose_points(x, y, rtol=rtol, atol=atol) actual, desired = np.asanyarray(actual), np.asanyarray(desired) header = 'Not equal to tolerance rtol=%g, atol=%g' % (rtol, atol) diff --git a/tools/travis-test.sh b/tools/travis-test.sh index d970daab3d3e..f342aa7b13b1 100755 --- a/tools/travis-test.sh +++ b/tools/travis-test.sh @@ -59,9 +59,9 @@ setup_bento() cd .. # Waf - wget http://waf.googlecode.com/files/waf-1.7.13.tar.bz2 - tar xjvf waf-1.7.13.tar.bz2 - cd waf-1.7.13 + wget http://ftp.waf.io/pub/release/waf-1.7.16.tar.bz2 + tar xjvf waf-1.7.16.tar.bz2 + cd waf-1.7.16 python waf-light export WAFDIR=$PWD cd ..