scikit-learn
diff --git a/‎sklearn/cluster/k_means_.py
Lines changed: 8 additions & 3 deletions b/‎sklearn/cluster/k_means_.py
Lines changed: 8 additions & 3 deletions
diff --git a/‎sklearn/cluster/tests/test_k_means.py
Lines changed: 125 additions & 4 deletions b/‎sklearn/cluster/tests/test_k_means.py
Lines changed: 125 additions & 4 deletions
diff --git a/‎sklearn/src/cblas/cblas_sdot.c
Lines changed: 123 additions & 45 deletions b/‎sklearn/src/cblas/cblas_sdot.c
Lines changed: 123 additions & 45 deletions
@@ -642,7 +642,9 @@ def _init_centroids(X, k, init, random_state=None, x_squared_norms=None,
         seeds = random_state.permutation(n_samples)[:k]
         centers = X[seeds]
     elif hasattr(init, '__array__'):
-        centers = init
+        # ensure that the centers have the same dtype as X
+        # this is a requirement of fused types of cython
+        centers = np.array(init, dtype=X.dtype)
     elif callable(init):
         centers = init(X, k, random_state=random_state)
     else:
@@ -1038,7 +1040,9 @@ def _mini_batch_step(X, x_squared_norms, centers, counts,
             counts[center_idx] += count
 
             # inplace rescale to compute mean of all points (old and new)
-            centers[center_idx] /= counts[center_idx]
+            # Note: numpy >= 1.10 does not support '/=' for the following
+            # expression for a mixture of int and float (see numpy issue #6464)
+            centers[center_idx] = centers[center_idx]/counts[center_idx]
 
             # update the squared diff if necessary
             if compute_squared_diff:
@@ -1238,7 +1242,8 @@ def fit(self, X, y=None):
         random_state = check_random_state(self.random_state)
         # to handle sparse data which only works as float64 at the moment
         if sp.issparse(X):
-            X = check_array(X, accept_sparse="csr", order='C', dtype=np.float64)
+            X = check_array(X, accept_sparse="csr", order='C',
+                            dtype=np.float64)
         else:
             X = check_array(X, accept_sparse="csr", order='C')
         n_samples, n_features = X.shape
 
@@ -16,7 +16,6 @@
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import if_safe_multiprocessing_with_blas
-from sklearn.utils.testing import if_not_mac_os
 from sklearn.utils.testing import assert_raise_message
 
 
@@ -272,14 +271,18 @@ def test_k_means_explicit_init_shape():
         msg = "does not match the number of features of the data"
         assert_raises_regex(ValueError, msg, km.fit, X)
         # for callable init
-        km = Class(n_init=1, init=lambda X_, k, random_state: X_[:, :2], n_clusters=len(X))
+        km = Class(n_init=1,
+                   init=lambda X_, k, random_state: X_[:, :2],
+                   n_clusters=len(X))
         assert_raises_regex(ValueError, msg, km.fit, X)
         # mismatch of number of clusters
         msg = "does not match the number of clusters"
         km = Class(n_init=1, init=X[:2, :], n_clusters=3)
         assert_raises_regex(ValueError, msg, km.fit, X)
         # for callable init
-        km = Class(n_init=1, init=lambda X_, k, random_state: X_[:2, :], n_clusters=3)
+        km = Class(n_init=1,
+                   init=lambda X_, k, random_state: X_[:2, :],
+                   n_clusters=3)
         assert_raises_regex(ValueError, msg, km.fit, X)
 
 
@@ -730,4 +733,122 @@ def test_x_squared_norms_init_centroids():
 def test_max_iter_error():
 
     km = KMeans(max_iter=-1)
-    assert_raise_message(ValueError, 'Number of iterations should be', km.fit, X)
+    assert_raise_message(ValueError,
+                         'Number of iterations should be', km.fit, X)
+
+
+def test_kmeans_float32_64():
+    km = KMeans(n_init=1, random_state=11)
+
+    # float64 data
+    km.fit(X)
+    # dtype of cluster centers has to be the dtype of the input data
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+    inertia64 = km.inertia_
+    X_new64 = km.transform(km.cluster_centers_)
+    pred64 = km.predict(X[0])
+
+    # float32 data
+    km.fit(np.float32(X))
+    # dtype of cluster centers has to be the dtype of the input data
+    assert_equal(km.cluster_centers_.dtype, np.float32)
+    inertia32 = km.inertia_
+    X_new32 = km.transform(km.cluster_centers_)
+    pred32 = km.predict(X[0])
+
+    # compare arrays with low precision since the difference between
+    # 32 and 64 bit sometimes makes a difference up to the 4th decimal place
+    assert_array_almost_equal(inertia32, inertia64, decimal=4)
+    assert_array_almost_equal(X_new32, X_new64, decimal=4)
+    # both predictions have to be the same and correspond to the correct label
+    assert_equal(pred32, pred64)
+    assert_equal(pred32, km.labels_[0])
+    assert_equal(pred64, km.labels_[0])
+
+    # float64 sparse data
+    km.fit(X_csr)
+    # dtype of cluster centers has to be the dtype of the input data
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+    inertia64 = km.inertia_
+    X_new64 = km.transform(km.cluster_centers_)
+    pred64 = km.predict(X_csr[0])
+
+    # float32 sparse data
+    # Note: at the moment sparse data is always processed as float64 internally
+    km.fit(sp.csr_matrix(X_csr, dtype=np.float32))
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+    inertia32 = km.inertia_
+    X_new32 = km.transform(km.cluster_centers_)
+    pred32 = km.predict(X_csr[0])
+
+    assert_array_almost_equal(inertia32, inertia64)
+    assert_array_almost_equal(X_new32, X_new64)
+    # both predictions have to be the same and correspond to the correct label
+    assert_equal(pred32, pred64)
+    assert_equal(pred32, km.labels_[0])
+    assert_equal(pred64, km.labels_[0])
+
+
+def test_mb_k_means_float32_64():
+    km = MiniBatchKMeans(n_init=1, random_state=30)
+
+    # float64 data
+    km.fit(X)
+    # dtype of cluster centers has to be the dtype of the input data
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+    inertia64 = km.inertia_
+    X_new64 = km.transform(km.cluster_centers_)
+    pred64 = km.predict(X[0])
+    km.partial_fit(X[0:3])
+    # dtype of cluster centers has to stay the same after partial_fit
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+
+    # float32 data
+    km.fit(np.float32(X))
+    # dtype of cluster centers has to be the dtype of the input data
+    assert_equal(km.cluster_centers_.dtype, np.float32)
+    inertia32 = km.inertia_
+    X_new32 = km.transform(km.cluster_centers_)
+    pred32 = km.predict(X[0])
+    km.partial_fit(X[0:3])
+    # dtype of cluster centers has to stay the same after partial_fit
+    assert_equal(km.cluster_centers_.dtype, np.float32)
+
+    # compare arrays with low precision since the difference between
+    # 32 and 64 bit sometimes makes a difference up to the 4th decimal place
+    assert_array_almost_equal(inertia32, inertia64, decimal=4)
+    assert_array_almost_equal(X_new32, X_new64, decimal=4)
+    # both predictions have to be the same and correspond to the correct label
+    assert_equal(pred32, pred64)
+    assert_equal(pred64, km.labels_[0])
+
+    # float64 sparse data
+    km.fit(X_csr)
+    # dtype of cluster centers has to be the dtype of the input data
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+    inertia64 = km.inertia_
+    X_new64 = km.transform(km.cluster_centers_)
+    pred64 = km.predict(X_csr[0])
+    km.partial_fit(X_csr[0:3])
+    # dtype of cluster centers has to stay the same after partial_fit
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+
+    # float32 sparse data
+    # Note: at the moment sparse data is always processed as float64 internally
+    km.fit(sp.csr_matrix(X_csr, dtype=np.float32))
+    # dtype of cluster centers has to be always float64 (see Note above.)
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+    inertia32 = km.inertia_
+    X_new32 = km.transform(km.cluster_centers_)
+    pred32 = km.predict(X_csr[0])
+    km.partial_fit(X_csr[0:3])
+    # dtype of cluster centers has to stay the same after partial_fit
+    assert_equal(km.cluster_centers_.dtype, np.float64)
+
+    assert_array_almost_equal(inertia32, inertia64)
+    assert_array_almost_equal(X_new32, X_new64)
+    # both predictions have to be the same and correspond to the correct label
+    assert_equal(pred32, pred64)
+    assert_equal(pred32, km.labels_[0])
+    assert_equal(pred64, km.labels_[0])
@@ -1,54 +1,132 @@
-/*
- *             Automatically Tuned Linear Algebra Software v3.2
- *                    (C) Copyright 1999 R. Clint Whaley                     
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *   1. Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *   2. Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions, and the following disclaimer in the
- *      documentation and/or other materials provided with the distribution.
- *   3. The name of the University of Tennessee, the ATLAS group,
- *      or the names of its contributers may not be used to endorse
- *      or promote products derived from this software without specific
- *      written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE. 
+/* ---------------------------------------------------------------------
+ *
+ * -- Automatically Tuned Linear Algebra Software (ATLAS)
+ *    (C) Copyright 2000 All Rights Reserved
+ *
+ * -- ATLAS routine -- Version 3.2 -- December 25, 2000
+ *
+ * Author         : Antoine P. Petitet
+ * Originally developed at the University of Tennessee,
+ * Innovative Computing Laboratory, Knoxville TN, 37996-1301, USA.
+ *
+ * ---------------------------------------------------------------------
+ *
+ * -- Copyright notice and Licensing terms:
+ *
+ *  Redistribution  and  use in  source and binary forms, with or without
+ *  modification, are  permitted provided  that the following  conditions
+ *  are met:
+ *
+ * 1. Redistributions  of  source  code  must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce  the above copyright
+ *    notice,  this list of conditions, and the  following disclaimer in
+ *    the documentation and/or other materials provided with the distri-
+ *    bution.
+ * 3. The name of the University,  the ATLAS group,  or the names of its
+ *    contributors  may not be used to endorse or promote products deri-
+ *    ved from this software without specific written permission.
  *
+ * -- Disclaimer:
+ *
+ * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO,  PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEO-
+ * RY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  (IN-
+ * CLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---------------------------------------------------------------------
  */
+/*
+ * Include files
+ */
+#include "atlas_refmisc.h"
 
-#define SREAL
-#include "atlas_misc.h"
-#ifdef ATL_USEPTHREADS
-   #include "atlas_ptalias1.h"
-#endif
-#include "atlas_level1.h"
-#include "cblas.h"
-
-float cblas_sdot(const int N, const float *X, const int incX, 
-                     const float *Y, const int incY)
+float cblas_sdot
+(
+   const int                  N,
+   const float                * X,
+   const int                  INCX,
+   const float                * Y,
+   const int                  INCY
+)
 {
-   if (N > 0)
+/*
+ * Purpose
+ * =======
+ *
+ * ATL_srefdot returns the dot product x^T * y of two n-vectors x and y.
+ *
+ * Arguments
+ * =========
+ *
+ * N       (input)                       const int
+ *         On entry, N specifies the length of the vector x. N  must  be
+ *         at least zero. Unchanged on exit.
+ *
+ * X       (input)                       const float *
+ *         On entry,  X  points to the  first entry to be accessed of an
+ *         incremented array of size equal to or greater than
+ *            ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof(   float   ),
+ *         that contains the vector x. Unchanged on exit.
+ *
+ * INCX    (input)                       const int
+ *         On entry, INCX specifies the increment for the elements of X.
+ *         INCX must not be zero. Unchanged on exit.
+ *
+ * Y       (input)                       const float *
+ *         On entry,  Y  points to the  first entry to be accessed of an
+ *         incremented array of size equal to or greater than
+ *            ( 1 + ( n - 1 ) * abs( INCY ) ) * sizeof(   float   ),
+ *         that contains the vector y. Unchanged on exit.
+ *
+ * INCY    (input)                       const int
+ *         On entry, INCY specifies the increment for the elements of Y.
+ *         INCY must not be zero. Unchanged on exit.
+ *
+ * ---------------------------------------------------------------------
+ */
+/*
+ * .. Local Variables ..
+ */
+   register float             dot = ATL_sZERO, x0, x1, x2, x3,
+                              y0, y1, y2, y3;
+   float                      * StX;
+   register int               i;
+   int                        nu;
+   const int                  incX2 = 2 * INCX, incY2 = 2 * INCY,
+                              incX3 = 3 * INCX, incY3 = 3 * INCY,
+                              incX4 = 4 * INCX, incY4 = 4 * INCY;
+/* ..
+ * .. Executable Statements ..
+ *
+ */
+   if( N > 0 )
    {
-      if (incX < 0)
+      if( ( nu = ( N >> 2 ) << 2 ) != 0 )
       {
-         if (incY < 0) return(ATL_sdot(N, X, -incX, Y, -incY));
-         else return(ATL_sdot(N, X+(1-N)*incX, incX, Y, incY));
+         StX = (float *)X + nu * INCX;
+
+         do
+         {
+            x0 = (*X);     y0 = (*Y);     x1 = X[INCX ]; y1 = Y[INCY ];
+            x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3];
+            dot += x0 * y0; dot += x1 * y1; dot += x2 * y2; dot += x3 * y3;
+            X  += incX4; Y  += incY4;
+         } while( X != StX );
       }
-      else if (incY < 0) return(ATL_sdot(N, X+(N-1)*incX, -incX, Y, -incY));
-      else return(ATL_sdot(N, X, incX, Y, incY));
+
+      for( i = N - nu; i != 0; i-- )
+      {  x0 = (*X); y0 = (*Y); dot += x0 * y0; X += INCX; Y += INCY; }
    }
-   else return(0.0f);
+   return( dot );
+/*
+ * End of ATL_srefdot
+ */
 }