xhluca
diff --git a/‎doc/whats_new/v0.20.rst
Lines changed: 0 additions & 8 deletions b/‎doc/whats_new/v0.20.rst
Lines changed: 0 additions & 8 deletions
diff --git a/‎sklearn/feature_extraction/tests/test_text.py
Lines changed: 1 addition & 31 deletions b/‎sklearn/feature_extraction/tests/test_text.py
Lines changed: 1 addition & 31 deletions
diff --git a/‎sklearn/feature_extraction/text.py
Lines changed: 7 additions & 6 deletions b/‎sklearn/feature_extraction/text.py
Lines changed: 7 additions & 6 deletions
@@ -51,14 +51,6 @@ Changelog
   combination with ``handle_unknown='ignore'``.
   :issue:`12881` by `Joris Van den Bossche`_.
 
-:mod:`sklearn.feature_extraction.text`
-......................................
-
-- |Fix| Fixed a bug in :class:`feature_extraction.text.CountVectorizer` which 
-  would result in the sparse feature matrix having conflicting `indptr` and
-  `indices` precisions under very large vocabularies. :issue:`11295` by
-  :user:`Gabriel Vacaliuc <gvacaliuc>`.
-
 .. _changes_0_20_2:
 
 Version 0.20.2
 
@@ -36,8 +36,7 @@
                                    assert_warns_message, assert_raise_message,
                                    clean_warning_registry, ignore_warnings,
                                    SkipTest, assert_raises, assert_no_warnings,
-                                   fails_if_pypy, assert_allclose_dense_sparse,
-                                   skip_if_32bit)
+                                   fails_if_pypy, assert_allclose_dense_sparse)
 from collections import defaultdict
 from functools import partial
 import pickle
@@ -1145,35 +1144,6 @@ def test_vectorizer_stop_words_inconsistent():
                          ['hello world'])
 
 
-@skip_if_32bit
-def test_countvectorizer_sort_features_64bit_sparse_indices():
-    """
-    Check that CountVectorizer._sort_features preserves the dtype of its sparse
-    feature matrix.
-
-    This test is skipped on 32bit platforms, see:
-        https://github.com/scikit-learn/scikit-learn/pull/11295
-    for more details.
-    """
-
-    X = sparse.csr_matrix((5, 5), dtype=np.int64)
-
-    # force indices and indptr to int64.
-    INDICES_DTYPE = np.int64
-    X.indices = X.indices.astype(INDICES_DTYPE)
-    X.indptr = X.indptr.astype(INDICES_DTYPE)
-
-    vocabulary = {
-            "scikit-learn": 0,
-            "is": 1,
-            "great!": 2
-            }
-
-    Xs = CountVectorizer()._sort_features(X, vocabulary)
-
-    assert INDICES_DTYPE == Xs.indices.dtype
-
-
 @fails_if_pypy
 @pytest.mark.parametrize('Estimator',
                          [CountVectorizer, TfidfVectorizer, HashingVectorizer])
 
@@ -31,7 +31,6 @@
 from .stop_words import ENGLISH_STOP_WORDS
 from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES
 from ..utils.fixes import sp_version
-from ..utils import _IS_32BIT
 
 
 __all__ = ['HashingVectorizer',
@@ -872,7 +871,7 @@ def _sort_features(self, X, vocabulary):
         Returns a reordered matrix and modifies the vocabulary in place
         """
         sorted_features = sorted(vocabulary.items())
-        map_index = np.empty(len(sorted_features), dtype=X.indices.dtype)
+        map_index = np.empty(len(sorted_features), dtype=np.int32)
         for new_val, (term, old_val) in enumerate(sorted_features):
             vocabulary[term] = new_val
             map_index[old_val] = new_val
@@ -962,12 +961,14 @@ def _count_vocab(self, raw_documents, fixed_vocab):
                                  " contain stop words")
 
         if indptr[-1] > 2147483648:  # = 2**31 - 1
-            if _IS_32BIT:
+            if sp_version >= (0, 14):
+                indices_dtype = np.int64
+            else:
                 raise ValueError(('sparse CSR array has {} non-zero '
                                   'elements and requires 64 bit indexing, '
-                                  'which is unsupported with 32 bit Python.')
-                                 .format(indptr[-1]))
-            indices_dtype = np.int64
+                                  ' which is unsupported with scipy {}. '
+                                  'Please upgrade to scipy >=0.14')
+                                 .format(indptr[-1], '.'.join(sp_version)))
 
         else:
             indices_dtype = np.int32