scikit-learn
diff --git a/‎.circleci/config.yml
Lines changed: 30 additions & 0 deletions b/‎.circleci/config.yml
Lines changed: 30 additions & 0 deletions
diff --git a/‎build_tools/circle/build_test_pypy.sh
Lines changed: 6 additions & 3 deletions b/‎build_tools/circle/build_test_pypy.sh
Lines changed: 6 additions & 3 deletions
diff --git a/‎conftest.py
Lines changed: 3 additions & 1 deletion b/‎conftest.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/developers/advanced_installation.rst
Lines changed: 10 additions & 26 deletions b/‎doc/developers/advanced_installation.rst
Lines changed: 10 additions & 26 deletions
diff --git a/‎doc/developers/utilities.rst
Lines changed: 1 addition & 2 deletions b/‎doc/developers/utilities.rst
Lines changed: 1 addition & 2 deletions
diff --git a/‎doc/modules/feature_extraction.rst
Lines changed: 2 additions & 2 deletions b/‎doc/modules/feature_extraction.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/other_distributions.rst
Lines changed: 1 addition & 8 deletions b/‎doc/other_distributions.rst
Lines changed: 1 addition & 8 deletions
diff --git a/‎examples/compose/plot_column_transformer.py
Lines changed: 1 addition & 1 deletion b/‎examples/compose/plot_column_transformer.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/cluster/optics_.py
Lines changed: 38 additions & 5 deletions b/‎sklearn/cluster/optics_.py
Lines changed: 38 additions & 5 deletions
@@ -65,6 +65,21 @@ jobs:
           path: ~/log.txt
           destination: log.txt
 
+  pypy3:
+    docker:
+      - image: pypy:3-6.0.0
+    steps:
+      - restore_cache:
+          keys:
+            - pypy3-ccache-{{ .Branch }}
+            - pypy3-ccache
+      - checkout
+      - run: ./build_tools/circle/build_test_pypy.sh
+      - save_cache:
+          key: pypy3-ccache-{{ .Branch }}-{{ .BuildNum }}
+          paths:
+            - ~/.ccache
+            - ~/.cache/pip
 
   deploy:
     docker:
@@ -89,6 +104,21 @@ workflows:
     jobs:
       - python3
       - python2
+      - pypy3:
+          filters:
+            branches:
+              only:
+                - 0.20.X
       - deploy:
           requires:
             - python3
+  pypy:
+    triggers:
+      - schedule:
+          cron: "0 0 * * *"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - pypy3
@@ -18,13 +18,16 @@ source pypy-env/bin/activate
 python --version
 which python
 
-pip install --extra-index https://antocuni.github.io/pypy-wheels/ubuntu numpy==1.14.4 Cython pytest
+pip install --extra-index https://antocuni.github.io/pypy-wheels/ubun
B41A
tu numpy Cython pytest
 pip install "scipy>=1.1.0" sphinx numpydoc docutils
 
 ccache -M 512M
 export CCACHE_COMPRESS=1
 export PATH=/usr/lib/ccache:$PATH
+export LOKY_MAX_CPU_COUNT="2"
 
-pip install -e .
+pip install -vv -e . 
 
-make test
+python -m pytest sklearn/
+python -m pytest doc/sphinxext/
+python -m pytest $(find doc -name '*.rst' | sort)
@@ -32,7 +32,9 @@ def pytest_collection_modifyitems(config, items):
         skip_marker = pytest.mark.skip(
             reason='FeatureHasher is not compatible with PyPy')
         for item in items:
-            if item.name == 'sklearn.feature_extraction.hashing.FeatureHasher':
+            if item.name in (
+                    'sklearn.feature_extraction.hashing.FeatureHasher',
+                    'sklearn.feature_extraction.text.HashingVectorizer'):
                 item.add_marker(skip_marker)
 
     # Skip tests which require internet if the flag is provided
 
@@ -34,7 +34,7 @@ Building from source
 
 Scikit-learn requires:
 
-- Python (>= 2.7 or >= 3.4),
+- Python (>= 3.5),
 - NumPy (>= 1.8.2),
 - SciPy (>= 0.13.3).
 
@@ -110,18 +110,11 @@ Linux
 
 Installing from source requires you to have installed the scikit-learn runtime
 dependencies, Python development headers and a working C/C++ compiler.
-Under Debian-based operating systems, which include Ubuntu, if you have
-Python 2 you can install all these requirements by issuing::
-
-    sudo apt-get install build-essential python-dev python-setuptools \
-                         python-numpy python-scipy \
-                         libatlas-dev libatlas3-base
-
-If you have Python 3::
-
+Under Debian-based operating systems, which include Ubuntu::
+    
     sudo apt-get install build-essential python3-dev python3-setuptools \
-                         python3-numpy python3-scipy \
-                         libatlas-dev libatlas3-base
+                     python3-numpy python3-scipy \
+                     libatlas-dev libatlas3-base
 
 On recent Debian and Ubuntu (e.g. Ubuntu 14.04 or later) make sure that ATLAS
 is used to provide the implementation of the BLAS and LAPACK linear algebra
@@ -190,9 +183,7 @@ PATH environment variable.
 32-bit Python
 -------------
 
-For 32-bit python it is possible use the standalone installers for
-`microsoft visual c++ express 2008 <http://download.microsoft.com/download/A/5/4/A54BADB6-9C3F-478D-8657-93B3FC9FE62D/vcsetup.exe>`_
-for Python 2 or Microsoft Visual C++ Express 2010 for Python 3.
+For 32-bit Python use Microsoft Visual C++ Express 2010.
 
 Once installed you should be able to build scikit-learn without any
 particular configuration by running the following command in the scikit-learn
@@ -211,34 +202,27 @@ The Windows SDKs include the MSVC compilers both for 32 and 64-bit
 architectures. They come as a ``GRMSDKX_EN_DVD.iso`` file that can be mounted
 as a new drive with a ``setup.exe`` installer in it.
 
-- For Python 2 you need SDK **v7.0**: `MS Windows SDK for Windows 7 and .NET
-  Framework 3.5 SP1
-  <https://www.microsoft.com/en-us/download/details.aspx?id=18950>`_
-
-- For Python 3 you need SDK **v7.1**: `MS Windows SDK for Windows 7 and .NET
+- For Python  you need SDK **v7.1**: `MS Windows SDK for Windows 7 and .NET
   Framework 4
   <https://www.microsoft.com/en-us/download/details.aspx?id=8442>`_
 
 Both SDKs can be installed in parallel on the same host. To use the Windows
 SDKs, you need to setup the environment of a ``cmd`` console launched with the
-following flags (at least for SDK v7.0)::
+following flags ::
 
     cmd /E:ON /V:ON /K
 
 Then configure the build environment with::
 
     SET DISTUTILS_USE_SDK=1
     SET MSSdk=1
-    "C:\Program Files\Microsoft SDKs\Windows\v7.0\Setup\WindowsSdkVer.exe" -q -version:v7.0
-    "C:\Program Files\Microsoft SDKs\Windows\v7.0\Bin\SetEnv.cmd" /x64 /release
+    "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1
+    "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release
 
 Finally you can build scikit-learn in the same ``cmd`` console::
 
     python setup.py install
 
-Replace ``v7.0`` by the ``v7.1`` in the above commands to do the same for
-Python 3 instead of Python 2.
-
 Replace ``/x64`` by ``/x86``  to build for 32-bit Python instead of 64-bit
 Python.
 
 
@@ -175,8 +175,7 @@ Graph Routines
 Benchmarking
 ------------
 
-- :func:`bench.total_seconds` (back-ported from ``timedelta.total_seconds``
-  in Python 2.7).  Used in ``benchmarks/bench_glm.py``.
+- :func:`bench.total_seconds`:  Used in ``benchmarks/bench_glm.py``.
 
 
 Testing Functions
 
@@ -735,9 +735,9 @@ decide better::
   array([[1, 1, 1, 0, 1, 1, 1, 0],
          [1, 1, 0, 1, 1, 1, 0, 1]])
 
-In the above example, ``'char_wb`` analyzer is used, which creates n-grams
+In the above example, ``char_wb`` analyzer is used, which creates n-grams
 only from characters inside word boundaries (padded with space on each
-side). The ``'char'`` analyzer, alternatively, creates n-grams that
+side). The ``char`` analyzer, alternatively, creates n-grams that
 span across words::
 
   >>> ngram_vectorizer = CountVectorizer(analyzer='char_wb', ngram_range=(5, 5))
 
@@ -36,20 +36,13 @@ Arch Linux
 
 Arch Linux's package is provided through the `official repositories
 <https://www.archlinux.org/packages/?q=scikit-learn>`_ as
-``python-scikit-learn`` for Python 3 and ``python2-scikit-learn`` for Python 2.
+``python-scikit-le
2851
arn`` for Python.
 It can be installed by typing the following command:
 
 .. code-block:: none
 
      # pacman -S python-scikit-learn
 
-or:
-
-.. code-block:: none
-
-     # pacman -S python2-scikit-learn
-
-depending on the version of Python you use.
 
 
 NetBSD
 
@@ -89,7 +89,7 @@ def transform(self, posts):
     # Extract the subject & body
     ('subjectbody', SubjectBodyExtractor()),
 
-    # Use C toolumnTransformer to combine the features from subject and body
+    # Use ColumnTransformer to combine the features from subject and body
     ('union', ColumnTransformer(
         [
             # Pulling features from the post's subject line (first column)
 
@@ -14,6 +14,7 @@
 import numpy as np
 
 from ..utils import check_array
+from ..utils import gen_batches, get_chunk_n_rows
 from ..utils.validation import check_is_fitted
 from ..neighbors import NearestNeighbors
 from ..base import BaseEstimator, ClusterMixin
@@ -395,8 +396,6 @@ def fit(self, X, y=None):
         # Start all points as 'unprocessed' ##
         self.reachability_ = np.empty(n_samples)
         self.reachability_.fill(np.inf)
-        self.core_distances_ = np.empty(n_samples)
-        self.core_distances_.fill(np.nan)
         # Start all points as noise ##
         self.labels_ = np.full(n_samples, -1, dtype=int)
 
@@ -407,9 +406,7 @@ def fit(self, X, y=None):
                                 n_jobs=self.n_jobs)
 
         nbrs.fit(X)
-        self.core_distances_[:] = nbrs.kneighbors(X,
-                                                  self.min_samples)[0][:, -1]
-
+        self.core_distances_ = self._compute_core_distances_(X, nbrs)
         self.ordering_ = self._calculate_optics_order(X, nbrs)
 
         indices_, self.labels_ = _extract_optics(self.ordering_,
@@ -425,6 +422,42 @@ def fit(self, X, y=None):
 
     # OPTICS helper functions
 
+    def _compute_core_distances_(self, X, neighbors, working_memory=None):
+        """Compute the k-th nearest neighbor of each sample
+
+        Equivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]
+        but with more memory efficiency.
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features)
+            The data.
+        neighbors : NearestNeighbors instance
+            The fitted nearest neighbors estimator.
+        working_memory : int, optional
+            The sought maximum memory for temporary distance matrix chunks.
+            When None (default), the value of
+            ``sklearn.get_config()['working_memory']`` is used.
+
+        Returns
+        -------
+        core_distances : array, shape (n_samples,)
+            Distance at which each sample becomes a core point.
+            Points which will never be core have a distance of inf.
+        """
+        n_samples = len(X)
+        core_distances = np.empty(n_samples)
+        core_distances.fill(np.nan)
+
+        chunk_n_rows = get_chunk_n_rows(row_bytes=16 * self.min_samples,
+                                        max_n_rows=n_samples,
+                                        working_memory=working_memory)
+        slices = gen_batches(n_samples, chunk_n_rows)
+        for sl in slices:
+            core_distances[sl] = neighbors.kneighbors(
+                X[sl], self.min_samples)[0][:, -1]
+        return core_distances
+
     def _calculate_optics_order(self, X, nbrs):
         # Main OPTICS loop. Not parallelizable. The order that entries are
         # written to the 'ordering_' list is important!
Original file line number	Diff line number	Diff line change
`@@ -89,7 +89,7 @@ def transform(self, posts):`
`89`	`89`	`# Extract the subject & body`
`90`	`90`	`('subjectbody', SubjectBodyExtractor()),`
`91`	`91`
`92`		`- # Use C toolumnTransformer to combine the features from subject and body`
	`92`	`+ # Use ColumnTransformer to combine the features from subject and body`
`93`	`93`	`('union', ColumnTransformer(`
`94`	`94`	`[`
`95`	`95`	`# Pulling features from the post's subject line (first column)`