scikit-learn
diff --git a/‎doc/faq.rst
Lines changed: 1 addition & 1 deletion b/‎doc/faq.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/biclustering.rst
Lines changed: 2 additions & 2 deletions b/‎doc/modules/biclustering.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/modules/covariance.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/covariance.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/cross_validation.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/cross_validation.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/ensemble.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/ensemble.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/pipeline.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/pipeline.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/svm.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/svm.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
Lines changed: 1 addition & 1 deletion b/‎doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
Lines changed: 1 addition & 1 deletion b/‎doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/whats_new.rst
Lines changed: 2 additions & 2 deletions b/‎doc/whats_new.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/bicluster/plot_spectral_coclustering.py
Lines changed: 1 addition & 1 deletion b/‎examples/bicluster/plot_spectral_coclustering.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cluster/plot_cluster_comparison.py
Lines changed: 1 addition & 1 deletion b/‎examples/cluster/plot_cluster_comparison.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/plot_johnson_lindenstrauss_bound.py
Lines changed: 1 addition & 1 deletion b/‎examples/plot_johnson_lindenstrauss_bound.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/ensemble/bagging.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/ensemble/bagging.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/ensemble/gradient_boosting.py
Lines changed: 2 additions & 2 deletions b/‎sklearn/ensemble/gradient_boosting.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/exceptions.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/exceptions.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/externals/joblib/pool.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/externals/joblib/pool.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/grid_search.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/grid_search.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/kernel_ridge.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/kernel_ridge.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/linear_model/omp.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/linear_model/omp.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/linear_model/tests/test_coordinate_descent.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/linear_model/tests/test_coordinate_descent.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/linear_model/tests/test_sgd.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/linear_model/tests/test_sgd.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/manifold/_barnes_hut_tsne.pyx
Lines changed: 2 additions & 2 deletions b/‎sklearn/manifold/_barnes_hut_tsne.pyx
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/manifold/spectral_embedding_.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/manifold/spectral_embedding_.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/manifold/tests/test_t_sne.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/manifold/tests/test_t_sne.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/metrics/tests/test_pairwise.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/metrics/tests/test_pairwise.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/mixture/dpgmm.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/mixture/dpgmm.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/model_selection/_search.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/model_selection/_search.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/tree/_splitter.pxd
Lines changed: 1 addition & 1 deletion b/‎sklearn/tree/_splitter.pxd
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/tree/_tree.pyx
Lines changed: 1 addition & 1 deletion b/‎sklearn/tree/_tree.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/utils/seq_dataset.pyx
Lines changed: 1 addition & 1 deletion b/‎sklearn/utils/seq_dataset.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/utils/weight_vector.pyx
Lines changed: 1 addition & 1 deletion b/‎sklearn/utils/weight_vector.pyx
Lines changed: 1 addition & 1 deletion
@@ -235,7 +235,7 @@ anymore. The version of joblib shipped with scikit-learn automatically uses
 that setting by default (under Python 3.4 and later).
 
 If you have custom code that uses ``multiprocessing`` directly instead of using
-it via joblib you can enable the the 'forkserver' mode globally for your
+it via joblib you can enable the 'forkserver' mode globally for your
 program: Insert the following instructions in your main script::
 
     import multiprocessing
 
@@ -140,7 +140,7 @@ are used to form the matrix :math:`Z`:
                         C^{-1/2} V
           \end{bmatrix}
 
-where the the columns of :math:`U` are :math:`u_2, \dots, u_{\ell +
+where the columns of :math:`U` are :math:`u_2, \dots, u_{\ell +
 1}`, and similarly for :math:`V`.
 
 Then the rows of :math:`Z` are clustered using :ref:`k-means
@@ -174,7 +174,7 @@ The :class:`SpectralBiclustering` algorithm assumes that the input
 data matrix has a hidden checkerboard structure. The rows and columns
 of a matrix with this structure may be partitioned so that the entries
 of any bicluster in the Cartesian product of row clusters and column
-clusters is are approximately constant. For instance, if there are two
+clusters are approximately constant. For instance, if there are two
 row partitions and three column partitions, each row will belong to
 three biclusters, and each column will belong to two biclusters.
 
 
@@ -280,7 +280,7 @@ empirical covariance matrix is then rescaled to compensate the
 performed selection of observations ("consistency step").  Having
 computed the Minimum Covariance Determinant estimator, one can give
 weights to observations according to their Mahalanobis distance,
-leading the a reweighted estimate of the covariance matrix of the data
+leading to a reweighted estimate of the covariance matrix of the data
 set ("reweighting step").
 
 Rousseeuw and Van Driessen [4] developed the FastMCD algorithm in order
 
@@ -465,7 +465,7 @@ Here is a usage example::
 
 :class:`ShuffleSplit` is thus a good alternative to :class:`KFold` cross
 validation that allows a finer control on the number of iterations and
-the proportion of samples in on each side of the train / test split.
+the proportion of samples on each side of the train / test split.
 
 
 Label-Shuffle-Split
 
@@ -679,7 +679,7 @@ the contribution of each weak learner by a factor :math:`\nu`:
     F_m(x) = F_{m-1}(x) + \nu \gamma_m h_m(x)
 
 The parameter :math:`\nu` is also called the **learning rate** because
-it scales the step length the the gradient descent procedure; it can
+it scales the step length the gradient descent procedure; it can
 be set via the ``learning_rate`` parameter.
 
 The parameter ``learning_rate`` strongly interacts with the parameter
 
@@ -134,7 +134,7 @@ create complex models.
 
 (A :class:`FeatureUnion` has no way of checking whether two transformers
 might produce identical features. It only produces a union when the
-feature sets are disjoint, and making sure they are is the caller's
+feature sets are disjoint, and making sure they are the caller's
 responsibility.)
 
 
 
@@ -181,7 +181,7 @@ for these classifiers.
 
 This might be made more clear by an example:
 
-Consider a three class problem with with class 0 having three support vectors
+Consider a three class problem with class 0 having three support vectors
 :math:`v^{0}_0, v^{1}_0, v^{2}_0` and class 1 and 2 having two support vectors
 :math:`v^{0}_1, v^{1}_1` and :math:`v^{0}_2, v^{1}_2` respectively.  For each
 support vector :math:`v^{j}_i`, there are two dual coefficients.  Let's call
 
@@ -28,7 +28,7 @@
     dataset.data, dataset.target, test_size=0.5)
 
 
-# TASK: Build a an vectorizer that splits strings into sequence of 1 to 3
+# TASK: Build a vectorizer that splits strings into sequence of 1 to 3
 # characters instead of word tokens
 
 # TASK: Build a vectorizer / classifier pipeline using the previous analyzer
 
@@ -28,7 +28,7 @@
     dataset.data, dataset.target, test_size=0.5)
 
 
-# TASK: Build a an vectorizer that splits strings into sequence of 1 to 3
+# TASK: Build a vectorizer that splits strings into sequence of 1 to 3
 # characters instead of word tokens
 vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer='char',
                              use_idf=False)
 
@@ -2812,7 +2812,7 @@ Other changes
 
    - :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
      consistency with ``decision_function``; for ``kernel==linear``,
-     ``coef_`` was fixed in the the one-vs-one case, by `Andreas Müller`_.
+     ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_.
 
    - Performance improvements to efficient leave-one-out cross-validated
      Ridge regression, esp. for the ``n_samples > n_features`` case, in
@@ -2993,7 +2993,7 @@ Changelog
 
    - Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378).
 
-   - Fixed a memory leak in in :ref:`svm` module by `Brian Holt`_ (issue #367).
+   - Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367).
 
    - Faster tests by `Fabian Pedregosa`_ and others.
 
 
@@ -4,7 +4,7 @@
 ==============================================
 
 This example demonstrates how to generate a dataset and bicluster it
-using the the Spectral Co-Clustering algorithm.
+using the Spectral Co-Clustering algorithm.
 
 The dataset is generated using the ``make_biclusters`` function, which
 creates a matrix of small values and implants bicluster with large
 
@@ -17,7 +17,7 @@
 clusters for the methods that needs this parameter
 specified. Note that affinity propagation has a tendency to
 create many clusters. Thus in this example its two parameters
-(damping and per-point preference) were set to to mitigate this
+(damping and per-point preference) were set to mitigate this
 behavior.
 """
 print(__doc__)
 
@@ -44,7 +44,7 @@
 Empirical validation
 ====================
 
-We validate the above bounds on the the digits dataset or on the 20 newsgroups
+We validate the above bounds on the digits dataset or on the 20 newsgroups
 text document (TF-IDF word frequencies) dataset:
 
 - for the digits dataset, some 8x8 gray level pixels data for 500
 
@@ -610,7 +610,7 @@ def predict_proba(self, X):
         the mean predicted class probabilities of the base estimators in the
         ensemble. If base estimators do not implement a ``predict_proba``
         method, then it resorts to voting and the predicted class probabilities
-        of a an input sample represents the proportion of estimators predicting
+        of an input sample represents the proportion of estimators predicting
         each class.
 
         Parameters
 
@@ -1240,7 +1240,7 @@ def apply(self, X):
         -------
         X_leaves : array_like, shape = [n_samples, n_estimators, n_classes]
             For each datapoint x in X and for each tree in the ensemble,
-            return the index of the leaf x ends up in in each estimator.
+            return the index of the leaf x ends up in each estimator.
             In the case of binary classification n_classes is 1.
         """
 
@@ -1840,7 +1840,7 @@ def apply(self, X):
         -------
         X_leaves : array_like, shape = [n_samples, n_estimators]
             For each datapoint x in X and for each tree in the ensemble,
-            return the index of the leaf x ends up in in each estimator.
+            return the index of the leaf x ends up in each estimator.
         """
 
         leaves = super(GradientBoostingRegressor, self).apply(X)
 
@@ -47,7 +47,7 @@ class DataConversionWarning(UserWarning):
     This warning occurs when some input data needs to be converted or
     interpreted in a way that may not match the user's expectations.
 
-    For example, this warning may occur when the the user
+    For example, this warning may occur when the user
         - passes an integer array to a function which expects float input and
           will convert the input
         - requests a non-copying operation, but a copy is required to meet the
 
@@ -534,7 +534,7 @@ def __init__(self, processes=None, temp_folder=None, max_nbytes=1e6,
                         os.makedirs(pool_folder)
                     use_shared_mem = True
                 except IOError:
-                    # Missing rights in the the /dev/shm partition,
+                    # Missing rights in the /dev/shm partition,
                     # fallback to regular temp folder.
                     temp_folder = None
         if temp_folder is None:
 
@@ -771,7 +771,7 @@ class GridSearchCV(BaseSearchCV):
     See Also
     ---------
     :class:`ParameterGrid`:
-        generates all the combinations of a an hyperparameter grid.
+        generates all the combinations of a hyperparameter grid.
 
     :func:`sklearn.cross_validation.train_test_split`:
         utility function to split the data into a development set usable
 
@@ -166,7 +166,7 @@ def fit(self, X, y=None, sample_weight=None):
         return self
 
     def predict(self, X):
-        """Predict using the the kernel ridge model
+        """Predict using the kernel ridge model
 
         Parameters
         ----------
 
@@ -145,7 +145,7 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
               copy_Gram=True, copy_Xy=True, return_path=False):
     """Orthogonal Matching Pursuit step on a precomputed Gram matrix.
 
-    This function uses the the Cholesky decomposition method.
+    This function uses the Cholesky decomposition method.
 
     Parameters
     ----------
 
@@ -205,7 +205,7 @@ def test_lasso_path_return_models_vs_new_return_gives_same_coefficients():
     alphas = [5., 1., .5]
 
     # Use lars_path and lasso_path(new output) with 1D linear interpolation
-    # to compute the the same path
+    # to compute the same path
     alphas_lars, _, coef_path_lars = lars_path(X, y, method='lasso')
     coef_path_cont_lars = interpolate.interp1d(alphas_lars[::-1],
                                                coef_path_lars[:, ::-1])
 
@@ -445,7 +445,7 @@ def test_sgd_multiclass_njobs(self):
         assert_array_equal(pred, true_result2)
 
     def test_set_coef_multiclass(self):
-        # Checks coef_init and intercept_init shape for for multi-class
+        # Checks coef_init and intercept_init shape for multi-class
         # problems
         # Provided coef_ does not match dataset
         clf = self.factory()
 
@@ -195,7 +195,7 @@ cdef inline void index2offset(int* offset, int index, int n_dimensions) nogil:
     # Quite likely there's a fancy bitshift way of doing this
     # since the offset is equivalent to the binary representation
     # of the integer index
-    # We read the the offset array left-to-right 
+    # We read the offset array left-to-right
     # such that the least significat bit is on the right
     cdef int rem, k, shift
     for k in range(n_dimensions):
@@ -212,7 +212,7 @@ cdef inline void index2offset(int* offset, int index, int n_dimensions) nogil:
 
 cdef inline int offset2index(int* offset, int n_dimensions) nogil:
     # Calculate the 1:1 index for a given offset array
-    # We read the the offset array right-to-left
+    # We read the offset array right-to-left
     # such that the least significat bit is on the right
     cdef int dim
     cdef int index = 0
 
@@ -479,7 +479,7 @@ def fit(self, X, y=None):
                                   "'precomputed', 'rbf', 'nearest_neighbors' "
                                   "or a callable.") % self.affinity)
         elif not callable(self.affinity):
-            raise ValueError(("'affinity' is expected to be an an affinity "
+            raise ValueError(("'affinity' is expected to be an affinity "
                               "name or a callable. Got: %s") % self.affinity)
 
         affinity_matrix = self._get_affinity_matrix(X)
 
@@ -228,7 +228,7 @@ def test_preserve_trustworthiness_approximately():
     # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     # The Barnes-Hut approximation uses a different method to estimate
-    # P_ij using only a a number of nearest neighbors instead of all
+    # P_ij using only a number of nearest neighbors instead of all
     # points (so that k = 3 * perplexity). A
F438
s a result we set the
     # perplexity=5, so that the number of neighbors is 5%.
     n_components = 2
 
@@ -270,7 +270,7 @@ def test_paired_distances():
         S3 = func(csr_matrix(X), csr_matrix(Y))
         assert_array_almost_equal(S, S3)
         if metric in PAIRWISE_DISTANCE_FUNCTIONS:
-            # Check the the pairwise_distances implementation
+            # Check the pairwise_distances implementation
             # gives the same value
             distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
             distances = np.diag(distances)
 
@@ -492,7 +492,7 @@ def _fit(self, X, y=None):
 
         A initialization step is performed before entering the em
         algorithm. If you want to avoid this step, set the keyword
-        argument init_params to the empty string '' when when creating
+        argument init_params to the empty string '' when creating
         the object. Likewise, if you would like just to do an
         initialization, set n_iter=0.
 
 
@@ -763,7 +763,7 @@ class GridSearchCV(BaseSearchCV):
     See Also
     ---------
     :class:`ParameterGrid`:
-        generates all the combinations of a an hyperparameter grid.
+        generates all the combinations of a hyperparameter grid.
 
     :func:`sklearn.model_selection.train_test_split`:
         utility function to split the data into a development set usable
 
@@ -75,7 +75,7 @@ cdef class Splitter:
     # The 1-d `constant_features` array of size n_features holds in
     # `constant_features[:n_constant_features]` the feature ids with
     # constant values for all the samples that reached a specific node.
-    # The value `n_constant_features` is given by the the parent node to its
+    # The value `n_constant_features` is given by the parent node to its
     # child nodes.  The content of the range `[n_constant_features:]` is left
     # undefined, but preallocated for performance reasons
     # This allows optimization with depth-based tree building.
 
@@ -537,7 +537,7 @@ cdef class Tree:
     """
     # Wrap for outside world.
     # WARNING: these reference the current `nodes` and `value` buffers, which
-    # must not be be freed by a subsequent memory allocation.
+    # must not be freed by a subsequent memory allocation.
     # (i.e. through `_resize` or `__setstate__`)
     property n_classes:
         def __get__(self):
 
@@ -47,7 +47,7 @@ cdef class SequentialDataset:
 
     cdef int random(self, double **x_data_ptr, int **x_ind_ptr,
                     int *nnz, double *y, double *sample_weight) nogil:
-        """Get the a random example ``x`` from the dataset.
+        """Get a random example ``x`` from the dataset.
 
         Parameters
         ----------
 
@@ -137,7 +137,7 @@ cdef class WeightVector(object):
             val = x_data_ptr[j]
             aw_data_ptr[idx] += (self.average_a * val * (-c / wscale))
 
-        # Once the the sample has been processed
+        # Once the sample has been processed
         # update the average_a and average_b
         if num_iter > 1:
             self.average_b /= (1.0 - mu)