From 6df4eac1a0eb71ebbe2d665aa96a82f19aff52fe Mon Sep 17 00:00:00 2001
From: Christof Angermueller <cangermueller@gmail.com>
Date: Mon, 16 Feb 2015 21:48:35 +0000
Subject: [PATCH 1/4] Update documentation of predict_proba in tree module

---
 doc/modules/tree.rst | 16 ++++++++++++++--
 sklearn/tree/tree.py |  3 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index 5fdd13a38ec5e..7df76f39d0b07 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -101,11 +101,17 @@ holding the class labels for the training samples::
     >>> clf = tree.DecisionTreeClassifier()
     >>> clf = clf.fit(X, Y)
 
-After being fitted, the model can then be used to predict new values::
+After being fitted, the model can then be used to predict the class of samples::
 
     >>> clf.predict([[2., 2.]])
     array([1])
 
+Alternatively, the probability of each class can be predicted, which is the
+fraction of training samples of the same class in a leaf::
+
+    >>> clf.predict_proba([[2., 2.]])
+    array([[ 0.,  1.]])
+
 :class:`DecisionTreeClassifier` is capable of both binary (where the
 labels are [-1, 1]) classification and multiclass (where the labels are
 [0, ..., K-1]) classification.
@@ -155,11 +161,17 @@ a PDF file (or any other supported file type) directly in Python::
     .. figure:: ../images/iris.pdf
        :align: center
 
-After being fitted, the model can then be used to predict new values::
+After being fitted, the model can then be used to predict the class of samples::
 
     >>> clf.predict(iris.data[:1, :])
     array([0])
 
+Alternatively, the probability of each class can be predicted, which is the
+fraction of training samples of the same class in a leaf::
+
+    >>> clf.predict_proba(iris.data[:1, :])
+    array([[ 1.,  0.,  0.]])
+
 .. figure:: ../auto_examples/tree/images/plot_iris_001.png
    :target: ../auto_examples/tree/plot_iris.html
    :align: center
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 8ee0ae55f83dd..8d4a900d31324 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -545,6 +545,9 @@ def __init__(self,
     def predict_proba(self, X):
         """Predict class probabilities of the input samples X.
 
+        The predicted class probability is the fraction of samples of the same
+        class in a leaf.
+
         Parameters
         ----------
         X : array-like or sparse matrix of shape = [n_samples, n_features]

From 6035c988fa75f7aac2767eef761c37cc4be9ddba Mon Sep 17 00:00:00 2001
From: Christof Angermueller <cangermueller@gmail.com>
Date: Fri, 10 Apr 2015 19:12:44 +0100
Subject: [PATCH 2/4] Add conventions section to user guide

---
 doc/tutorial/basic/tutorial.rst | 66 +++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 5ac2f61c19563..13c1f18b0f111 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -250,3 +250,69 @@ Note that pickle has some security and maintainability issues. Please refer to
 section :ref:`model_persistence` for more detailed information about model
 persistence with scikit-learn.
 
+
+Conventions
+-----------
+
+scikit-learn estimators follow certain rules to make their behavior more
+predictive.
+
+
+Type casting
+~~~~~~~~~~~~
+
+Unless otherwise specified, input will be cast to ``float64``, and regression
+targets will be ``float64``. Consider the following example::
+
+  >>> import numpy as np
+  >>> from sklearn import random_projection
+
+  >>> X = np.random.rand(10, 2000)
+  >>> X = np.array(X, dtype='float32')
+  >>> X.dtype
+  dtype('float32')
+
+  >>> transformer = random_projection.GaussianRandomProjection()
+  >>> X_new = transformer.fit_transform(X)
+  >>> X_new.dtype
+  dtype('float64')
+
+The input data ``X`` is ``float32``, which is cast to ``float64`` by
+``fit_transform(X)``.
+
+
+Refitting and updating parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hyper-parameters of an estimator can be updated after it has be constructed by
+changing the corresponding member variables. Calling ``fit()`` more than once
+will overwrite what was learned by previous ``fit()``::
+
+  >>> import numpy as np
+  >>> from sklearn.svm import SVC
+
+  >>> np.random.seed(0)
+  >>> X = np.random.rand(100, 10)
+  >>> y = np.random.binomial(1, 0.5, 100)
+  >>> XX = np.random.rand(5, 10)
+
+  >>> clf = SVC()
+  >>> clf.kernel = 'linear'
+  >>> clf.fit(X, y)
+  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
+    kernel='linear', max_iter=-1, probability=False, random_state=None,
+    shrinking=True, tol=0.001, verbose=False)
+  >>> clf.predict(XX)
+  array([1, 0, 1, 1, 0])
+
+  >>> clf.kernel = 'rbf'
+  >>> clf.fit(X, y)
+  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
+    kernel='rbf', max_iter=-1, probability=False, random_state=None,
+    shrinking=True, tol=0.001, verbose=False)
+  >>> clf.predict(XX)
+  array([0, 0, 0, 1, 0])
+
+Here, the default kernel ``rbf`` is first changed to ``linear`` after the
+estimator has been constructed via ``SVC()``, and changed back to ``rbf`` to
+refit the estimator and to make a second prediction.

From 58315248f43af9db21c74fa8b8a7e849cbf48a97 Mon Sep 17 00:00:00 2001
From: Christof Angermueller <cangermueller@gmail.com>
Date: Fri, 10 Apr 2015 22:01:47 +0100
Subject: [PATCH 3/4] Update conventions section in userguide

---
 doc/tutorial/basic/tutorial.rst | 36 ++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 13c1f18b0f111..171340f566e73 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -261,8 +261,7 @@ predictive.
 Type casting
 ~~~~~~~~~~~~
 
-Unless otherwise specified, input will be cast to ``float64``, and regression
-targets will be ``float64``. Consider the following example::
+Unless otherwise specified, input will be cast to ``float64``::
 
   >>> import numpy as np
   >>> from sklearn import random_projection
@@ -277,16 +276,43 @@ targets will be ``float64``. Consider the following example::
   >>> X_new.dtype
   dtype('float64')
 
-The input data ``X`` is ``float32``, which is cast to ``float64`` by
+In this example, ``X`` is ``float32``, which is cast to ``float64`` by
 ``fit_transform(X)``.
 
+Regression targets are cast to ``float64``, classification targets are
+maintained::
+    >>> from sklearn import datasets
+    >>> from sklearn.svm import SVC
+
+    >>> iris = datasets.load_iris()
+    >>> clf = SVC()
+    >>> clf.fit(iris.data, iris.target)
+    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
+      kernel='rbf', max_iter=-1, probability=False, random_state=None,
+      shrinking=True, tol=0.001, verbose=False)
+
+    >>> clf.predict(iris.data[:3])
+    array([0, 0, 0])
+
+    >>> clf.fit(iris.data, iris.target_names[iris.target])
+    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
+      kernel='rbf', max_iter=-1, probability=False, random_state=None,
+      shrinking=True, tol=0.001, verbose=False)
+
+    >>> clf.predict(iris.data[:3])  # doctest: +NORMALIZE_WHITESPACE
+    array(['setosa', 'setosa', 'setosa'], dtype='<U10')
+
+Here, the first ``predict()`` returns an integer array, since ``iris.target``
+(an integer array) was used in ``fit``. The second ``predict`` returns a string
+array, since ``iris.target_names`` was for fitting.
+
 
 Refitting and updating parameters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Hyper-parameters of an estimator can be updated after it has be constructed by
+Hyper-parameters of an estimator can be updated after it has been constructed by
 changing the corresponding member variables. Calling ``fit()`` more than once
-will overwrite what was learned by previous ``fit()``::
+will overwrite what was learned by any previous ``fit()``::
 
   >>> import numpy as np
   >>> from sklearn.svm import SVC

From 234be23d3d086443caefc96c3b2b221a811c019b Mon Sep 17 00:00:00 2001
From: Christof Angermueller <cangermueller@gmail.com>
Date: Sun, 12 Apr 2015 18:15:16 +0100
Subject: [PATCH 4/4] Use RandomState() in tutorial and rename variables

---
 doc/tutorial/basic/tutorial.rst | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 171340f566e73..615e13fe8e1f0 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -266,7 +266,8 @@ Unless otherwise specified, input will be cast to ``float64``::
   >>> import numpy as np
   >>> from sklearn import random_projection
 
-  >>> X = np.random.rand(10, 2000)
+  >>> rng = np.random.RandomState(0)
+  >>> X = rng.rand(10, 2000)
   >>> X = np.array(X, dtype='float32')
   >>> X.dtype
   dtype('float32')
@@ -310,33 +311,39 @@ array, since ``iris.target_names`` was for fitting.
 Refitting and updating parameters
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Hyper-parameters of an estimator can be updated after it has been constructed by
-changing the corresponding member variables. Calling ``fit()`` more than once
-will overwrite what was learned by any previous ``fit()``::
+Hyper-parameters of an estimator can be updated after it has been constructed
+via the :func:`sklearn.pipeline.Pipeline.set_params` method. Calling ``fit()``
+more than once will overwrite what was learned by any previous ``fit()``::
 
   >>> import numpy as np
   >>> from sklearn.svm import SVC
 
-  >>> np.random.seed(0)
-  >>> X = np.random.rand(100, 10)
-  >>> y = np.random.binomial(1, 0.5, 100)
-  >>> XX = np.random.rand(5, 10)
+  >>> rng = np.random.RandomState(0)
+  >>> X = rng.rand(100, 10)
+  >>> y = rng.binomial(1, 0.5, 100)
+  >>> X_test = rng.rand(5, 10)
 
   >>> clf = SVC()
-  >>> clf.kernel = 'linear'
+  >>> clf.set_params(kernel='linear')
+  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
+    kernel='linear', max_iter=-1, probability=False, random_state=None,
+    shrinking=True, tol=0.001, verbose=False)
   >>> clf.fit(X, y)
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
     kernel='linear', max_iter=-1, probability=False, random_state=None,
     shrinking=True, tol=0.001, verbose=False)
-  >>> clf.predict(XX)
+  >>> clf.predict(X_test)
   array([1, 0, 1, 1, 0])
 
-  >>> clf.kernel = 'rbf'
+  >>> clf.set_params(kernel='rbf')
+  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
+    kernel='rbf', max_iter=-1, probability=False, random_state=None,
+    shrinking=True, tol=0.001, verbose=False)
   >>> clf.fit(X, y)
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
     kernel='rbf', max_iter=-1, probability=False, random_state=None,
     shrinking=True, tol=0.001, verbose=False)
-  >>> clf.predict(XX)
+  >>> clf.predict(X_test)
   array([0, 0, 0, 1, 0])
 
 Here, the default kernel ``rbf`` is first changed to ``linear`` after the