scikit-learn
diff --git a/‎doc/modules/svm.rst
Lines changed: 1 addition & 1 deletion b/‎doc/modules/svm.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/tutorial/basic/tutorial.rst
Lines changed: 11 additions & 10 deletions b/‎doc/tutorial/basic/tutorial.rst
Lines changed: 11 additions & 10 deletions
diff --git a/‎doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
Lines changed: 0 additions & 62 deletions b/‎doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py
Lines changed: 0 additions & 62 deletions
diff --git a/‎doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py
Lines changed: 0 additions & 63 deletions b/‎doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py
Lines changed: 0 additions & 63 deletions
@@ -529,7 +529,7 @@ test vectors must be provided.
     >>> from sklearn import svm
     >>> X = np.array([[0, 0], [1, 1]])
     >>> y = [0, 1]
-    >>> clf = svm.SVC(kernel='precomputed')
+    >>> clf = svm.SVC(gamma='auto', kernel='precomputed')
     >>> # linear kernel computation
     >>> gram = np.dot(X, X.T)
     >>> clf.fit(gram, y) # doctest: +NORMALIZE_WHITESPACE
 
@@ -214,12 +214,12 @@ persistence model, namely `pickle <https://docs.python.org/2/library/pickle.html
 
   >>> from sklearn import svm
   >>> from sklearn import datasets
-  >>> clf = svm.SVC(gamma='auto')
+  >>> clf = svm.SVC(gamma='scale')
   >>> iris = datasets.load_iris()
   >>> X, y = iris.data, iris.target
   >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
+    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False)
 
@@ -290,10 +290,10 @@ maintained::
     >>> from sklearn import datasets
     >>> from sklearn.svm import SVC
     >>> iris = datasets.load_iris()
-    >>> clf = SVC()
+    >>> clf = SVC(gamma='scale')
     >>> clf.fit(iris.data, iris.target)  # doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
+      decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
       max_iter=-1, probability=False, random_state=None, shrinking=True,
       tol=0.001, verbose=False)
 
@@ -302,7 +302,7 @@ maintained::
 
     >>> clf.fit(iris.data, iris.target_names[iris.target])  # doctest: +NORMALIZE_WHITESPACE
     SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
+      decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
       max_iter=-1, probability=False, random_state=None, shrinking=True,
       tol=0.001, verbose=False)
 
@@ -328,25 +328,25 @@ more than once will overwrite what was learned by any previous ``fit()``::
   >>> y = rng.binomial(1, 0.5, 100)
   >>> X_test = rng.rand(5, 10)
 
-  >>> clf = SVC()
+  >>> clf = SVC(gamma='scale')
   >>> clf.set_params(kernel='linear').fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
+    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False)
   >>> clf.predict(X_test)
   array([1, 0, 1, 1, 0])
 
   >>> clf.set_params(kernel='rbf').fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
   SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
+    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
     max_iter=-1, probability=False, random_state=None, shrinking=True,
     tol=0.001, verbose=False)
   >>> clf.predict(X_test)
   array([0, 0, 0, 1, 0])
 
 Here, the default kernel ``rbf`` is first changed to ``linear`` after the
-estimator has been constructed via ``SVC()``, and changed back to ``rbf`` to
+estimator has been constructed via ``SVC(gamma='scale')``, and changed back to ``rbf`` to
 refit the estimator and to make a second prediction.
 
 Multiclass vs. multilabel fitting
@@ -363,7 +363,8 @@ the target data fit upon::
     >>> X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
     >>> y = [0, 0, 1, 1, 2]
 
-    >>> classif = OneVsRestClassifier(estimator=SVC(random_state=0))
+    >>> classif = OneVsRestClassifier(estimator=SVC(gamma='scale',
+    ...                                             random_state=0))
     >>> classif.fit(X, y).predict(X)
     array([0, 0, 1, 1, 2])
 
 
@@ -1,62 +0,0 @@
-"""Build a language detector model
-
-The goal of this exercise is to train a linear classifier on text features
-that represent sequences of up to 3 consecutive characters so as to be
-recognize natural languages by using the frequencies of short character
-sequences as 'fingerprints'.
-
-"""
-# Author: Olivier Grisel <olivier.grisel@ensta.org>
-# License: Simplified BSD
-
-import sys
-
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import Perceptron
-from sklearn.pipeline import Pipeline
-from sklearn.datasets import load_files
-from sklearn.model_selection import train_test_split
-from sklearn import metrics
-
-
-# The training data folder must be passed as first argument
-languages_data_folder = sys.argv[1]
-dataset = load_files(languages_data_folder)
-
-# Split the dataset in training and test set:
-docs_train, docs_test, y_train, y_test = train_test_split(
-    dataset.data, dataset.target, test_size=0.5)
-
-
-# TASK: Build a vectorizer that splits strings into sequence of 1 to 3
-# characters instead of word tokens
-
-# TASK: Build a vectorizer / classifier pipeline using the previous analyzer
-# the pipeline instance should stored in a variable named clf
-
-# TASK: Fit the pipeline on the training set
-
-# TASK: Predict the outcome on the testing set in a variable named y_predicted
-
-# Print the classification report
-print(metrics.classification_report(y_test, y_predicted,
-                                    target_names=dataset.target_names))
-
-# Plot the confusion matrix
-cm = metrics.confusion_matrix(y_test, y_predicted)
-print(cm)
-
-#import matplotlib.pyplot as plt
-#plt.matshow(cm, cmap=plt.cm.jet)
-#plt.show()
-
-# Predict the result on some short new sentences:
-sentences = [
-    u'This is a language detection test.',
-    u'Ceci est un test de d\xe9tection de la langue.',
-    u'Dies ist ein Test, um die Sprache zu erkennen.',
-]
-predicted = clf.predict(sentences)
-
-for s, p in zip(sentences, predicted):
-    print(u'The language of "%s" is "%s"' % (s, dataset.target_names[p]))
@@ -1,63 +0,0 @@
-"""Build a sentiment analysis / polarity model
-
-Sentiment analysis can be casted as a binary text classification problem,
-that is fitting a linear classifier on features extracted from the text
-of the user messages so as to guess wether the opinion of the author is
-positive or negative.
-
-In this examples we will use a movie review dataset.
-
-"""
-# Author: Olivier Grisel <olivier.grisel@ensta.org>
-# License: Simplified BSD
-
-import sys
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.svm import LinearSVC
-from sklearn.pipeline import Pipeline
-from sklearn.model_selection import GridSearchCV
-from sklearn.datasets import load_files
-from sklearn.model_selection import train_test_split
-from sklearn import metrics
-
-
-if __name__ == "__main__":
-    # NOTE: we put the following in a 'if __name__ == "__main__"' protected
-    # block to be able to use a multi-core grid search that also works under
-    # Windows, see: http://docs.python.org/library/multiprocessing.html#windows
-    # The multiprocessing module is used as the backend of joblib.Parallel
-    # that is used when n_jobs != 1 in GridSearchCV
-
-    # the training data folder must be passed as first argument
-    movie_reviews_data_folder = sys.argv[1]
-    dataset = load_files(movie_reviews_data_folder, shuffle=False)
-    print("n_samples: %d" % len(dataset.data))
-
-    # split the dataset in training and test set:
-    docs_train, docs_test, y_train, y_test = train_test_split(
-        dataset.data, dataset.target, test_size=0.25, random_state=None)
-
-    # TASK: Build a vectorizer / classifier pipeline that filters out tokens
-    # that are too rare or too frequent
-
-    # TASK: Build a grid search to find out whether unigrams or bigrams are
-    # more useful.
-    # Fit the pipeline on the training set using grid search for the parameters
-
-    # TASK: print the cross-validated scores for the each parameters set
-    # explored by the grid search
-
-    # TASK: Predict the outcome on the testing set and store it in a variable
-    # named y_predicted
-
-    # Print the classification report
-    print(metrics.classification_report(y_test, y_predicted,
-                                        target_names=dataset.target_names))
-
-    # Print and plot the confusion matrix
-    cm = metrics.confusion_matrix(y_test, y_predicted)
-    print(cm)
-
-    # import matplotlib.pyplot as plt
-    # plt.matshow(cm)
-    # plt.show()