|
| 1 | +""" |
| 2 | +======================================================================== |
| 3 | +Benchmark for explicit feature map approximation of polynomial kernels |
| 4 | +======================================================================== |
| 5 | +
|
| 6 | +An example illustrating the approximation of the feature map |
| 7 | +of an Homogeneous Polynomial kernel. |
| 8 | +
|
| 9 | +.. currentmodule:: sklearn.kernel_approximation |
| 10 | +
|
| 11 | +It shows how to use :class:`PolynomialCountSketch` and :class:`Nystroem` to |
| 12 | +approximate the feature map of a polynomial kernel for |
| 13 | +classification with an SVM on the digits dataset. Results using a linear |
| 14 | +SVM in the original space, a linear SVM using the approximate mappings |
| 15 | +and a kernelized SVM are compared. |
| 16 | +
|
| 17 | +The first plot shows the classification accuracy of Nystroem [2] and |
| 18 | +PolynomialCountSketch [1] as the output dimension (n_components) grows. |
| 19 | +It also shows the accuracy of a linear SVM and a polynomial kernel SVM |
| 20 | +on the same data. |
| 21 | +
|
| 22 | +The second plot explores the scalability of PolynomialCountSketch |
| 23 | +and Nystroem. For a sufficiently large output dimension, |
| 24 | +PolynomialCountSketch should be faster as it is O(n(d+klog k)) |
| 25 | +while Nystroem is O(n(dk+k^2)). In addition, Nystroem requires |
| 26 | +a time-consuming training phase, while training is almost immediate |
| 27 | +for PolynomialCountSketch, whose training phase boils down to |
| 28 | +initializing some random variables (because is data-independent). |
| 29 | +
|
| 30 | +[1] Pham, N., & Pagh, R. (2013, August). Fast and scalable polynomial |
| 31 | +kernels via explicit feature maps. In Proceedings of the 19th ACM SIGKDD |
| 32 | +international conference on Knowledge discovery and data mining (pp. 239-247) |
| 33 | +(http://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf) |
| 34 | +
|
| 35 | +[2] Charikar, M., Chen, K., & Farach-Colton, M. (2002, July). Finding frequent |
| 36 | +items in data streams. In International Colloquium on Automata, Languages, and |
| 37 | +Programming (pp. 693-703). Springer, Berlin, Heidelberg. |
| 38 | +(http://www.vldb.org/pvldb/1/1454225.pdf) |
| 39 | +
|
| 40 | +""" |
| 41 | +# Author: Daniel Lopez-Sanchez <lope@usal.es> |
| 42 | +# License: BSD 3 clause |
| 43 | + |
| 44 | +# Load data manipulation functions |
| 45 | +from sklearn.datasets import load_digits |
| 46 | +from sklearn.model_selection import train_test_split |
| 47 | + |
| 48 | +# Some common libraries |
| 49 | +import matplotlib.pyplot as plt |
| 50 | +import numpy as np |
| 51 | + |
| 52 | +# Will use this for timing results |
| 53 | +from time import time |
| 54 | + |
| 55 | +# Import SVM classifiers and feature map approximation algorithms |
| 56 | +from sklearn.svm import LinearSVC, SVC |
| 57 | +from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch |
| 58 | +from sklearn.pipeline import Pipeline |
| 59 | + |
| 60 | +# Split data in train and test sets |
| 61 | +X, y = load_digits()["data"], load_digits()["target"] |
| 62 | +X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7) |
| 63 | + |
| 64 | +# Set the range of n_components for our experiments |
| 65 | +out_dims = range(20, 400, 20) |
| 66 | + |
| 67 | +# Evaluate Linear SVM |
| 68 | +lsvm = LinearSVC().fit(X_train, y_train) |
| 69 | +lsvm_score = 100*lsvm.score(X_test, y_test) |
| 70 | + |
| 71 | +# Evaluate kernelized SVM |
| 72 | +ksvm = SVC(kernel="poly", degree=2, gamma=1.).fit(X_train, y_train) |
| 73 | +ksvm_score = 100*ksvm.score(X_test, y_test) |
| 74 | + |
| 75 | +# Evaluate PolynomialCountSketch + LinearSVM |
| 76 | +ps_svm_scores = [] |
| 77 | +n_runs = 5 |
| 78 | + |
| 79 | +# To compensate for the stochasticity of the method, we make n_tets runs |
| 80 | +for k in out_dims: |
| 81 | + score_avg = 0 |
| 82 | + for _ in range(n_runs): |
| 83 | + ps_svm = Pipeline([("PS", PolynomialCountSketch(degree=2, |
| 84 | + n_components=k)), |
| 85 | + ("SVM", LinearSVC())]) |
| 86 | + score_avg += ps_svm.fit(X_train, y_train).score(X_test, y_test) |
| 87 | + ps_svm_scores.append(100*score_avg/n_runs) |
| 88 | + |
| 89 | +# Evaluate Nystroem + LinearSVM |
| 90 | +ny_svm_scores = [] |
| 91 | +n_runs = 5 |
| 92 | + |
| 93 | +for k in out_dims: |
| 94 | + score_avg = 0 |
| 95 | + for _ in range(n_runs): |
| 96 | + ny_svm = Pipeline([("NY", Nystroem(kernel="poly", gamma=1., degree=2, |
| 97 | + coef0=0, n_components=k)), |
| 98 | + ("SVM", LinearSVC())]) |
| 99 | + score_avg += ny_svm.fit(X_train, y_train).score(X_test, y_test) |
| 100 | + ny_svm_scores.append(100*score_avg/n_runs) |
| 101 | + |
| 102 | +# Show results |
| 103 | +fig, ax = plt.subplots(figsize=(6, 4)) |
| 104 | +ax.set_title("Accuracy results") |
| 105 | +ax.plot(out_dims, ps_svm_scores, label="PolynomialCountSketch + linear SVM", |
| 106 | + c="orange") |
| 107 | +ax.plot(out_dims, ny_svm_scores, label="Nystroem + linear SVM", |
| 108 | + c="blue") |
| 109 | +ax.plot([out_dims[0], out_dims[-1]], [lsvm_score, lsvm_score], |
| 110 | + label="Linear SVM", c="black", dashes=[2, 2]) |
| 111 | +ax.plot([out_dims[0], out_dims[-1]], [ksvm_score, ksvm_score], |
| 112 | + label="Poly-kernel SVM", c="red", dashes=[2, 2]) |
| 113 | +ax.legend() |
| 114 | +ax.set_xlabel("N_components for PolynomialCountSketch and Nystroem") |
| 115 | +ax.set_ylabel("Accuracy (%)") |
| 116 | +ax.set_xlim([out_dims[0], out_dims[-1]]) |
| 117 | +fig.tight_layout() |
| 118 | + |
| 119 | +# Now lets evaluate the scalability of PolynomialCountSketch vs Nystroem |
| 120 | +# First we generate some fake data with a lot of samples |
| 121 | + |
| 122 | +fakeData = np.random.randn(10000, 100) |
| 123 | +fakeDataY = np.random.randint(0, high=10, size=(10000)) |
| 124 | + |
| 125 | +out_dims = range(500, 6000, 500) |
| 126 | + |
| 127 | +# Evaluate scalability of PolynomialCountSketch as n_components grows |
| 128 | +ps_svm_times = [] |
| 129 | +for k in out_dims: |
| 130 | + ps = PolynomialCountSketch(degree=2, n_components=k) |
| 131 | + |
| 132 | + start = time() |
| 133 | + ps.fit_transform(fakeData, None) |
| 134 | + ps_svm_times.append(time() - start) |
| 135 | + |
| 136 | +# Evaluate scalability of Nystroem as n_components grows |
| 137 | +# This can take a while due to the inefficient training phase |
| 138 | +ny_svm_times = [] |
| 139 | +for k in out_dims: |
| 140 | + ny = Nystroem(kernel="poly", gamma=1., degree=2, coef0=0, n_components=k) |
| 141 | + |
| 142 | + start = time() |
| 143 | + ny.fit_transform(fakeData, None) |
| 144 | + ny_svm_times.append(time() - start) |
| 145 | + |
| 146 | +# Show results |
| 147 | +fig, ax = plt.subplots(figsize=(6, 4)) |
| 148 | +ax.set_title("Scalability results") |
| 149 | +ax.plot(out_dims, ps_svm_times, label="PolynomialCountSketch", c="orange") |
| 150 | +ax.plot(out_dims, ny_svm_times, label="Nystroem", c="blue") |
| 151 | +ax.legend() |
| 152 | +ax.set_xlabel("N_components for PolynomialCountSketch and Nystroem") |
| 153 | +ax.set_ylabel("fit_transform time \n(s/10.000 samples)") |
| 154 | +ax.set_xlim([out_dims[0], out_dims[-1]]) |
| 155 | +fig.tight_layout() |
| 156 | +plt.show() |
0 commit comments