scikit-learn
diff --git a/‎examples/applications/plot_stock_market.py
Lines changed: 51 additions & 21 deletions b/‎examples/applications/plot_stock_market.py
Lines changed: 51 additions & 21 deletions
diff --git a/‎examples/applications/plot_tomography_l1_reconstruction.py
Lines changed: 1 addition & 1 deletion b/‎examples/applications/plot_tomography_l1_reconstruction.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/classification/plot_digits_classification.py
Lines changed: 4 additions & 4 deletions b/‎examples/classification/plot_digits_classification.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/covariance/plot_robust_vs_empirical_covariance.py
Lines changed: 7 additions & 7 deletions b/‎examples/covariance/plot_robust_vs_empirical_covariance.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎examples/cross_decomposition/plot_compare_cross_decomposition.py
Lines changed: 4 additions & 4 deletions b/‎examples/cross_decomposition/plot_compare_cross_decomposition.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/decomposition/plot_sparse_coding.py
Lines changed: 2 additions & 2 deletions b/‎examples/decomposition/plot_sparse_coding.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/exercises/plot_iris_exercise.py
Lines changed: 6 additions & 6 deletions b/‎examples/exercises/plot_iris_exercise.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎examples/linear_model/plot_lasso_and_elasticnet.py
Lines changed: 2 additions & 2 deletions b/‎examples/linear_model/plot_lasso_and_elasticnet.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/neighbors/plot_kde_1d.py
Lines changed: 4 additions & 4 deletions b/‎examples/neighbors/plot_kde_1d.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/plot_kernel_approximation.py
Lines changed: 5 additions & 3 deletions b/‎examples/plot_kernel_approximation.py
Lines changed: 5 additions & 3 deletions
@@ -64,27 +64,59 @@
 # Author: Gael Varoquaux gael.varoquaux@normalesup.org
 # License: BSD 3 clause
 
-import datetime
+from datetime import datetime
 
 import numpy as np
-import matplotlib.pyplot as plt
-try:
-     from matplotlib.finance import quotes_historical_yahoo_ochl
-except ImportError:
-     # quotes_historical_yahoo_ochl was named quotes_historical_yahoo before matplotlib 1.4
-     from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl
+from matplotlib import pyplot as plt
 from matplotlib.collections import LineCollection
+from six.moves.urllib.request import urlopen
+from six.moves.urllib.parse import urlencode
 from sklearn import cluster, covariance, manifold
 
 ###############################################################################
 # Retrieve the data from Internet
 
+def quotes_historical_google(symbol, date1, date2):
+    """Get the historical data from Google finance.
+
+    Parameters
+    ----------
+    symbol : str
+        Ticker symbol to query for, for example ``"DELL"``.
+    date1 : datetime.datetime
+        Start date.
+    date2 : datetime.datetime
+        End date.
+
+    Returns
+    -------
+    X : array
+        The columns are ``date`` -- datetime, ``open``, ``high``,
+        ``low``, ``close`` and ``volume`` of type float.
+    """
+    params = urlencode({
+        'q': symbol,
+        'startdate': date1.strftime('%b %d, %Y'),
+        'enddate': date2.strftime('%b %d, %Y'),
+        'output': 'csv'
+    })
+    url = 'http://www.google.com/finance/historical?' + params
+    with urlopen(url) as response:
+        dtype = {
+            'names': ['date', 'open', 'high', 'low', 'close', 'volume'],
+            'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']
+        }
+        converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}
+        return np.genfromtxt(response, delimiter=',', skip_header=1,
+                             dtype=dtype, converters=converters,
+                             missing_values='-', filling_values=-1)
+
+
 # Choose a time period reasonably calm (not too long ago so that we get
 # high-tech firms, and before the 2008 crash)
-d1 = datetime.datetime(2003, 1, 1)
-d2 = datetime.datetime(2008, 1, 1)
+d1 = datetime(2003, 1, 1)
+d2 = datetime(2008, 1, 1)
 
-# kraft symbol has now changed from KFT to MDLZ in yahoo
 symbol_dict = {
     'TOT': 'Total',
     'XOM': 'Exxon',
@@ -102,7 +134,6 @@
     'AMZN': 'Amazon',
     'TM': 'Toyota',
     'CAJ': 'Canon',
-    'MTU': 'Mitsubishi',
     'SNE': 'Sony',
     'F': 'Ford',
     'HMC': 'Honda',
@@ -111,9 +142,8 @@
     'BA': 'Boeing',
     'KO': 'Coca Cola',
     'MMM': '3M',
-    'MCD': 'Mc Donalds',
+    'MCD': 'McDonald\'s',
     'PEP': 'Pepsi',
-    'MDLZ': 'Kraft Foods',
     'K': 'Kellogg',
     'UN': 'Unilever',
     'MAR': 'Marriott',
@@ -129,11 +159,9 @@
     'AAPL': 'Apple',
     'SAP': 'SAP',
     'CSCO': 'Cisco',
-    'TXN': 'Texas instruments',
+    'TXN': 'Texas Instruments',
     'XRX': 'Xerox',
-    'LMT': 'Lookheed Martin',
     'WMT': 'Wal-Mart',
-    'WBA': 'Walgreen',
     'HD': 'Home Depot',
     'GSK': 'GlaxoSmithKline',
     'PFE': 'Pfizer',
@@ -149,14 +177,16 @@
 
 symbols, names = np.array(list(symbol_dict.items())).T
 
-quotes = [quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
-          for symbol in symbols]
+quotes = [
+    quotes_historical_google(symbol, d1, d2) for symbol in symbols
+]
 
-open = np.array([q.open for q in quotes]).astype(np.float)
-close = np.array([q.close for q in quotes]).astype(np.float)
+close_prices = np.stack([q['close'] for q in quotes])
+open_prices = np.stack([q['open'] for q in quotes])
 
 # The daily variations of the quotes are what carry most information
-variation = close - open
+variation = close_prices - open_prices
+
 
 ###############################################################################
 # Learn a graphical structure from the correlations
 
@@ -99,7 +99,7 @@ def build_projection_operator(l_x, n_dir):
 def generate_synthetic_data():
     """ Synthetic binary data """
     rs = np.random.RandomState(0)
-    n_pts = 36.
+    n_pts = 36
     x, y = np.ogrid[0:l, 0:l]
     mask_outer = (x - l / 2) ** 2 + (y - l / 2) ** 2 < (l / 2) ** 2
     mask = np.zeros((l, l))
 
@@ -46,17 +46,17 @@
 classifier = svm.SVC(gamma=0.001)
 
 # We learn the digits on the first half of the digits
-classifier.fit(data[:n_samples / 2], digits.target[:n_samples / 2])
+classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2])
 
 # Now predict the value of the digit on the second half:
-expected = digits.target[n_samples / 2:]
-predicted = classifier.predict(data[n_samples / 2:])
+expected = digits.target[n_samples // 2:]
+predicted = classifier.predict(data[n_samples // 2:])
 
 print("Classification report for classifier %s:\n%s\n"
       % (classifier, metrics.classification_report(expected, predicted)))
 print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
 
-images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted))
+images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))
 for index, (image, prediction) in enumerate(images_and_predictions[:4]):
     plt.subplot(2, 4, index + 5)
     plt.axis('off')
 
@@ -67,7 +67,7 @@
 
 range_n_outliers = np.concatenate(
     (np.linspace(0, n_samples / 8, 5),
-     np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1]))
+     np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1])).astype(np.int)
 
 # definition of arrays to store results
 err_loc_mcd = np.zeros((range_n_outliers.size, repeat))
@@ -135,13 +135,13 @@
 plt.errorbar(range_n_outliers, err_cov_mcd.mean(1),
              yerr=err_cov_mcd.std(1),
              label="Robust covariance (mcd)", color='m')
-plt.errorbar(range_n_outliers[:(x_size / 5 + 1)],
-             err_cov_emp_full.mean(1)[:(x_size / 5 + 1)],
-             yerr=err_cov_emp_full.std(1)[:(x_size / 5 + 1)],
+plt.errorbar(range_n_outliers[:(x_size // 5 + 1)],
+             err_cov_emp_full.mean(1)[:(x_size // 5 + 1)],
+             yerr=err_cov_emp_full.std(1)[:(x_size // 5 + 1)],
              label="Full data set empirical covariance", color='green')
-plt.plot(range_n_outliers[(x_size / 5):(x_size / 2 - 1)],
-         err_cov_emp_full.mean(1)[(x_size / 5):(x_size / 2 - 1)], color='green',
-         ls='--')
+plt.plot(range_n_outliers[(x_size // 5):(x_size // 2 - 1)],
+         err_cov_emp_full.mean(1)[(x_size // 5):(x_size // 2 - 1)],
+         color='green', ls='--')
 plt.errorbar(range_n_outliers, err_cov_emp_pure.mean(1),
              yerr=err_cov_emp_pure.std(1),
              label="Pure data set empirical covariance", color='black')
 
@@ -36,10 +36,10 @@
 Y = latents + np.random.normal(size=4 * n).reshape((n, 4))
 
-X_train = X[:n / 2]
-Y_train = Y[:n / 2]
-X_test = X[n / 2:]
-Y_test = Y[n / 2:]
+X_train = X[:n // 2]
+Y_train = Y[:n // 2]
+X_test = X[n // 2:]
+Y_test = Y[n // 2:]
 
 print("Corr(X)")
 print(np.round(np.corrcoef(X.T), 2))
 
@@ -44,13 +44,13 @@ def ricker_matrix(width, resolution, n_components):
 resolution = 1024
 subsampling = 3  # subsampling factor
 width = 100
-n_components = resolution / subsampling
+n_components = resolution // subsampling
 
 # Compute a wavelet dictionary
 D_fixed = ricker_matrix(width=width, resolution=resolution,
                         n_components=n_components)
 D_multi = np.r_[tuple(ricker_matrix(width=w, resolution=resolution,
-                                    n_components=np.floor(n_components / 5))
+                      n_components=n_components // 5)
                 for w in (10, 50, 100, 500, 1000))]
 
 # Generate a signal
 
@@ -29,10 +29,10 @@
 X = X[order]
 y = y[order].astype(np.float)
 
-X_train = X[:.9 * n_sample]
-y_train = y[:.9 * n_sample]
-X_test = X[.9 * n_sample:]
-y_test = y[.9 * n_sample:]
+X_train = X[:int(.9 * n_sample)]
+y_train = y[:int(.9 * n_sample)]
+X_test = X[int(.9 * n_sample):]
+y_test = y[int(.9 * n_sample):]
 
 # fit the model
 for fig_num, kernel in enumerate(('linear', 'rbf', 'poly')):
@@ -58,8 +58,8 @@
     # Put the result into a color plot
     Z = Z.reshape(XX.shape)
     plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
-    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
-                levels=[-.5, 0, .5])
+    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
+                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
 
     plt.title(kernel)
 plt.show()
@@ -32,8 +32,8 @@
 
 # Split data in train set and test set
 n_samples = X.shape[0]
-X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
-X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]
+X_train, y_train = X[:n_samples // 2], y[:n_samples // 2]
+X_test, y_test = X[n_samples // 2:], y[n_samples // 2:]
 
 ###############################################################################
 # Lasso
 
@@ -38,8 +38,8 @@
 # Plot the progression of histograms to kernels
 np.random.seed(1)
 N = 20
-X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
-                    np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
+X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
+                    np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]
 X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
 bins = np.linspace(-5, 10, 10)
 
@@ -116,8 +116,8 @@ def format_func(x, loc):
 # Plot a 1D density example
 N = 100
 np.random.seed(1)
-X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
-                    np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
+X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
+                    np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]
 
 X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
 
 
@@ -68,12 +68,14 @@
 data -= data.mean(axis=0)
 
 # We learn the digits on the first half of the digits
-data_train, targets_train = data[:n_samples / 2], digits.target[:n_samples / 2]
+data_train, targets_train = (data[:n_samples // 2],
+                             digits.target[:n_samples // 2])
 
 
 # Now predict the value of the digit on the second half:
-data_test, targets_test = data[n_samples / 2:], digits.target[n_samples / 2:]
-#data_test = scaler.transform(data_test)
+data_test, targets_test = (data[n_samples // 2:],
+                           digits.target[n_samples // 2:])
+# data_test = scaler.transform(data_test)
 
 # Create a classifier: a support vector classifier
 kernel_svm = svm.SVC(gamma=.2)