10000 Backport NumPy 1.13.0 fixes to 0.18.X (#9137) · scikit-learn/scikit-learn@8d9b58b · GitHub
[go: up one dir, main page]

Skip to content

Commit 8d9b58b

Browse files
jakirkhamamueller
authored andcommitted
Backport NumPy 1.13.0 fixes to 0.18.X (#9137)
* Fix tests on numpy master (#7946) Until now we were in a edge case on assert_array_equal * Fix tests on numpy master (#8355) numpy.apply_along_axis has changed behaviour when the function passed in returns a 2d array * [MRG] Updated plot_stock_market.py to use Google Finance (#9010) * DOC updated plot_stock_market.py to use Google Finance The implementations is intentionally very basic not to distract the users from the example. Specifically unlike ``quotes_historical_yahoo_ochl`` it does not cache downloaded data. I also had to remove some symbols because the have no data on Google for the specified date interval. These are WBA, LMT, KFT and MTU. Closes #8899 * DOC removed plot_stock_market.py from expected failing examples * Addressed review comments * Addressed another pass of review comments * [MRG] Remove DeprecationWarnings in examples due to using floats instead of ints (#8040)
1 parent 14031f6 commit 8d9b58b

16 files changed

+112
-72
lines changed

examples/applications/plot_stock_market.py

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,59 @@
6464
# Author: Gael Varoquaux gael.varoquaux@normalesup.org
6565
# License: BSD 3 clause
6666

67-
import datetime
67+
from datetime import datetime
6868

6969
import numpy as np
70-
import matplotlib.pyplot as plt
71-
try:
72-
from matplotlib.finance import quotes_historical_yahoo_ochl
73-
except ImportError:
74-
# quotes_historical_yahoo_ochl was named quotes_historical_yahoo before matplotlib 1.4
75-
from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl
70+
from matplotlib import pyplot as plt
7671
from matplotlib.collections import LineCollection
72+
from six.moves.urllib.request import urlopen
73+
from six.moves.urllib.parse import urlencode
7774
from sklearn import cluster, covariance, manifold
7875

7976
###############################################################################
8077
# Retrieve the data from Internet
8178

79+
def quotes_historical_google(symbol, date1, date2):
80+
"""Get the historical data from Google finance.
81+
82+
Parameters
83+
----------
84+
symbol : str
85+
Ticker symbol to query for, for example ``"DELL"``.
86+
date1 : datetime.datetime
87+
Start date.
88+
date2 : datetime.datetime
89+
End date.
90+
91+
Returns
92+
-------
93+
X : array
94+
The columns are ``date`` -- datetime, ``open``, ``high``,
95+
``low``, ``close`` and ``volume`` of type float.
96+
"""
97+
params = urlencode({
98+
'q': symbol,
99+
'startdate': date1.strftime('%b %d, %Y'),
100+
'enddate': date2.strftime('%b %d, %Y'),
101+
'output': 'csv'
102+
})
103+
url = 'http://www.google.com/finance/historical?' + params
104+
with urlopen(url) as response:
105+
dtype = {
106+
'names': ['date', 'open', 'high', 'low', 'close', 'volume'],
107+
'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']
108+
}
109+
converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}
110+
return np.genfromtxt(response, delimiter=',', skip_header=1,
111+
dtype=dtype, converters=converters,
112+
missing_values='-', filling_values=-1)
113+
114+
82115
# Choose a time period reasonably calm (not too long ago so that we get
83116
# high-tech firms, and before the 2008 crash)
84-
d1 = datetime.datetime(2003, 1, 1)
85-
d2 = datetime.datetime(2008, 1, 1)
117+
d1 = datetime(2003, 1, 1)
118+
d2 = datetime(2008, 1, 1)
86119

87-
# kraft symbol has now changed from KFT to MDLZ in yahoo
88120
symbol_dict = {
89121
'TOT': 'Total',
90122
'XOM': 'Exxon',
@@ -102,7 +134,6 @@
102134
'AMZN': 'Amazon',
103135
'TM': 'Toyota',
104136
'CAJ': 'Canon',
105< 10000 /code>-
'MTU': 'Mitsubishi',
106137
'SNE': 'Sony',
107138
'F': 'Ford',
108139
'HMC': 'Honda',
@@ -111,9 +142,8 @@
111142
'BA': 'Boeing',
112143
'KO': 'Coca Cola',
113144
'MMM': '3M',
114-
'MCD': 'Mc Donalds',
145+
'MCD': 'McDonald\'s',
115146
'PEP': 'Pepsi',
116-
'MDLZ': 'Kraft Foods',
117147
'K': 'Kellogg',
118148
'UN': 'Unilever',
119149
'MAR': 'Marriott',
@@ -129,11 +159,9 @@
129159
'AAPL': 'Apple',
130160
'SAP': 'SAP',
131161
'CSCO': 'Cisco',
132-
'TXN': 'Texas instruments',
162+
'TXN': 'Texas Instruments',
133163
'XRX': 'Xerox',
134-
'LMT': 'Lookheed Martin',
135164
'WMT': 'Wal-Mart',
136-
'WBA': 'Walgreen',
137165
'HD': 'Home Depot',
138166
'GSK': 'GlaxoSmithKline',
139167
'PFE': 'Pfizer',
@@ -149,14 +177,16 @@
149177

150178
symbols, names = np.array(list(symbol_dict.items())).T
151179

152-
quotes = [quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
153-
for symbol in symbols]
180+
quotes = [
181+
quotes_historical_google(symbol, d1, d2) for symbol in symbols
182+
]
154183

155-
open = np.array([q.open for q in quotes]).astype(np.float)
156-
close = np.array([q.close for q in quotes]).astype(np.float)
184+
close_prices = np.stack([q['close'] for q in quotes])
185+
open_prices = np.stack([q['open'] for q in quotes])
157186

158187
# The daily variations of the quotes are what carry most information
159-
variation = close - open
188+
variation = close_prices - open_prices
189+
160190

161191
###############################################################################
162192
# Learn a graphical structure from the correlations

examples/applications/plot_tomography_l1_reconstruction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def build_projection_operator(l_x, n_dir):
9999
def generate_synthetic_data():
100100
""" Synthetic binary data """
101101
rs = np.random.RandomState(0)
102-
n_pts = 36.
102+
n_pts = 36
103103
x, y = np.ogrid[0:l, 0:l]
104104
mask_outer = (x - l / 2) ** 2 + (y - l / 2) ** 2 < (l / 2) ** 2
105105
mask = np.zeros((l, l))

examples/classification/plot_digits_classification.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,17 @@
4646
classifier = svm.SVC(gamma=0.001)
4747

4848
# We learn the digits on the first half of the digits
49-
classifier.fit(data[:n_samples / 2], digits.target[:n_samples / 2])
49+
classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2])
5050

5151
# Now predict the value of the digit on the second half:
52-
expected = digits.target[n_samples / 2:]
53-
predicted = classifier.predict(data[n_samples / 2:])
52+
expected = digits.target[n_samples // 2:]
53+
predicted = classifier.predict(data[n_samples // 2:])
5454

5555
print("Classification report for classifier %s:\n%s\n"
5656
% (classifier, metrics.classification_report(expected, predicted)))
5757
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
5858

59-
images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted))
59+
images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))
6060
for index, (image, prediction) in enumerate(images_and_predictions[:4]):
6161
plt.subplot(2, 4, index + 5)
6262
plt.axis('off')

examples/covariance/plot_robust_vs_empirical_covariance.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767

6868
range_n_outliers = np.concatenate(
6969
(np.linspace(0, n_samples / 8, 5),
70-
np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1]))
70+
np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1])).astype(np.int)
7171

7272
# definition of arrays to store results
7373
err_loc_mcd = np.zeros((range_n_outliers.size, repeat))
@@ -135,13 +135,13 @@
135135
plt.errorbar(range_n_outliers, err_cov_mcd.mean(1),
136136
yerr=err_cov_mcd.std(1),
137137
label="Robust covariance (mcd)", color='m')
138-
plt.errorbar(range_n_outliers[:(x_size / 5 + 1)],
139-
err_cov_emp_full.mean(1)[:(x_size / 5 + 1)],
140-
yerr=err_cov_emp_full.std(1)[:(x_size / 5 + 1)],
138+
plt.errorbar(range_n_outliers[:(x_size // 5 + 1)],
139+
err_cov_emp_full.mean(1)[:(x_size // 5 + 1)],
140+
yerr=err_cov_emp_full.std(1)[:(x_size // 5 + 1)],
141141
label="Full data set empirical covariance", color='green')
142-
plt.plot(range_n_outliers[(x_size / 5):(x_size / 2 - 1)],
143-
err_cov_emp_full.mean(1)[(x_size / 5):(x_size / 2 - 1)], color='green',
144-
ls='--')
142+
plt.plot(range_n_outliers[(x_size // 5):(x_size // 2 - 1)],
143+
err_cov_emp_full.mean(1)[(x_size // 5):(x_size // 2 - 1)],
144+
color='green', ls='--')
145145
plt.errorbar(range_n_outliers, err_cov_emp_pure.mean(1),
146146
yerr=err_cov_emp_pure.std(1),
147147
label="Pure data set empirical covariance", color='black')

examples/cross_decomposition/plot_compare_cross_decomposition.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@
3636
X = latents + np.random.normal(size=4 * n).reshape((n, 4))
3737
Y = latents + np.random.normal(size=4 * n).reshape((n, 4))
3838

39-
X_train = X[:n / 2]
40-
Y_train = Y[:n / 2]
41-
X_test = X[n / 2:]
42-
Y_test = Y[n / 2:]
39+
X_train = X[:n // 2]
40+
Y_train = Y[:n // 2]
41+
X_test = X[n // 2:]
42+
Y_test = Y[n // 2:]
4343

4444
print("Corr(X)")
4545
print(np.round(np.corrcoef(X.T), 2))

examples/decomposition/plot_sparse_coding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@ def ricker_matrix(width, resolution, n_components):
4444
resolution = 1024
4545
subsampling = 3 # subsampling factor
4646
width = 100
47-
n_components = resolution / subsampling
47+
n_components = resolution // subsampling
4848

4949
# Compute a wavelet dictionary
5050
D_fixed = ricker_matrix(width=width, resolution=resolution,
5151
n_components=n_components)
5252
D_multi = np.r_[tuple(ricker_matrix(width=w, resolution=resolution,
53-
n_components=np.floor(n_components / 5))
53+
n_components=n_components // 5)
5454
for w in (10, 50, 100, 500, 1000))]
5555

5656
# Generate a signal

examples/exercises/plot_iris_exercise.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@
2929
X = X[order]
3030
y = y[order].astype(np.float)
3131

32-
X_train = X[:.9 * n_sample]
33-
y_train = y[:.9 * n_sample]
34-
X_test = X[.9 * n_sample:]
35-
y_test = y[.9 * n_sample:]
32+
X_train = X[:int(.9 * n_sample)]
33+
y_train = y[:int(.9 * n_sample)]
34+
X_test = X[int(.9 * n_sample):]
35+
y_test = y[int(.9 * n_sample):]
3636

3737
# fit the model
3838
for fig_num, kernel in enumerate(('linear', 'rbf', 'poly')):
@@ -58,8 +58,8 @@
5858
# Put the result into a color plot
5959
Z = Z.reshape(XX.shape)
6060
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
61-
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
62-
levels=[-.5, 0, .5])
61+
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
62+
linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
6363

6464
plt.title(kernel)
6565
plt.show()

examples/linear_model/plot_lasso_and_elasticnet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232

3333
# Split data in train set and test set
3434
n_samples = X.shape[0]
35-
X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
36-
X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]
35+
X_train, y_train = X[:n_samples // 2], y[:n_samples // 2]
36+
X_test, y_test = X[n_samples // 2:], y[n_samples // 2:]
3737

3838
###############################################################################
3939
# Lasso

examples/neighbors/plot_kde_1d.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@
3838
# Plot the progression of histograms to kernels
3939
np.random.seed(1)
4040
N = 20
41-
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
42-
np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
41+
X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
42+
np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]
4343
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
4444
bins = np.linspace(-5, 10, 10)
4545

@@ -116,8 +116,8 @@ def format_func(x, loc):
116116
# Plot a 1D density example
117117
N = 100
118118
np.random.seed(1)
119-
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
120-
np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
119+
X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
120+
np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]
121121

122122
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
123123

examples/plot_kernel_approximation.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,14 @@
6868
data -= data.mean(axis=0)
6969

7070
# We learn the digits on the first half of the digits
71-
data_train, targets_train = data[:n_samples / 2], digits.target[:n_samples / 2]
71+
data_train, targets_train = (data[:n_samples // 2],
72+
digits.target[:n_samples // 2])
7273

7374

7475
# Now predict the value of the digit on the second half:
75-
data_test, targets_test = data[n_samples / 2:], digits.target[n_samples / 2:]
76-
#data_test = scaler.transform(data_test)
76+
data_test, targets_test = (data[n_samples // 2:],
77+
digits.target[n_samples // 2:])
78+
# data_test = scaler.transform(data_test)
7779

7880
# Create a classifier: a support vector classifier
7981
kernel_svm = svm.SVC(gamma=.2)

0 commit comments

Comments
 (0)
0