8000 Backport NumPy 1.13.0 fixes to 0.18.X by jakirkham · Pull Request #9137 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

Backport NumPy 1.13.0 fixes to 0.18.X #9137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 51 additions & 21 deletions examples/applications/plot_stock_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,27 +64,59 @@
# Author: Gael Varoquaux gael.varoquaux@normalesup.org
# License: BSD 3 clause

import datetime
from datetime import datetime

import numpy as np
import matplotlib.pyplot as plt
try:
from matplotlib.finance import quotes_historical_yahoo_ochl
except ImportError:
# quotes_historical_yahoo_ochl was named quotes_historical_yahoo before matplotlib 1.4
from matplotlib.finance import quotes_historical_yahoo as quotes_historical_yahoo_ochl
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
from six.moves.urllib.request import urlopen
from six.moves.urllib.parse import urlencode
from sklearn import cluster, covariance, manifold

###############################################################################
# Retrieve the data from Internet

def quotes_historical_google(symbol, date1, date2):
"""Get the historical data from Google finance.

Parameters
----------
symbol : str
Ticker symbol to query for, for example ``"DELL"``.
date1 : datetime.datetime
Start date.
date2 : datetime.datetime
End date.

Returns
-------
X : array
The columns are ``date`` -- datetime, ``open``, ``high``,
``low``, ``close`` and ``volume`` of type float.
"""
params = urlencode({
'q': symbol,
'startdate': date1.strftime('%b %d, %Y'),
'enddate': date2.strftime('%b %d, %Y'),
'output': 'csv'
})
url = 'http://www.google.com/finance/historical?' + params
with urlopen(url) as response:
dtype = {
'names': ['date', 'open', 'high', 'low', 'close', 'volume'],
'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']
}
converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}
return np.genfromtxt(response, delimiter=',', skip_header=1,
dtype=dtype, converters=converters,
missing_values='-', filling_values=-1)


# Choose a time period reasonably calm (not too long ago so that we get
# high-tech firms, and before the 2008 crash)
d1 = datetime.datetime(2003, 1, 1)
d2 = datetime.datetime(2008, 1, 1)
d1 = datetime(2003, 1, 1)
d2 = datetime(2008, 1, 1)

# kraft symbol has now changed from KFT to MDLZ in yahoo
symbol_dict = {
'TOT': 'Total',
'XOM': 'Exxon',
Expand All @@ -102,7 +134,6 @@
'AMZN': 'Amazon',
'TM': 'Toyota',
'CAJ': 'Canon',
'MTU': 'Mitsubishi',
'SNE': 'Sony',
'F': 'Ford',
'HMC': 'Honda',
Expand All @@ -111,9 +142,8 @@
'BA': 'Boeing',
'KO': 'Coca Cola',
'MMM': '3M',
'MCD': 'Mc Donalds',
'MCD': 'McDonald\'s',
'PEP': 'Pepsi',
'MDLZ': 'Kraft Foods',
'K': 'Kellogg',
'UN': 'Unilever',
'MAR': 'Marriott',
Expand All @@ -129,11 +159,9 @@
'AAPL': 'Apple',
'SAP': 'SAP',
'CSCO': 'Cisco',
'TXN': 'Texas instruments',
'TXN': 'Texas Instruments',
'XRX': 'Xerox',
'LMT': 'Lookheed Martin',
'WMT': 'Wal-Mart',
'WBA': 'Walgreen',
'HD': 'Home Depot',
'GSK': 'GlaxoSmithKline',
'PFE': 'Pfizer',
Expand All @@ -149,14 +177,16 @@

symbols, names = np.array(list(symbol_dict.items())).T

quotes = [quotes_historical_yahoo_ochl(symbol, d1, d2, asobject=True)
for symbol in symbols]
quotes = [
quotes_historical_google(symbol, d1, d2) for symbol in symbols
]

open = np.array([q.open for q in quotes]).astype(np.float)
close = np.array([q.close for q in quotes]).astype(np.float)
close_prices = np.stack([q['close'] for q in quotes])
open_prices = np.stack([q['open'] for q in quotes])

# The daily variations of the quotes are what carry most information
variation = close - open
variation = close_prices - open_prices


###############################################################################
# Learn a graphical structure from the correlations
Expand Down
2 changes: 1 addition & 1 deletion examples/applications/plot_tomography_l1_reconstruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def build_projection_operator(l_x, n_dir):
def generate_synthetic_data():
""" Synthetic binary data """
rs = np.random.RandomState(0)
n_pts = 36.
n_pts = 36
x, y = np.ogrid[0:l, 0:l]
mask_outer = (x - l / 2) ** 2 + (y - l / 2) ** 2 < (l / 2) ** 2
mask = np.zeros((l, l))
Expand Down
8 changes: 4 additions & 4 deletions examples/classification/plot_digits_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,17 @@
classifier = svm.SVC(gamma=0.001)

# We learn the digits on the first half of the digits
classifier.fit(data[:n_samples / 2], digits.target[:n_samples / 2])
classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2])

# Now predict the value of the digit on the second half:
expected = digits.target[n_samples / 2:]
predicted = classifier.predict(data[n_samples / 2:])
expected = digits.target[n_samples // 2:]
predicted = classifier.predict(data[n_samples // 2:])

print("Classification report for classifier %s:\n%s\n"
% (classifier, metrics.classification_report(expected, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted))
images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))
for index, (image, prediction) in enumerate(images_and_predictions[:4]):
plt.subplot(2, 4, index + 5)
plt.axis('off')
Expand Down
14 changes: 7 additions & 7 deletions examples/covariance/plot_robust_vs_empirical_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@

range_n_outliers = np.concatenate(
(np.linspace(0, n_samples / 8, 5),
np.linspac 1E0A e(n_samples / 8, n_samples / 2, 5)[1:-1]))
np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1])).astype(np.int)

# definition of arrays to store results
err_loc_mcd = np.zeros((range_n_outliers.size, repeat))
Expand Down Expand Up @@ -135,13 +135,13 @@
plt.errorbar(range_n_outliers, err_cov_mcd.mean(1),
yerr=er F438 r_cov_mcd.std(1),
label="Robust covariance (mcd)", color='m')
plt.errorbar(range_n_outliers[:(x_size / 5 + 1)],
err_cov_emp_full.mean(1)[:(x_size / 5 + 1)],
yerr=err_cov_emp_full.std(1)[:(x_size / 5 + 1)],
plt.errorbar(range_n_outliers[:(x_size // 5 + 1)],
err_cov_emp_full.mean(1)[:(x_size // 5 + 1)],
yerr=err_cov_emp_full.std(1)[:(x_size // 5 + 1)],
label="Full data set empirical covariance", color='green')
plt.plot(range_n_outliers[(x_size / 5):(x_size / 2 - 1)],
err_cov_emp_full.mean(1)[(x_size / 5):(x_size / 2 - 1)], color='green',
ls='--')
plt.plot(range_n_outliers[(x_size // 5):(x_size // 2 - 1)],
err_cov_emp_full.mean(1)[(x_size // 5):(x_size // 2 - 1)],
color='green', ls='--')
plt.errorbar(range_n_outliers, err_cov_emp_pure.mean(1),
yerr=err_cov_emp_pure.std(1),
label="Pure data set empirical covariance", color='black')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@
X = latents + np.random.normal(size=4 * n).reshape((n, 4))
Y = latents + np.random.normal(size=4 * n).reshape((n, 4))

X_train = X[:n / 2]
Y_train = Y[:n / 2]
X_test = X[n / 2:]
Y_test = Y[n / 2:]
X_train = X[:n // 2]
Y_train = Y[:n // 2]
X_test = X[n // 2:]
Y_test = Y[n // 2:]

print("Corr(X)")
print(np.round(np.corrcoef(X.T), 2))
Expand Down
4 changes: 2 additions & 2 deletions examples/decomposition/plot_sparse_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ def ricker_matrix(width, resolution, n_components):
resolution = 1024
subsampling = 3 # subsampling factor
width = 100
n_components = resolution / subsampling
n_components = resolution // subsampling

# Compute a wavelet dictionary
D_fixed = ricker_matrix(width=width, resolution=resolution,
n_components=n_components)
D_multi = np.r_[tuple(ricker_matrix(width=w, resolution=resolution,
n_components=np.floor(n_components / 5))
n_components=n_components // 5)
for w in (10, 50, 100, 500, 1000))]

# Generate a signal
Expand Down
12 changes: 6 additions & 6 deletions examples/exercises/plot_iris_exercise.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
X = X[order]
y = y[order].astype(np.float)

X_train = X[:.9 * n_sample]
y_train = y[:.9 * n_sample]
X_test = X[.9 * n_sample:]
y_test = y[.9 * n_sample:]
X_train = X[:int(.9 * n_sample)]
y_train = y[:int(.9 * n_sample)]
X_test = X[int(.9 * n_sample):]
y_test = y[int(.9 * n_sample):]

# fit the model
for fig_num, kernel in enumerate(('linear', 'rbf', 'poly')):
Expand All @@ -58,8 +58,8 @@
# Put the result into a color plot
Z = Z.reshape(XX.shape)
plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
levels=[-.5, 0, .5])
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
linestyles=['--', '-', '--'], levels=[-.5, 0, .5])

plt.title(kernel)
plt.show()
4 changes: 2 additions & 2 deletions examples/linear_model/plot_lasso_and_elasticnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@

# Split data in train set and test set
n_samples = X.shape[0]
X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]
X_train, y_train = X[:n_samples // 2], y[:n_samples // 2]
X_test, y_test = X[n_samples // 2:], y[n_samples // 2:]

###############################################################################
# Lasso
Expand Down
8 changes: 4 additions & 4 deletions examples/neighbors/plot_kde_1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
# Plot the progression of histograms to kernels
np.random.seed(1)
N = 20
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
bins = np.linspace(-5, 10, 10)

Expand Down Expand Up @@ -116,8 +116,8 @@ def format_func(x, loc):
# Plot a 1D density example
N = 100
np.random.seed(1)
X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
X = np.concatenate((np.random.normal(0, 1, int(0.3 * N)),
np.random.normal(5, 1, int(0.7 * N))))[:, np.newaxis]

X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]

Expand Down
8 changes: 5 additions & 3 deletions examples/plot_kernel_approximation.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@
data -= data.mean(axis=0)

# We learn the digits on the first half of the digits
data_train, targets_train = data[:n_samples / 2], digits.target[:n_samples / 2]
data_train, targets_train = (data[:n_samples // 2],
digits.target[:n_samples // 2])


# Now predict the value of the digit on the second half:
data_test, targets_test = data[n_samples / 2:], digits.target[n_samples / 2:]
#data_test = scaler.transform(data_test)
data_test, targets_test = (data[n_samples // 2:],
digits.target[n_samples // 2:])
# data_test = scaler.transform(data_test)

# Create a classifier: a support vector classifier
kernel_svm = svm.SVC(gamma=.2)
Expand Down
6 changes: 3 additions & 3 deletions examples/plot_kernel_ridge_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
y = np.sin(X).ravel()

# Add noise to targets
y[::5] += 3 * (0.5 - rng.rand(X.shape[0]/5))
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))

X_plot = np.linspace(0, 5, 100000)[:, None]

Expand Down Expand Up @@ -119,8 +119,8 @@
# Generate sample data
X = 5 * rng.rand(10000, 1)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(X.shape[0]/5))
sizes = np.logspace(1, 4, 7)
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
sizes = np.logspace(1, 4, 7, dtype=np.int)
for name, estimator in {"KRR": KernelRidge(kernel='rbf', alpha=0.1,
gamma=10),
"SVR": SVR(kernel='rbf', C=1e1, gamma=10)}.items():
Expand Down
11 changes: 6 additions & 5 deletions examples/plot_multioutput_face_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@
test = test[face_ids, :]

n_pixels = data.shape[1]
X_train = train[:, :np.ceil(0.5 * n_pixels)] # Upper half of the faces
y_train = train[:, np.floor(0.5 * n_pixels):] # Lower half of the faces
X_test = test[:, :np.ceil(0.5 * n_pixels)]
y_test = test[:, np.floor(0.5 * n_pixels):]
# Upper half of the faces
X_train = train[:, :(n_pixels + 1) // 2]
# Lower half of the faces
y_train = train[:, n_pixels // 2:]
X_test = test[:, :(n_pixels + 1) // 2]
y_test = test[:, n_pixels // 2:]

# Fit estimators
ESTIMATORS = {
Expand Down Expand Up @@ -74,7 +76,6 @@
sub = plt.subplot(n_faces, n_cols, i * n_cols + 1,
title="true faces")


sub.axis("off")
sub.imshow(true_face.reshape(image_shape),
cmap=plt.cm.gray,
Expand Down
4 changes: 2 additions & 2 deletions examples/svm/plot_svm_scale_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@

# l2 data: non sparse, but less features
y_2 = np.sign(.5 - rnd.rand(n_samples))
X_2 = rnd.randn(n_samples, n_features / 5) + y_2[:, np.newaxis]
X_2 += 5 * rnd.randn(n_samples, n_features / 5)
X_2 = rnd.randn(n_samples, n_features // 5) + y_2[:, np.newaxis]
X_2 += 5 * rnd.randn(n_samples, n_features // 5)

clf_sets = [(LinearSVC(penalty='l1', loss='squared_hinge', dual=False,
tol=1e-3),
Expand Down
2 changes: 1 addition & 1 deletion examples/tree/plot_unveil_tree_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

# The tree structure can be traversed to compute various properties such
# as the depth of each node and whether or not it is a leaf.
node_depth = np.zeros(shape=n_nodes)
node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
is_leaves = np.zeros(shape=n_nodes, dtype=bool)
stack = [(0, -1)] # seed is the root node id and its parent depth
while len(stack) > 0:
Expand Down
2 changes: 1 addition & 1 deletion sklearn/gaussian_process/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1852,7 +1852,7 @@ def diag(self, X):
Diagonal of kernel k(X, X)
"""
# We have to fall back to slow way of computing diagonal
return np.apply_along_axis(self, 1, X)[:, 0]
return np.apply_along_axis(self, 1, X).ravel()

def is_stationary(self):
"""Returns whether the kernel is stationary. """
Expand Down
19 changes: 13 additions & 6 deletions sklearn/model_selection/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,16 +1028,23 @@ def test_cv_iterable_wrapper():
# Since the wrapped iterable is enlisted and stored,
# split can be called any number of times to produce
# consistent results.
assert_array_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_iter_wrapped.split(X, y)))
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_iter_wrapped.split(X, y)))
# If the splits are randomized, successive calls to split yields different
# results
kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
assert_array_equal(list(kf_randomized_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
assert_true(np.any(np.array(list(kf_iter_wrapped.split(X, y))) !=
np.array(list(kf_randomized_iter_wrapped.split(X, y)))))
np.testing.assert_equal(list(kf_randomized_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))

try:
np.testing.assert_equal(list(kf_iter_wrapped.split(X, y)),
list(kf_randomized_iter_wrapped.split(X, y)))
splits_are_equal = True
except AssertionError:
splits_are_equal = False
assert_false(splits_are_equal, "If the splits are randomized, "
"successive calls to split should yield different results")


def test_group_kfold():
Expand Down
0