8000 P3K use six to have a python 2 & 3 compatible code base · seckcoder/scikit-learn@1967a0b · GitHub
[go: up one dir, main page]

Skip to content

Commit 1967a0b

Browse files
ogrisellarsmans
authored andcommitted
P3K use six to have a python 2 & 3 compatible code base
1 parent 9f1c346 commit 1967a0b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+401
-361
lines changed

setup.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -98,28 +98,13 @@ def configuration(parent_package='', top_path=None):
9898
_old_stdout = sys.stdout
9999
try:
100100
sys.stdout = StringIO() # supress noisy output
101-
res = lib2to3.main.main("lib2to3.fixes",
102-
['-x', 'import', '-w', local_path])
101+
res = lib2to3.main.main("lib2to3.fixes", ['-x', 'import', '-w', local_path])
103102
finally:
104103
sys.stdout = _old_stdout
105104

106105
if res != 0:
107106
raise Exception('2to3 failed, exiting ...')
108107

109-
# Ugly hack to make pip work with Python 3, see
110-
# http://projects.scipy.org/numpy/ticket/1857.
111-
# Explanation: pip messes with __file__ which interacts badly with the
112-
# change in directory due to the 2to3 conversion. Therefore we restore
113-
# __file__ to what it would have been otherwise.
114-
global __file__
115-
__file__ = os.path.join(os.curdir, os.path.basename(__file__))
116-
if '--egg-base' in sys.argv:
117-
# Change pip-egg-info entry to absolute path, so pip can find it
118-
# after changing directory.
119-
idx = sys.argv.index('--egg-base')
120-
if sys.argv[idx + 1] == 'pip-egg-info':
121-
sys.argv[idx + 1] = os.path.join(old_path, 'pip-egg-info')
122-
123108
os.chdir(local_path)
124109
sys.path.insert(0, local_path)
125110

sklearn/base.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import numpy as np
1010
from scipy import sparse
11+
from .externals import six
1112

1213

1314
###############################################################################
@@ -42,7 +43,7 @@ def clone(estimator, safe=True):
4243
% (repr(estimator), type(estimator)))
4344
klass = estimator.__class__
4445
new_object_params = estimator.get_params(deep=False)
45-
for name, param in new_object_params.iteritems():
46+
for name, param in six.iteritems(new_object_params):
4647
new_object_params[name] = clone(param, safe=False)
4748
new_object = klass(**new_object_params)
4849
params_set = new_object.get_params(deep=False)
@@ -120,7 +121,7 @@ def _pprint(params, offset=0, printer=repr):
120121
params_list = list()
121122
this_line_length = offset
122123
line_sep = ',\n' + (1 + offset // 2) * ' '
123-
for i, (k, v) in enumerate(sorted(params.iteritems())):
124+
for i, (k, v) in enumerate(sorted(six.iteritems(params))):
124125
if type(v) is float:
125126
# use str for representing floating point numbers
126127
# this way we get consistent representation across
@@ -225,7 +226,7 @@ def set_params(self, **params):
225226
# Simple optimisation to gain speed (inspect is slow)
226227
return self
227228
valid_params = self.get_params(deep=True)
228-
for key, value in params.iteritems():
229+
for key, value in six.iteritems(params):
229230
split = key.split('__', 1)
230231
if len(split) > 1:
231232
# nested objects case

sklearn/cluster/affinity_propagation_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
139139
!= n_samples)
140140
if (not unconverged and (K > 0)) or (it == max_iter):
141141
if verbose:
142-
print "Converged after %d iterations." % it
142+
print("Converged after %d iterations." % it)
143143
break
144144
else:
145145
if verbose:
146-
print "Did not converge"
146+
print("Did not converge")
147147

148148
I = np.where(np.diag(A + R) > 0)[0]
149149
K = I.size # Identify exemplars

sklearn/cluster/hierarchical.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
from scipy.cluster import hierarchy
1616

1717
from ..base import BaseEstimator, ClusterMixin
18-
from ..utils._csgraph import cs_graph_components
1918
from ..externals.joblib import Memory
19+
from ..externals import six
2020
from ..metrics import euclidean_distances
2121
from ..utils import array2d
22+
from ..utils._csgraph import cs_graph_components
2223

2324
from . import _hierarchical
2425
from ._feature_agglomeration import AgglomerationTransform
@@ -144,9 +145,9 @@ def ward_tree(X, connectivity=None, n_components=None, copy=True,
144145
moments_2 = np.zeros((n_nodes, n_features))
145146
moments_2[:n_samples] = X
146147
inertia = np.empty(len(coord_row), dtype=np.float)
147-
_hierarchical.compute_ward_dist(moments_1, moments_2, coord_row, coord_col,
148-
inertia)
149-
inertia = zip(inertia, coord_row, coord_col)
148+
_hierarchical.compute_ward_dist(moments_1, moments_2,
149+
coord_row, coord_col, inertia)
150+
inertia = list(six.moves.zip(inertia, coord_row, coord_col))
150151
heapify(inertia)
151152

152153
# prepare the main fields
@@ -158,7 +159,7 @@ def ward_tree(X, connectivity=None, n_components=None, copy=True,
158159
not_visited = np.empty(n_nodes, dtype=np.int8)
159160

160161
# recursive merge loop
161-
for k in xrange(n_samples, n_nodes):
162+
for k in range(n_samples, n_nodes):
162163
# identify the merge
163164
while True:
164165
inert, i, j = heappop(inertia)
@@ -191,7 +192,7 @@ def ward_tree(X, connectivity=None, n_components=None, copy=True,
191192
coord_row, coord_col, ini)
192193
# List comprehension is faster than a for loop
193194
[heappush(inertia, (ini[idx], k, coord_col[idx]))
194-
for idx in xrange(n_additions)]
195+
for idx in range(n_additions)]
195196

196197
# Separate leaves in children (empty lists up to now)
197198
n_leaves = n_samples
@@ -346,7 +347,7 @@ def fit(self, X):
346347
"""
347348
memory = self.memory
348349
X = array2d(X)
349-
if isinstance(memory, basestring):
350+
if isinstance(memory, six.string_types):
350351
memory = Memory(cachedir=memory, verbose=0)
351352

352353
if not self.connectivity is None:

sklearn/cluster/k_means_.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _k_init(X, n_clusters, n_local_trials=None, random_state=None,
9898
current_pot = closest_dist_sq.sum()
9999

100100
# Pick the remaining n_clusters-1 points
101-
for c in xrange(1, n_clusters):
101+
for c in range(1, n_clusters):
102102
# Choose center candidates by sampling with probability proportional
103103
# to the squared distance to the closest existing center
104104
rand_vals = random_state.random_sample(n_local_trials) * current_pot
@@ -112,7 +112,7 @@ def _k_init(X, n_clusters, n_local_trials=None, random_state=None,
112112
best_candidate = None
113113
best_pot = None
114114
best_dist_sq = None
115-
for trial in xrange(n_local_trials):
115+
for trial in range(n_local_trials):
116116
# Compute potential when including center candidate
117117
new_dist_sq = np.minimum(closest_dist_sq,
118118
distance_to_candidates[trial])
@@ -363,7 +363,7 @@ def _kmeans_single(X, n_clusters, max_iter=300, init='k-means++',
363363
centers = _init_centroids(X, n_clusters, init, random_state=random_state,
364364
x_squared_norms=x_squared_norms)
365365
if verbose:
366-
print 'Initialization complete'
366+
print('Initialization complete')
367367

368368
# Allocate memory to store the distances for each sample to its
369369
# closer center for reallocation in case of ties
@@ -386,7 +386,7 @@ def _kmeans_single(X, n_clusters, max_iter=300, init='k-means++',
386386
centers = _k_means._centers_dense(X, labels, n_clusters, distances)
387387

388388
if verbose:
389-
print 'Iteration %i, inertia %s' % (i, inertia)
389+
print('Iteration %i, inertia %s' % (i, inertia))
390390

391391
if best_inertia is None or inertia < best_inertia:
392392
best_labels = labels.copy()
@@ -395,7 +395,7 @@ def _kmeans_single(X, n_clusters, max_iter=300, init='k-means++',
395395

396396
if np.sum((centers_old - centers) ** 2) < tol:
397397
if verbose:
398-
print 'Converged to similar centers at iteration', i
398+
print('Converged to similar centers at iteration', i)
399399
break
400400
return best_labels, best_inertia, best_centers
401401

@@ -943,14 +943,14 @@ def _mini_batch_convergence(model, iteration_idx, n_iter, tol,
943943
'mean batch inertia: %f, ewa inertia: %f ' % (
944944
iteration_idx + 1, n_iter, batch_inertia,
945945
ewa_inertia))
946-
print progress_msg
946+
print(progress_msg)
947947

948948
# Early stopping based on absolute tolerance on squared change of
949949
# centers postion (using EWA smoothing)
950950
if tol > 0.0 and ewa_diff < tol:
951951
if verbose:
952-
print 'Converged (small centers change) at iteration %d/%d' % (
953-
iteration_idx + 1, n_iter)
952+
print('Converged (small centers change) at iteration %d/%d'
953+
% (iteration_idx + 1, n_iter))
954954
return True
955955

956956
# Early stopping heuristic due to lack of improvement on smoothed inertia
@@ -965,9 +965,9 @@ def _mini_batch_convergence(model, iteration_idx, n_iter, tol,
965965
if (model.max_no_improvement is not None
966966
and no_improvement >= model.max_no_improvement):
967967
if verbose:
968-
print ('Converged (lack of improvement in inertia)'
969-
' at iteration %d/%d' % (
970-
iteration_idx + 1, n_iter))
968+
print('Converged (lack of improvement in inertia)'
969+
' at iteration %d/%d'
970+
% (iteration_idx + 1, n_iter))
971971
return True
972972

973973
# update the convergence context to maintain state across sucessive calls:
@@ -1141,8 +1141,8 @@ def fit(self, X, y=None):
11411141
best_inertia = None
11421142
for init_idx in range(self.n_init):
11431143
if self.verbose:
1144-
print "Init %d/%d with method: %s" % (
1145-
init_idx + 1, self.n_init, self.init)
1144+
print("Init %d/%d with method: %s"
1145+
% (init_idx + 1, self.n_init, self.init))
11461146
counts = np.zeros(self.n_clusters, dtype=np.int32)
11471147

11481148
# TODO: once the `k_means` function works with sparse input we
@@ -1167,8 +1167,8 @@ def fit(self, X, y=None):
11671167
_, inertia = _labels_inertia(X_valid, x_squared_norms_valid,
11681168
cluster_centers)
11691169
if self.verbose:
1170-
print "Inertia for init %d/%d: %f" % (
1171-
init_idx + 1, self.n_init, inertia)
1170+
print("Inertia for init %d/%d: %f"
1171+
% (init_idx + 1, self.n_init, inertia))
11721172
if best_inertia is None or inertia < best_inertia:
11731173
self.cluster_centers_ = cluster_centers
11741174
self.counts_ = counts
@@ -1179,8 +1179,7 @@ def fit(self, X, y=None):
11791179

11801180
# Perform the iterative optimization until the final convergence
11811181
# criterion
1182-
for iteration_idx in xrange(n_iter):
1183-
1182+
for iteration_idx in range(n_iter):
11841183
# Sample a minibatch from the full dataset
11851184
minibatch_indices = random_state.random_integers(
11861185
0, n_samples - 1, self.batch_size)
@@ -1210,7 +1209,7 @@ def fit(self, X, y=None):
12101209

12111210
if self.compute_labels:
12121211
if self.verbose:
1213-
print 'Computing label assignements and total inertia'
1212+
print('Computing label assignements and total inertia')
12141213
self.labels_, self.inertia_ = _labels_inertia(
12151214
X, x_squared_norms, self.cluster_centers_)
12161215

sklearn/cluster/mean_shift_.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from collections import defaultdict
99
import numpy as np
1010

11+
from ..externals import six
1112
from ..utils import extmath, check_random_state
1213
from ..base import BaseEstimator, ClusterMixin
1314
from ..neighbors import NearestNeighbors
@@ -193,7 +194,7 @@ def get_bin_seeds(X, bin_size, min_bin_freq=1):
193194
bin_sizes[tuple(binned_point)] += 1
194195

195196
# Select only those bins as seeds which have enough members
196-
bin_seeds = np.array([point for point, freq in bin_sizes.iteritems() if
197+
bin_seeds = np.array([point for point, freq in six.iteritems(bin_sizes) if
197198
freq >= min_bin_freq], dtype=np.float32)
198199
bin_seeds = bin_seeds * bin_size
199200
return bin_seeds

sklearn/cluster/tests/test_spectral.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Testing for Spectral Clustering methods"""
22

3-
from cPickle import dumps, loads
3+
from sklearn.externals.six.moves import cPickle
4+
dumps, loads = cPickle.dumps, cPickle.loads
45

56
import numpy as np
67
from scipy import sparse

sklearn/covariance/graph_lasso_.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from ..linear_model import cd_fast
2323
from ..cross_validation import check_cv, cross_val_score
2424
from ..externals.joblib import Parallel, delayed
25+
import collections
2526

2627

2728
###############################################################################
@@ -154,8 +155,8 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
154155
else:
155156
errors = dict(invalid='raise')
156157
try:
157-
for i in xrange(max_iter):
158-
for idx in xrange(n_features):
158+
for i in range(max_iter):
159+
for idx in range(n_features):
159160
sub_covariance = covariance_[indices != idx].T[indices != idx]
160161
row = emp_cov[idx, indices != idx]
161162
with np.errstate(**errors):
@@ -187,7 +188,7 @@ def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
187188
d_gap = _dual_gap(emp_cov, precision_, alpha)
188189
cost = _objective(emp_cov, precision_, alpha)
189190
if verbose:
190-
print (
191+
print(
191192
'[graph_lasso] Iteration % 3i, cost % 3.2e, dual gap %.3e'
192193
% (i, cost, d_gap))
193194
if return_costs:
@@ -431,7 +432,7 @@ def fit(self, X, y=None):
431432
n_alphas = self.alphas
432433
inner_verbose = max(0, self.verbose - 1)
433434

434-
if operator.isSequenceType(n_alphas):
435+
if isinstance(n_alphas, collections.Sequence):
435436
alphas = self.alphas
436437
n_refinements = 1
437438
else:
@@ -510,10 +511,10 @@ def fit(self, X, y=None):
510511
n_alphas + 2)
511512
alphas = alphas[1:-1]
512513
if self.verbose and n_refinements > 1:
513-
print '[GraphLassoCV] Done refinement % 2i out of %i: % 3is'\
514-
% (i + 1, n_refinements, time.time() - t0)
514+
print('[GraphLassoCV] Done refinement % 2i out of %i: % 3is'\
515+
% (i + 1, n_refinements, time.time() - t0))
515516

516-
path = zip(*path)
517+
path = list(zip(*path))
517518
cv_scores = list(path[1])
518519
alphas = list(path[0])
519520
# Finally, compute the score with alpha = 0

sklearn/covariance/robust_covariance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def c_step(X, n_support, remaining_iterations=30, initial_estimates=None,
144144
# Check early stopping
145145
if remaining_iterations == 0:
146146
if verbose:
147-
print 'Maximum number of iterations reached'
147+
print('Maximum number of iterations reached')
148148
det = fast_logdet(covariance)
149149
results = location, covariance, det, support, dist
150150

sklearn/cross_validation.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .utils.fixes import unique
2424
from .externals.joblib import Parallel, delayed
2525
from .metrics import SCORERS, Scorer
26+
import numbers
2627

2728
__all__ = ['Bootstrap',
2829
'KFold',
@@ -93,7 +94,7 @@ def __init__(self, n, indices=True):
9394

9495
def __iter__(self):
9596
n = self.n
96-
for i in xrange(n):
97+
for i in range(n):
9798
test_index = np.zeros(n, dtype=np.bool)
9899
test_index[i] = True
99100
train_index = np.logical_not(test_index)
@@ -282,7 +283,7 @@ def __iter__(self):
282283
n_folds = self.n_folds
283284
fold_size = n // n_folds
284285

285-
for i in xrange(n_folds):
286+
for i in range(n_folds):
286287
test_index = np.zeros(n, dtype=np.bool)
287288
if i < n_folds - 1:
288289
test_index[self.idxs[i * fold_size:(i + 1) * fold_size]] = True
@@ -377,7 +378,7 @@ def __iter__(self):
377378
n = y.size
378379
idx = np.argsort(y)
379380

380-
for i in xrange(n_folds):
381+
for i in range(n_folds):
381382
test_index = np.zeros(n, dtype=np.bool)
382383
test_index[idx[i::n_folds]] = True
383384
train_index = np.logical_not(test_index)

sklearn/datasets/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,12 +398,12 @@ def load_boston():
398398
data_file = csv.reader(open(join(module_path, 'data',
399399
'boston_house_prices.csv')))
400400
fdescr = open(join(module_path, 'descr', 'boston_house_prices.rst'))
401-
temp = data_file.next()
401+
temp = next(data_file)
402402
n_samples = int(temp[0])
403403
n_features = int(temp[1])
404404
data = np.empty((n_samples, n_features))
405405
target = np.empty((n_samples,))
406-
temp = data_file.next() # names of features
406+
temp = next(data_file) # names of features
407407
feature_names = np.array(temp)
408408

409409
for i, d in enumerate(data_file):

0 commit comments

Comments
 (0)
0