8000 Changed ctype to np.ndarray to avoid cython read only error · scikit-learn/scikit-learn@6094264 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6094264

Browse files
committed
Changed ctype to np.ndarray to avoid cython read only error
Modified cython type (double -> np.ndarray(double)) to avoid read-only cython error Added TempMemMap context manager class to allow easy temp file memmapping within tests, with secured deletion Refactored to make use of sklearn.utils.TempMemMap Added non regression tests for one task and multi task lasso on read only memapped input arrays Added non regression test for memmapped read only input design matrix Import joblib from sklearn.externals Added __main__ Added C file Added y dtype check (necessary as np.ndarray does not allow implicit cast, unlike typed memory views Minor changes to address PR Changes type in sparse_enet_coordinate_descent PEP8 Remove __main__ self.mmap_mode Added c file Changed name Added atexit Refactor to use auxiliary delete_folder refactor TempMemmap pep8 changes refactor TempMemmap
1 parent bf33390 commit 6094264

File tree

7 files changed

+2722
-2556
lines changed

7 files changed

+2722
-2556
lines changed

sklearn/decomposition/tests/test_dict_learning.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import numpy as np
22

3+
34
from sklearn.utils.testing import assert_array_almost_equal
45
from sklearn.utils.testing import assert_array_equal
56
from sklearn.utils.testing import assert_equal
67
from sklearn.utils.testing import assert_true
78
from sklearn.utils.testing import assert_less
89
from sklearn.utils.testing import assert_raises
910
from sklearn.utils.testing import ignore_warnings
11+
from sklearn.utils.testing import TempMemmap
1012

1113
from sklearn.decomposition import DictionaryLearning
1214
from sklearn.decomposition import MiniBatchDictionaryLearning
@@ -59,6 +61,13 @@ def test_dict_learning_reconstruction_parallel():
5961
code = dico.transform(X)
6062
assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
6163

64+
def test_dict_learning_lassocd_readonly_data():
65+
n_components = 12
66+
with TempMemmap(X) as X_read_only:
67+
dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd',
68+
transform_alpha=0.001, random_state=0, n_jobs=-1)
69+
code = dico.fit(X_read_only).transform(X_read_only)
70+
assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
6271

6372
def test_dict_learning_nonzero_coefs():
6473
n_components = 4
@@ -214,4 +223,4 @@ def test_sparse_coder_estimator():
214223
code = SparseCoder(dictionary=V, transform_algorithm='lasso_lars',
215224
transform_alpha=0.001).transform(X)
216225
assert_true(not np.all(code == 0))
217-
assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
226+
assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)

sklearn/linear_model/cd_fast.c

Lines changed: 2640 additions & 2531 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sklearn/linear_model/cd_fast.pyx

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,9 @@ def enet_coordinate_descent(np.ndarray[DOUBLE, ndim=1] w,
289289
@cython.cdivision(True)
290290
def sparse_enet_coordinate_descent(double[:] w,
291291
double alpha, double beta,
292-
double[:] X_data, int[:] X_indices,
293-
int[:] X_indptr, double[:] y,
292+
np.ndarray[double, ndim=1] X_data,
293+
np.ndarray[int, ndim=1] X_indices,
294+
np.ndarray[int, ndim=1] X_indptr, np.ndarray[double, ndim=1] y,
294295
double[:] X_mean, int max_iter,
295296
double tol, object rng, bint random=0,
296297
bint positive=0):
@@ -487,7 +488,9 @@ def sparse_enet_coordinate_descent(double[:] w,
487488
@cython.wraparound(False)
488489
@cython.cdivision(True)
489490
def enet_coordinate_descent_gram(double[:] w, double alpha, double beta,
490-
double[:, :] Q, double[:] q, double[:] y,
491+
np.ndarray[double, ndim=2] Q,
492+
np.ndarray[double, ndim=1] q,
493+
np.ndarray[double, ndim=1] y,
491494
int max_iter, double tol, object rng,
492495
bint random=0, bint positive=0):
493496
"""Cython version of the coordinate descent algorithm
@@ -628,8 +631,8 @@ def enet_coordinate_descent_gram(double[:] w, double alpha, double beta,
628631
@cython.wraparound(False)
629632
@cython.cdivision(True)
630633
def enet_coordinate_descent_multi_task(double[::1, :] W, double l1_reg,
631-
double l2_reg, double[::1, :] X,
632-
double[:, :] Y, int max_iter,
634+
double l2_reg, np.ndarray[double, ndim=2] X,
635+
np.ndarray[double, ndim=2] Y, int max_iter,
633636
double tol, object rng,
634637
bint random=0):
635638
"""Cython version of the coordinate descent algorithm

sklearn/linear_model/coordinate_descent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,
355355
ElasticNetCV
356356
"""
357357
X = check_array(X, 'csc', dtype=np.float64, order='F', copy=copy_X)
358+
y = check_array(y, 'csc', dtype=np.float64, order='F', copy=False, ensure_2d=False)
358359
if Xy is not None:
359360
Xy = check_array(Xy, 'csc', dtype=np.float64, order='F', copy=False,
360361
ensure_2d=False)

sklearn/linear_model/tests/test_coordinate_descent.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from sklearn.utils.testing import assert_warns
2020
from sklearn.utils.testing import ignore_warnings
2121
from sklearn.utils.testing import assert_array_equal
22+
from sklearn.utils.testing import TempMemmap
2223

2324
from sklearn.linear_model.coordinate_descent import Lasso, \
2425
LassoCV, ElasticNet, ElasticNetCV, MultiTaskLasso, MultiTaskElasticNet, \
@@ -388,6 +389,31 @@ def test_multi_task_lasso_and_enet():
388389
assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
389390

390391

392+
def test_lasso_readonly_data():
393+
X = np.array([[-1], [0], [1]])
394+
Y = np.array([-1, 0, 1]) # just a straight line
395+
T = np.array([[2], [3], [4]]) # test sample
396+
397+
with TempMemmap((X, Y)) as (X, Y):
398+
clf = Lasso(alpha=0.5)
399+
clf.fit(X, Y)
400+
pred = clf.predict(T)
401+
assert_array_almost_equal(clf.coef_, [.25])
402+
assert_array_almost_equal(pred, [0.5, 0.75, 1.])
403+
assert_almost_equal(clf.dual_gap_, 0)
404+
405+
406+
def test_multi_task_lasso_readonly_data():
407+
X, y, X_test, y_test = build_dataset()
408+
Y = np.c_[y, y]
409+
with TempMemmap((X, Y)) as (X, Y):
410+
Y = np.c_[y, y]
411+
# Y_test = np.c_[y_test, y_test]
412+
clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
413+
assert_true(0 < clf.dual_gap_ < 1e-5)
414+
assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
415+
416+
391417
def test_enet_multitarget():
392418
n_targets = 3
393419
X, y, _, _ = build_dataset(n_samples=10, n_features=8,

sklearn/linear_model/tests/test_least_angle.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
import tempfile
2-
import shutil
3-
import os.path as op
4-
import warnings
51
from nose.tools import assert_equal
62

73
import numpy as np
@@ -16,6 +12,7 @@
1612
from sklearn.utils.testing import assert_raises
1713
from sklearn.utils.testing import ignore_warnings
1814
from sklearn.utils.testing import assert_no_warnings, assert_warns
15+
from sklearn.utils.testing import TempMemmap
1916
from sklearn.utils import ConvergenceWarning
2017
from sklearn import linear_model, datasets
2118
from sklearn.linear_model.least_angle import _lars_path_residues
@@ -441,23 +438,9 @@ def test_lars_path_readonly_data():
441438
# This is a non-regression test for:
442439
# https://github.com/scikit-learn/scikit-learn/issues/4597
443440
splitted_data = train_test_split(X, y, random_state=42)
444-
temp_folder = tempfile.mkdtemp()
445-
try:
446-
fpath = op.join(temp_folder, 'data.pkl')
447-
joblib.dump(splitted_data, fpath)
448-
X_train, X_test, y_train, y_test = joblib.load(fpath, mmap_mode='r')
449-
441+
with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
450442
# The following should not fail despite copy=False
451443
_lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
452-
finally:
453-
# try to release the mmap file handle in time to be able to delete
454-
# the temporary folder under windows
455-
del X_train, X_test, y_train, y_test
456-
try:
457-
shutil.rmtree(temp_folder)
458-
except shutil.WindowsError:
459-
warnings.warn("Could not delete temporary folder %s" % temp_folder)
460-
461444

462445
if __name__ == '__main__':
463446
import nose

sklearn/utils/testing.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,14 @@
2828
from urllib.request import urlopen
2929
from urllib.error import HTTPError
3030

31+
import tempfile
32+
import shutil
33+
import os.path as op
34+
import atexit
35+
3136
import sklearn
3237
from sklearn.base import BaseEstimator
38+
from sklearn.externals import joblib
3339

3440
# Conveniently import all assertions in one place.
3541
from nose.tools import assert_equal
@@ -697,5 +703,34 @@ def check_skip_travis():
697703
if os.environ.get('TRAVIS') == "true":
698704
raise SkipTest("This test needs to be skipped on Travis")
699705

706+
707+
def _delete_folder(folder_path, warn=False):
708+
"""Utility function to cleanup a temporary folder if still existing.
709+
Copy from joblib.pool (for independance)"""
710+
try:
711+
if os.path.exists(folder_path):
712+
shutil.rmtree(folder_path)
713+
except shutil.WindowsError:
714+
if warn:
715+
warnings.warn("Could not delete temporary folder %s" % folder_path)
716+
717+
718+
class TempMemmap(object):
719+
def __init__(self, data, mmap_mode='r'):
720+
self.temp_folder = tempfile.mkdtemp(prefix='sklearn_testing_')
721+
self.mmap_mode = mmap_mode
722+
self.data = data
723+
724+
def __enter__(self):
725+
fpath = op.join(self.temp_folder, 'data.pkl')
726+
joblib.dump(self.data, fpath)
727+
data_read_only = joblib.load(fpath, mmap_mode=self.mmap_mode)
728+
atexit.register(lambda: _delete_folder(self.temp_folder, warn=True))
729+
return data_read_only
730+
731+
def __exit__(self, exc_type, exc_val, exc_tb):
732+
_delete_folder(self.temp_folder)
733+
734+
700735
with_network = with_setup(check_skip_network)
701736
with_travis = with_setup(check_skip_travis)

0 commit comments

Comments
 (0)
0