10000 Fixed bug with test suite, finished and polished, and added estimated… · SkuaD01/scikit-learn@abbe725 · GitHub
[go: up one dir, main page]

Skip to content

Commit abbe725

Browse files
committed
Fixed bug with test suite, finished and polished, and added estimated run time
1 parent 127fa02 commit abbe725

File tree

1 file changed

+26
-13
lines changed

1 file changed

+26
-13
lines changed

sklearn/impute/tests/test_time.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,22 @@
55
import profile
66
import pytest
77

8+
##################### THIS TEST SUITE WILL TAKE APPROXIMATELY 15 MINUTES, BUT VARIES BY MACHINE ############################
89

9-
# Total Original Time: 97.91s
10-
# Total new Time: 48.07s
11-
12-
perc = [0,0.1,0.5,0.9,1]
10+
# Initialise constants
11+
epsilon = 0.5
12+
perc = [0.1,0.5,0.9] # We want to test a varying amount of missing values to impute, each of these represent the precrentage of missing values in X
1313
small_n = [100,300,500]
1414
large_n = [1000,3000,5000]
1515

16+
# generate random array of size x by y with (p*100)% missing values
1617
def gen_matrix(x, y, p):
1718
np.random.seed(1)
1819
X = np.random.random([x, y])
1920
X = pd.DataFrame(X).mask(X <= p)
2021
return X
2122

22-
23+
# Run (simulated) old time or new time, return start and end times
2324
def run_test(X, old=False):
2425
start = time.time()
2526
if (old):
@@ -32,11 +33,13 @@ def run_test(X, old=False):
3233
return start,end
3334

3435
def relative_assert(new, old):
35-
assert pytest.approx(new).__lt__(pytest.approx(old, abs=1))
36+
# Since smaller times will yield more sporatic results, an amount of seconds of amount epsilon are accounted for
37+
assert ((new == pytest.approx(old, abs=epsilon)) or (new < old))
3638

3739
def output_res(old_end,old_start,end,start):
3840
print("\nOld time:", round((old_end-old_start),4) ,", New time:", round((end-start),4),",", str(round(((old_end-old_start)/(end-start) - 1)*100, 2))+"% improved")
3941

42+
# Test arrays of size 1 by a small n with varying missing values
4043
@pytest.mark.parametrize("n,p",[(n, p) for p in perc for n in small_n])
4144
def test_time_1_by_n(n,p):
4245
X = gen_matrix(1, n, p)
@@ -48,6 +51,7 @@ def test_time_1_by_n(n,p):
4851

4952
relative_assert(end-start, old_end-old_start)
5053

54+
# Test arrays of size 1 by a large n with varying missing values
5155
@pytest.mark.parametrize("N,p",[(N, p) for p in perc for N in large_n])
5256
def test_time_1_by_N(N,p):
5357
X = gen_matrix(1, N, p)
@@ -59,6 +63,7 @@ def test_time_1_by_N(N,p):
5963

6064
relative_assert(end-start, old_end-old_start)
6165

66+
# Test arrays of a small n by 1 with varying missing values
6267
@pytest.mark.parametrize("n,p",[(n, p) for p in perc for n in small_n])
6368
def test_time_n_by_1(n,p):
6469
X = gen_matrix(n, 1, p)
@@ -70,6 +75,7 @@ def test_time_n_by_1(n,p):
7075

7176
relative_assert(end-start, old_end-old_start)
7277

78+
# Test arrays of a large n by 1 with varying missing values
7379
@pytest.mark.parametrize("N,p",[(N, p) for p in perc for N in large_n])
7480
def test_time_N_by_1(N,p):
7581
X = gen_matrix(N, 1, p)
@@ -81,6 +87,7 @@ def test_time_N_by_1(N,p):
8187

8288
relative_assert(end-start, old_end-old_start)
8389

90+
# Test arrays of a small n by a small n with varying missing values
8491
@pytest.mark.parametrize("n1,n2,p",[(n1,n2,p) for p in perc for n1 in small_n for n2 in small_n])
8592
def test_time_n_by_n(n1,n2,p):
8693
X = gen_matrix(n1, n2, p)
@@ -92,6 +99,7 @@ def test_time_n_by_n(n1,n2,p):
9299

93100
relative_assert(end-start, old_end-old_start)
94101

102+
# Test arrays of a small n by a large n with varying missing values
95103
@pytest.mark.parametrize("n,N,p",[(n,N,p) for p in perc for n in small_n for N in large_n])
96104
def test_time_n_by_N(n,N,p):
97105
X = gen_matrix(n, N, p)
@@ -103,6 +111,9 @@ def test_time_n_by_N(n,N,p):
103111

104112
relative_assert(end-start, old_end-old_start)
105113

114+
# Test arrays of a large n by a small n with varying missing values
115+
### This is the most important test case since it is the most likely scenario for usage
116+
### (More likely to be testing a large number of features)
106117
@pytest.mark.parametrize("N,n,p",[(N,n,p) for p in perc for n in small_n for N in large_n])
107118
def test_time_N_by_n(N,n,p):
108119
X = gen_matrix(N, n, p)
@@ -114,13 +125,15 @@ def test_time_N_by_n(N,n,p):
114125

115126
relative_assert(end-start, old_end-old_start)
116127

117-
# @pytest.mark.parametrize("N1,N2,p",[(N1,N2,p) for p in perc for n in large_n for N in large_n])
118-
# def test_time_N_by_N(N1, N2):
119-
# X = gen_matrix(N1, N2, p)
128+
# Test arrays of a large n by a large n with varying missing values
129+
# (This takes a VERY long time to run, since they are more often called individually)
130+
## @pytest.mark.parametrize("N1,N2,p",[(N1,N2,p) for p in perc for N1 in large_n for N2 in large_n])
131+
## def test_time_N_by_N(N1, N2, p):
132+
## X = gen_matrix(N1, N2, p)
120133

121-
# start, end = run_test(X)
122-
# old_start, old_end = run_test(X, old=True)
134+
## start, end = run_test(X)
135+
## old_start, old_end = run_test(X, old=True)
123136

124-
# output_res(old_end,old_start,end,start)
137+
## output_res(old_end,old_start,end,start)
125138

126-
# relative_assert(end-start, old_end-old_start)
139+
## relative_assert(end-start, old_end-old_start)

0 commit comments

Comments
 (0)
0