5
5
import profile
6
6
import pytest
7
7
8
+ ##################### THIS TEST SUITE WILL TAKE APPROXIMATELY 15 MINUTES, BUT VARIES BY MACHINE ############################
8
9
9
- # Total Original Time: 97.91s
10
- # Total new Time: 48.07s
11
-
12
- perc = [0 ,0.1 ,0.5 ,0.9 ,1 ]
10
+ # Initialise constants
11
+ epsilon = 0.5
12
+ perc = [0.1 ,0.5 ,0.9 ] # We want to test a varying amount of missing values to impute, each of these represent the precrentage of missing values in X
13
13
small_n = [100 ,300 ,500 ]
14
14
large_n = [1000 ,3000 ,5000 ]
15
15
16
+ # generate random array of size x by y with (p*100)% missing values
16
17
def gen_matrix (x , y , p ):
17
18
np .random .seed (1 )
18
19
X = np .random .random ([x , y ])
19
20
X = pd .DataFrame (X ).mask (X <= p )
20
21
return X
21
22
22
-
23
+ # Run (simulated) old time or new time, return start and end times
23
24
def run_test (X , old = False ):
24
25
start = time .time ()
25
26
if (old ):
@@ -32,11 +33,13 @@ def run_test(X, old=False):
32
33
return start ,end
33
34
34
35
def relative_assert (new , old ):
35
- assert pytest .approx (new ).__lt__ (pytest .approx (old , abs = 1 ))
36
+ # Since smaller times will yield more sporatic results, an amount of seconds of amount epsilon are accounted for
37
+ assert ((new == pytest .approx (old , abs = epsilon )) or (new < old ))
36
38
37
39
def output_res (old_end ,old_start ,end ,start ):
38
40
print ("\n Old time:" , round ((old_end - old_start ),4 ) ,", New time:" , round ((end - start ),4 ),"," , str (round (((old_end - old_start )/ (end - start ) - 1 )* 100 , 2 ))+ "% improved" )
39
41
42
+ # Test arrays of size 1 by a small n with varying missing values
40
43
@pytest .mark .parametrize ("n,p" ,[(n , p ) for p in perc for n in small_n ])
41
44
def test_time_1_by_n (n ,p ):
42
45
X = gen_matrix (1 , n , p )
@@ -48,6 +51,7 @@ def test_time_1_by_n(n,p):
48
51
49
52
relative_assert (end - start , old_end - old_start )
50
53
54
+ # Test arrays of size 1 by a large n with varying missing values
51
55
@pytest .mark .parametrize ("N,p" ,[(N , p ) for p in perc for N in large_n ])
52
56
def test_time_1_by_N (N ,p ):
53
57
X = gen_matrix (1 , N , p )
@@ -59,6 +63,7 @@ def test_time_1_by_N(N,p):
59
63
60
64
relative_assert (end - start , old_end - old_start )
61
65
66
+ # Test arrays of a small n by 1 with varying missing values
62
67
@pytest .mark .parametrize ("n,p" ,[(n , p ) for p in perc for n in small_n ])
63
68
def test_time_n_by_1 (n ,p ):
64
69
X = gen_matrix (n , 1 , p )
@@ -70,6 +75,7 @@ def test_time_n_by_1(n,p):
70
75
71
76
relative_assert (end - start , old_end - old_start )
72
77
78
+ # Test arrays of a large n by 1 with varying missing values
73
79
@pytest .mark .parametrize ("N,p" ,[(N , p ) for p in perc for N in large_n ])
74
80
def test_time_N_by_1 (N ,p ):
75
81
X = gen_matrix (N , 1 , p )
@@ -81,6 +87,7 @@ def test_time_N_by_1(N,p):
81
87
82
88
relative_assert (end - start , old_end - old_start )
83
89
90
+ # Test arrays of a small n by a small n with varying missing values
84
91
@pytest .mark .parametrize ("n1,n2,p" ,[(n1 ,n2 ,p ) for p in perc for n1 in small_n for n2 in small_n ])
85
92
def test_time_n_by_n (n1 ,n2 ,p ):
86
93
X = gen_matrix (n1 , n2 , p )
@@ -92,6 +99,7 @@ def test_time_n_by_n(n1,n2,p):
92
99
93
100
relative_assert (end - start , old_end - old_start )
94
101
102
+ # Test arrays of a small n by a large n with varying missing values
95
103
@pytest .mark .parametrize ("n,N,p" ,[(n ,N ,p ) for p in perc for n in small_n for N in large_n ])
96
104
def test_time_n_by_N (n ,N ,p ):
97
105
X = gen_matrix (n , N , p )
@@ -103,6 +111,9 @@ def test_time_n_by_N(n,N,p):
103
111
104
112
relative_assert (end - start , old_end - old_start )
105
113
114
+ # Test arrays of a large n by a small n with varying missing values
115
+ ### This is the most important test case since it is the most likely scenario for usage
116
+ ### (More likely to be testing a large number of features)
106
117
@pytest .mark .parametrize ("N,n,p" ,[(N ,n ,p ) for p in perc for n in small_n for N in large_n ])
107
118
def test_time_N_by_n (N ,n ,p ):
108
119
X = gen_matrix (N , n , p )
@@ -114,13 +125,15 @@ def test_time_N_by_n(N,n,p):
114
125
115
126
relative_assert (end - start , old_end - old_start )
116
127
117
- # @pytest.mark.parametrize("N1,N2,p",[(N1,N2,p) for p in perc for n in large_n for N in large_n])
118
- # def test_time_N_by_N(N1, N2):
119
- # X = gen_matrix(N1, N2, p)
128
+ # Test arrays of a large n by a large n with varying missing values
129
+ # (This takes a VERY long time to run, since they are more often called individually)
130
+ ## @pytest.mark.parametrize("N1,N2,p",[(N1,N2,p) for p in perc for N1 in large_n for N2 in large_n])
131
+ ## def test_time_N_by_N(N1, N2, p):
132
+ ## X = gen_matrix(N1, N2, p)
120
133
121
- # start, end = run_test(X)
122
- # old_start, old_end = run_test(X, old=True)
134
+ ## start, end = run_test(X)
135
+ ## old_start, old_end = run_test(X, old=True)
123
136
124
- # output_res(old_end,old_start,end,start)
137
+ ## output_res(old_end,old_start,end,start)
125
138
126
- # relative_assert(end-start, old_end-old_start)
139
+ ## relative_assert(end-start, old_end-old_start)
0 commit comments