SkuaD01
diff --git a/‎sklearn/impute/tests/test_time.py
Lines changed: 26 additions & 13 deletions b/‎sklearn/impute/tests/test_time.py
Lines changed: 26 additions & 13 deletions
@@ -5,21 +5,22 @@
 import profile
 import pytest
 
+##################### THIS TEST SUITE WILL TAKE APPROXIMATELY 15 MINUTES, BUT VARIES BY MACHINE ############################
 
-# Total Original Time:  97.91s
-# Total new Time:       48.07s
-
-perc = [0,0.1,0.5,0.9,1]
+# Initialise constants
+epsilon = 0.5
+perc = [0.1,0.5,0.9] # We want to test a varying amount of missing values to impute, each of these represent the precrentage of missing values in X
 small_n = [100,300,500]
 large_n = [1000,3000,5000]
 
+# generate random array of size x by y with (p*100)% missing values
 def gen_matrix(x, y, p):
     np.random.seed(1)
     X = np.random.random([x, y])
     X = pd.DataFrame(X).mask(X <= p)
     return X
 
-
+# Run (simulated) old time or new time, return start and end times
 def run_test(X, old=False):
     start = time.time()
     if (old):
@@ -32,11 +33,13 @@ def run_test(X, old=False):
     return start,end
 
 def relative_assert(new, old):
-    assert pytest.approx(new).__lt__(pytest.approx(old, abs=1))
+    # Since smaller times will yield more sporatic results, an amount of seconds of amount epsilon are accounted for
+    assert ((new == pytest.approx(old, abs=epsilon)) or (new < old))
 
 def output_res(old_end,old_start,end,start):
     print("\nOld time:", round((old_end-old_start),4) ,", New time:", round((end-start),4),",", str(round(((old_end-old_start)/(end-start) - 1)*100, 2))+"% improved")
 
+# Test arrays of size 1 by a small n with varying missing values
 @pytest.mark.parametrize("n,p",[(n, p) for p in perc for n in small_n])
 def test_time_1_by_n(n,p):
     X = gen_matrix(1, n, p)
@@ -48,6 +51,7 @@ def test_time_1_by_n(n,p):
 
     relative_assert(end-start, old_end-old_start)
 
+# Test arrays of size 1 by a large n with varying missing values
 @pytest.mark.parametrize("N,p",[(N, p) for p in perc for N in large_n])
 def test_time_1_by_N(N,p):
     X = gen_matrix(1, N, p)
@@ -59,6 +63,7 @@ def test_time_1_by_N(N,p):
 
     relative_assert(end-start, old_end-old_start)
 
+# Test arrays of a small n by 1 with varying missing values
 @pytest.mark.parametrize("n,p",[(n, p) for p in perc for n in small_n])
 def test_time_n_by_1(n,p):
     X = gen_matrix(n, 1, p)
@@ -70,6 +75,7 @@ def test_time_n_by_1(n,p):
 
     relative_assert(end-start, old_end-old_start)
 
+# Test arrays of a large n by 1 with varying missing values
 @pytest.mark.parametrize("N,p",[(N, p) for p in perc for N in large_n])
 def test_time_N_by_1(N,p):
     X = gen_matrix(N, 1, p)
@@ -81,6 +87,7 @@ def test_time_N_by_1(N,p):
 
     relative_assert(end-start, old_end-old_start)
 
+# Test arrays of a small n by a small n with varying missing values
 @pytest.mark.parametrize("n1,n2,p",[(n1,n2,p) for p in perc for n1 in small_n for n2 in small_n])
 def test_time_n_by_n(n1,n2,p):
     X = gen_matrix(n1, n2, p)
@@ -92,6 +99,7 @@ def test_time_n_by_n(n1,n2,p):
 
     relative_assert(end-start, old_end-old_start)
 
+# Test arrays of a small n by a large n with varying missing values
 @pytest.mark.parametrize("n,N,p",[(n,N,p) for p in perc for n in small_n for N in large_n])
 def test_time_n_by_N(n,N,p):
     X = gen_matrix(n, N, p)
@@ -103,6 +111,9 @@ def test_time_n_by_N(n,N,p):
 
     relative_assert(end-start, old_end-old_start)
 
+# Test arrays of a large n by a small n with varying missing values
+### This is the most important test case since it is the most likely scenario for usage
+### (More likely to be testing a large number of features)
 @pytest.mark.parametrize("N,n,p",[(N,n,p) for p in perc for n in small_n for N in large_n])
 def test_time_N_by_n(N,n,p):
     X = gen_matrix(N, n, p)
@@ -114,13 +125,15 @@ def test_time_N_by_n(N,n,p):
 
     relative_assert(end-start, old_end-old_start)
 
-# @pytest.mark.parametrize("N1,N2,p",[(N1,N2,p) for p in perc for n in large_n for N in large_n])
-# def test_time_N_by_N(N1, N2):
-#     X = gen_matrix(N1, N2, p)
+# Test arrays of a large n by a large n with varying missing values
+# (This takes a VERY long time to run, since they are more often called individually)
+## @pytest.mark.parametrize("N1,N2,p",[(N1,N2,p) for p in perc for N1 in large_n for N2 in large_n])
+## def test_time_N_by_N(N1, N2, p):
+##     X = gen_matrix(N1, N2, p)
 
-#     start, end = run_test(X)
-#     old_start, old_end = run_test(X, old=True)
+##     start, end = run_test(X)
+##     old_start, old_end = run_test(X, old=True)
 
-#     output_res(old_end,old_start,end,start)
+##     output_res(old_end,old_start,end,start)
 
-#     relative_assert(end-start, old_end-old_start)
+##     relative_assert(end-start, old_end-old_start)