1
+ #############################
2
+ # ECLAT algorithm in Python #
3
+ #############################
4
+
5
+ #
6
+ # Importing the libraries
7
+ #
8
+ import numpy as np
9
+ import matplotlib .pyplot as plt
10
+ import pandas as pd
11
+ from itertools import combinations
12
+
13
+ #
14
+ # Data Preprocessing
15
+ #
16
+ dataset = pd .read_csv ('Market_Basket_Optimisation.csv' , header = None )
17
+
18
+ #
19
+ # Generating correct input format
20
+ #
21
+ transactions = [[str (dataset .values [j ,i ]) for i in range (0 ,20 )] for j in range (0 ,7501 )]
22
+
23
+ #
24
+ # ECLAT function (any k-item combo)
25
+ #
26
+ def eclat (transactions_list , output_filename , min_support = 0.002 ):
27
+
28
+ """ Implements the eclat algorithm on a list of lists containing transactions
29
+ in the format of the video. Data output is written to a file specified by
30
+ output_filename argument.
31
+
32
+ transactions_list: list of lists, each list representing a transaction, e.g.
33
+
34
+ dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
35
+ transactions_list = [[str(dataset.values[j,i]) for i in range(0,20)] for j in range(0,7501)]
36
+
37
+ output_filename: string to specify output filename, e.g. "eclat_out.tsv"
38
+
39
+ min_support: skips all combinations of items with supoort < min_support,
40
+ e.g. 0.002
41
+
42
+ """
43
+ import time
44
+ t_start = time .time ()
45
+
46
+ combos_TO_counts = {}
47
+ for transaction in transactions :
48
+ goods = list (np .unique (transaction ))
49
+ length = len (goods )
50
+ for k in range (2 ,length + 1 ):
51
+ k_combos = list (combinations (goods , k ))
52
+ for combo in k_combos :
53
+ if set (combo ).issubset (transaction ):
54
+ try :
55
+ combos_TO_counts [combo ] += 1
56
+ except (KeyError ):
57
+ combos_TO_counts [combo ] = 1
58
+
59
+ t_end = time .time ()
60
+ t_duration = t_end - t_start
61
+
62
+ #
63
+ # Calculate supports for combinations of goods
64
+ #
65
+ combo_support_vec = []
66
+ for combo in combos_TO_counts .keys ():
67
+ # NOTE: Support(M) = #transactions inc. M / #Total transactions,
68
+ # i.e. M's popularity
69
+ support = float (combos_TO_counts [combo ])/ len (transactions )
70
+ combo_support_vec .append ((combo , support ))
71
+ #
72
+ # Sort in order of support
73
+ #
74
+ combo_support_vec .sort (key = lambda x : float (x [1 ]), reverse = True )
75
+ #
76
+ # Create tab-separated output file (skipping sets w/ < min_support)
77
+ #
78
+ # Note: first column is the set of goods, and the second column is the support
79
+ with open ("./eclat_out.tsv" ,"w" ) as fo :
80
+ for combo , support in combo_support_vec :
81
+ if support < min_support :
82
+ continue
83
+ else :
84
+ fo .write (", " .join (combo )+ "\t " + str (support )+ "\n " )
85
+ print ("Completion time (seconds):" + str (t_duration ))
86
+ return combo_support_vec
87
+
88
+ combos_vs_supports = eclat (transactions , "./eclat.tsv" , min_support = 0.002 )
0 commit comments