8000 Add files via upload · GitHubberFitz/ScriptsDump@d6c2604 · GitHub
[go: up one dir, main page]

Skip to content

Commit d6c2604

Browse files
authored
Add files via upload
1 parent d5b6a8a commit d6c2604

File tree

1 file changed

+88
-0
lines changed
  • Machine_Learning/src/Association Rule Learning/Eclat

1 file changed

+88
-0
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#############################
2+
# ECLAT algorithm in Python #
3+
#############################
4+
5+
#
6+
# Importing the libraries
7+
#
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
import pandas as pd
11+
from itertools import combinations
12+
13+
#
14+
# Data Preprocessing
15+
#
16+
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
17+
18+
#
19+
# Generating correct input format
20+
#
21+
transactions = [[str(dataset.values[j,i]) for i in range(0,20)] for j in range(0,7501)]
22+
23+
#
24+
# ECLAT function (any k-item combo)
25+
#
26+
def eclat(transactions_list, output_filename, min_support = 0.002):
27+
28+
""" Implements the eclat algorithm on a list of lists containing transactions
29+
in the format of the video. Data output is written to a file specified by
30+
output_filename argument.
31+
32+
transactions_list: list of lists, each list representing a transaction, e.g.
33+
34+
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
35+
transactions_list = [[str(dataset.values[j,i]) for i in range(0,20)] for j in range(0,7501)]
36+
37+
output_filename: string to specify output filename, e.g. "eclat_out.tsv"
38+
39+
min_support: skips all combinations of items with supoort < min_support,
40+
e.g. 0.002
41+
42+
"""
43+
import time
44+
t_start = time.time()
45+
46+
combos_TO_counts = {}
47+
for transaction in transactions:
48+
goods = list(np.unique(transaction))
49+
length = len(goods)
50+
for k in range(2,length+1):
51+
k_combos = list(combinations(goods, k))
52+
for combo in k_combos:
53+
if set(combo).issubset(transaction):
54+
try:
55+
combos_TO_counts[combo] += 1
56+
except(KeyError):
57+
combos_TO_counts[combo] = 1
58+
59+
t_end = time.time()
60+
t_duration = t_end - t_start
61+
62+
#
63+
# Calculate supports for combinations of goods
64+
#
65+
combo_support_vec = []
66+
for combo in combos_TO_counts.keys():
67+
# NOTE: Support(M) = #transactions inc. M / #Total transactions,
68+
# i.e. M's popularity
69+
support = float(combos_TO_counts[combo])/len(transactions)
70+
combo_support_vec.append((combo, support))
71+
#
72+
# Sort in order of support
73+
#
74+
combo_support_vec.sort(key=lambda x: float(x[1]), reverse=True)
75+
#
76+
# Create tab-separated output file (skipping sets w/ < min_support)
77+
#
78+
# Note: first column is the set of goods, and the second column is the support
79+
with open("./eclat_out.tsv","w") as fo:
80+
for combo, support in combo_support_vec:
81+
if support<min_support:
82+
continue
83+
else:
84+
fo.write(", ".join(combo)+"\t"+str(support)+"\n")
85+
print("Completion time (seconds):"+str(t_duration))
86+
return combo_support_vec
87+
88+
combos_vs_supports = eclat(transactions, "./eclat.tsv", min_support = 0.002)

0 commit comments

Comments
 (0)
0