1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Dec 20 09:38:02 2017
4
+
5
+ @author: gualandi
6
+ """
7
+
8
+ # Parse a file where for each row we have:
9
+ # user id | age | gender | occupation | zip code
10
+ # 1|24|M|technician|85711
11
+ def ParseUsers (filename ):
12
+ fh = open (filename , 'r' , encoding = "utf-8" )
13
+ Rs = {}
14
+ for line in fh :
15
+ row = line .replace ('\n ' ,'' ).split ('|' )
16
+ Rs [int (row [0 ])] = (int (row [1 ]), row [2 ], row [3 ], row [4 ])
17
+
18
+ return Rs
19
+
20
+ # user id | item id | rating | timestamp
21
+ def ParseRatings (filename ):
22
+ fh = open (filename , 'r' )
23
+ Rs = {}
24
+ for line in fh :
25
+ row = line .replace ('\n ' ,'' ).split ('\t ' )
26
+ user_id , item_id = int (row [0 ]), int (row [1 ])
27
+ Rs [(user_id , item_id )] = int (row [2 ])
28
+ return Rs
29
+
30
+ def PrintTop (Ds , top = 5 ):
31
+ for key in sorted (Ds , key = Ds .get , reverse = True )[:top ]:
32
+ print (key , Ds [key ])
33
+
34
+ # Support: compute average of a list of values
35
+ def Mean (Ls ):
36
+ return sum (Ls )/ len (Ls )
37
+
38
+ # In alternativa si può usare la libreria "statistics"
39
+ # Link: https://docs.python.org/3/library/statistics.html
40
+ # ===> from statistics import mean
41
+
42
+ # Esercizio 8.2: compute average of all rating
43
+ def ComputeAverage (Ls ):
44
+ return Mean (Ls .values ())
45
+
46
+ # Esercizio 8.3
47
+ def ComputeItemAverage (Ls ):
48
+ Is = {}
49
+ for key in Ls :
50
+ user_id , item_id = key # Unfolding
51
+ Is [item_id ] = Is .get (item_id , []) + [Ls [key ]]
52
+
53
+ for item in Is :
54
+ Is [item ] = Mean (Is [item ])#, len(Is[item])
55
+
56
+ return Is
57
+
58
+ # Esercizio 8.4
59
+ def ComputeUserAverage (Ls ):
60
+ Is = {}
61
+ for key in Ls :
62
+ user_id , item_id = key # Unfolding
63
+ Is [user_id ] = Is .get (user_id , []) + [Ls [key ]]
64
+
65
+ for key in Is :
66
+ Is [key ] = Mean (Is [key ])
67
+
68
+ return Is
69
+
70
+ # Esercizio 8.5
71
+ def ComputeUserTypeAverage (Ls , Us ):
72
+ Is = {}
73
+ for key in Ls :
74
+ user_id , item_id = key # Unfolding
75
+ type_id = Us .get (user_id , 'none' )[2 ]
76
+ Is [type_id ] = Is .get (type_id , []) + [Ls [key ]]
77
+
78
+ for key in Is :
79
+ Is [key ] = Mean (Is [key ])
80
+
81
+ return Is
82
+
83
+ # Esercizio 8.6
84
+ def Round (x ):
85
+ return round (x , 0 )
86
+
87
+ def PredictAvg (TrainingSet , TestSet ):
88
+ avg = ComputeAverage (TrainingSet )
89
+ Ps = {}
90
+ for key in TestSet :
91
+ Ps [key ] = Round (avg )
92
+ return Ps
93
+
94
+ # Esercizio 8.7
95
+ from math import sqrt
96
+ def RMSE (Yb , Y ):
97
+ return sqrt (Mean (list (map (lambda k : (Yb [k ]- Y [k ])** 2 , Yb ))))
98
+
99
+ def nRMSE (Yb , Y ):
100
+ return RMSE (Yb ,Y )/ 4
101
+
102
+ def RMSE2 (Yb , Y ):
103
+ # Riusare metodi da libreria di Machine Learning
104
+ # Link: http://scikit-learn.org/stable/
105
+ from sklearn .metrics import mean_squared_error
106
+ A = list (map (lambda k : Y [k ], sorted (Y )))
107
+ B = list (map (lambda k : Yb [k ], sorted (Y )))
108
+ return sqrt (mean_squared_error (A , B ))
109
+
110
+ def R2_Score (Yb , Y ):
111
+ # Riusare metodi da libreria di Machine Learning
112
+ # Link: http://scikit-learn.org/stable/
113
+ from sklearn .metrics import r2_score
114
+ A = list (map (lambda k : Y [k ], sorted (Y )))
115
+ B = list (map (lambda k : Yb [k ], sorted (Y )))
116
+ return r2_score (A , B )
117
+
118
+
119
+ def MAE (Yb , Y ):
120
+ # Riusare metodi da libreria di Machine Learning
121
+ # Link: http://scikit-learn.org/stable/
122
+ from sklearn .metrics import mean_absolute_error
123
+ A = list (map (lambda k : Y [k ], sorted (Y )))
124
+ B = list (map (lambda k : Yb [k ], sorted (Y )))
125
+ return mean_absolute_error (A , B )
126
+
127
+ # Esercizio 8.9
128
+ def PredictAvgItem (TrainingSet , TestSet ):
129
+ avg = ComputeAverage (TrainingSet )
130
+ As = ComputeItemAverage (TrainingSet )
131
+ Ps = {}
132
+ for key in TestSet :
133
+ _ , item_id = key # Unfolding
134
+ Ps [key ] = Round (As .get (item_id , avg ))
135
+ return Ps
136
+
137
+ def PredictAvgUser (TrainingSet , TestSet ):
138
+ avg = ComputeAverage (TrainingSet )
139
+ As = ComputeUserAverage (TrainingSet )
140
+ Ps = {}
141
+ for key in TestSet :
142
+ user_id , _ = key # Unfolding
143
+ Ps [key ] = Round (As .get (user_id , avg ))
144
+ return Ps
145
+
146
+ def PredictAvgCategory (TrainingSet , TestSet , Users ):
147
+ avg = ComputeAverage (TrainingSet )
148
+ As = ComputeUserTypeAverage (TrainingSet , Users )
149
+ Ps = {}
150
+ for key in TestSet :
151
+ user_id , _ = key # Unfolding
152
+ type_id = Users .get (user_id , 'none' )[2 ]
153
+ Ps [key ] = Round (As .get (type_id , avg ))
154
+ return Ps
155
+
156
+ def SingleTest (Users , n , Metric = RMSE ):
157
+ TrainingSet = ParseRatings ('../data/u{}.base' .format (n ))
158
+ TestSet = ParseRatings ('../data/u{}.test' .format (n ))
159
+
160
+ print ('Avg globale: ' , Metric (PredictAvg (TrainingSet , TestSet ), TestSet ))
161
+ print ('Avg film: ' , Metric (PredictAvgItem (TrainingSet , TestSet ), TestSet ))
162
+ print ('Avg utente: ' , Metric (PredictAvgUser (TrainingSet , TestSet ), TestSet ))
163
+ print ('Avg cat user: ' , Metric (PredictAvgCategory (TrainingSet , TestSet , Users ), TestSet ))
164
+ print ('Avg file/user:' , Metric (PredictAvgAvg (TrainingSet , TestSet ), TestSet ))
165
+
166
+
167
+ def PredictAvgAvg (TrainingSet , TestSet ):
168
+ avg = ComputeAverage (TrainingSet )
169
+ Is = ComputeItemAverage (TrainingSet )
170
+ Us = ComputeUserAverage (TrainingSet )
171
+ Ps = {}
172
+ for key in TestSet :
173
+ user_id , item_id = key
174
+ avg1 = Is .get (item_id , avg )
175
+ avg2 = Us .get (user_id , avg )
176
+ Ps [key ] = Round ((avg1 + avg2 )/ 2 )
177
+ return Ps
178
+
179
+
180
+ #-----------------------------------------------
181
+ # MAIN function
182
+ #-----------------------------------------------
183
+ if __name__ == "__main__" :
184
+ Users = ParseUsers ('../data/u.user' )
185
+ if False :
186
+ TrainingSet = ParseRatings ('../data/u1.base' )
187
+ TestSet = ParseRatings ('../data/u1.test' )
188
+ print (len (TrainingSet ), len (TestSet ))
189
+
190
+ print ('Compute Global Average {}' .format (ComputeAverage (TrainingSet )))
191
+ Is = ComputeItemAverage (TrainingSet )
192
+ PrintTop (Is )
193
+
194
+ Us = ComputeUserAverage (TrainingSet )
195
+ PrintTop (Us )
196
+
197
+ Ts = ComputeUserTypeAverage (TrainingSet , Users )
198
+ PrintTop (Ts , top = 100 )
199
+
200
+ # Naive prediction
201
+ Pbar = PredictAvg (TrainingSet , TestSet )
202
+
203
+ # Esercizio 8.8
204
+ print (RMSE (Pbar , TestSet ))
205
+ # Esercizio 8.9
206
+ print (RMSE (PredictAvgItem (TrainingSet , TestSet ), TestSet ))
207
+ print (RMSE (PredictAvgUser (TrainingSet , TestSet ), TestSet ))
208
+ print (RMSE (PredictAvgCategory (TrainingSet , TestSet , Users ), TestSet ))
209
+ else :
210
+ for n in range (1 ,6 ):
211
+ print ('Test set: training=u{}.base, test=u{}.test:' .format (n , n ))
212
+ SingleTest (Users , n , RMSE )
0 commit comments