10000 Indeed interview question · premaseem/pythonLab@9805391 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9805391

Browse files
Aseem JainAseem Jain
authored andcommitted
Indeed interview question
Coding Challenge to analyze logs and output access timestamp in 5 minute window
1 parent ccac44a commit 9805391

File tree

1 file changed

+179
-0
lines changed

1 file changed

+179
-0
lines changed
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
"""
2+
@Author: Aseem Jain
3+
@Linkedin: https://www.linkedin.com/in/premaseem/
4+
@Github: https://github.com/premaseem/pythonLab/tree/master/challenge
5+
6+
'''
7+
Suppose we have an unsorted log file of accesses to web resources.
8+
Each log entry consists of an access time, the ID of the user making the access, and the resource ID.
9+
10+
The access time is represented as seconds since 00:00:00, and all times are assumed to be in the same day.
11+
12+
For example:
13+
logs1 = [
14+
["58523", "user_1", "resource_1"],
15+
["62314", "user_2", "resource_2"],
16+
["54001", "user_1", "resource_3"],
17+
["200", "user_6", "resource_5"],
18+
["215", "user_6", "resource_4"],
19+
["54060", "user_2", "resource_3"],
20+
["53760", "user_3", "resource_3"],
21+
["58522", "user_22", "resource_1"],
22+
["53651", "user_5", "resource_3"],
23+
["2", "user_6", "resource_1"],
24+
["100", "user_6", "resource_6"],
25+
["400", "user_7", "resource_2"],
26+
["100", "user_8", "resource_6"],
27+
["54359", "user_1", "resource_3"],
28+
]
29+
30+
Example 2:
31+
logs2 = [
32+
["300", "user_1", "resource_3"],
33+
["599", "user_1", "resource_3"],
34+
["900", "user_1", "resource_3"],
35+
["1199", "user_1", "resource_3"],
36+
["1200", "user_1", "resource_3"],
37+
["1201", "user_1", "resource_3"],
38+
["1202", "user_1", "resource_3"]
39+
]
40+
41+
42+
43+
Write a function that takes the logs and returns the resource with the highest number of accesses in any 5 minute window, together with how many accesses it saw.
44+
45+
Expected Output:
46+
most_requested_resource(logs1) # => ('resource_3', 3)
47+
Reason: resource_3 is accessed at 53760, 54001, and 54060
48+
49+
most_requested_resource(logs2) # => ('resource_3', 4)
50+
Reason: resource_3 is accessed at 1199, 1200, 1201, and 1202
51+
52+
Complexity analysis variables:
53+
54+
n: number of logs in the input
55+
56+
"""
57+
58+
logs1 = [
59+
["58523", "user_1", "resource_1"],
60+
["62314", "user_2", "resource_2"],
61+
["54001", "user_1", "resource_3"],
62+
["200", "user_6", "resource_5"],
63+
["215", "user_6", "resource_4"],
64+
["54060", "user_2", "resource_3"],
65+
["53760", "user_3", "resource_3"],
66+
["58522", "user_22", "resource_1"],
67+
["53651", "user_5", "resource_3"],
68+
["2", "user_6", "resource_1"],
69+
["100", "user_6", "resource_6"],
70+
["400", "user_7", "resource_2"],
71+
["100", "user_8", "resource_6"],
72+
[ "54359", "user_1", "resource_3"],
73+
]
74+
75+
logs2 = [
76+
["300", "user_1", "resource_3"],
77+
["599", "user_1", "resource_3"],
78+
["900", "user_1", "resource_3"],
79+
["1199", "user_1", "resource_3"],
80+
["1200", "user_1", "resource_3"],
81+
["1201", "user_1", "resource_3"],
82+
["1202", "user_1", "resource_3"]
83+
]
84+
85+
86+
# Analyze logs and output first and last access time for each user
87+
88+
"""
89+
1. Transform list into map with user as key and list of ts as value
90+
2. Data massage, sort the list and grab first and last ts
91+
3. return or print
92+
"""
93+
94+
def sol1(lst):
95+
print(lst)
96+
m = {}
97+
for r in lst:
98+
m.setdefault(r[1],[])
99+
m[r[1]].append(int(r[0]))
100+
print(m)
101+
fm = {}
102+
for k,v in m.items():
103+
v.sort()
104+
fm[k] = [v[0],v[-1]]
105+
print("final map as answer", fm)
106+
return fm
107+
108+
109+
sol1(logs1)
110+
111+
112+
# Coding Challenge to analyze logs and output access timestamp in 5 minute window
113+
114+
# => ('resource_3', 3)
115+
# Reason: resource_3 is accessed at 53760, 54001, and 54060
116+
117+
# Algo
118+
"""
119+
1. Transform to map k: resource_id v: list of timestamps
120+
2. Sort the timestamps and calculate the frequence in 5 min window
121+
3. return the highest access resource.
122+
"""
123+
124+
def freq(lst):
125+
ansl = []
126+
for x in lst:
127+
deltaList = []
128+
for y in lst:
129+
if y >= x and y <= (x + 300):
130+
deltaList.append(y)
131+
ansl.append(deltaList)
132+
ml = 0
133+
mi = 0
134+
for i,r in enumerate(ansl):
135+
if ml <= len(r):
136+
ml = len(r)
137+
mi = i
138+
print(ansl)
139+
ansl.append(ml)
140+
ansl.append(mi)
141+
return ansl
142+
143+
# def find_max(lst):
144+
145+
146+
def most_requested_resource(l):
147+
print(l)
148+
m = {}
149+
for r in l:
150+
m.setdefault(r[2],[])
151+
m[r[2]].append(int(r[0]))
152+
print(m)
153+
154+
fm = {}
155+
for k,v in m.items():
156+
fm[k] = freq(v)
157+
158+
print(fm)
159+
ans = 0
160+
itms = None
161+
resource = None
162+
for k,v in fm.items():
163+
if ans <= v[-2]:
164+
ans = v[-2]
165+
itms = v[v[-1]]
166+
resource = k
167+
# => ('resource_3', 3)
168+
# Reason: resource_3 is accessed at 53760, 54001, and 54060
169+
print(f"Reason: {resource} is accessed max frequency of {ans} with ts {itms}")
170+
171+
172+
173+
174+
175+
most_requested_resource(logs1)
176+
177+
178+
179+

0 commit comments

Comments
 (0)
0