|
| 1 | +""" |
| 2 | +@Author: Aseem Jain |
| 3 | +@Linkedin: https://www.linkedin.com/in/premaseem/ |
| 4 | +@Github: https://github.com/premaseem/pythonLab/tree/master/challenge |
| 5 | +
|
| 6 | +''' |
| 7 | +Suppose we have an unsorted log file of accesses to web resources. |
| 8 | +Each log entry consists of an access time, the ID of the user making the access, and the resource ID. |
| 9 | +
|
| 10 | +The access time is represented as seconds since 00:00:00, and all times are assumed to be in the same day. |
| 11 | +
|
| 12 | +For example: |
| 13 | +logs1 = [ |
| 14 | + ["58523", "user_1", "resource_1"], |
| 15 | + ["62314", "user_2", "resource_2"], |
| 16 | + ["54001", "user_1", "resource_3"], |
| 17 | + ["200", "user_6", "resource_5"], |
| 18 | + ["215", "user_6", "resource_4"], |
| 19 | + ["54060", "user_2", "resource_3"], |
| 20 | + ["53760", "user_3", "resource_3"], |
| 21 | + ["58522", "user_22", "resource_1"], |
| 22 | + ["53651", "user_5", "resource_3"], |
| 23 | + ["2", "user_6", "resource_1"], |
| 24 | + ["100", "user_6", "resource_6"], |
| 25 | + ["400", "user_7", "resource_2"], |
| 26 | + ["100", "user_8", "resource_6"], |
| 27 | + ["54359", "user_1", "resource_3"], |
| 28 | +] |
| 29 | +
|
| 30 | +Example 2: |
| 31 | +logs2 = [ |
| 32 | + ["300", "user_1", "resource_3"], |
| 33 | + ["599", "user_1", "resource_3"], |
| 34 | + ["900", "user_1", "resource_3"], |
| 35 | + ["1199", "user_1", "resource_3"], |
| 36 | + ["1200", "user_1", "resource_3"], |
| 37 | + ["1201", "user_1", "resource_3"], |
| 38 | + ["1202", "user_1", "resource_3"] |
| 39 | +] |
| 40 | +
|
| 41 | +
|
| 42 | +
|
| 43 | +Write a function that takes the logs and returns the resource with the highest number of accesses in any 5 minute window, together with how many accesses it saw. |
| 44 | +
|
| 45 | +Expected Output: |
| 46 | +most_requested_resource(logs1) # => ('resource_3', 3) |
| 47 | +Reason: resource_3 is accessed at 53760, 54001, and 54060 |
| 48 | +
|
| 49 | +most_requested_resource(logs2) # => ('resource_3', 4) |
| 50 | +Reason: resource_3 is accessed at 1199, 1200, 1201, and 1202 |
| 51 | +
|
| 52 | +Complexity analysis variables: |
| 53 | +
|
| 54 | +n: number of logs in the input |
| 55 | +
|
| 56 | +""" |
| 57 | + |
| 58 | +logs1 = [ |
| 59 | + ["58523", "user_1", "resource_1"], |
| 60 | + ["62314", "user_2", "resource_2"], |
| 61 | + ["54001", "user_1", "resource_3"], |
| 62 | + ["200", "user_6", "resource_5"], |
| 63 | + ["215", "user_6", "resource_4"], |
| 64 | + ["54060", "user_2", "resource_3"], |
| 65 | + ["53760", "user_3", "resource_3"], |
| 66 | + ["58522", "user_22", "resource_1"], |
| 67 | + ["53651", "user_5", "resource_3"], |
| 68 | + ["2", "user_6", "resource_1"], |
| 69 | + ["100", "user_6", "resource_6"], |
| 70 | + ["400", "user_7", "resource_2"], |
| 71 | + ["100", "user_8", "resource_6"], |
| 72 | + [ "54359", "user_1", "resource_3"], |
| 73 | +] |
| 74 | + |
| 75 | +logs2 = [ |
| 76 | + ["300", "user_1", "resource_3"], |
| 77 | + ["599", "user_1", "resource_3"], |
| 78 | + ["900", "user_1", "resource_3"], |
| 79 | + ["1199", "user_1", "resource_3"], |
| 80 | + ["1200", "user_1", "resource_3"], |
| 81 | + ["1201", "user_1", "resource_3"], |
| 82 | + ["1202", "user_1", "resource_3"] |
| 83 | +] |
| 84 | + |
| 85 | + |
| 86 | +# Analyze logs and output first and last access time for each user |
| 87 | + |
| 88 | +""" |
| 89 | +1. Transform list into map with user as key and list of ts as value |
| 90 | +2. Data massage, sort the list and grab first and last ts |
| 91 | +3. return or print |
| 92 | +""" |
| 93 | + |
| 94 | +def sol1(lst): |
| 95 | + print(lst) |
| 96 | + m = {} |
| 97 | + for r in lst: |
| 98 | + m.setdefault(r[1],[]) |
| 99 | + m[r[1]].append(int(r[0])) |
| 100 | + print(m) |
| 101 | + fm = {} |
| 102 | + for k,v in m.items(): |
| 103 | + v.sort() |
| 104 | + fm[k] = [v[0],v[-1]] |
| 105 | + print("final map as answer", fm) |
| 106 | + return fm |
| 107 | + |
| 108 | + |
| 109 | +sol1(logs1) |
| 110 | + |
| 111 | + |
| 112 | +# Coding Challenge to analyze logs and output access timestamp in 5 minute window |
| 113 | + |
| 114 | +# => ('resource_3', 3) |
| 115 | +# Reason: resource_3 is accessed at 53760, 54001, and 54060 |
| 116 | + |
| 117 | +# Algo |
| 118 | +""" |
| 119 | +1. Transform to map k: resource_id v: list of timestamps |
| 120 | +2. Sort the timestamps and calculate the frequence in 5 min window |
| 121 | +3. return the highest access resource. |
| 122 | +""" |
| 123 | + |
| 124 | +def freq(lst): |
| 125 | + ansl = [] |
| 126 | + for x in lst: |
| 127 | + deltaList = [] |
| 128 | + for y in lst: |
| 129 | + if y >= x and y <= (x + 300): |
| 130 | + deltaList.append(y) |
| 131 | + ansl.append(deltaList) |
| 132 | + ml = 0 |
| 133 | + mi = 0 |
| 134 | + for i,r in enumerate(ansl): |
| 135 | + if ml <= len(r): |
| 136 | + ml = len(r) |
| 137 | + mi = i |
| 138 | + print(ansl) |
| 139 | + ansl.append(ml) |
| 140 | + ansl.append(mi) |
| 141 | + return ansl |
| 142 | + |
| 143 | +# def find_max(lst): |
| 144 | + |
| 145 | + |
| 146 | +def most_requested_resource(l): |
| 147 | + print(l) |
| 148 | + m = {} |
| 149 | + for r in l: |
| 150 | + m.setdefault(r[2],[]) |
| 151 | + m[r[2]].append(int(r[0])) |
| 152 | + print(m) |
| 153 | + |
| 154 | + fm = {} |
| 155 | + for k,v in m.items(): |
| 156 | + fm[k] = freq(v) |
| 157 | + |
| 158 | + print(fm) |
| 159 | + ans = 0 |
| 160 | + itms = None |
| 161 | + resource = None |
| 162 | + for k,v in fm.items(): |
| 163 | + if ans <= v[-2]: |
| 164 | + ans = v[-2] |
| 165 | + itms = v[v[-1]] |
| 166 | + resource = k |
| 167 | + # => ('resource_3', 3) |
| 168 | + # Reason: resource_3 is accessed at 53760, 54001, and 54060 |
| 169 | + print(f"Reason: {resource} is accessed max frequency of {ans} with ts {itms}") |
| 170 | + |
| 171 | + |
| 172 | + |
| 173 | + |
| 174 | + |
| 175 | +most_requested_resource(logs1) |
| 176 | + |
| 177 | + |
| 178 | + |
| 179 | + |
0 commit comments