8000 [023] Gotta go fast! · stephen-codepython/100DaysOfCode@dcb6ab4 · GitHub
[go: up one dir, main page]

Skip to content

Commit dcb6ab4

Browse files
authored
[023] Gotta go fast!
## Benchmark ```python > %timeit most_common_str(s, 1) 115 µs ± 603 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) > %timeit most_common_re(s, 1) 187 µs ± 2.57 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) > %timeit most_common_iter(s, 1) 691 µs ± 3.62 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) > %timeit get_most_common(get_words(s), 1) 718 µs ± 2.13 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) ```
1 parent fa5f7b8 commit dcb6ab4

File tree

1 file changed

+14
-18
lines changed

1 file changed

+14
-18
lines changed

023/harry.py

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,31 @@
11
from collections import Counter
2-
from string import punctuation
2+
from string import punctuation, whitespace
33
import sys
44

55

6-
def strip_punctuation(word):
7-
'''Remove punctuation from a word'''
8-
return "".join(c for c in word if c not in punctuation)
6+
def most_common_str(s, n=None):
7+
words = s.lower().translate(str.maketrans('', '', punctuation)).split()
8+
return Counter(words).most_common(n)
99

1010

11-
def get_words(text):
12-
'''Converts text into set of words without punctuation'''
13-
with open(text) as f:
14-
words = f.read().lower().split()
15-
words = [strip_punctuation(word) for word in words]
16-
# could remove stopwords but requires nltk.corpus
17-
return filter(None, words)
11+
def most_common_re(s, n=None):
12+
return Counter(re.findall(rf'[^{punctuation}{whitespace}]+',
13+
s.lower())).most_common(n)
1814

1915

20-
def get_most_common(words, n=None):
21-
'''Return n common words, if n is None, return all (also singles)'''
22-
return Counter(words).most_common(n)
16+
def most_common_iter(s, n=None):
17+
return Counter(''.join(c for c in w if c not in punctuation)
18+
for w in s.lower().split()).most_common(n)
2319

2420

2521
if __name__ == "__main__":
2622
try:
27-
harry = sys.argv[1]
23+
file = sys.argv[1]
2824
except IndexError:
29-
harry = 'harry.txt'
25+
file = 'harry.txt'
3026

31-
words = get_words(harry)
32-
common_words = get_most_common(words, n=20)
27+
with open(file) as f:
28+
common_words = most_common_str(f.read(), n=20)
3329

3430
for word, count in common_words:
3531
print('{:<4} {}'.format(count, word))

0 commit comments

Comments
 (0)
0