diff --git a/word-count/example.py b/word-count/example.py index 14dfa9c8846..cc0b10942e5 100644 --- a/word-count/example.py +++ b/word-count/example.py @@ -1,9 +1,11 @@ -from collections import Counter - +import re def word_count(text): """Return a Counter object that maps from the words contained in the phrase to their respective counts """ - return Counter(text.split()) - + count = {} + for w in re.split('\W',re.sub('[^A-Za-z0-9]',' ', text)): + if w: + count[w] = count.get(w, 0) + 1 + return count diff --git a/word-count/word_count_test.py b/word-count/word_count_test.py index 009dc7757fb..163cf9ce77c 100644 --- a/word-count/word_count_test.py +++ b/word-count/word_count_test.py @@ -22,9 +22,9 @@ def test_count_multiple_occurences(self): word_count('one fish two fish red fish blue fish') ) - def test_preserves_punctuation(self): + def test_ignores_punctuation(self): self.assertEqual( - {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, ':': 2, 'javascript!!&@$%^&': 1}, + {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1}, word_count('car : carpet as java : javascript!!&@$%^&') ) @@ -39,16 +39,16 @@ def test_mixed_case(self): {'go': 1, 'Go': 1, 'GO': 1}, word_count('go Go GO') ) - + def test_multiple_spaces(self): self.assertEqual( {'wait': 1, 'for': 1, 'it': 1}, word_count('wait for it') ) - + def test_newlines(self): self.assertEqual( - {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, + {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, 'want': 1, 'your': 1, 'bad': 1, 'romance': 1}, word_count('rah rah ah ah ah\nroma roma ma\nga ga oh la la\nwant your bad romance') )