From 0e3375aa5e1a1a29069345f4449eb4c86386e7ce Mon Sep 17 00:00:00 2001 From: Gabor Date: Mon, 18 May 2015 19:46:28 +0200 Subject: [PATCH 1/2] word-count: the naive approach can be good but in this case it was not the best the test case should not preserve puntuation because 'javascript!!&@$%^&' is not a word. --- word-count/example.py | 10 ++++++---- word-count/word_count_test.py | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/word-count/example.py b/word-count/example.py index 14dfa9c8846..cc0b10942e5 100644 --- a/word-count/example.py +++ b/word-count/example.py @@ -1,9 +1,11 @@ -from collections import Counter - +import re def word_count(text): """Return a Counter object that maps from the words contained in the phrase to their respective counts """ - return Counter(text.split()) - + count = {} + for w in re.split('\W',re.sub('[^A-Za-z0-9]',' ', text)): + if w: + count[w] = count.get(w, 0) + 1 + return count diff --git a/word-count/word_count_test.py b/word-count/word_count_test.py index 009dc7757fb..ff6bcf39d59 100644 --- a/word-count/word_count_test.py +++ b/word-count/word_count_test.py @@ -24,7 +24,7 @@ def test_count_multiple_occurences(self): def test_preserves_punctuation(self): self.assertEqual( - {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, ':': 2, 'javascript!!&@$%^&': 1}, + {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1}, word_count('car : carpet as java : javascript!!&@$%^&') ) @@ -39,16 +39,16 @@ def test_mixed_case(self): {'go': 1, 'Go': 1, 'GO': 1}, word_count('go Go GO') ) - + def test_multiple_spaces(self): self.assertEqual( {'wait': 1, 'for': 1, 'it': 1}, word_count('wait for it') ) - + def test_newlines(self): self.assertEqual( - {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, + {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, 'want': 1, 'your': 1, 'bad': 1, 'romance': 1}, word_count('rah rah ah ah ah\nroma roma ma\nga ga oh la la\nwant your bad romance') ) From f37bfee83a9d19e837584efa4f185c11533d6f85 Mon Sep 17 00:00:00 2001 From: Gabor Date: Mon, 18 May 2015 19:50:56 +0200 Subject: [PATCH 2/2] word-count: correct test name I missed previously to alter the name of the test to represent what it does --- word-count/word_count_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/word-count/word_count_test.py b/word-count/word_count_test.py index ff6bcf39d59..163cf9ce77c 100644 --- a/word-count/word_count_test.py +++ b/word-count/word_count_test.py @@ -22,7 +22,7 @@ def test_count_multiple_occurences(self): word_count('one fish two fish red fish blue fish') ) - def test_preserves_punctuation(self): + def test_ignores_punctuation(self): self.assertEqual( {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1}, word_count('car : carpet as java : javascript!!&@$%^&')