Fix doc for regexp_span_tokenize to explicitly use regexp_span_tokenize

erip · erip · commit 62f76474a8ec · 2015-12-19T16:33:02.000-05:00
diff --git a/nltk/tokenize/util.py b/nltk/tokenize/util.py
@@ -45,10 +45,10 @@ def regexp_span_tokenize(s, regexp):
     Return the offsets of the tokens in *s*, as a sequence of ``(start, end)``
     tuples, by splitting the string at each successive match of *regexp*.
 
-        >>> from nltk.tokenize import WhitespaceTokenizer
+        >>> from nltk.tokenize.util import regexp_span_tokenize
         >>> s = '''Good muffins cost $3.88\nin New York.  Please buy me
         ... two of them.\n\nThanks.'''
-        >>> list(WhitespaceTokenizer().span_tokenize(s))
+        >>> list(regexp_span_tokenize(s, r'\s'))
         [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36),
         (38, 44), (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)]