8000 Merge pull request #86 from Yorwba/sentences-docs · simmsb/unicode-segmentation@d1ae69c · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d1ae69c

Browse files
authored
Merge pull request unicode-rs#86 from Yorwba/sentences-docs
Improve docs for sentence segmentation
2 parents ec65d22 + bf55e02 commit d1ae69c

File tree

1 file changed

+40
-6
lines changed

1 file changed

+40
-6
lines changed

src/lib.rs

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,12 +177,6 @@ pub trait UnicodeSegmentation {
177177
/// ```
178178
fn split_word_bound_indices<'a>(&'a self) -> UWordBoundIndices<'a>;
179179

180-
/// Returns an iterator over substrings of `self` separated on
181-
/// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
182-
///
183-
/// The concatenation of the substrings returned by this function is just the original string.
184-
fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>;
185-
186180
/// Returns an iterator over substrings of `self` separated on
187181
/// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
188182
///
@@ -192,10 +186,50 @@ pub trait UnicodeSegmentation {
192186
/// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic)
193187
/// property, or with
194188
/// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values).
189+
///
190+
/// # Example
191+
///
192+
/// ```
193+
/// # use self::unicode_segmentation::UnicodeSegmentation;
194+
/// let uss = "Mr. Fox jumped. [...] The dog was too lazy.";
195+
/// let us1 = uss.unicode_sentences().collect::<Vec<&str>>();
196+
/// let b: &[_] = &["Mr. ", "Fox jumped. ", "The dog was too lazy."];
197+
///
198+
/// assert_eq!(&us1[..], b);
199+
/// ```
200+
fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>;
201+
202+
/// Returns an iterator over substrings of `self` separated on
203+
/// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries).
204+
///
205+
/// The concatenation of the substrings returned by this function is just the original string.
206+
///
207+
/// # Example
208+
///
209+
/// ```
210+
/// # use self::unicode_segmentation::UnicodeSegmentation;
211+
/// let ssbs = "Mr. Fox jumped. [...] The dog was too lazy.";
212+
/// let ssb1 = ssbs.split_sentence_bounds().collect::<Vec<&str>>();
213+
/// let b: &[_] = &["Mr. ", "Fox jumped. ", "[...] ", "The dog was too lazy."];
214+
///
215+
/// assert_eq!(&ssb1[..], b);
216+
/// ```
195217
fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>;
196218

197219
/// Returns an iterator over substrings of `self`, split on UAX#29 sentence boundaries,
198220
/// and their offsets. See `split_sentence_bounds()` for more information.
221+
///
222+
/// # Example
223+
///
224+
/// ```
225+
/// # use self::unicode_segmentation::UnicodeSegmentation;
226+
/// let ssis = "Mr. Fox jumped. [...] The dog was too lazy.";
227+
/// let ssi1 = ssis.split_sentence_bound_indices().collect::<Vec<(usize, &str)>>();
228+
/// let b: &[_] = &[(0, "Mr. "), (4, "Fox jumped. "), (16, "[...] "),
229+
/// (22, "The dog was too lazy.")];
230+
///
231+
/// assert_eq!(&ssi1[..], b);
232+
/// ```
199233
fn split_sentence_bound_indices<'a>(&'a self) -> USentenceBoundIndices<'a>;
200234
}
201235

0 commit comments

Comments
 (0)
0