8000 Get rid of special case for CRLF · jmuk/unicode-segmentation@92767fd · GitHub
[go: up one dir, main page]

Skip to content

Commit 92767fd

Browse files
committed
Get rid of special case for CRLF
The existing code treated CR and LF as special cases of the Control grapheme category, for reasons that weren't very good. This patch gets rid of that and just handles GB3 in the pair lookup. That should improve performance in the rope case, as it will cut down on the amount of pre-context requested when a chunk begins with LF.
1 parent 0083ef5 commit 92767fd

File tree

3 files changed

+77
-100
lines changed

3 files changed

+77
-100
lines changed

scripts/unicode.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -330,21 +330,13 @@ def emit_break_module(f, break_table, break_cats, name):
330330
grapheme_cats = load_properties("auxiliary/GraphemeBreakProperty.txt", [])
331331

332332
# Control
333-
# Note 1:
333+
# Note:
334334
# This category also includes Cs (surrogate codepoints), but Rust's `char`s are
335335
# Unicode Scalar Values only, and surrogates are thus invalid `char`s.
336336
# Thus, we have to remove Cs from the Control category
337-
# Note 2:
338-
# 0x0a and 0x0d (CR and LF) are not in the Control category for Graphemes.
339-
# However, the Graphemes iterator treats these as a special case, so they
340-
# should be included in grapheme_cats["Control"] for our implementation.
341337
grapheme_cats["Control"] = group_cat(list(
342-
(set(ungroup_cat(grapheme_cats["Control"]))
343-
| set(ungroup_cat(grapheme_cats["CR"]))
344-
| set(ungroup_cat(grapheme_cats["LF"])))
338+
set(ungroup_cat(grapheme_cats["Control"]))
345339
- set(ungroup_cat([surrogate_codepoints]))))
346-
del(grapheme_cats["CR"])
347-
del(grapheme_cats["LF"])
348340

349341
grapheme_table = []
350342
for cat in grapheme_cats:

src/grapheme.rs

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,6 @@ enum GraphemeState {
144144
NotBreak,
145145
// It is known to be a boundary.
146146
Break,
147-
// The codepoint after is LF, so a boundary iff the codepoint before is not CR. (GB3)
148-
CheckCrlf,
149147
// The codepoint after is a Regional Indicator Symbol, so a boundary iff
150148
// it is preceded by an even number of RIS codepoints. (GB12, GB13)
151149
Regional,
@@ -212,7 +210,6 @@ enum PairResult {
212210
NotBreak, // definitely not a break
213211
Break, // definitely a break
214212
Extended, // a break iff not in extended mode
215-
CheckCrlf, // a break unless it's a CR LF pair
216213
Regional, // a break if preceded by an even number of RIS
217214
Emoji, // a break if preceded by emoji base and (Extend)*
218215
}
@@ -221,9 +218,13 @@ fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
221218
use tables::grapheme::GraphemeCat::*;
222219
use self::PairResult::*;
223220
match (before, after) {
224-
(GC_Control, GC_Control) => CheckCrlf, // GB3
221+
(GC_CR, GC_LF) => NotBreak, // GB3
225222
(GC_Control, _) => Break, // GB4
223+
(GC_CR, _) => Break, // GB4
224+
(GC_LF, _) => Break, // GB4
226225
(_, GC_Control) => Break, // GB5
226+
(_, GC_CR) => Break, // GB5
227+
(_, GC_LF) => Break, // GB5
227228
(GC_L, GC_L) => NotBreak, // GB6
228229
(GC_L, GC_V) => NotBreak, // GB6
229230
(GC_L, GC_LV) => NotBreak, // GB6
@@ -357,10 +358,6 @@ impl GraphemeCursor {
357358
}
358359
}
359360
match self.state {
360-
GraphemeState::CheckCrlf => {
361-
let is_break = chunk.as_bytes()[chunk.len() - 1] != b'\r';
362-
self.decide(is_break);
363-
}
364361
GraphemeState::Regional => self.handle_regional(chunk, chunk_start),
365362
GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
366363
_ => panic!("invalid state")
@@ -480,11 +477,6 @@ impl GraphemeCursor {
480477
if self.offset == chunk_start {
481478
let mut need_pre_context = true;
482479
match self.cat_after.unwrap() {
483-
gr::GC_Control => {
484-
if chunk.as_bytes()[offset_in_chunk] == b'\n' {
485-
self.state = GraphemeState::CheckCrlf;
486-
}
487-
}
488480
gr::GC_Regional_Indicator => self.state = GraphemeState::Regional,
489481
gr::GC_E_Modifier => self.state = GraphemeState::Emoji,
490482
_ => need_pre_context = self.cat_before.is_none(),
@@ -505,17 +497,6 @@ impl GraphemeCursor {
505497
let is_extended = self.is_extended;
506498
return self.decision(!is_extended);
507499
}
508-
PairResult::CheckCrlf => {
509-
if chunk.as_bytes()[offset_in_chunk] != b'\n' {
510-
return self.decision(true);
511-
}
512-
// TODO: I think we don't have to test this
513-
if self.offset > chunk_start {
514-
return self.decision(chunk.as_bytes()[offset_in_chunk - 1] != b'\r');
515-
}
516-
self.state = GraphemeState::CheckCrlf;
517-
return Err(GraphemeIncomplete::PreContext(chunk_start));
518-
}
519500
PairResult::Regional => {
520501
if let Some(ris_count) = self.ris_count {
521502
return self.decision((ris_count % 2) == 0);

src/tables.rs

Lines changed: 70 additions & 66 deletions
10000
Original file line numberDiff line numberDiff line change
@@ -296,13 +296,15 @@ pub mod grapheme {
296296
#[derive(Clone, Copy, PartialEq, Eq)]
297297
pub enum GraphemeCat {
298298
GC_Any,
299+
GC_CR,
299300
GC_Control,
300301
GC_E_Base,
301302
GC_E_Base_GAZ,
302303
GC_E_Modifier,
303304
GC_Extend,
304305
GC_Glue_After_Zwj,
305306
GC_L,
307+
GC_LF,
306308
GC_LV,
307309
GC_LVT,
308310
GC_Prepend,
@@ -333,71 +335,73 @@ pub mod grapheme {
333335
}
334336

335337
const grapheme_cat_table: &'static [(char, char, GraphemeCat)] = &[
336-
('\u{0}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}', GC_Control), ('\u{ad}', '\u{ad}',
337-
GC_Control), ('\u{300}', '\u{36f}', GC_Extend), ('\u{483}', '\u{489}', GC_Extend),
338-
('\u{591}', '\u{5bd}', GC_Extend), ('\u{5bf}', '\u{5bf}', GC_Extend), ('\u{5c1}', '\u{5c2}',
339-
GC_Extend), ('\u{5c4}', '\u{5c5}', GC_Extend), ('\u{5c7}', '\u{5c7}', GC_Extend),
340-
('\u{600}', '\u{605}', GC_Prepend), ('\u{610}', '\u{61a}', GC_Extend), ('\u{61c}',
341-
'\u{61c}', GC_Control), ('\u{64b}', '\u{65f}', GC_Extend), ('\u{670}', '\u{670}',
342-
GC_Extend), ('\u{6d6}', '\u{6dc}', GC_Extend), ('\u{6dd}', '\u{6dd}', GC_Prepend),
343-
('\u{6df}', '\u{6e4}', GC_Extend), ('\u{6e7}', '\u{6e8}', GC_Extend), ('\u{6ea}', '\u{6ed}',
344-
GC_Extend), ('\u{70f}', '\u{70f}', GC_Prepend), ('\u{711}', '\u{711}', GC_Extend),
345-
('\u{730}', '\u{74a}', GC_Extend), ('\u{7a6}', '\u{7b0}', GC_Extend), ('\u{7eb}', '\u{7f3}',
346-
GC_Extend), ('\u{816}', '\u{819}', GC_Extend), ('\u{81b}', '\u{823}', GC_Extend),
347-
('\u{825}', '\u{827}', GC_Extend), ('\u{829}', '\u{82d}', GC_Extend), ('\u{859}', '\u{85b}',
348-
GC_Extend), ('\u{8d4}', '\u{8e1}', GC_Extend), ('\u{8e2}', '\u{8e2}', GC_Prepend),
349-
('\u{8e3}', '\u{902}', GC_Extend), ('\u{903}', '\u{903}', GC_SpacingMark), ('\u{93a}',
350-
'\u{93a}', GC_Extend), ('\u{93b}', '\u{93b}', GC_SpacingMark), ('\u{93c}', '\u{93c}',
351-
GC_Extend), ('\u{93e}', '\u{940}', GC_SpacingMark), ('\u{941}', '\u{948}', GC_Extend),
352-
('\u{949}', '\u{94c}', GC_SpacingMark), ('\u{94d}', '\u{94d}', GC_Extend), ('\u{94e}',
353-
'\u{94f}', GC_SpacingMark), ('\u{951}', '\u{957}', GC_Extend), ('\u{962}', '\u{963}',
354-
GC_Extend), ('\u{981}', '\u{981}', GC_Extend), ('\u{982}', '\u{983}', GC_SpacingMark),
355-
('\u{9bc}', '\u{9bc}', GC_Extend), ('\u{9be}', '\u{9be}', GC_Extend), ('\u{9bf}', '\u{9c0}',
356-
GC_SpacingMark), ('\u{9c1}', '\u{9c4}', GC_Extend), ('\u{9c7}', '\u{9c8}', GC_SpacingMark),
357-
('\u{9cb}', '\u{9cc}', GC_SpacingMark), ('\u{9cd}', '\u{9cd}', GC_Extend), ('\u{9d7}',
358-
'\u{9d7}', GC_Extend), ('\u{9e2}', '\u{9e3}', GC_Extend), ('\u{a01}', '\u{a02}', GC_Extend),
359-
('\u{a03}', '\u{a03}', GC_SpacingMark), ('\u{a3c}', '\u{a3c}', GC_Extend), ('\u{a3e}',
360-
'\u{a40}', GC_SpacingMark), ('\u{a41}', '\u{a42}', GC_Extend), ('\u{a47}', '\u{a48}',
361-
GC_Extend), ('\u{a4b}', '\u{a4d}', GC_Extend), ('\u{a51}', '\u{a51}', GC_Extend),
362-
('\u{a70}', '\u{a71}', GC_Extend), ('\u{a75}', '\u{a75}', GC_Extend), ('\u{a81}', '\u{a82}',
363-
GC_Extend), ('\u{a83}', '\u{a83}', GC_SpacingMark), ('\u{abc}', '\u{abc}', GC_Extend),
364-
('\u{abe}', '\u{ac0}', GC_SpacingMark), ('\u{ac1}', '\u{ac5}', GC_Extend), ('\u{ac7}',
365-
'\u{ac8}', GC_Extend), ('\u{ac9}', '\u{ac9}', GC_SpacingMark), ('\u{acb}', '\u{acc}',
366-
GC_SpacingMark), ('\u{acd}', '\u{acd}', GC_Extend), ('\u{ae2}', '\u{ae3}', GC_Extend),
367-
('\u{b01}', '\u{b01}', GC_Extend), ('\u{b02}', '\u{b03}', GC_SpacingMark), ('\u{b3c}',
368-
'\u{b3c}', GC_Extend), ('\u{b3e}', '\u{b3f}', GC_Extend), ('\u{b40}', '\u{b40}',
369-
GC_SpacingMark), ('\u{b41}', '\u{b44}', GC_Extend), ('\u{b47}', '\u{b48}', GC_SpacingMark),
370-
('\u{b4b}', '\u{b4c}', GC_SpacingMark), ('\u{b4d}', '\u{b4d}', GC_Extend), ('\u{b56}',
371-
'\u{b57}', GC_Extend), ('\u{b62}', '\u{b63}', GC_Extend), ('\u{b82}', '\u{b82}', GC_Extend),
372-
('\u{bbe}', '\u{bbe}', GC_Extend), ('\u{bbf}', '\u{bbf}', GC_SpacingMark), ('\u{bc0}',
373-
'\u{bc0}', GC_Extend), ('\u{bc1}', '\u{bc2}', GC_SpacingMark), ('\u{bc6}', '\u{bc8}',
374-
GC_SpacingMark), ('\u{bca}', '\u{bcc}', GC_SpacingMark), ('\u{bcd}', '\u{bcd}', GC_Extend),
375-
('\u{bd7}', '\u{bd7}', GC_Extend), ('\u{c00}', '\u{c00}', GC_Extend), ('\u{c01}', '\u{c03}',
376-
GC_SpacingMark), ('\u{c3e}', '\u{c40}', GC_Extend), ('\u{c41}', '\u{c44}', GC_SpacingMark),
377-
('\u{c46}', '\u{c48}', GC_Extend), ('\u{c4a}', '\u{c4d}', GC_Extend), ('\u{c55}', '\u{c56}',
378-
GC_Extend), ('\u{c62}', '\u{c63}', GC_Extend), ('\u{c81}', '\u{c81}', GC_Extend),
379-
('\u{c82}', '\u{c83}', GC_SpacingMark), ('\u{cbc}', '\u{cbc}', GC_Extend), ('\u{cbe}',
380-
'\u{cbe}', GC_SpacingMark), ('\u{cbf}', '\u{cbf}', GC_Extend), ('\u{cc0}', '\u{cc1}',
381-
GC_SpacingMark), ('\u{cc2}', '\u{cc2}', GC_Extend), ('\u{cc3}', '\u{cc4}', GC_SpacingMark),
382-
('\u{cc6}', '\u{cc6}', GC_Extend), ('\u{cc7}', '\u{cc8}', GC_SpacingMark), ('\u{cca}',
383-
'\u{ccb}', GC_SpacingMark), ('\u{ccc}', '\u{ccd}', GC_Extend), ('\u{cd5}', '\u{cd6}',
384-
GC_Extend), ('\u{ce2}', '\u{ce3}', GC_Extend), ('\u{d01}', '\u{d01}', GC_Extend),
385-
('\u{d02}', '\u{d03}', GC_SpacingMark), ('\u{d3e}', '\u{d3e}', GC_Extend), ('\u{d3f}',
386-
'\u{d40}', GC_SpacingMark), ('\u{d41}', '\u{d44}', GC_Extend), ('\u{d46}', '\u{d48}',
387-
GC_SpacingMark), ('\u{d4a}', '\u{d4c}', GC_SpacingMark), ('\u{d4d}', '\u{d4d}', GC_Extend),
388-
('\u{d4e}', '\u{d4e}', GC_Prepend), ('\u{d57}', '\u{d57}', GC_Extend), ('\u{d62}',
389-
'\u{d63}', GC_Extend), ('\u{d82}', '\u{d83}', GC_SpacingMark), ('\u{dca}', '\u{dca}',
390-
GC_Extend), ('\u{dcf}', '\u{dcf}', GC_Extend), ('\u{dd0}', '\u{dd1}', GC_SpacingMark),
391-
('\u{dd2}', '\u{dd4}', GC_Extend), ('\u{dd6}', '\u{dd6}', GC_Extend), ('\u{dd8}', '\u{dde}',
392-
GC_SpacingMark), ('\u{ddf}', '\u{ddf}', GC_Extend), ('\u{df2}', '\u{df3}', GC_SpacingMark),
393-
('\u{e31}', '\u{e31}', GC_Extend), ('\u{e33}', '\u{e33}', GC_SpacingMark), ('\u{e34}',
394-
'\u{e3a}', GC_Extend), ('\u{e47}', '\u{e4e}', GC_Extend), ('\u{eb1}', '\u{eb1}', GC_Extend),
395-
('\u{eb3}', '\u{eb3}', GC_SpacingMark), ('\u{eb4}', '\u{eb9}', GC_Extend), ('\u{ebb}',
396-
'\u{ebc}', GC_Extend), ('\u{ec8}', '\u{ecd}', GC_Extend), ('\u{f18}', '\u{f19}', GC_Extend),
397-
('\u{f35}', '\u{f35}', GC_Extend), ('\u{f37}', '\u{f37}', GC_Extend), ('\u{f39}', '\u{f39}',
398-
GC_Extend), ('\u{f3e}', '\u{f3f}', GC_SpacingMark), ('\u{f71}', '\u{f7e}', GC_Extend),
399-
('\u{f7f}', '\u{f7f}', GC_SpacingMark), ('\u{f80}', '\u{f84}', GC_Extend), ('\u{f86}',
400-
'\u{f87}', GC_Extend), ('\u{f8d}', '\u{f97}', GC_Extend), ('\u{f99}', '\u{fbc}', GC_Extend),
338+
('\u{0}', '\u{9}', GC_Control), ('\u{a}', '\u{a}', GC_LF), ('\u{b}', '\u{c}', GC_Control),
339+
('\u{d}', '\u{d}', GC_CR), ('\u{e}', '\u{1f}', GC_Control), ('\u{7f}', '\u{9f}',
340+
GC_Control), ('\u{ad}', '\u{ad}', GC_Control), ('\u{300}', '\u{36f}', GC_Extend),
341+
('\u{483}', '\u{489}', GC_Extend), ('\u{591}', '\u{5bd}', GC_Extend), ('\u{5bf}', '\u{5bf}',
342+
GC_Extend), ('\u{5c1}', '\u{5c2}', GC_Extend), ('\u{5c4}', '\u{5c5}', GC_Extend),
343+
('\u{5c7}', '\u{5c7}', GC_Extend), ('\u{600}', '\u{605}', GC_Prepend), ('\u{610}',
344+
'\u{61a}', GC_Extend), ('\u{61c}', '\u{61c}', GC_Control), ('\u{64b}', '\u{65f}',
345+
GC_Extend), ('\u{670}', '\u{670}', GC_Extend), ('\u{6d6}', '\u{6dc}', GC_Extend),
346+
('\u{6dd}', '\u{6dd}', GC_Prepend), ('\u{6df}', '\u{6e4}', GC_Extend), ('\u{6e7}',
347+
'\u{6e8}', GC_Extend), ('\u{6ea}', '\u{6ed}', GC_Extend), ('\u{70f}', '\u{70f}',
348+
GC_Prepend), ('\u{711}', '\u{711}', GC_Extend), ('\u{730}', '\u{74a}', GC_Extend),
349+
('\u{7a6}', '\u{7b0}', GC_Extend), ('\u{7eb}', '\u{7f3}', GC_Extend), ('\u{816}', '\u{819}',
350+
GC_Extend), ('\u{81b}', '\u{823}', GC_Extend), ('\u{825}', '\u{827}', GC_Extend),
351+
('\u{829}', '\u{82d}', GC_Extend), ('\u{859}', '\u{85b}', GC_Extend), ('\u{8d4}', '\u{8e1}',
352+
GC_Extend), ('\u{8e2}', '\u{8e2}', GC_Prepend), ('\u{8e3}', '\u{902}', GC_Extend),
353+
('\u{903}', '\u{903}', GC_SpacingMark), ('\u{93a}', '\u{93a}', GC_Extend), ('\u{93b}',
354+
'\u{93b}', GC_SpacingMark), ('\u{93c}', '\u{93c}', GC_Extend), ('\u{93e}', '\u{940}',
355+
GC_SpacingMark), ('\u{941}', '\u{948}', GC_Extend), ('\u{949}', '\u{94c}', GC_SpacingMark),
356+
('\u{94d}', '\u{94d}', GC_Extend), ('\u{94e}', '\u{94f}', GC_SpacingMark), ('\u{951}',
357+
'\u{957}', GC_Extend), ('\u{962}', '\u{963}', GC_Extend), ('\u{981}', '\u{981}', GC_Extend),
358+
('\u{982}', '\u{983}', GC_SpacingMark), ('\u{9bc}', '\u{9bc}', GC_Extend), ('\u{9be}',
359+
'\u{9be}', GC_Extend), ('\u{9bf}', '\u{9c0}', GC_SpacingMark), ('\u{9c1}', '\u{9c4}',
360+
GC_Extend), ('\u{9c7}', '\u{9c8}', GC_SpacingMark), ('\u{9cb}', '\u{9cc}', GC_SpacingMark),
361+
('\u{9cd}', '\u{9cd}', GC_Extend), ('\u{9d7}', '\u{9d7}', GC_Extend), ('\u{9e2}', '\u{9e3}',
362+
GC_Extend), ('\u{a01}', '\u{a02}', GC_Extend), ('\u{a03}', '\u{a03}', GC_SpacingMark),
363+
('\u{a3c}', '\u{a3c}', GC_Extend), ('\u{a3e}', '\u{a40}', GC_SpacingMark), ('\u{a41}',
364+
'\u{a42}', GC_Extend), ('\u{a47}', '\u{a48}', GC_Extend), ('\u{a4b}', '\u{a4d}', GC_Extend),
365+
('\u{a51}', '\u{a51}', GC_Extend), ('\u{a70}', '\u{a71}', GC_Extend), ('\u{a75}', '\u{a75}',
366+
GC_Extend), ('\u{a81}', '\u{a82}', GC_Extend), ('\u{a83}', '\u{a83}', GC_SpacingMark),
367+
('\u{abc}', '\u{abc}', GC_Extend), ('\u{abe}', '\u{ac0}', GC_SpacingMark), ('\u{ac1}',
368+
'\u{ac5}', GC_Extend), ('\u{ac7}', '\u{ac8}', GC_Extend), ('\u{ac9}', '\u{ac9}',
369+
GC_SpacingMark), ('\u{acb}', '\u{acc}', GC_SpacingMark), ('\u{acd}', '\u{acd}', GC_Extend),
370+
('\u{ae2}', '\u{ae3}', GC_Extend), ('\u{b01}', '\u{b01}', GC_Extend), ('\u{b02}', '\u{b03}',
371+
GC_SpacingMark), ('\u{b3c}', '\u{b3c}', GC_Extend), ('\u{b3e}', '\u{b3f}', GC_Extend),
372+
('\u{b40}', '\u{b40}', GC_SpacingMark), ('\u{b41}', '\u{b44}', GC_Extend), ('\u{b47}',
373+
'\u{b48}', GC_SpacingMark), ('\u{b4b}', '\u{b4c}', GC_SpacingMark), ('\u{b4d}', '\u{b4d}',
374+
GC_Extend), ('\u{b56}', '\u{b57}', GC_Extend), ('\u{b62}', '\u{b63}', GC_Extend),
375+
('\u{b82}', '\u{b82}', GC_Extend), ('\u{bbe}', '\u{bbe}', GC_Extend), ('\u{bbf}', '\u{bbf}',
376+
GC_SpacingMark), ('\u{bc0}', '\u{bc0}', GC_Extend), ('\u{bc1}', '\u{bc2}', GC_SpacingMark),
377+
('\u{bc6}', '\u{bc8}', GC_SpacingMark), ('\u{bca}', '\u{bcc}', GC_SpacingMark), ('\u{bcd}',
378+
'\u{bcd}', GC_Extend), ('\u{bd7}', '\u{bd7}', GC_Extend), ('\u{c00}', '\u{c00}', GC_Extend),
379+
('\u{c01}', '\u{c03}', GC_SpacingMark), ('\u{c3e}', '\u{c40}', GC_Extend), ('\u{c41}',
380+
'\u{c44}', GC_SpacingMark), ('\u{c46}', '\u{c48}', GC_Extend), ('\u{c4a}', '\u{c4d}',
381+
GC_Extend), ('\u{c55}', '\u{c56}', GC_Extend), ('\u{c62}', '\u{c63}', GC_Extend),
382+
('\u{c81}', '\u{c81}', GC_Extend), ('\u{c82}', '\u{c83}', GC_SpacingMark), ('\u{cbc}',
383+
'\u{cbc}', GC_Extend), ('\u{cbe}', '\u{cbe}', GC_SpacingMark), ('\u{cbf}', '\u{cbf}',
384+
GC_Extend), ('\u{cc0}', '\u{cc1}', GC_SpacingMark), ('\u{cc2}', '\u{cc2}', GC_Extend),
385+
('\u{cc3}', '\u{cc4}', GC_SpacingMark), ('\u{cc6}', '\u{cc6}', GC_Extend), ('\u{cc7}',
386+
'\u{cc8}', GC_SpacingMark), ('\u{cca}', '\u{ccb}', GC_SpacingMark), ('\u{ccc}', '\u{ccd}',
387+
GC_Extend), ('\u{cd5}', '\u{cd6}', GC_Extend), ('\u{ce2}', '\u{ce3}', GC_Extend),
388+
('\u{d01}', '\u{d01}', GC_Extend), ('\u{d02}', '\u{d03}', GC_SpacingMark), ('\u{d3e}',
389+
'\u{d3e}', GC_Extend), ('\u{d3f}', '\u{d40}', GC_SpacingMark), ('\u{d41}', '\u{d44}',
390+
GC_Extend), ('\u{d46}', '\u{d48}', GC_SpacingMark), ('\u{d4a}', '\u{d4c}', GC_SpacingMark),
391+
('\u{d4d}', '\u{d4d}', GC_Extend), ('\u{d4e}', '\u{d4e}', GC_Prepend), ('\u{d57}',
392+
'\u{d57}', GC_Extend), ('\u{d62}', '\u{d63}', GC_Extend), ('\u{d82}', '\u{d83}',
393+
GC_SpacingMark), ('\u{dca}', '\u{dca}', GC_Extend), ('\u{dcf}', '\u{dcf}', GC_Extend),
394+
('\u{dd0}', '\u{dd1}', GC_SpacingMark), ('\u{dd2}', '\u{dd4}', GC_Extend), ('\u{dd6}',
395+
'\u{dd6}', GC_Extend), ('\u{dd8}', '\u{dde}', GC_SpacingMark), ('\u{ddf}', '\u{ddf}',
396+
GC_Extend), ('\u{df2}', '\u{df3}', GC_SpacingMark), ('\u{e31}', '\u{e31}', GC_Extend),
397+
('\u{e33}', '\u{e33}', GC_SpacingMark), ('\u{e34}', '\u{e3a}', GC_Extend), ('\u{e47}',
398+
'\u{e4e}', GC_Extend), ('\u{eb1}', '\u{eb1}', GC_Extend), ('\u{eb3}', '\u{eb3}',
399+
GC_SpacingMark), ('\u{eb4}', '\u{eb9}', GC_Extend), ('\u{ebb}', '\u{ebc}', GC_Extend),
400+
('\u{ec8}', '\u{ecd}', GC_Extend), ('\u{f18}', '\u{f19}', GC_Extend), ('\u{f35}', '\u{f35}',
401+
GC_Extend), ('\u{f37}', '\u{f37}', GC_Extend), ('\u{f39}', '\u{f39}', GC_Extend),
402+
('\u{f3e}', '\u{f3f}', GC_SpacingMark), ('\u{f71}', '\u{f7e}', GC_Extend), ('\u{f7f}',
403+
'\u{f7f}', GC_SpacingMark), ('\u{f80}', '\u{f84}', GC_Extend), ('\u{f86}', '\u{f87}',
404+
GC_Extend), ('\u{f8d}', '\u{f97}', GC_Extend), ('\u{f99}', '\u{fbc}', GC_Extend),
401405
('\u{fc6}', '\u{fc6}', GC_Extend), ('\u{102d}', '\u{1030}', GC_Extend), ('\u{1031}',
402406
'\u{1031}', GC_SpacingMark), ('\u{1032}', '\u{1037}', GC_Extend), ('\u{1039}', '\u{103a}',
403407
GC_Extend), ('\u{103b}', '\u{103c}', GC_SpacingMark), ('\u{103d}', '\u{103e}', GC_Extend),
@@ -868,7 +872,7 @@ pub mod word {
868872
pub use self::WordCat::*;
869873

870874
#[allow(non_camel_case_types)]
871-
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
875+
#[derive(Clone, Copy, PartialEq, Eq)]
872876
pub enum WordCat {
873877
WC_ALetter,
874878
WC_Any,

0 commit comments

Comments
 (0)
0