8000 Merge pull request #136 from Jules-Bertholet/std-tables · unicode-rs/unicode-segmentation@2081c29 · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 2081c29

Browse files
authored
Merge pull request #136 from Jules-Bertholet/std-tables
Use stdlib alphabetic and numeric character tables
2 parents 592ce00 + e96ec2e commit 2081c29

File tree

2 files changed

+38
-16
lines changed

2 files changed

+38
-16
lines changed

scripts/unicode.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -232,19 +232,27 @@ def emit_util_mod(f):
232232
233233
#[inline]
234234
fn is_alphabetic(c: char) -> bool {
235-
match c {
236-
'a' ..= 'z' | 'A' ..= 'Z' => true,
237-
c if c > '\x7f' => super::derived_property::Alphabetic(c),
238-
_ => false,
235+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
236+
c.is_alphabetic()
237+
} else {
238+
match c {
239+
'a' ..= 'z' | 'A' ..= 'Z' => true,
240+
c if c > '\\x7f' => super::derived_property::Alphabetic(c),
241+
_ => false,
242+
}
239243
}
240244
}
241245
242246
#[inline]
243247
fn is_numeric(c: char) -> bool {
244-
match c {
245-
'0' ..= '9' => true,
246-
c if c > '\x7f' => super::general_category::N(c),
247-
_ => false,
248+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
249+
c.is_numeric()
250+
} else {
251+
match c {
252+
'0' ..= '9' => true,
253+
c if c > '\\x7f' => super::general_category::N(c),
254+
_ => false,
255+
}
248256
}
249257
}
250258
@@ -388,6 +396,10 @@ def emit_break_module(f, break_table, break_cats, name):
388396
/// The version of [Unicode](http://www.unicode.org/)
389397
/// that this version of unicode-segmentation is based on.
390398
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
399+
""" % UNICODE_VERSION)
400+
401+
rf.write("""
402+
const UNICODE_VERSION_U8: (u8, u8, u8) = (%s, %s, %s);
391403
""" % UNICODE_VERSION)
392404

393405
# download and parse all the data

src/tables.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
/// that this version of unicode-segmentation is based on.
1717
pub const UNICODE_VERSION: (u64, u64, u64) = (15, 1, 0);
1818

19+
const UNICODE_VERSION_U8: (u8, u8, u8) = (15, 1, 0);
20+
1921
pub mod util {
2022
#[inline]
2123
pub fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool {
@@ -29,19 +31,27 @@ pub mod util {
2931

3032
#[inline]
3133
fn is_alphabetic(c: char) -> bool {
32-
match c {
33-
'a' ..= 'z' | 'A' ..= 'Z' => true,
34-
c if c > '' => super::derived_property::Alphabetic(c),
35-
_ => false,
34+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
35+
c.is_alphabetic()
36+
} else {
37+
match c {
38+
'a' ..= 'z' | 'A' ..= 'Z' => true,
39+
c if c > '\x7f' => super::derived_property::Alphabetic(c),
40+
_ => false,
41+
}
3642
}
3743
}
3844

3945
#[inline]
4046
fn is_numeric(c: char) -> bool {
41-
match c {
42-
'0' ..= '9' => true,
43-
c if c > '' => super::general_category::N(c),
44-
_ => false,
47+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
48+
c.is_numeric()
49+
} else {
50+
match c {
51+
'0' ..= '9' => true,
52+
c if c > '\x7f' => super::general_category::N(c),
53+
_ => false,
54+
}
4555
}
4656
}
4757

0 commit comments

Comments
 (0)
0