Florob
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎Cargo.toml
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/unicode.py
Lines changed: 1 addition & 1 deletion b/‎scripts/unicode.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/unicode_gen_normtests.py
Lines changed: 81 additions & 0 deletions b/‎scripts/unicode_gen_normtests.py
Lines changed: 81 additions & 0 deletions
diff --git a/‎src/lib.rs
Lines changed: 3 additions & 1 deletion b/‎src/lib.rs
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/test.rs
Lines changed: 64 additions & 0 deletions b/‎src/test.rs
Lines changed: 64 additions & 0 deletions
@@ -1,3 +1,4 @@
 target
 Cargo.lock
 scripts/tmp
+*.pyc
@@ -1,7 +1,7 @@
 [package]
 
 name = "unicode-normalization"
-version = "0.0.2"
+version = "0.0.3"
 authors = ["kwantam <kwantam@gmail.com>"]
 
 homepage = "https://github.com/unicode-rs/unicode-normalization"
 
@@ -26,5 +26,5 @@ to your `Cargo.toml`:
 
 ```toml
 [dependencies]
-unicode-normalization = "0.0.2"
+unicode-normalization = "0.0.3"
 ```
@@ -30,7 +30,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
+// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
 
 #![allow(missing_docs, non_upper_case_globals, non_snake_case)]
 '''
 
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+#
+# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# This script uses the following Unicode tables:
+# - NormalizationTest.txt
+#
+# Since this should not require frequent updates, we just store this
+# out-of-line and check the unicode.rs file into git.
+
+import unicode, re, os, fileinput
+
+def load_test_data(f):
+    outls = []
+    testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$")
+
+    unicode.fetch(f)
+    for line in fileinput.input(os.path.basename(f)):
+        # comment and header lines start with # and @ respectively
+        if len(line) < 1 or line[0:1] == '#' or line[0:1] == '@':
+            continue
+
+        m = testRe.match(line)
+        groups = []
+        if not m:
+            print "error: no match on line where test was expected: %s" % line
+            continue
+
+        has_surrogates = False
+        for i in range(1, 6):
+            group = []
+            chs = m.group(i).split()
+            for ch in chs:
+                intch = int(ch,16)
+                if unicode.is_surrogate(intch):
+                    has_surrogates = True
+                    break
+                group.append(intch)
+
+            if has_surrogates:
+                break
+            groups.append(group)
+
+        if has_surrogates:
+            continue
+        outls.append(groups)
+
+    return outls
+
+def showfun(gs):
+    outstr = '('
+    gfirst = True
+    for g in gs:
+        if not gfirst:
+            outstr += ','
+        gfirst = False
+
+        outstr += '"'
+        for ch in g:
+            outstr += "\\u{%x}" % ch
+        outstr += '"'
+    outstr += ')'
+    return outstr
+
+if __name__ == "__main__":
+    d = load_test_data("NormalizationTest.txt")
+    ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]"
+    with open("testdata.rs", "w") as nf:
+        nf.write(unicode.preamble)
+
nf.write("\n")        nf.write("\n")
+        nf.write("    // official Unicode test data\n")
+        nf.write("    // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n")
+        unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)
@@ -34,7 +34,7 @@
 //!
 //! ```toml
 //! [dependencies]
-//! unicode-normalization = "0.0.2"
+//! unicode-normalization = "0.0.3"
 //! ```
 
 #![deny(missing_docs, unsafe_code)]
@@ -48,6 +48,8 @@ mod tables;
 
 #[cfg(test)]
 mod test;
+#[cfg(test)]
+mod testdata;
 
 /// Methods for composing and decomposing characters.
 pub mod char {
 
@@ -87,3 +87,67 @@ fn test_nfkc_chars() {
     t!("\u{ac1c}", "\u{ac1c}");
     t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
 }
+
+#[test]
+fn test_official() {
+    use testdata::TEST_NORM;
+    macro_rules! normString {
+        ($fun: ident, $input: expr) => { UnicodeNormalization::$fun($input).collect::<String>() }
+    }
+
+    for &(s1, s2, s3, s4, s5) in TEST_NORM {
+        // these invariants come from the CONFORMANCE section of
+        // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
+        {
+            let r1 = normString!(nfc_chars, s1);
+            let r2 = normString!(nfc_chars, s2);
+            let r3 = normString!(nfc_chars, s3);
+            let r4 = normString!(nfc_chars, s4);
+            let r5 = normString!(nfc_chars, s5);
+            assert_eq!(s2, &r1[..]);
+            assert_eq!(s2, &r2[..]);
+            assert_eq!(s2, &r3[..]);
+            assert_eq!(s4, &r4[..]);
+            assert_eq!(s4, &r5[..]);
+        }
+
+        {
+            let r1 = normString!(nfd_chars, s1);
+            let r2 = normString!(nfd_chars, s2);
+            let r3 = normString!(nfd_chars, s3);
+            let r4 = normString!(nfd_chars, s4);
+            let r5 = normString!(nfd_chars, s5);
+            assert_eq!(s3, &r1[..]);
+            assert_eq!(s3, &r2[..]);
+            assert_eq!(s3, &r3[..]);
+            assert_eq!(s5, &r4[..]);
+            assert_eq!(s5, &r5[..]);
+        }
+
+        {
+            let r1 = normString!(nfkc_chars, s1);
+            let r2 = normString!(nfkc_chars, s2);
+            let r3 = normString!(nfkc_chars, s3);
+            let r4 = normString!(nfkc_chars, s4);
+            let r5 = normString!(nfkc_chars, s5);
+            assert_eq!(s4, &r1[..]);
+            assert_eq!(s4, &r2[..]);
+            assert_eq!(s4, &r3[..]);
+            assert_eq!(s4, &r4[..]);
+            assert_eq!(s4, &r5[..]);
+        }
+
+        {
+            let r1 = normString!(nfkd_chars, s1);
+            let r2 = normString!(nfkd_chars, s2);
+            let r3 = normString!(nfkd_chars, s3);
+            let r4 = normString!(nfkd_chars, s4);
+            let r5 = normString!(nfkd_chars, s5);
+            assert_eq!(s5, &r1[..]);
+            assert_eq!(s5, &r2[..]);
+            assert_eq!(s5, &r3[..]);
+            assert_eq!(s5, &r4[..]);
+            assert_eq!(s5, &r5[..]);
+        }
+    }
+}
-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 target
 Cargo.lock
 scripts/tmp
 +*.pyc