python · JukkaL · Aug 5, 2021 · Aug 2, 2021 · Aug 3, 2021 · Aug 3, 2021
diff --git a/mypyc/test-data/fixtures/ir.py b/mypyc/test-data/fixtures/ir.py
@@ -74,10 +74,11 @@ def split(self, sep: Optional[str] = None, max: Optional[int] = None) -> List[st
     def strip (self, item: str) -> str: pass
     def join(self, x: Iterable[str]) -> str: pass
     def format(self, *args: Any, **kwargs: Any) -> str: ...
-    def upper(self) -> str: pass
-    def startswith(self, x: str, start: int=..., end: int=...) -> bool: pass
-    def endswith(self, x: str, start: int=..., end: int=...) -> bool: pass
-    def replace(self, old: str, new: str, maxcount: Optional[int] = None) -> str: pass
+    def upper(self) -> str: ...
+    def startswith(self, x: str, start: int=..., end: int=...) -> bool: ...
+    def endswith(self, x: str, start: int=..., end: int=...) -> bool: ...
+    def replace(self, old: str, new: str, maxcount: int=...) -> str: ...
+    def encode(self, x: str=..., y: str=...) -> bytes: ...
 
 class float:
     def __init__(self, x: object) -> None: pass
@@ -97,14 +98,15 @@ def __neg__(self) -> complex: pass
 
 class bytes:
     @overload
-    def __init__(self) -> None: pass
+    def __init__(self) -> None: ...
     @overload
-    def __init__(self, x: object) -> None: pass
-    def __add__(self, x: bytes) -> bytes: pass
-    def __eq__(self, x: object) -> bool: pass
-    def __ne__(self, x: object) -> bool: pass
-    def __getitem__(self, i: int) -> int: pass
-    def join(self, x: Iterable[object]) -> bytes: pass
+    def __init__(self, x: object) -> None: ...
+    def __add__(self, x: bytes) -> bytes: ...
+    def __eq__(self, x: object) -> bool: ...
+    def __ne__(self, x: object) -> bool: ...
+    def __getitem__(self, i: int) -> int: ...
+    def join(self, x: Iterable[object]) -> bytes: ...
+    def decode(self, x: str, y: str) -> str: ...
 
 class bytearray:
     @overload
@@ -253,6 +255,8 @@ class IndexError(LookupError): pass
 
 class RuntimeError(Exception): pass
 
+class UnicodeEncodeError(RuntimeError): pass
+
 class NotImplementedError(RuntimeError): pass
 
 class StopIteration(Exception):
@@ -284,6 +288,8 @@ def abs(x: float) -> float: ...
 def exit() -> None: ...
 def repr(o: object) -> str: ...
 def ascii(o: object) -> str: ...
+def ord(o: object) -> int: ...
+def chr(i: int) -> str: ...
 
 # Dummy definitions.
 class classmethod: pass

diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test
@@ -1,6 +1,6 @@
 # Test cases for strings (compile and run)
 
-[case testStr]
+[case testStrBasics]
 from typing import Tuple
 def f() -> str:
     return 'some string'
@@ -511,3 +511,63 @@ def test_format_method_python_doc() -> None:
                         '    9    9   11 1001',\
                         '   10    A   12 1010',\
                         '   11    B   13 1011']
+
+[case testUnicodeEncodeDecode]
+
+# https://docs.python.org/3/howto/unicode.html
+
+def test_chr() -> None:
+    assert chr(57344) == '\ue000'
+    assert chr(0) == '\x00'
+    try:
+        chr(-1)
+        assert False
+    except ValueError:
+        pass
+    try:
+        chr(1114112)
+        assert False
+    except ValueError:
+        pass
+    assert chr(1114111) == '\U0010ffff'
+
+def test_ord() -> None:
+    assert ord('\ue000') == 57344
+    s = "a\xac\u1234\u20ac\U00008000"
+    # ^^^^ two-digit hex escape
+    #   ^^^^^^ four-digit Unicode escape
+    #           ^^^^^^^^^^ eight-digit Unicode escape
+    l1 = [ord(c) for c in s]
+    assert l1 == [97, 172, 4660, 8364, 32768]
+    u = 'abcdé'
+    assert ord(u[-1]) == 233
+    assert ord(b'a') == 97
+    try:
+        ord('aa')
+        assert False
+    except TypeError:
+        pass
+
+def test_decode() -> None:
+    assert "\N{GREEK CAPITAL LETTER DELTA}" == '\u0394'
+    assert "\u0394" == "\u0394"
+    assert "\U00000394" == '\u0394'
+    assert b'\x80abc'.decode("utf-8", "replace") == '\ufffdabc'
+    assert b'\x80abc'.decode("utf-8", "backslashreplace") == '\\x80abc'
+    assert b'\x80abc'.decode("utf-8", "ignore") == 'abc'
+
+def test_encode() -> None:
+    u = chr(40960) + 'abcd' + chr(1972)
+    assert u.encode() == b'\xea\x80\x80abcd\xde\xb4'
+    assert u.encode('utf-8') == b'\xea\x80\x80abcd\xde\xb4'
+    try:
+        u.encode('ascii')
+        assert False
+    except UnicodeEncodeError:
+        pass
+    assert u.encode('ascii', 'ignore') == b'abcd'
+    assert u.encode('ascii', 'replace') == b'?abcd?'
+    assert u.encode('ascii', 'xmlcharrefreplace') == b'&#40960;abcd&#1972;'
+    assert u.encode('ascii', 'backslashreplace') == b'\\ua000abcd\\u07b4'
+    assert u.encode('ascii', 'namereplace') == b'\\N{YI SYLLABLE IT}abcd\\u07b4'
+    assert 'pythön!'.encode() == b'pyth\xc3\xb6n!'