@@ -24,13 +24,13 @@ class Str(str):
24
24
pass
25
25
26
26
27
- PyUnicode_NATIVE_ASCII = 1
28
- PyUnicode_NATIVE_UCS1 = 2
29
- PyUnicode_NATIVE_UCS2 = 3
30
- PyUnicode_NATIVE_UCS4 = 4
31
- PyUnicode_NATIVE_UTF8 = 5
27
+ PyUnicode_FORMAT_ASCII = 0x01
28
+ PyUnicode_FORMAT_UCS1 = 0x02
29
+ PyUnicode_FORMAT_UCS2 = 0x04
30
+ PyUnicode_FORMAT_UCS4 = 0x08
31
+ PyUnicode_FORMAT_UTF8 = 0x10
32
32
# Invalid native format
33
- PyUnicode_NATIVE_INVALID = 0
33
+ PyUnicode_FORMAT_INVALID = 0x20
34
34
35
35
class CAPITest (unittest .TestCase ):
36
36
@@ -1683,74 +1683,119 @@ def test_pep393_utf8_caching_bug(self):
1683
1683
# Check that the second call returns the same result
1684
1684
self .assertEqual (getargs_s_hash (s ), chr (k ).encode () * (i + 1 ))
1685
1685
1686
- def test_unicode_asnativeformat (self ):
1687
- # Test PyUnicode_AsNativeFormat()
1688
- asnativeformat = _testlimitedcapi .unicode_asnativeformat
1689
- self .assertEqual (asnativeformat ("abc" ),
1690
- (b'abc' , PyUnicode_NATIVE_ASCII ))
1691
- self .assertEqual (asnativeformat ("latin1:\xe9 " ),
1692
- (b'latin1:\xe9 ' , PyUnicode_NATIVE_UCS1 ))
1693
-
1694
- ucs2_enc = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1695
- self .assertEqual (asnativeformat ('ucs2:\u20ac ' ),
1686
+ def test_unicode_export (self ):
1687
+ # Test PyUnicode_Export() and PyUnicode_FreeExport()
1688
+ unicode_export = _testlimitedcapi .unicode_export
1689
+ if sys .byteorder == 'little' :
1690
+ ucs2_enc = 'utf-16le'
1691
+ ucs4_enc = 'utf-32le'
1692
+ else :
1693
+ ucs2_enc = 'utf-16be'
1694
+ ucs4_enc = 'utf-32be'
1695
+
1696
+ # export to the native format
1697
+ formats = (PyUnicode_FORMAT_ASCII
1698
+ | PyUnicode_FORMAT_UCS1
1699
+ | PyUnicode_FORMAT_UCS2
1700
+ | PyUnicode_FORMAT_UCS4 )
1701
+ self .assertEqual (unicode_export ("abc" , formats ),
1702
+ (b'abc' , PyUnicode_FORMAT_ASCII ))
1703
+ self .assertEqual (unicode_export ("latin1:\xe9 " , formats ),
1704
+ (b'latin1:\xe9 ' , PyUnicode_FORMAT_UCS1 ))
1705
+ self .assertEqual (unicode_export ('ucs2:\u20ac ' , formats ),
1696
1706
('ucs2:\u20ac ' .encode (ucs2_enc ),
1697
- PyUnicode_NATIVE_UCS2 ))
1698
-
1699
- ucs4_enc = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1700
- self .assertEqual (asnativeformat ('ucs4:\U0010ffff ' ),
1707
+ PyUnicode_FORMAT_UCS2 ))
1708
+ self .assertEqual (unicode_export ('ucs4:\U0010ffff ' , formats ),
1701
1709
('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1702
- PyUnicode_NATIVE_UCS4 ))
1703
-
1704
- def test_unicode_fromnativeformat (self ):
1705
- # Test PyUnicode_FromNativeFormat()
1706
- fromnativeformat = _testlimitedcapi .unicode_fromnativeformat
1707
- self .assertEqual (fromnativeformat (b'abc' , PyUnicode_NATIVE_ASCII ),
1710
+ PyUnicode_FORMAT_UCS4 ))
1711
+
1712
+ # always export to UCS4
1713
+ self .assertEqual (unicode_export ("abc" , PyUnicode_FORMAT_UCS4 ),
1714
+ ('abc' .encode (ucs4_enc ), PyUnicode_FORMAT_UCS4 ))
1715
+ self .assertEqual (unicode_export ("latin1:\xe9 " , PyUnicode_FORMAT_UCS4 ),
1716
+ ('latin1:\xe9 ' .encode (ucs4_enc ), PyUnicode_FORMAT_UCS4 ))
1717
+ self .assertEqual (unicode_export ('ucs2:\u20ac ' , PyUnicode_FORMAT_UCS4 ),
1718
+ ('ucs2:\u20ac ' .encode (ucs4_enc ),
1719
+ PyUnicode_FORMAT_UCS4 ))
1720
+ self .assertEqual (unicode_export ('ucs4:\U0010ffff ' , PyUnicode_FORMAT_UCS4 ),
1721
+ ('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1722
+ PyUnicode_FORMAT_UCS4 ))
1723
+
1724
+ # always export to UTF8
1725
+ self .assertEqual (unicode_export ("abc" , PyUnicode_FORMAT_UTF8 ),
1726
+ ('abc' .encode ('utf8' ), PyUnicode_FORMAT_UTF8 ))
1727
+ self .assertEqual (unicode_export ("latin1:\xe9 " , PyUnicode_FORMAT_UTF8 ),
1728
+ ('latin1:\xe9 ' .encode ('utf8' ), PyUnicode_FORMAT_UTF8 ))
1729
+ self .assertEqual (unicode_export ('ucs2:\u20ac ' , PyUnicode_FORMAT_UTF8 ),
1730
+ ('ucs2:\u20ac ' .encode ('utf8' ),
1731
+ PyUnicode_FORMAT_UTF8 ))
1732
+ self .assertEqual (unicode_export ('ucs4:\U0010ffff ' , PyUnicode_FORMAT_UTF8 ),
1733
+ ('ucs4:\U0010ffff ' .encode ('utf8' ),
1734
+ PyUnicode_FORMAT_UTF8 ))
1735
+
1736
+ # No supported format or invalid format
1737
+ with self .assertRaisesRegex (ValueError ,
1738
+ "unable to find a matching export format" ):
1739
+ unicode_export ('abc' , 0 )
1740
+ with self .assertRaisesRegex (ValueError ,
1741
+ "unable to find a matching export format" ):
1742
+ unicode_export ('abc' , PyUnicode_FORMAT_INVALID )
1743
+
1744
+ def test_unicode_import (self ):
1745
+ # Test PyUnicode_Import()
1746
+ unicode_import = _testlimitedcapi .unicode_import
1747
+ if sys .byteorder == 'little' :
1748
+ ucs2_enc = 'utf-16le'
1749
+ ucs4_enc = 'utf-32le'
1750
+ else :
1751
+ ucs2_enc = 'utf-16be'
1752
+ ucs4_enc = 'utf-32be'
1753
+
1754
+ self .assertEqual (unicode_import (b'abc' , PyUnicode_FORMAT_ASCII ),
1708
1755
"abc" )
1709
- self .assertEqual (fromnativeformat (b'latin1:\xe9 ' , PyUnicode_NATIVE_UCS1 ),
1756
+ self .assertEqual (unicode_import (b'latin1:\xe9 ' , PyUnicode_FORMAT_UCS1 ),
1710
1757
"latin1:\xe9 " )
1711
1758
1712
- ucs2_enc = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1713
- self .assertEqual (fromnativeformat ('ucs2:\u20ac ' .encode (ucs2_enc ),
1714
- PyUnicode_NATIVE_UCS2 ),
1759
+ self .assertEqual (unicode_import ('ucs2:\u20ac ' .encode (ucs2_enc ),
1760
+ PyUnicode_FORMAT_UCS2 ),
1715
1761
'ucs2:\u20ac ' )
1716
1762
1717
- ucs4_enc = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1718
- self .assertEqual (fromnativeformat ('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1719
- PyUnicode_NATIVE_UCS4 ),
1763
+ self .assertEqual (unicode_import ('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1764
+ PyUnicode_FORMAT_UCS4 ),
1720
1765
'ucs4:\U0010ffff ' )
1721
1766
1722
1767
text = "abc\xe9 \U0010ffff "
1723
- self .assertEqual (fromnativeformat (text .encode ('utf8' ),
1724
- PyUnicode_NATIVE_UTF8 ),
1768
+ self .assertEqual (unicode_import (text .encode ('utf8' ),
1769
+ PyUnicode_FORMAT_UTF8 ),
1725
1770
text )
1726
1771
1727
1772
# Empty string
1728
1773
for native_format in (
1729
- PyUnicode_NATIVE_ASCII ,
1730
- PyUnicode_NATIVE_UCS1 ,
1731
- PyUnicode_NATIVE_UCS2 ,
1732
- PyUnicode_NATIVE_UCS4 ,
1733
- PyUnicode_NATIVE_UTF8 ,
1774
+ PyUnicode_FORMAT_ASCII ,
1775
+ PyUnicode_FORMAT_UCS1 ,
1776
+ PyUnicode_FORMAT_UCS2 ,
1777
+ PyUnicode_FORMAT_UCS4 ,
1778
+ PyUnicode_FORMAT_UTF8 ,
1734
1779
):
1735
1780
with self .subTest (native_format = native_format ):
1736
- self .assertEqual (fromnativeformat (b'' , native_format ),
1781
+ self .assertEqual (unicode_import (b'' , native_format ),
1737
1782
'' )
1738
1783
1739
1784
# Invalid format
1740
1785
with self .assertRaises (ValueError ):
1741
- fromnativeformat (b'' , PyUnicode_NATIVE_INVALID )
1786
+ unicode_import (b'' , PyUnicode_FORMAT_INVALID )
1742
1787
1743
1788
# Invalid size
1744
1789
ucs2 = 'ucs2:\u20ac ' .encode (ucs2_enc )
1745
1790
with self .assertRaises (ValueError ):
1746
- fromnativeformat (ucs2 [:- 1 ], PyUnicode_NATIVE_UCS2 )
1791
+ unicode_import (ucs2 [:- 1 ], PyUnicode_FORMAT_UCS2 )
1747
1792
ucs4 = 'ucs4:\U0010ffff ' .encode (ucs4_enc )
1748
1793
with self .assertRaises (ValueError ):
1749
- fromnativeformat (ucs4 [:- 1 ], PyUnicode_NATIVE_UCS4 )
1794
+ unicode_import (ucs4 [:- 1 ], PyUnicode_FORMAT_UCS4 )
1750
1795
with self .assertRaises (ValueError ):
1751
- fromnativeformat (ucs4 [:- 2 ], PyUnicode_NATIVE_UCS4 )
1796
+ unicode_import (ucs4 [:- 2 ], PyUnicode_FORMAT_UCS4 )
1752
1797
with self .assertRaises (ValueError ):
1753
- fromnativeformat (ucs4 [:- 3 ], PyUnicode_NATIVE_UCS4 )
1798
+ unicode_import (ucs4 [:- 3 ], PyUnicode_FORMAT_UCS4 )
1754
1799
1755
1800
1756
1801
if __name__ == '__main__' :
0 commit comments