8000 Add `collation` option and `set_character_set()` to Connection (#1119) · PyMySQL/PyMySQL@6929092 · GitHub
[go: up one dir, main page]

Skip to content

Commit 6929092

Browse files
authored
Add collation option and set_character_set() to Connection (#1119)
Send `SET NAMES` on every new connection to ensure charset/collation are correctly configured. Fix #1092
1 parent 103004d commit 6929092

File tree

2 files changed

+54
-3
lines changed

2 files changed

+54
-3
lines changed

pymysql/connections.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ class Connection:
112112
(default: None - no timeout)
113113
:param write_timeout: The timeout for writing to the connection in seconds.
114114
(default: None - no timeout)
115-
:param charset: Charset to use.
115+
:param str charset: Charset to use.
116+
:param str collation: Collation name to use.
116117
:param sql_mode: Default SQL_MODE to use.
117118
:param read_default_file:
118119
Specifies my.cnf file to read these parameters from under the [client] section.
@@ -174,6 +175,7 @@ def __init__(
174175
unix_socket=None,
175176
port=0,
176177
charset="",
178+
collation=None,
177179
sql_mode=None,
178180
read_default_file=None,
179181
conv=None,
@@ -308,6 +310,7 @@ def _config(key, arg):
308310
self._write_timeout = write_timeout
309311

310312
self.charset = charset or DEFAULT_CHARSET
313+
self.collation = collation
311314
self.use_unicode = use_unicode
312315

313316
self.encoding = charset_by_name(self.charset).encoding
@@ -593,13 +596,32 @@ def ping(self, reconnect=True):
593596
raise
594597

595598
def set_charset(self, charset):
599+
"""Deprecated. Use set_character_set() instead."""
600+
# This function has been implemented in old PyMySQL.
601+
# But this name is different from MySQLdb.
602+
# So we keep this function for compatibility and add
603+
# new set_character_set() function.
604+
self.set_character_set(charset)
605+
606+
def set_character_set(self, charset, collation=None):
607+
"""
608+
Set charaset (and collation)
609+
610+
Send "SET NAMES charset [COLLATE collation]" query.
611+
Update Connection.encoding based on charset.
612+
"""
596613
# Make sure charset is supported.
597614
encoding = charset_by_name(charset).encoding
598615

599-
self._execute_command(COMMAND.COM_QUERY, "SET NAMES %s" % self.escape(charset))
616+
if collation:
617+
query = f"SET NAMES {charset} COLLATE {collation}"
618+
else:
619+
query = f"SET NAMES {charset}"
620+
self._execute_command(COMMAND.COM_QUERY, query)
600621
self._read_packet()
601622
self.charset = charset
602623
self.encoding = encoding
624+
self.collation = collation
603625

604626
def connect(self, sock=None):
605627
self._closed = False
@@ -641,15 +663,30 @@ def connect(self, sock=None):
641663
self._get_server_information()
642664
self._request_authentication()
643665

666+
# Send "SET NAMES" query on init for:
667+
# - Ensure charaset (and collation) is set to the server.
668+
# - collation_id in handshake packet may be ignored.
669+
# - If collation is not specified, we don't know what is server's
670+
# default collation for the charset. For example, default collation
671+
# of utf8mb4 is:
672+
# - MySQL 5.7, MariaDB 10.x: utf8mb4_general_ci
673+
# - MySQL 8.0: utf8mb4_0900_ai_ci
674+
#
675+
# Reference:
676+
# - https://github.com/PyMySQL/PyMySQL/issues/1092
677+
# - https://github.com/wagtail/wagtail/issues/9477
678+
# - https://zenn.dev/methane/articles/2023-mysql-collation (Japanese)
679+
self.set_character_set(self.charset, self.collation)
680+
644681
if self.sql_mode is not None:
645682
c = self.cursor()
646683
c.execute("SET sql_mode=%s", (self.sql_mode,))
684+
c.close()
647685

648686
if self.init_command is not None:
649687
c = self.cursor()
650688
c.execute(self.init_command)
651689
c.close()
652-
self.commit()
653690

654691
if self.autocommit_mode is not None:
655692
self.autocommit(self.autocommit_mode)

pymysql/tests/test_connection.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,20 @@ def test_utf8mb4(self):
444444
arg["charset"] = "utf8mb4"
445445
pymysql.connect(**arg)
446446

447+
def test_set_character_set(self):
448+
con = self.connect()
449+
cur = con.cursor()
450+
451+
con.set_character_set("latin1")
452+
cur.execute("SELECT @@character_set_connection")
453+
self.assertEqual(cur.fetchone(), ("latin1",))
454+
self.assertEqual(con.encoding, "cp1252")
455+
456+
con.set_character_set("utf8mb4", "utf8mb4_general_ci")
457+
cur.execute("SELECT @@character_set_connection, @@collation_connection")
458+
self.assertEqual(cur.fetchone(), ("utf8mb4", "utf8mb4_general_ci"))
459+
self.assertEqual(con.encoding, "utf8")
460+
447461
def test_largedata(self):
448462
"""Large query and response (>=16MB)"""
449463
cur = self.connect().cursor()

0 commit comments

Comments
 (0)
0