@@ -112,7 +112,8 @@ class Connection:
112
112
(default: None - no timeout)
113
113
:param write_timeout: The timeout for writing to the connection in seconds.
114
114
(default: None - no timeout)
115
- :param charset: Charset to use.
115
+ :param str charset: Charset to use.
116
+ :param str collation: Collation name to use.
116
117
:param sql_mode: Default SQL_MODE to use.
117
118
:param read_default_file:
118
119
Specifies my.cnf file to read these parameters from under the [client] section.
@@ -174,6 +175,7 @@ def __init__(
174
175
unix_socket = None ,
175
176
port = 0 ,
176
177
charset = "" ,
178
+ collation = None ,
177
179
sql_mode = None ,
178
180
read_default_file = None ,
179
181
conv = None ,
@@ -308,6 +310,7 @@ def _config(key, arg):
308
310
self ._write_timeout = write_timeout
309
311
310
312
self .charset = charset or DEFAULT_CHARSET
313
+ self .collation = collation
311
314
self .use_unicode = use_unicode
312
315
313
316
self .encoding = charset_by_name (self .charset ).encoding
@@ -593,13 +596,32 @@ def ping(self, reconnect=True):
593
596
raise
594
597
595
598
def set_charset (self , charset ):
599
+ """Deprecated. Use set_character_set() instead."""
600
+ # This function has been implemented in old PyMySQL.
601
+ # But this name is different from MySQLdb.
602
+ # So we keep this function for compatibility and add
603
+ # new set_character_set() function.
604
+ self .set_character_set (charset )
605
+
606
+ def set_character_set (self , charset , collation = None ):
607
+ """
608
+ Set charaset (and collation)
609
+
610
+ Send "SET NAMES charset [COLLATE collation]" query.
611
+ Update Connection.encoding based on charset.
612
+ """
596
613
# Make sure charset is supported.
597
614
encoding = charset_by_name (charset ).encoding
598
615
599
- self ._execute_command (COMMAND .COM_QUERY , "SET NAMES %s" % self .escape (charset ))
616
+ if collation :
617
+ query = f"SET NAMES { charset } COLLATE { collation } "
618
+ else :
619
+ query = f"SET NAMES { charset } "
620
+ self ._execute_command (COMMAND .COM_QUERY , query )
600
621
self ._read_packet ()
601
622
self .charset = charset
602
623
self .encoding = encoding
624
+ self .collation = collation
603
625
604
626
def connect (self , sock = None ):
605
627
self ._closed = False
@@ -641,15 +663,30 @@ def connect(self, sock=None):
641
663
self ._get_server_information ()
642
664
self ._request_authentication ()
643
665
666
+ # Send "SET NAMES" query on init for:
667
+ # - Ensure charaset (and collation) is set to the server.
668
+ # - collation_id in handshake packet may be ignored.
669
+ # - If collation is not specified, we don't know what is server's
670
+ # default collation for the charset. For example, default collation
671
+ # of utf8mb4 is:
672
+ # - MySQL 5.7, MariaDB 10.x: utf8mb4_general_ci
673
+ # - MySQL 8.0: utf8mb4_0900_ai_ci
674
+ #
675
+ # Reference:
676
+ # - https://github.com/PyMySQL/PyMySQL/issues/1092
677
+ # - https://github.com/wagtail/wagtail/issues/9477
678
+ # - https://zenn.dev/methane/articles/2023-mysql-collation (Japanese)
679
+ self .set_character_set (self .charset , self .collation )
680
+
644
681
if self .sql_mode is not None :
645
682
c = self .cursor ()
646
683
c .execute ("SET sql_mode=%s" , (self .sql_mode ,))
684
+ c .close ()
647
685
648
686
if self .init_command is not None :
649
687
c = self .cursor ()
650
688
c .execute (self .init_command )
651
689
c .close ()
652
- self .commit ()
653
690
654
691
if self .autocommit_mode is not None :
655
692
self .autocommit (self .autocommit_mode )
0 commit comments