-
Notifications
You must be signed in to change notification settings - Fork 481
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PS-9237 feature: Include support for utf8mb4_0900_ai_ci in MySQL 5.7 #5364
base: release-8.0.37-29
Are you sure you want to change the base?
Changes from 3 commits
a39be6d
873c1eb
da0002b
06da781
3041e9a
ccb29cc
e5dc3bf
bfc10f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
RESET MASTER; | ||
*** Variables from the default session | ||
*** (values for all collation variables are expected to be utf8mb4_0900_ai_ci) | ||
SHOW GLOBAL VARIABLES LIKE '%collation%'; | ||
Variable_name Value | ||
collation_connection utf8mb4_0900_ai_ci | ||
collation_database utf8mb4_0900_ai_ci | ||
collation_server utf8mb4_0900_ai_ci | ||
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci | ||
SHOW SESSION VARIABLES LIKE '%collation%'; | ||
Variable_name Value | ||
collation_connection utf8mb4_0900_ai_ci | ||
collation_database utf8mb4_0900_ai_ci | ||
collation_server utf8mb4_0900_ai_ci | ||
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci | ||
|
||
*** Variables from the new connection established via MySQL command line client | ||
*** (values for all collation variables are expected to be utf8mb4_0900_ai_ci) | ||
Variable_name Value | ||
collation_connection utf8mb4_0900_ai_ci | ||
collation_database utf8mb4_0900_ai_ci | ||
collation_server utf8mb4_0900_ai_ci | ||
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci | ||
Variable_name Value | ||
collation_connection utf8mb4_0900_ai_ci | ||
collation_database utf8mb4_0900_ai_ci | ||
collation_server utf8mb4_0900_ai_ci | ||
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci | ||
|
||
|
||
*** Updating collation variables | ||
SET GLOBAL default_collation_for_utf8mb4 = utf8mb4_general_ci; | ||
Warnings: | ||
Warning 1681 Updating 'default_collation_for_utf8mb4' is deprecated. It will be made read-only in a future release. | ||
SET GLOBAL collation_server = utf8mb4_general_ci; | ||
SET GLOBAL collation_connection = utf8mb4_general_ci; | ||
SET GLOBAL collation_database = utf8mb4_general_ci; | ||
Warnings: | ||
Warning 1681 Updating 'collation_database' is deprecated. It will be made read-only in a future release. | ||
|
||
|
||
*** Re-connecting | ||
|
||
|
||
*** Variables after re-connecting to the default database | ||
*** (values for all collation variables except for @@session.collation_database are expected to be utf8mb4_general_ci) | ||
SHOW GLOBAL VARIABLES LIKE '%collation%'; | ||
Variable_name Value | ||
collation_connection utf8mb4_general_ci | ||
collation_database utf8mb4_general_ci | ||
collation_server utf8mb4_general_ci | ||
default_collation_for_utf8mb4 utf8mb4_general_ci | ||
SHOW SESSION VARIABLES LIKE '%collation%'; | ||
Variable_name Value | ||
collation_connection utf8mb4_general_ci | ||
collation_database utf8mb4_0900_ai_ci | ||
collation_server utf8mb4_general_ci | ||
default_collation_for_utf8mb4 utf8mb4_general_ci | ||
|
||
|
||
*** Creating a fresh database | ||
CREATE DATABASE fresh; | ||
SHOW CREATE DATABASE fresh; | ||
Database Create Database | ||
fresh CREATE DATABASE `fresh` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci */ /*!80016 DEFAULT ENCRYPTION='N' */ | ||
|
||
|
||
*** Variables after connecting to a fresh database | ||
*** (values for collation variables expected to be utf8mb4_general_ci) | ||
SHOW GLOBAL VARIABLES LIKE '%collation%'; | ||
Variable_name Value | ||
collation_connection utf8mb4_general_ci | ||
collation_database utf8mb4_general_ci | ||
collation_server utf8mb4_general_ci | ||
default_collation_for_utf8mb4 utf8mb4_general_ci | ||
SHOW SESSION VARIABLES LIKE '%collation%'; | ||
Variable_name Value | ||
collation_connection utf8mb4_general_ci | ||
collation_database utf8mb4_general_ci | ||
collation_server utf8mb4_general_ci | ||
default_collation_for_utf8mb4 utf8mb4_general_ci | ||
|
||
*** Variables from the new connection established via MySQL command line client to a fresh database | ||
*** (values for all collation variables are expected to be utf8mb4_general_ci) | ||
Variable_name Value | ||
collation_connection utf8mb4_general_ci | ||
collation_database utf8mb4_general_ci | ||
collation_server utf8mb4_general_ci | ||
default_collation_for_utf8mb4 utf8mb4_general_ci | ||
Variable_name Value | ||
collation_connection utf8mb4_general_ci | ||
collation_database utf8mb4_general_ci | ||
collation_server utf8mb4_general_ci | ||
default_collation_for_utf8mb4 utf8mb4_general_ci | ||
|
||
|
||
*** Creating tables in the fresh database from the default connection | ||
CREATE TABLE tbl_internal_before(id BIGINT UNSIGNED); | ||
SET character_set_client = utf8mb4; | ||
CREATE TABLE tbl_internal_after(id BIGINT UNSIGNED); | ||
|
||
|
||
*** Creating tables in the fresh database from a connection established via MySQL command line client | ||
|
||
|
||
*** Creating logical dump of the fresh database | ||
|
||
|
||
*** Creating a database for restoring data from the logical dump | ||
CREATE DATABASE restore; | ||
SHOW CREATE DATABASE restore; | ||
Database Create Database | ||
restore CREATE DATABASE `restore` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci */ /*!80016 DEFAULT ENCRYPTION='N' */ | ||
|
||
|
||
*** Restoring data from the logical dump | ||
|
||
|
||
*** Checking events in the binary log | ||
include/assert_grep.inc [Events in the binary log must use utf8mb4_general_ci (45) collation] | ||
include/assert_grep.inc [Events in the binary log must not use utf8mb4_0900_ai_ci (255) collation] | ||
|
||
|
||
*** Dropping fresh database | ||
DROP DATABASE restore; | ||
DROP DATABASE fresh; | ||
|
||
|
||
*** Restoring collation variables | ||
SET GLOBAL collation_database = utf8mb4_0900_ai_ci; | ||
Warnings: | ||
Warning 1681 Updating 'collation_database' is deprecated. It will be made read-only in a future release. | ||
SET GLOBAL collation_connection = utf8mb4_0900_ai_ci; | ||
SET GLOBAL collation_server = utf8mb4_0900_ai_ci; | ||
SET GLOBAL default_collation_for_utf8mb4 = utf8mb4_0900_ai_ci; | ||
Warnings: | ||
Warning 1681 Updating 'default_collation_for_utf8mb4' is deprecated. It will be made read-only in a future release. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
--source include/count_sessions.inc | ||
|
||
# needed for repeatable content of the binary log | ||
RESET MASTER; | ||
|
||
--let $global_col_stmt = SHOW GLOBAL VARIABLES LIKE '%collation%' | ||
--let $session_col_stmt = SHOW SESSION VARIABLES LIKE '%collation%' | ||
|
||
--echo *** Variables from the default session | ||
--echo *** (values for all collation variables are expected to be utf8mb4_0900_ai_ci) | ||
eval $global_col_stmt; | ||
eval $session_col_stmt; | ||
|
||
--let $default_database = `SELECT DATABASE()` | ||
|
||
--echo | ||
--echo *** Variables from the new connection established via MySQL command line client | ||
--echo *** (values for all collation variables are expected to be utf8mb4_0900_ai_ci) | ||
--exec $MYSQL -e "$global_col_stmt; $session_col_stmt;" $default_database | ||
|
||
--echo | ||
--echo | ||
--echo *** Updating collation variables | ||
--let $saved_default_collation_for_utf8mb4 = `SELECT @@global.default_collation_for_utf8mb4` | ||
--let $saved_collation_server = `SELECT @@global.collation_server` | ||
--let $saved_collation_connection = `SELECT @@global.collation_connection` | ||
--let $saved_collation_database = `SELECT @@global.collation_database` | ||
|
||
SET GLOBAL default_collation_for_utf8mb4 = utf8mb4_general_ci; | ||
SET GLOBAL collation_server = utf8mb4_general_ci; | ||
SET GLOBAL collation_connection = utf8mb4_general_ci; | ||
SET GLOBAL collation_database = utf8mb4_general_ci; | ||
|
||
--echo | ||
--echo | ||
--echo *** Re-connecting | ||
--disconnect default | ||
--connect (default,localhost,root,,$default_database) | ||
|
||
--echo | ||
--echo | ||
--echo *** Variables after re-connecting to the default database | ||
--echo *** (values for all collation variables except for @@session.collation_database are expected to be utf8mb4_general_ci) | ||
eval $global_col_stmt; | ||
eval $session_col_stmt; | ||
|
||
--echo | ||
--echo | ||
--echo *** Creating a fresh database | ||
--let $fresh_database = fresh | ||
eval CREATE DATABASE $fresh_database; | ||
eval SHOW CREATE DATABASE $fresh_database; | ||
|
||
--connect (con1,localhost,root,,$fresh_database) | ||
|
||
--echo | ||
--echo | ||
--echo *** Variables after connecting to a fresh database | ||
--echo *** (values for collation variables expected to be utf8mb4_general_ci) | ||
eval $global_col_stmt; | ||
eval $session_col_stmt; | ||
|
||
--echo | ||
--echo *** Variables from the new connection established via MySQL command line client to a fresh database | ||
--echo *** (values for all collation variables are expected to be utf8mb4_general_ci) | ||
--exec $MYSQL -e "$global_col_stmt; $session_col_stmt;" $fresh_database | ||
|
||
--let $binlog_file = query_get_value("SHOW MASTER STATUS", File, 1) | ||
--let $server_port = `SELECT @@port` | ||
|
||
--echo | ||
--echo | ||
--echo *** Creating tables in the fresh database from the default connection | ||
CREATE TABLE tbl_internal_before(id BIGINT UNSIGNED); | ||
SET character_set_client = utf8mb4; | ||
CREATE TABLE tbl_internal_after(id BIGINT UNSIGNED); | ||
|
||
--echo | ||
--echo | ||
--echo *** Creating tables in the fresh database from a connection established via MySQL command line client | ||
--exec $MYSQL -e "CREATE TABLE tbl_external_before(id BIGINT UNSIGNED); SET character_set_client = utf8mb4; CREATE TABLE tbl_external_after(id BIGINT UNSIGNED);" $fresh_database | ||
|
||
|
||
--echo | ||
--echo | ||
--echo *** Creating logical dump of the fresh database | ||
--let $fresh_dump_file = $MYSQL_TMP_DIR/fresh_dump.sql | ||
--exec $MYSQL_DUMP --column-statistics=0 --no-data $fresh_database > $fresh_dump_file | ||
|
||
--echo | ||
--echo | ||
--echo *** Creating a database for restoring data from the logical dump | ||
--let $restore_database = restore | ||
eval CREATE DATABASE $restore_database; | ||
eval SHOW CREATE DATABASE $restore_database; | ||
|
||
--echo | ||
--echo | ||
--echo *** Restoring data from the logical dump | ||
--exec $MYSQL $restore_database < $fresh_dump_file | ||
|
||
--remove_file $fresh_dump_file | ||
|
||
--echo | ||
--echo | ||
--echo *** Checking events in the binary log | ||
--let $binlog_dump_file = $MYSQL_TMP_DIR/binlog_dump.sql | ||
--exec $MYSQL_BINLOG --read-from-remote-server --host=127.0.0.1 --port=$server_port --user=root --to-last-log $binlog_file > $binlog_dump_file | ||
|
||
# The binary log is expected to have the following 2 lines with collation 45 (utf8mb4_general_ci) | ||
# SET @@session.character_set_client=45,@@session.collation_connection=45,@@session.collation_server=45/*!*/; | ||
# /*!80011 SET @@session.default_collation_for_utf8mb4=45*//*!*/; | ||
--let $assert_text = Events in the binary log must use utf8mb4_general_ci (45) collation | ||
--let $assert_file = $binlog_dump_file | ||
--let $assert_select = =45 | ||
--let $assert_count = 2 | ||
--source include/assert_grep.inc | ||
|
||
--let $assert_text = Events in the binary log must not use utf8mb4_0900_ai_ci (255) collation | ||
--let $assert_file = $binlog_dump_file | ||
--let $assert_select = =255 | ||
--let $assert_count = 0 | ||
--source include/assert_grep.inc | ||
|
||
--remove_file $binlog_dump_file | ||
|
||
|
||
--disconnect con1 | ||
--connection default | ||
--source include/wait_until_count_sessions.inc | ||
|
||
--echo | ||
--echo | ||
--echo *** Dropping fresh database | ||
eval DROP DATABASE $restore_database; | ||
eval DROP DATABASE $fresh_database; | ||
|
||
--echo | ||
--echo | ||
--echo *** Restoring collation variables | ||
eval SET GLOBAL collation_database = $saved_collation_database; | ||
eval SET GLOBAL collation_connection = $saved_collation_connection; | ||
eval SET GLOBAL collation_server = $saved_collation_server; | ||
eval SET GLOBAL default_collation_for_utf8mb4 = $saved_default_collation_for_utf8mb4; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1869,6 +1869,21 @@ static bool check_charset(sys_var *, THD *thd, set_var *var) { | |
my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), err.ptr()); | ||
return true; | ||
} | ||
// if 'default_collation_for_utf8mb4' is set to something other than | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about similar code in check_collation_not_null() ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After a bit more pondering about it, I think it is a good idea to have test coverage for cases when user can use both charset name and collation name. Like Do you agree? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dlenev I extended coverage with the following statements SET character_set_client = utf8mb4;
SET NAMES DEFAULT;
SET NAMES utf8mb4;
SET NAMES utf8mb4 COLLATE utf8mb4_general_ci;
SET CHARACTER SET DEFAULT;
SET CHARACTER SET utf8mb4;
SET collation_connection = utf8mb4_general_ci; Note that 'SET NAMES utf8mb4 COLLATE utf8mb4_0900_ai_ci' will generate Likewise, 'SET collation_connection = utf8mb4_0900_ai_ci' will generate As for 'SET collation_connection = utf8mb4' (when we try to assign a character set name to a collation variable), there is now problem here as this statatement is considered |
||
// default 'utf8mb4' collation ('utf8mb4_0900_ai_ci') and if the value | ||
// returned by 'get_charset_by_csname()' is also default 'utf8mb4' | ||
// collation ('utf8mb4_0900_ai_ci'), meaning that were requesting for | ||
// 'utf8mb4', we need to fix the returned value depending on the value of | ||
// 'default_collation_for_utf8mb4' (currently, only 'utf8mb4_general_ci' | ||
// is possible) | ||
const auto *primary_utf8mb4_collation = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same comment as for previous patch: Perhaps it is better simply do:
instead ? What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reworked the same way. |
||
get_charset_by_csname("utf8mb4", MY_CS_PRIMARY, MYF(0)); | ||
if (thd->variables.default_collation_for_utf8mb4 != | ||
primary_utf8mb4_collation) { | ||
if (var->save_result.ptr == primary_utf8mb4_collation) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
var->save_result.ptr = thd->variables.default_collation_for_utf8mb4; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
} | ||
warn_on_deprecated_charset( | ||
thd, static_cast<const CHARSET_INFO *>(var->save_result.ptr), | ||
err.ptr()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As far as I can see in other places where we check if default_collation_for_utf8mb4 needs to kick in (e.g. see sql_lex.cc) we directly use my_charset_utf8mb4_0900_ai_ci instead of getting access to it though get_charset_by_csname().
IMO it makes sense to be consistent and do the same here... Especially since this code seems to be called for each connect so saving even a few CPU cycles would be nice.
What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also I think that instead of two ifs you can simply do:
What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, @dlenev I had exactly the same doubts and was going back and forth with this. From one side I did not want to hardcode
primary_utf8mb4_collation
to bemy_charset_utf8mb4_0900_ai_ci
(because who knows may be in the next version the default collation will change again). On the other hand, I totally agree that establishing the connection is a critical path and we should not add any unnecessary cycles here.Anyway, if this caught your attention as well, then it is probably more serious than I thought.
Let's wait for the final feedback from the customer and I will add the changes you suggested into the final patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dlenev I reworked the critical paths code with simplified versions
that does not involve charset by name lookup.