Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PS-9237 feature: Include support for utf8mb4_0900_ai_ci in MySQL 5.7 #5364

Draft
wants to merge 8 commits into
base: release-8.0.37-29
Choose a base branch
from
164 changes: 164 additions & 0 deletions mysql-test/r/percona_default_collation_for_utf8mb4_extensions.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
RESET MASTER;
*** Variables from the default session
*** (values for all collation variables are expected to be utf8mb4_0900_ai_ci)
SHOW GLOBAL VARIABLES LIKE '%collation%';
Variable_name Value
collation_connection utf8mb4_0900_ai_ci
collation_database utf8mb4_0900_ai_ci
collation_server utf8mb4_0900_ai_ci
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci
SHOW SESSION VARIABLES LIKE '%collation%';
Variable_name Value
collation_connection utf8mb4_0900_ai_ci
collation_database utf8mb4_0900_ai_ci
collation_server utf8mb4_0900_ai_ci
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci

*** Variables from the new connection established via MySQL command line client
*** (values for all collation variables are expected to be utf8mb4_0900_ai_ci)
Variable_name Value
collation_connection utf8mb4_0900_ai_ci
collation_database utf8mb4_0900_ai_ci
collation_server utf8mb4_0900_ai_ci
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci
Variable_name Value
collation_connection utf8mb4_0900_ai_ci
collation_database utf8mb4_0900_ai_ci
collation_server utf8mb4_0900_ai_ci
default_collation_for_utf8mb4 utf8mb4_0900_ai_ci


*** Updating collation variables
SET GLOBAL default_collation_for_utf8mb4 = utf8mb4_general_ci;
Warnings:
Warning 1681 Updating 'default_collation_for_utf8mb4' is deprecated. It will be made read-only in a future release.
SET GLOBAL collation_server = utf8mb4_general_ci;
SET GLOBAL collation_connection = utf8mb4_general_ci;
SET GLOBAL collation_database = utf8mb4_general_ci;
Warnings:
Warning 1681 Updating 'collation_database' is deprecated. It will be made read-only in a future release.


*** Re-connecting


*** Variables after re-connecting to the default database
*** (values for all collation variables except for @@session.collation_database are expected to be utf8mb4_general_ci)
SHOW GLOBAL VARIABLES LIKE '%collation%';
Variable_name Value
collation_connection utf8mb4_general_ci
collation_database utf8mb4_general_ci
collation_server utf8mb4_general_ci
default_collation_for_utf8mb4 utf8mb4_general_ci
SHOW SESSION VARIABLES LIKE '%collation%';
Variable_name Value
collation_connection utf8mb4_general_ci
collation_database utf8mb4_0900_ai_ci
collation_server utf8mb4_general_ci
default_collation_for_utf8mb4 utf8mb4_general_ci


*** Creating a fresh database
CREATE DATABASE fresh;
SHOW CREATE DATABASE fresh;
Database Create Database
fresh CREATE DATABASE `fresh` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci */ /*!80016 DEFAULT ENCRYPTION='N' */


*** Variables after connecting to a fresh database
*** (values for collation variables expected to be utf8mb4_general_ci)
SHOW GLOBAL VARIABLES LIKE '%collation%';
Variable_name Value
collation_connection utf8mb4_general_ci
collation_database utf8mb4_general_ci
collation_server utf8mb4_general_ci
default_collation_for_utf8mb4 utf8mb4_general_ci
SHOW SESSION VARIABLES LIKE '%collation%';
Variable_name Value
collation_connection utf8mb4_general_ci
collation_database utf8mb4_general_ci
collation_server utf8mb4_general_ci
default_collation_for_utf8mb4 utf8mb4_general_ci


*** Variables from the new connection established via MySQL command line client to a fresh database
*** (values for all collation variables are expected to be utf8mb4_general_ci)
Variable_name Value
collation_connection utf8mb4_general_ci
collation_database utf8mb4_general_ci
collation_server utf8mb4_general_ci
default_collation_for_utf8mb4 utf8mb4_general_ci
Variable_name Value
collation_connection utf8mb4_general_ci
collation_database utf8mb4_general_ci
collation_server utf8mb4_general_ci
default_collation_for_utf8mb4 utf8mb4_general_ci


*** Creating tables in the fresh database from the default connection
CREATE TABLE t1(id BIGINT UNSIGNED);
SET character_set_client = utf8mb4;
CREATE TABLE t2(id BIGINT UNSIGNED);
SET NAMES DEFAULT;
CREATE TABLE t3(id BIGINT UNSIGNED);
SET NAMES utf8mb4;
CREATE TABLE t4(id BIGINT UNSIGNED);
SET NAMES utf8mb4 COLLATE utf8mb4_general_ci;
CREATE TABLE t5(id BIGINT UNSIGNED);
SET CHARACTER SET DEFAULT;
CREATE TABLE t6(id BIGINT UNSIGNED);
SET CHARACTER SET utf8mb4;
CREATE TABLE t7(id BIGINT UNSIGNED);
SET collation_connection = utf8mb4_general_ci;
CREATE TABLE t8(id BIGINT UNSIGNED);


*** Making sure that binlog events created implicitly via stored procedures
*** and triggers have character_set_client = 45
CREATE TRIGGER test_trigger BEFORE INSERT ON t2 FOR EACH ROW
BEGIN
INSERT INTO t1 SET id = NEW.id;
END|
CREATE PROCEDURE proc()
BEGIN
INSERT INTO t2 VALUES (42);
END|
INSERT INTO t2 VALUES(1);
CALL proc();


*** Creating tables in the fresh database from a connection established via MySQL command line client


*** Creating logical dump of the fresh database


*** Creating a database for restoring data from the logical dump
CREATE DATABASE restore;
SHOW CREATE DATABASE restore;
Database Create Database
restore CREATE DATABASE `restore` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci */ /*!80016 DEFAULT ENCRYPTION='N' */


*** Restoring data from the logical dump


*** Checking events in the binary log
include/assert_grep.inc [Events in the binary log must use utf8mb4_general_ci (45) collation]
include/assert_grep.inc [Events in the binary log must not use utf8mb4_0900_ai_ci (255) collation]


*** Dropping fresh database
DROP DATABASE restore;
DROP DATABASE fresh;


*** Restoring collation variables
SET GLOBAL collation_database = utf8mb4_0900_ai_ci;
Warnings:
Warning 1681 Updating 'collation_database' is deprecated. It will be made read-only in a future release.
SET GLOBAL collation_connection = utf8mb4_0900_ai_ci;
SET GLOBAL collation_server = utf8mb4_0900_ai_ci;
SET GLOBAL default_collation_for_utf8mb4 = utf8mb4_0900_ai_ci;
Warnings:
Warning 1681 Updating 'default_collation_for_utf8mb4' is deprecated. It will be made read-only in a future release.
185 changes: 185 additions & 0 deletions mysql-test/t/percona_default_collation_for_utf8mb4_extensions.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
--source include/count_sessions.inc

# needed for repeatable content of the binary log
RESET MASTER;

--let $global_col_stmt = SHOW GLOBAL VARIABLES LIKE '%collation%'
--let $session_col_stmt = SHOW SESSION VARIABLES LIKE '%collation%'

--echo *** Variables from the default session
--echo *** (values for all collation variables are expected to be utf8mb4_0900_ai_ci)
eval $global_col_stmt;
eval $session_col_stmt;

--let $default_database = `SELECT DATABASE()`

--echo
--echo *** Variables from the new connection established via MySQL command line client
--echo *** (values for all collation variables are expected to be utf8mb4_0900_ai_ci)
--exec $MYSQL -e "$global_col_stmt; $session_col_stmt;" $default_database

--echo
--echo
--echo *** Updating collation variables
--let $saved_default_collation_for_utf8mb4 = `SELECT @@global.default_collation_for_utf8mb4`
--let $saved_collation_server = `SELECT @@global.collation_server`
--let $saved_collation_connection = `SELECT @@global.collation_connection`
--let $saved_collation_database = `SELECT @@global.collation_database`

SET GLOBAL default_collation_for_utf8mb4 = utf8mb4_general_ci;
SET GLOBAL collation_server = utf8mb4_general_ci;
SET GLOBAL collation_connection = utf8mb4_general_ci;
SET GLOBAL collation_database = utf8mb4_general_ci;

--echo
--echo
--echo *** Re-connecting
--disconnect default
--connect (default,localhost,root,,$default_database)

--echo
--echo
--echo *** Variables after re-connecting to the default database
--echo *** (values for all collation variables except for @@session.collation_database are expected to be utf8mb4_general_ci)
eval $global_col_stmt;
eval $session_col_stmt;

--echo
--echo
--echo *** Creating a fresh database
--let $fresh_database = fresh
eval CREATE DATABASE $fresh_database;
eval SHOW CREATE DATABASE $fresh_database;

--connect (con1,localhost,root,,$fresh_database)

--echo
--echo
--echo *** Variables after connecting to a fresh database
--echo *** (values for collation variables expected to be utf8mb4_general_ci)
eval $global_col_stmt;
eval $session_col_stmt;

--echo
--echo
--echo *** Variables from the new connection established via MySQL command line client to a fresh database
--echo *** (values for all collation variables are expected to be utf8mb4_general_ci)
--exec $MYSQL -e "$global_col_stmt; $session_col_stmt;" $fresh_database

--let $binlog_file = query_get_value("SHOW MASTER STATUS", File, 1)
--let $server_port = `SELECT @@port`

--echo
--echo
--echo *** Creating tables in the fresh database from the default connection
CREATE TABLE t1(id BIGINT UNSIGNED);
SET character_set_client = utf8mb4;
CREATE TABLE t2(id BIGINT UNSIGNED);
SET NAMES DEFAULT;
CREATE TABLE t3(id BIGINT UNSIGNED);
SET NAMES utf8mb4;
CREATE TABLE t4(id BIGINT UNSIGNED);
SET NAMES utf8mb4 COLLATE utf8mb4_general_ci;
CREATE TABLE t5(id BIGINT UNSIGNED);
# Note that 'SET NAMES utf8mb4 COLLATE utf8mb4_0900_ai_ci' will generate
# character_set_client=255 in the binary log.
SET CHARACTER SET DEFAULT;
CREATE TABLE t6(id BIGINT UNSIGNED);
SET CHARACTER SET utf8mb4;
CREATE TABLE t7(id BIGINT UNSIGNED);
SET collation_connection = utf8mb4_general_ci;
CREATE TABLE t8(id BIGINT UNSIGNED);
# Note that 'SET collation_connection = utf8mb4_0900_ai_ci' will generate
# character_set_client=255 in the binary log.
# Also statements like 'SET collation_connection = utf8mb4' when we try to
# assign a character set name to a collation variable are considered
# syntactically incorrect, so we do not include such checks into the test
# plan.


--echo
--echo
--echo *** Making sure that binlog events created implicitly via stored procedures
--echo *** and triggers have character_set_client = 45
delimiter |;
CREATE TRIGGER test_trigger BEFORE INSERT ON t2 FOR EACH ROW
BEGIN
INSERT INTO t1 SET id = NEW.id;
END|

CREATE PROCEDURE proc()
BEGIN
INSERT INTO t2 VALUES (42);
END|
delimiter ;|

INSERT INTO t2 VALUES(1);
CALL proc();

--echo
--echo
--echo *** Creating tables in the fresh database from a connection established via MySQL command line client
--exec $MYSQL -e "CREATE TABLE tbl_external_before(id BIGINT UNSIGNED); SET character_set_client = utf8mb4; CREATE TABLE tbl_external_after(id BIGINT UNSIGNED);" $fresh_database


--echo
--echo
--echo *** Creating logical dump of the fresh database
--let $fresh_dump_file = $MYSQL_TMP_DIR/fresh_dump.sql
--exec $MYSQL_DUMP --column-statistics=0 --no-data $fresh_database > $fresh_dump_file

--echo
--echo
--echo *** Creating a database for restoring data from the logical dump
--let $restore_database = restore
eval CREATE DATABASE $restore_database;
eval SHOW CREATE DATABASE $restore_database;

--echo
--echo
--echo *** Restoring data from the logical dump
--exec $MYSQL $restore_database < $fresh_dump_file

--remove_file $fresh_dump_file

--echo
--echo
--echo *** Checking events in the binary log
--let $binlog_dump_file = $MYSQL_TMP_DIR/binlog_dump.sql
--exec $MYSQL_BINLOG --read-from-remote-server --host=127.0.0.1 --port=$server_port --user=root --to-last-log $binlog_file > $binlog_dump_file

# The binary log is expected to have the following 2 lines with collation 45 (utf8mb4_general_ci)
# SET @@session.character_set_client=45,@@session.collation_connection=45,@@session.collation_server=45/*!*/;
# /*!80011 SET @@session.default_collation_for_utf8mb4=45*//*!*/;
--let $assert_text = Events in the binary log must use utf8mb4_general_ci (45) collation
--let $assert_file = $binlog_dump_file
--let $assert_select = =45
--let $assert_count = 2
--source include/assert_grep.inc

--let $assert_text = Events in the binary log must not use utf8mb4_0900_ai_ci (255) collation
--let $assert_file = $binlog_dump_file
--let $assert_select = =255
--let $assert_count = 0
--source include/assert_grep.inc

--remove_file $binlog_dump_file


--disconnect con1
--connection default
--source include/wait_until_count_sessions.inc

--echo
--echo
--echo *** Dropping fresh database
eval DROP DATABASE $restore_database;
eval DROP DATABASE $fresh_database;

--echo
--echo
--echo *** Restoring collation variables
eval SET GLOBAL collation_database = $saved_collation_database;
eval SET GLOBAL collation_connection = $saved_collation_connection;
eval SET GLOBAL collation_server = $saved_collation_server;
eval SET GLOBAL default_collation_for_utf8mb4 = $saved_default_collation_for_utf8mb4;
9 changes: 9 additions & 0 deletions sql/parse_tree_nodes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@ bool PT_option_value_no_option_type_charset::contextualize(Parse_context *pc) {
const CHARSET_INFO *cs2;
cs2 =
opt_charset ? opt_charset : global_system_variables.character_set_client;
// Fixing cs2 in case when default_collation_for_utf8mb4 is set to non-default
// value
if (thd->variables.default_collation_for_utf8mb4 !=
&my_charset_utf8mb4_0900_ai_ci) {
if (cs2 == &my_charset_utf8mb4_0900_ai_ci) {
cs2 = thd->variables.default_collation_for_utf8mb4;
}
}

set_var_collation_client *var;
var = new (thd->mem_root) set_var_collation_client(
flags, cs2, thd->variables.collation_database, cs2);
Expand Down
16 changes: 16 additions & 0 deletions sql/sql_connect.cc
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,22 @@ void reset_mqh(THD *thd, LEX_USER *lu, bool get_them = false) {

bool thd_init_client_charset(THD *thd, uint cs_number) {
CHARSET_INFO *cs;

// if the 8.0 client sets 'MYSQL_SET_CHARSET_NAME' option to 'utf8mb4' or
// leaves it empty, basically meaning the same, this function will be called
// with 'cs_number' equal to 255 (meaning 'utf8mb4_0900_ai_ci')

// at the same time, if 'default_collation_for_utf8mb4' is set to something
// other than default 'utf8mb4' collation ('utf8mb4_0900_ai_ci', number 255),
// we need to fix 'cs_number' here by setting it to the corresponding number
// of 'default_collation_for_utf8mb4' (currently only 'utf8mb4_general_ci',
// number 45, is supported)
if (thd->variables.default_collation_for_utf8mb4 !=
&my_charset_utf8mb4_0900_ai_ci) {
if (cs_number == my_charset_utf8mb4_0900_ai_ci.number) {
cs_number = thd->variables.default_collation_for_utf8mb4->number;
}
}
/*
Use server character set and collation if
- opt_character_set_client_handshake is not set
Expand Down
Loading
Loading