Hi, Aleksey! Note, it's a review of the `git diff 82e44d60d1e 8175ce8a5f1` so it's not only commit c4de76aeff8
On Apr 04, Aleksey Midenkov wrote: > revision-id: c4de76aeff8 (mariadb-10.5.2-540-gc4de76aeff8) > parent(s): 82e44d60d1e > author: Aleksey Midenkov <mide...@gmail.com> > committer: Aleksey Midenkov <mide...@gmail.com> > timestamp: 2021-03-31 21:17:55 +0300 > message: > > MDEV-17554 Auto-create new partition for system versioned tables with history > partitioned by INTERVAL/LIMIT Overall it's very good! I have few minor questions/comments, see below. But it's almost done, I think. > diff --git a/mysql-test/suite/versioning/r/delete_history.result > b/mysql-test/suite/versioning/r/delete_history.result > index cb865a835b3..90c9e4777bb 100644 > --- a/mysql-test/suite/versioning/r/delete_history.result > +++ b/mysql-test/suite/versioning/r/delete_history.result > @@ -154,3 +152,18 @@ select * from t1; > a > 1 > drop table t1; > +# > +# MDEV-17554 Auto-create new partition for system versioned tables with > history partitioned by INTERVAL/LIMIT > +# > +# Don't auto-create new partition on DELETE HISTORY: > +create or replace table t (a int) with system versioning > +partition by system_time limit 1000 auto; > +delete history from t; > +show create table t; > +Table Create Table > +t CREATE TABLE `t` ( > + `a` int(11) DEFAULT NULL > +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME LIMIT 1000 AUTO > +PARTITIONS 2 Hmm, and if DELETE HISTORY would auto-create new partitions, what output would one expect here? I mean, how one can see whether the test result is correct or wrong? > +drop table t; > diff --git a/mysql-test/suite/versioning/t/partition.test > b/mysql-test/suite/versioning/t/partition.test > index 445f5844630..57b80ca0b47 100644 > --- a/mysql-test/suite/versioning/t/partition.test > +++ b/mysql-test/suite/versioning/t/partition.test > @@ -1068,11 +1078,412 @@ alter table t1 add partition partitions 2; > --replace_result $default_engine DEFAULT_ENGINE > show create table t1; > alter table t1 add partition partitions 8; > +drop table t1; > + > +--echo # > +--echo # MDEV-17554 Auto-create new partition for system versioned tables > with history partitioned by INTERVAL/LIMIT > +--echo # > +create or replace table t1 (x int) with system versioning > +partition by system_time limit 1 auto; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +--echo # INSERT, INSERT .. SELECT don't increment partitions it's not really "increment", better say "don't auto-create" > +insert into t1 values (1); > + > +create or replace table t2 (y int); > +insert into t2 values (2); > + > +insert into t1 select * from t2; > +insert into t2 select * from t1; why do you need a t2 table in this test? > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +--echo # Too many partitions error > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour auto; > +set timestamp= unix_timestamp('2001-01-01 00:01:00'); > +--error ER_TOO_MANY_PARTITIONS_ERROR > +update t1 set x= x + 1; > + > +--enable_info > +--echo # Increment from 3 to 5 > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 3600 second > +starts '2000-01-01 00:00:00' auto partitions 3; > + > +insert into t1 values (1); > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +set timestamp= unix_timestamp('2000-01-01 02:00:00'); > +update t1 set x= x + 1; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +set timestamp= unix_timestamp('2000-01-01 03:00:00'); > +update t1 set x= x + 2; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +--echo # Increment from 3 to 6, manual names, LOCK TABLES > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour auto ( > + partition p1 history, > + partition p3 history, > + partition pn current); > + > +insert into t1 values (1); > --replace_result $default_engine DEFAULT_ENGINE > show create table t1; > > +set timestamp= unix_timestamp('2000-01-01 02:00:00'); > +update t1 set x= x + 3; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +set timestamp= unix_timestamp('2000-01-01 03:00:00'); > +update t1 set x= x + 4; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +set timestamp= unix_timestamp('2000-01-01 04:00:00'); > +lock tables t1 write; > +update t1 set x= x + 5; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +unlock tables; > +set timestamp= default; > + > +--echo # Couple of more LOCK TABLES cases > +create or replace table t1 (x int) with system versioning > +partition by system_time limit 1000 auto; > +lock tables t1 write; > +insert into t1 values (1); > +update t1 set x= x + 1; > +unlock tables; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; what does this test case demonstrate? > + > +--echo # Overflow prevention under LOCK TABLES > +create or replace table t1 (x int) > +with system versioning partition by system_time > +limit 10 auto; > + > +insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9); > +update t1 set x= x + 10; > + > +lock tables t1 write; > +update t1 set x= 1 where x = 11; > +update t1 set x= 2 where x = 12; > +update t1 set x= 3 where x = 13; > +unlock tables; > + > +select count(x) from t1 partition (p0); > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +drop tables t1; > + > +--echo # Test VIEW, LOCK TABLES > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour auto partitions 3; > +create or replace view v1 as select * from t1; > + > +insert into v1 values (1); why would a view matter in this test case? > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +set timestamp= unix_timestamp('2000-01-01 01:00:00'); > +lock tables t1 write; > +update t1 set x= x + 3; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +unlock tables; > + > +drop view v1; > drop tables t1; > > +--echo # Multiple increments in single command > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour auto partitions 3; > + > +create or replace table t2 (y int) with system versioning > +partition by system_time interval 1 hour auto; > + > +insert into t1 values (1); > +insert into t2 values (2); > + > +set timestamp= unix_timestamp('2000-01-01 01:00:00'); > +update t1, t2 set x= x + 1, y= y + 1; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t2; > + > +set timestamp= unix_timestamp('2000-01-01 02:00:00'); > +update t1, t2 set x= x + 1, y= y + 1; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t2; > + > +--echo # Here t2 is incremented too, but not updated > +set timestamp= unix_timestamp('2000-01-01 03:00:00'); > +update t1, t2 set t1.x= 0 where t1.x< t2.y; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +# Multiupdate_prelocking_strategy::handle_end() is processed after table > open. > +# For PS it is possible to skip unneeded auto-creation because the above > happens at > +# prepare stage and auto-creation is done at execute stage. > +--replace_result $default_engine DEFAULT_ENGINE 'PARTITIONS 4' 'PARTITIONS > ok' 'PARTITIONS 5' 'PARTITIONS ok' eh... I don't think this is really "ok". As far as I remember, Multiupdate_prelocking_strategy knows what tables should be opened for writing and what for reading. Why would a new partition be created for t2? > +show create table t2; > + > +drop tables t1, t2; > + > +--echo # PS, SP, LOCK TABLES > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour auto; > + > +insert into t1 values (1); > + > +set timestamp= unix_timestamp('2000-01-01 01:00:00'); > +execute immediate 'update t1 set x= x + 5'; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +prepare s from 'update t1 set x= x + 6'; > +set timestamp= unix_timestamp('2000-01-01 02:00:00'); > +execute s; execute s; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > + > +set timestamp= unix_timestamp('2000-01-01 03:00:00'); > +lock tables t1 write; > +execute s; execute s; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +unlock tables; > +drop prepare s; > + > +create procedure sp() update t1 set x= x + 7; > +set timestamp= unix_timestamp('2000-01-01 04:00:00'); > +call sp; call sp; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +set timestamp= unix_timestamp('2000-01-01 05:00:00'); > +lock tables t1 write; > +call sp; call sp; > +--replace_result $default_engine DEFAULT_ENGINE > +show create table t1; > +unlock tables; > +drop procedure sp; > + > +set timestamp= unix_timestamp('2001-01-01 00:00:00'); > +create or replace table t1 (i int) with system versioning > +partition by system_time interval 1 day starts '2000-01-01 00:00:00'; > +insert into t1 values (0); > +set timestamp= unix_timestamp('2001-01-01 00:00:01'); > +prepare s from 'update t1 set i= i + 1'; > +execute s; > +set timestamp= unix_timestamp('2001-01-02 00:00:01'); > +execute s; > +drop prepare s; > + > +if (!$MTR_COMBINATION_HEAP) because of blobs? > +{ > +--echo # Complex table > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 ( > + x int primary key auto_increment, ... > +--echo # Transaction > +create or replace table t1 (x int) with system versioning engine innodb > +partition by system_time interval 1 hour auto; > +start transaction; > +insert into t1 values (1); > +select * from t1; > + > +--connect con1, localhost, root > +set lock_wait_timeout= 1; > +set innodb_lock_wait_timeout= 1; > +--error ER_LOCK_WAIT_TIMEOUT > +update t1 set x= x + 111; > +select * from t1; what do you test here? (there is no show create table in the test) > + > +# cleanup > +--disconnect con1 > +--connection default > +drop table t1; > + > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning engine innodb > +partition by system_time interval 1 hour auto; > + > +insert into t1 values (1); > +set timestamp= unix_timestamp('2000-01-01 01:00:00'); > +start transaction; > +update t1 set x= 0; > +--connect con1, localhost, root > +select * from t1; if you add a show create table here, what would it show? > +--connection default > +commit; > +show create table t1; > + > +set timestamp= unix_timestamp('2000-01-01 02:00:00'); > +start transaction; > +update t1 set x= 1; > +--connection con1 > +select * from t1; > +--connection default > +rollback; > +show create table t1; > +--disconnect con1 > +--connection default > +drop table t1; > + > +--echo # > +--echo # MENT-724 Locking timeout caused by auto-creation affects original > DML I'd better avoid MENT references here. Let's only mention Jira issues that users can actually see > +--echo # > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int primary key) with system versioning engine > innodb > +partition by system_time interval 1 hour auto; > + > +insert into t1 values (1); > +start transaction; > +insert into t1 values (2); > + > +--connect con1, localhost, root > +set lock_wait_timeout= 1; > +set innodb_lock_wait_timeout= 1; > +set timestamp= unix_timestamp('2000-01-01 01:00:01'); > +update t1 set x= x + 122 where x = 1; isn't it a test that you already have above? with x = x + 111 > +--disconnect con1 > +--connection default > +select * from t1; > + > +# cleanup > +drop table t1; > +set timestamp= default; > + > --echo # > --echo # End of 10.5 tests > --echo # > diff --git a/mysql-test/suite/versioning/r/partition.result > b/mysql-test/suite/versioning/r/partition.result > index 2a277b1c2ea..8369b40d98c 100644 > --- a/mysql-test/suite/versioning/r/partition.result > +++ b/mysql-test/suite/versioning/r/partition.result > @@ -1236,27 +1270,752 @@ t1 CREATE TABLE `t1` ( ... > +set timestamp= unix_timestamp('2000-01-01 03:00:00'); > +affected rows: 0 > +lock tables t1 write; > +affected rows: 0 > +execute s; > +affected rows: 1 > +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0 > +execute s; > +affected rows: 1 > +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0 > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `x` int(11) DEFAULT NULL > +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 > 00:00:00' AUTO > +PARTITIONS 6 why did it add two partitions here? > +affected rows: 1 > +unlock tables; > +affected rows: 0 > +drop prepare s; > +affected rows: 0 > +create procedure sp() update t1 set x= x + 7; > +affected rows: 0 > +set timestamp= unix_timestamp('2000-01-01 04:00:00'); > +affected rows: 0 > +call sp; > +affected rows: 1 > +call sp; > +affected rows: 1 > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `x` int(11) DEFAULT NULL > +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 > 00:00:00' AUTO > +PARTITIONS 6 > +affected rows: 1 > +set timestamp= unix_timestamp('2000-01-01 05:00:00'); > +affected rows: 0 > +lock tables t1 write; > +affected rows: 0 > +call sp; > +affected rows: 1 > +call sp; > +affected rows: 1 > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `x` int(11) DEFAULT NULL > +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING > + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 > 00:00:00' AUTO > +PARTITIONS 8 and two here > +affected rows: 1 > +unlock tables; > +affected rows: 0 > +drop procedure sp; > +affected rows: 0 > +set timestamp= unix_timestamp('2001-01-01 00:00:00'); > +affected rows: 0 > +create or replace table t1 (i int) with system versioning > +partition by system_time interval 1 day starts '2000-01-01 00:00:00'; > +affected rows: 0 > +insert into t1 values (0); > +affected rows: 1 > +set timestamp= unix_timestamp('2001-01-01 00:00:01'); > +affected rows: 0 > +prepare s from 'update t1 set i= i + 1'; > +affected rows: 0 > +info: Statement prepared > +execute s; > +affected rows: 1 > +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 1 > +Warnings: > +Warning 4114 Versioned table `test`.`t1`: last HISTORY partition > (`p0`) is out of INTERVAL, need more HISTORY partitions why? > +set timestamp= unix_timestamp('2001-01-02 00:00:01'); > +affected rows: 0 > +execute s; > +affected rows: 1 > +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 1 > +Warnings: > +Warning 4114 Versioned table `test`.`t1`: last HISTORY partition > (`p0`) is out of INTERVAL, need more HISTORY partitions > +drop prepare s; > +affected rows: 0 > +# Complex table ... > diff --git a/mysql-test/suite/versioning/t/rpl.test > b/mysql-test/suite/versioning/t/rpl.test > index b5be68feece..54258a8bdf1 100644 > --- a/mysql-test/suite/versioning/t/rpl.test > +++ b/mysql-test/suite/versioning/t/rpl.test > @@ -133,4 +133,44 @@ sync_slave_with_master; > connection master; > drop table t1; > > +--echo # > +--echo # MDEV-17554 Auto-create new partition for system versioned tables > with history partitioned by INTERVAL/LIMIT > +--echo # > +set timestamp= unix_timestamp('2000-01-01 00:00:00'); > +create or replace table t1 (x int) with system versioning > +partition by system_time interval 1 hour auto; > +insert t1 values (); > +set timestamp= unix_timestamp('2000-01-01 01:00:00'); > +delete from t1; > +--replace_result InnoDB ENGINE MyISAM ENGINE MEMORY ENGINE > +show create table t1; > +--sync_slave_with_master > +--replace_result InnoDB ENGINE MyISAM ENGINE MEMORY ENGINE > +show create table t1; > +--connection master > +drop table t1; > +set timestamp= default; > + > +--echo # > +--echo # MENT-685 DML events for auto-partitioned tables are written into > binary log twice same comment about MENT > +--echo # > +create table t1 (a int) with system versioning > +partition by system_time limit 1 auto; > + > +insert into t1 values (1); > +update t1 set a= a + 1; > +update t1 set a= a + 1; > +--replace_result InnoDB ENGINE MyISAM ENGINE MEMORY ENGINE > +show create table t1; > +select * from t1; > + > +--sync_slave_with_master > +--replace_result InnoDB ENGINE MyISAM ENGINE MEMORY ENGINE > +show create table t1; > + > +select * from t1; > +--connection master > +# cleanup > +drop table t1; may be show binlog events? you know, to verify that DML events aren't written into binary log twice > + > --source include/rpl_end.inc > diff --git a/sql/ha_partition.h b/sql/ha_partition.h > index 60a2d7f6762..7ade4419c22 100644 > --- a/sql/ha_partition.h > +++ b/sql/ha_partition.h > @@ -1610,7 +1610,7 @@ class ha_partition :public handler > for (; part_id < part_id_end; ++part_id) > { > handler *file= m_file[part_id]; > - DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); > + bitmap_set_bit(&(m_part_info->read_partitions), part_id); Where was it set before your patch? > file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN); > part_recs+= file->stats.records; > } > diff --git a/sql/handler.cc b/sql/handler.cc > index b312635c8ee..6bb6c279193 100644 > --- a/sql/handler.cc > +++ b/sql/handler.cc > @@ -1572,7 +1572,7 @@ int ha_commit_trans(THD *thd, bool all) > DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL || > trans == &thd->transaction->stmt); > > - if (thd->in_sub_stmt) > + if (thd->in_sub_stmt & ~SUB_STMT_AUTO_HIST) > { where is this ha_commit_trans(thd, false) called from? mysql_alter_table that adds a new partition? > DBUG_ASSERT(0); > /* > diff --git a/sql/partition_info.cc b/sql/partition_info.cc > index 871411cf6c4..cf8dd140553 100644 > --- a/sql/partition_info.cc > +++ b/sql/partition_info.cc > @@ -816,10 +816,17 @@ bool partition_info::has_unique_name(partition_element > *element) > vers_info->interval Limit by fixed time interval > vers_info->hist_part (out) Working history partition > */ > -void partition_info::vers_set_hist_part(THD *thd) > +uint partition_info::vers_set_hist_part(THD *thd, bool auto_hist) > { > + DBUG_ASSERT(!thd->lex->last_table() || > + !thd->lex->last_table()->vers_conditions.delete_history); is that right? Conceptually you need to test vers_conditions.delete_history for the table that owns this partition_info. Is it always last_table() ? I'd say it'd be more logical to do, like part_field_array[0]->table->pos_in_table_list > + > + uint create_count= 0; > + auto_hist= auto_hist && vers_info->auto_hist; > + > if (vers_info->limit) > { > + DBUG_ASSERT(!vers_info->interval.is_set()); > ha_partition *hp= (ha_partition*)(table->file); > partition_element *next= NULL; > List_iterator<partition_element> it(partitions); > @@ -862,9 +869,183 @@ void partition_info::vers_set_hist_part(THD *thd) > { > vers_info->hist_part= next; > if (next->range_value > thd->query_start()) > - return; > + { > + error= false; > + break; > + } > + } > + if (error) > + { > + if (auto_hist) > + { > + DBUG_ASSERT(thd->query_start() >= vers_info->hist_part->range_value); > + my_time_t diff= thd->query_start() - (my_time_t) > vers_info->hist_part->range_value; > + if (diff > 0) > + { > + size_t delta= vers_info->interval.seconds(); > + create_count= (uint) (diff / delta + 1); > + if (diff % delta) > + create_count++; > + } > + else > + create_count= 1; > + } > + else > + { > + my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG), after such an overflow a table becomes essentially corrupted, rows are in a wrong partition. So new history partitions cannot be added anymore without reorganizing the last partition. How is it handled now? (it's just a question, not part of the review, as it's not something you were changing in your patch) > + table->s->db.str, table->s->table_name.str, > + vers_info->hist_part->partition_name, "INTERVAL"); > + } > } > } > + > + /* > + When hist_part is almost full LOCK TABLES my overflow the partition as > we s/my/may/ > + can't add new partitions under LOCK TABLES. Reserve one for this case. > + */ > + if (auto_hist && create_count == 0 && > + thd->lex->sql_command == SQLCOM_LOCK_TABLES && > + vers_info->hist_part->id + 1 == vers_info->now_part->id) > + create_count= 1; > + > + return create_count; > +} > + > + > +/** > + @brief Run fast_alter_partition_table() to add new history partitions > + for tables requiring them. > +*/ > +bool vers_add_auto_hist_parts(THD *thd, TABLE_LIST* tl, uint num_parts) > +{ > + bool result= true; > + HA_CREATE_INFO create_info; > + Alter_info alter_info; > + partition_info *save_part_info= thd->work_part_info; > + Query_tables_list save_query_tables; > + Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer; > + bool save_no_write_to_binlog= thd->lex->no_write_to_binlog; > + thd->m_reprepare_observer= NULL; > + thd->lex->reset_n_backup_query_tables_list(&save_query_tables); > + thd->in_sub_stmt|= SUB_STMT_AUTO_HIST; > + thd->lex->no_write_to_binlog= true; > + TABLE *table= tl->table; > + > + DBUG_ASSERT(!thd->is_error()); > + > + { > + DBUG_ASSERT(table->s->get_table_ref_type() == TABLE_REF_BASE_TABLE); > + DBUG_ASSERT(table->versioned()); > + DBUG_ASSERT(table->part_info); > + DBUG_ASSERT(table->part_info->vers_info); > + alter_info.reset(); > + alter_info.partition_flags= > ALTER_PARTITION_ADD|ALTER_PARTITION_AUTO_HIST; > + create_info.init(); > + create_info.alter_info= &alter_info; > + Alter_table_ctx alter_ctx(thd, tl, 1, &table->s->db, > &table->s->table_name); > + > + MDL_REQUEST_INIT(&tl->mdl_request, MDL_key::TABLE, tl->db.str, > + tl->table_name.str, MDL_SHARED_NO_WRITE, > MDL_TRANSACTION); > + if (thd->mdl_context.acquire_lock(&tl->mdl_request, > + thd->variables.lock_wait_timeout)) > + goto exit; > + table->mdl_ticket= tl->mdl_request.ticket; > + > + create_info.db_type= table->s->db_type(); > + create_info.options|= HA_VERSIONED_TABLE; > + DBUG_ASSERT(create_info.db_type); > + > + > create_info.vers_info.set_start(table->s->vers_start_field()->field_name); > + create_info.vers_info.set_end(table->s->vers_end_field()->field_name); > + > + partition_info *part_info= new partition_info(); > + if (unlikely(!part_info)) > + { > + my_error(ER_OUT_OF_RESOURCES, MYF(0)); > + goto exit; > + } > + part_info->use_default_num_partitions= false; > + part_info->use_default_num_subpartitions= false; > + part_info->num_parts= num_parts; > + part_info->num_subparts= table->part_info->num_subparts; > + part_info->subpart_type= table->part_info->subpart_type; > + if (unlikely(part_info->vers_init_info(thd))) > + { > + my_error(ER_OUT_OF_RESOURCES, MYF(0)); > + goto exit; > + } > + > + // NB: set_ok_status() requires DA_EMPTY > + thd->get_stmt_da()->reset_diagnostics_area(); would it not be DA_EMPTY without a reset? this is at the beginning of a statement, I'd expect it normally be DA_EMPTY here. What other DA states can you get here? > + > + thd->work_part_info= part_info; > + if (part_info->set_up_defaults_for_partitioning(thd, table->file, NULL, > + > table->part_info->next_part_no(num_parts))) > + { > + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, > + ER_VERS_HIST_PART_FAILED, > + "Auto-increment history partition: " > + "setting up defaults failed"); > + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), > + tl->db.str, tl->table_name.str); > + goto exit; > + } > + bool partition_changed= false; > + bool fast_alter_partition= false; > + if (prep_alter_part_table(thd, table, &alter_info, &create_info, > + &partition_changed, &fast_alter_partition)) > + { > + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, > ER_VERS_HIST_PART_FAILED, > + "Auto-increment history partition: " > + "alter partitition prepare failed"); > + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), > + tl->db.str, tl->table_name.str); > + goto exit; > + } > + if (!fast_alter_partition) > + { > + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, > ER_VERS_HIST_PART_FAILED, > + "Auto-increment history partition: " > + "fast alter partitition is not possible"); > + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), > + tl->db.str, tl->table_name.str); > + goto exit; > + } > + DBUG_ASSERT(partition_changed); > + if (mysql_prepare_alter_table(thd, table, &create_info, &alter_info, > + &alter_ctx)) > + { > + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, > ER_VERS_HIST_PART_FAILED, > + "Auto-increment history partition: " > + "alter prepare failed"); > + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), > + tl->db.str, tl->table_name.str); > + goto exit; > + } > + > + if (fast_alter_partition_table(thd, table, &alter_info, &create_info, > + tl, &table->s->db, &table->s->table_name)) > + { > + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, > ER_VERS_HIST_PART_FAILED, > + "Auto-increment history partition: " > + "alter partition table failed"); > + my_error(ER_VERS_HIST_PART_FAILED, MYF(ME_WARNING), > + tl->db.str, tl->table_name.str); > + goto exit; > + } > + } > + > + result= false; > + // NB: we have to return DA_EMPTY for new command may be DBUG_ASSERT(thd->get_stmt_da()->is_ok()); > + thd->get_stmt_da()->reset_diagnostics_area(); > + > +exit: > + thd->work_part_info= save_part_info; > + thd->m_reprepare_observer= save_reprepare_observer; > + thd->lex->restore_backup_query_tables_list(&save_query_tables); > + thd->in_sub_stmt&= ~SUB_STMT_AUTO_HIST; > + thd->lex->no_write_to_binlog= save_no_write_to_binlog; > + return result; > } > > > diff --git a/sql/partition_info.h b/sql/partition_info.h > index 0656238ec07..94093353d60 100644 > --- a/sql/partition_info.h > +++ b/sql/partition_info.h > @@ -72,9 +73,26 @@ struct Vers_part_info : public Sql_alloc > my_time_t start; > INTERVAL step; > enum interval_type type; > - bool is_set() { return type < INTERVAL_LAST; } > + bool is_set() const { return type < INTERVAL_LAST; } > + size_t seconds() const > + { > + if (step.second) > + return step.second; > + if (step.minute) > + return step.minute * 60; > + if (step.hour) > + return step.hour * 3600; > + if (step.day) > + return step.day * 3600 * 24; > + // comparison is used in rough estimates, it doesn't need to be > calendar-correct > + if (step.month) > + return step.month * 3600 * 24 * 30; shouldn't it be `* 28` ? you need a most pessimistic estimate to make sure you never miss to create a partition when one is needed. You can sometimes create one when it's not needed yet, but it's less of a problem. > + DBUG_ASSERT(step.year); > + return step.year * 86400 * 30 * 365; that's one `* 30` too many :) > + } > } interval; > ulonglong limit; > + bool auto_hist; > partition_element *now_part; > partition_element *hist_part; > }; > diff --git a/sql/sql_base.cc b/sql/sql_base.cc > index 1a1186aca73..d0e255186da 100644 > --- a/sql/sql_base.cc > +++ b/sql/sql_base.cc > @@ -1625,6 +1625,52 @@ bool is_locked_view(THD *thd, TABLE_LIST *t) > } > > > +bool TABLE::vers_need_hist_part(const THD *thd, const TABLE_LIST > *table_list) const > +{ > +#ifdef WITH_PARTITION_STORAGE_ENGINE > + if (part_info && part_info->part_type == VERSIONING_PARTITION && > + !table_list->vers_conditions.delete_history && > + !thd->stmt_arena->is_stmt_prepare() && > + table_list->lock_type >= TL_WRITE_ALLOW_WRITE && > + table_list->mdl_request.type >= MDL_SHARED_WRITE && > + table_list->mdl_request.type < MDL_EXCLUSIVE) > + { > + switch (thd->lex->sql_command) > + { SQLCOM_LOAD with DUP_REPLACE? > + case SQLCOM_INSERT: > + if (thd->lex->duplicates != DUP_UPDATE) > + break; > + /* fall-through: */ > + case SQLCOM_LOCK_TABLES: > + case SQLCOM_DELETE: > + case SQLCOM_UPDATE: > + case SQLCOM_REPLACE: > + case SQLCOM_REPLACE_SELECT: > + case SQLCOM_DELETE_MULTI: > + case SQLCOM_UPDATE_MULTI: > + return true; > + default:; > + break; > + } > + if (thd->rgi_slave && thd->rgi_slave->current_event && > thd->lex->sql_command == SQLCOM_END) I wonder, would it be possible, instead of introducing rgi_slave->current_event, just make row events set thd->lex->sql_command appropriately? For example, currently row events increment thd->status_var.com_stat[] each event for its own SQLCOM_xxx, it won't be needed if they'll just set thd->lex->sql_command. > + { > + switch (thd->rgi_slave->current_event->get_type_code()) > + { > + case UPDATE_ROWS_EVENT: > + case UPDATE_ROWS_EVENT_V1: > + case DELETE_ROWS_EVENT: > + case DELETE_ROWS_EVENT_V1: > + return true; > + default:; > + break; > + } > + } > + } > +#endif > + return false; > +} > + > + > /** > Open a base table. > > @@ -1777,6 +1823,11 @@ bool open_table(THD *thd, TABLE_LIST *table_list, > Open_table_context *ot_ctx) > DBUG_PRINT("info",("Using locked table")); > #ifdef WITH_PARTITION_STORAGE_ENGINE > part_names_error= set_partitions_as_used(table_list, table); > + if (table->vers_need_hist_part(thd, table_list)) > + { > + /* Rotation is still needed under LOCK TABLES */ a bit confusing comment, you left out stuff that are obvious to you but not to others. I'd suggest something like /* New partitions are not auto-created under LOCK TABLES (TODO: fix it) but rotation can still happen. */ > + table->part_info->vers_set_hist_part(thd, false); > + } > #endif > goto reset; > } > @@ -2032,6 +2083,20 @@ bool open_table(THD *thd, TABLE_LIST *table_list, > Open_table_context *ot_ctx) > tc_add_table(thd, table); > } > > + if (!ot_ctx->vers_create_count && what does this condition mean? When is ot_ctx->vers_create_count > 0 ? > + table->vers_need_hist_part(thd, table_list)) > + { > + ot_ctx->vers_create_count= table->part_info->vers_set_hist_part(thd, > true); > + if (ot_ctx->vers_create_count) > + { > + MYSQL_UNBIND_TABLE(table->file); > + tc_release_table(table); > + > ot_ctx->request_backoff_action(Open_table_context::OT_ADD_HISTORY_PARTITION, > + table_list); > + DBUG_RETURN(TRUE); > + } > + } > + > if (!(flags & MYSQL_OPEN_HAS_MDL_LOCK) && > table->s->table_category < TABLE_CATEGORY_INFORMATION) > { > @@ -3172,10 +3239,18 @@ Open_table_context::recover_from_failed_open() > break; > case OT_DISCOVER: > case OT_REPAIR: > - if ((result= lock_table_names(m_thd, m_thd->lex->create_info, > - m_failed_table, NULL, > - get_timeout(), 0))) > + case OT_ADD_HISTORY_PARTITION: > + result= lock_table_names(m_thd, m_thd->lex->create_info, > m_failed_table, > + NULL, get_timeout(), 0); > + if (result) > + { > + if (m_action == OT_ADD_HISTORY_PARTITION) > + { > + m_thd->clear_error(); why? > + result= false; > + } > break; > + } > > tdc_remove_table(m_thd, m_failed_table->db.str, > m_failed_table->table_name.str); > diff --git a/sql/sql_error.h b/sql/sql_error.h > index 318d5076534..92076616adb 100644 > --- a/sql/sql_error.h > +++ b/sql/sql_error.h > @@ -1195,7 +1195,6 @@ class Diagnostics_area: public Sql_state_errno, > > void copy_non_errors_from_wi(THD *thd, const Warning_info *src_wi); > > -private: It doesn't seem you're using get_warning_info() anywhere > Warning_info *get_warning_info() { return m_wi_stack.front(); } > > const Warning_info *get_warning_info() const { return m_wi_stack.front(); } > diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc > index 86ba393fb7b..f4a960afe47 100644 > --- a/sql/sql_partition.cc > +++ b/sql/sql_partition.cc > @@ -7237,7 +7246,8 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, > } > else if ((alter_info->partition_flags & ALTER_PARTITION_ADD) && > (part_info->part_type == RANGE_PARTITION || > - part_info->part_type == LIST_PARTITION)) > + part_info->part_type == LIST_PARTITION || > + alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST)) it'd be good it adding new empty VERSIONING partitions would always go this way, auto or not. but it's a separate task. > { > /* > ADD RANGE/LIST PARTITIONS > @@ -7281,7 +7291,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table, > write_log_add_change_partition(lpt) || > ERROR_INJECT_CRASH("crash_add_partition_4") || > ERROR_INJECT_ERROR("fail_add_partition_4") || > - mysql_change_partitions(lpt) || > + mysql_change_partitions(lpt, false) || this way you skip trans_commit_stmt/trans_commit_implicit for ALTER TABLE ... ADD RANGE/LIST partition. is it always ok? A safer alternative would've been mysql_change_partitions(lpt, !(alter_info->partition_flags & ALTER_PARTITION_AUTO_HIST)) but it's more complex, so I'd prefer your variant, if we can be sure that it doesn't break anything. > ERROR_INJECT_CRASH("crash_add_partition_5") || > ERROR_INJECT_ERROR("fail_add_partition_5") || > alter_close_table(lpt) || Regards, Sergei VP of MariaDB Server Engineering and secur...@mariadb.org _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp