Hi Varun, On Fri, Nov 16, 2018 at 07:41:07PM +0530, Varun wrote: > revision-id: 1ed2d8b98ade099fe23b7d5c00d23364388e15aa > (mariadb-10.0.36-80-g1ed2d8b98ad) > parent(s): a84d87fde8c0bc325c8e00f06ea02bcd84a75d55 > author: Varun Gupta > committer: Varun Gupta > timestamp: 2018-11-16 19:40:47 +0530 > message: > > MDEV-17032: Estimates are higher for partitions of a table with > @@use_stat_tables= PREFERABLY > > The problem here is EITS statistics does not calculate statistics for the > partitions of the table. > So a temporary solution would be to not read EITS statistics for partitioned > tables. > > Also disabling reading of EITS for columns that participate in the partition > list of a table. > > --- > mysql-test/r/partition.result | 88 > +++++++++++++++++++++++++++++++++++++++++++ > mysql-test/t/partition.test | 58 ++++++++++++++++++++++++++++ > sql/opt_range.cc | 12 +++++- > sql/partition_info.cc | 26 +++++++++++++ > sql/partition_info.h | 1 + > sql/sql_statistics.cc | 16 ++++++++ > 6 files changed, 199 insertions(+), 2 deletions(-) > > diff --git a/mysql-test/r/partition.result b/mysql-test/r/partition.result > index c6669176b3d..aedf9f89f0e 100644 > --- a/mysql-test/r/partition.result > +++ b/mysql-test/r/partition.result > @@ -2645,3 +2645,91 @@ Warnings: > Note 1517 Duplicate partition name p2 > DEALLOCATE PREPARE stmt; > DROP TABLE t1; > +# > +# MDEV-17032: Estimates are higher for partitions of a table with > @@use_stat_tables= PREFERABLY > +# > +create table t0(a int); > +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); > +create table t1(a int); > +insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C; > +create table t2 ( > +part_key int, > +a int, > +b int > +) partition by list(part_key) ( > +partition p0 values in (0), > +partition p1 values in (1), > +partition p2 values in (2), > +partition p3 values in (3), > +partition p4 values in (4) > +); > +insert into t2 > +select mod(a,5), a/100, a from t1; > +set @save_use_stat_tables= @@use_stat_tables; > +set > @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; > +# > +# Tests using stats provided by the storage engine > +# > +explain extended select * from t2 where part_key=1; > +id select_type table type possible_keys key key_len ref > rows filtered Extra > +1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 > Using where > +Warnings: > +Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS > `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1) > +explain partitions select * from t2 where part_key=1; > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 > Using where > +explain extended select * from t2 where part_key in (1,2); > +id select_type table type possible_keys key key_len ref > rows filtered Extra > +1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 > Using where > +Warnings: > +Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS > `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in > (1,2)) > +explain partitions select * from t2 where part_key=1; > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 > Using where > +explain extended select * from t2 where part_key in (1,2); > +id select_type table type possible_keys key key_len ref > rows filtered Extra > +1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 > Using where > +Warnings: > +Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS > `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in > (1,2)) > +explain partitions select * from t2 where part_key in (1,2); > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 > Using where > +set @save_histogram_size=@@histogram_size; > +set @@histogram_size=100; > +set @@use_stat_tables= PREFERABLY; > +set @@optimizer_use_condition_selectivity=4; > +analyze table t2; > +Table Op Msg_type Msg_text > +test.t2 analyze status Engine-independent statistics collected > +test.t2 analyze status OK > +# > +# Tests using EITS > +# > +explain extended select * from t2 where part_key=1; > +id select_type table type possible_keys key key_len ref > rows filtered Extra > +1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 > Using where > +Warnings: > +Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS > `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1) > +explain partitions select * from t2 where part_key=1; > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 > Using where > +explain extended select * from t2 where part_key in (1,2); > +id select_type table type possible_keys key key_len ref > rows filtered Extra > +1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 > Using where > +Warnings: > +Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS > `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in > (1,2)) > +explain partitions select * from t2 where part_key=1; > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 > Using where > +explain extended select * from t2 where part_key in (1,2); > +id select_type table type possible_keys key key_len ref > rows filtered Extra > +1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 > Using where > +Warnings: > +Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS > `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in > (1,2)) > +explain partitions select * from t2 where part_key in (1,2); > +id select_type table partitions type possible_keys key > key_len ref rows Extra > +1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 > Using where > +set @@use_stat_tables= @save_use_stat_tables; > +set @@optimizer_use_condition_selectivity= > @save_optimizer_use_condition_selectivity; > +set @@histogram_size= @save_histogram_size; > +drop table t0,t1,t2; > diff --git a/mysql-test/t/partition.test b/mysql-test/t/partition.test > index 1c8cd0375d6..00c6f1ce77c 100644 > --- a/mysql-test/t/partition.test > +++ b/mysql-test/t/partition.test > @@ -2897,3 +2897,61 @@ EXECUTE stmt; > DEALLOCATE PREPARE stmt; > DROP TABLE t1; > > +--echo # > +--echo # MDEV-17032: Estimates are higher for partitions of a table with > @@use_stat_tables= PREFERABLY > +--echo # > + > +create table t0(a int); > +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); > + > +create table t1(a int); > +insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C; > + > + > +create table t2 ( > + part_key int, > + a int, > + b int > +) partition by list(part_key) ( > + partition p0 values in (0), > + partition p1 values in (1), > + partition p2 values in (2), > + partition p3 values in (3), > + partition p4 values in (4) > +); > +insert into t2 > +select mod(a,5), a/100, a from t1; > + > +set @save_use_stat_tables= @@use_stat_tables; > +set > @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; > +--echo # > +--echo # Tests using stats provided by the storage engine > +--echo # > +explain extended select * from t2 where part_key=1; > +explain partitions select * from t2 where part_key=1; > +explain extended select * from t2 where part_key in (1,2); > +explain partitions select * from t2 where part_key=1; ^^ Isn't this query the same like one two lines above?
> +explain extended select * from t2 where part_key in (1,2); > +explain partitions select * from t2 where part_key in (1,2); > + > +set @save_histogram_size=@@histogram_size; > +set @@histogram_size=100; > +set @@use_stat_tables= PREFERABLY; > +set @@optimizer_use_condition_selectivity=4; > +analyze table t2; > +--echo # > +--echo # Tests using EITS > +--echo # > +# filtered should be 100 Please change to use '--echo #' so that the above is visible in the .result file as well. > +explain extended select * from t2 where part_key=1; > +explain partitions select * from t2 where part_key=1; > +# filtered should be 100 > +explain extended select * from t2 where part_key in (1,2); > +explain partitions select * from t2 where part_key=1; > +# filtered should be 100 > +explain extended select * from t2 where part_key in (1,2); > +explain partitions select * from t2 where part_key in (1,2); Please add a test that shows that condition selectivity is taken into account for non-partitioned columns. > +set @@use_stat_tables= @save_use_stat_tables; > +set @@optimizer_use_condition_selectivity= > @save_optimizer_use_condition_selectivity; > +set @@histogram_size= @save_histogram_size; > +drop table t0,t1,t2; > diff --git a/sql/opt_range.cc b/sql/opt_range.cc > index 3bcaa72e32f..59aa2f3f280 100644 > --- a/sql/opt_range.cc > +++ b/sql/opt_range.cc > @@ -3322,6 +3322,10 @@ bool > create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, > { > Field **field_ptr; > TABLE *table= param->table; > + partition_info *part_info= NULL; > + #ifdef WITH_PARTITION_STORAGE_ENGINE > + part_info= table->part_info; > + #endif > uint parts= 0; > > for (field_ptr= table->field; *field_ptr; field_ptr++) > @@ -3329,7 +3333,9 @@ bool > create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, > Column_statistics* col_stats= (*field_ptr)->read_stats; > if (bitmap_is_set(used_fields, (*field_ptr)->field_index) > && col_stats && !col_stats->no_stat_values_provided() > - && !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY)) > + && !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY) > + && (!part_info || > + part_info->field_in_partition_expr(*field_ptr))) > parts++; > } > > @@ -3350,7 +3356,9 @@ bool > create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, > if (bitmap_is_set(used_fields, (*field_ptr)->field_index)) > { > Field *field= *field_ptr; > - if (field->type() == MYSQL_TYPE_GEOMETRY) > + if (field->type() == MYSQL_TYPE_GEOMETRY > + || (!part_info || > + part_info->field_in_partition_expr(field))) > continue; This condition is wrong. (field->type() == MYSQL_TYPE_GEOMETRY || (!part_info || part_info->field_in_partition_expr(field))) is the same as (field->type() == MYSQL_TYPE_GEOMETRY || !part_info || ... which means if there is no partitioning, we won't count the selectivity. Indeed, if I apply this patch, all EITS tests start to fail, selectivity is no longer counted: --- /home/psergey/dev-git/10.1/mysql-test/r/selectivity_innodb.result 2018-11-16 14:06:27.231202907 +0300 +++ /home/psergey/dev-git/10.1/mysql-test/r/selectivity_innodb.reject 2018-11-25 01:03:14.661647893 +0300 @@ -28,13 +28,13 @@ explain extended select * from t1 where a is null; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 100.00 Using where Warnings: Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where isnull(`test`.`t1`.`a`) explain extended select * from t1 where a is not null; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 100.00 Using where Warnings: See also the comment below about the return value of field_in_partition_expr. > > uint16 store_length; > diff --git a/sql/partition_info.cc b/sql/partition_info.cc > index 52bda560c1c..0111fc1451d 100644 > --- a/sql/partition_info.cc > +++ b/sql/partition_info.cc > @@ -3164,6 +3164,32 @@ void partition_info::print_debug(const char *str, uint > *value) > DBUG_PRINT("info", ("parser: %s", str)); > DBUG_VOID_RETURN; > } > + > +/* > + > + Disabling reading EITS statistics for columns involved in the > + partition list of a table. > + We assume the selecticivity for such columns would be handled > + during partition pruning. > + > +*/ > + This comment here doesn't make any sense anymore. Please move it to create_key_parts_for_pseudo_indexes. Also, the name and return value of the function is confusing now. If one calls the function for the partitioning column: field_in_partition_expr(partition_column) == false. Please change the return value to be true in this case, and false otherwise. > +bool partition_info::field_in_partition_expr(Field *field) const > +{ > + uint i; > + for (i= 0; i < num_part_fields; i++) > + { > + if (field->eq(part_field_array[i])) > + return FALSE; > + } > + for (i= 0; i < num_subpart_fields; i++) > + { > + if (field->eq(subpart_field_array[i])) > + return FALSE; > + } > + return TRUE; > +} > + > #else /* WITH_PARTITION_STORAGE_ENGINE */ > /* > For builds without partitioning we need to define these functions > diff --git a/sql/partition_info.h b/sql/partition_info.h > index f250c5496bf..10b8954ace7 100644 > --- a/sql/partition_info.h > +++ b/sql/partition_info.h > @@ -384,6 +384,7 @@ class partition_info : public Sql_alloc > bool is_full_part_expr_in_fields(List<Item> &fields); > public: > bool has_unique_name(partition_element *element); > + bool field_in_partition_expr(Field *field) const; > }; > > uint32 get_next_partition_id_range(struct st_partition_iter* part_iter); > diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc > index cb75a5c2176..02dd4970c99 100644 > --- a/sql/sql_statistics.cc > +++ b/sql/sql_statistics.cc > @@ -3589,6 +3589,22 @@ void set_statistics_for_table(THD *thd, TABLE *table) > (use_stat_table_mode <= COMPLEMENTARY || > !table->stats_is_read || read_stats->cardinality_is_null) ? > table->file->stats.records : read_stats->cardinality; > + > + /* > + For partitioned table, EITS statistics is based on data from all > partitions. > + > + On the other hand, Partition Pruning figures which partitions will be > + accessed and then computes the estimate of rows in used_partitions. > + > + Use the estimate from Partition Pruning as it is typically more precise. > + Ideally, EITS should provide per-partition statistics but this is not > + implemented currently. > + */ Good. > + #ifdef WITH_PARTITION_STORAGE_ENGINE > + if (table->part_info) > + table->used_stat_records= table->file->stats.records; > + #endif > + > KEY *key_info, *key_info_end; > for (key_info= table->key_info, key_info_end= key_info+table->s->keys; > key_info < key_info_end; key_info++) -- BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp