Hi Varun, Please finally find the review below.
The patch is almost there I think, but there are some minor adjustments that still need to be made. > revision-id: 0ff8b73b60fae40556f91d3959f20bf3cf182b28 > (mariadb-10.3.0-947-g0ff8b73b60f) > parent(s): 15419a558370aeed9521b498c34d50f20a8d47a5 > author: Varun Gupta > committer: Varun Gupta > timestamp: 2018-05-15 13:53:11 +0530 > message: > > MDEV-15777:Support Early NULLs filtering-like restrictions in the range > optimizer > > --- > mysql-test/main/mdev15777.result | 455 > +++++++++++++++++++++++++++++++++++++++ > mysql-test/main/mdev15777.test | 138 ++++++++++++ > sql/opt_range.cc | 125 ++++++++++- > sql/opt_range.h | 2 + > sql/sql_select.cc | 22 +- > sql/table.cc | 1 + > sql/table.h | 6 + > 7 files changed, 745 insertions(+), 4 deletions(-) > > diff --git a/mysql-test/main/mdev15777.result > b/mysql-test/main/mdev15777.result > new file mode 100644 > index 00000000000..549ac08b91f > --- /dev/null > +++ b/mysql-test/main/mdev15777.result > @@ -0,0 +1,455 @@ > +# Test added to check that null filtering is used by range optimizer > +create table ten(a int); > +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); > +create table one_k(a int); > +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; > +create table one_m(a int); > +insert into one_m select A.a + B.a* 1000 from one_k A, one_k B; The testcases are extremely confusing. * Do you really need a table with one millon rows to demonstrate the issue? * Why use examples with subqueries (which are converted into join anyway)? - if we want to verify that this optimization works together with subquery-to-semi-join conversion, let's start with a join example, and then construct a subquery which is converted to the query. * The testcases produce a lot of EXPLAIN outputs, and there is no explanation provided what one should expect. Please create a testcase: - of reasonable size (I think tables of 10-20K rows should suffice) - start from a join query - Like in other testcases, tables should be named, t1,t2, ... - I assume the query plan should be: range access on the first table (constructed from a NOT NULL predicate), ref access for the second one. - The testcase should have '--echo #' comments explaining what is relevant in the query plan > +delete from one_m where a=0 limit 1; > +create table t1 ( > +id int(10) unsigned NOT NULL AUTO_INCREMENT, > +filler varchar(100), > +subset_id int(11) DEFAULT NULL, > +PRIMARY KEY (id), > +KEY t1_subset_id (subset_id) > +); > +create table t1_subsets ( ... > diff --git a/sql/opt_range.cc b/sql/opt_range.cc > index 8422917065f..ae06b899ac9 100644 > --- a/sql/opt_range.cc > +++ b/sql/opt_range.cc > @@ -155,6 +155,7 @@ class SEL_IMERGE; > #define CLONE_KEY1_MAYBE 1 > #define CLONE_KEY2_MAYBE 2 > #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) > +#define FT_KEYPART (MAX_FIELDS+10) > > > /* > @@ -2400,6 +2401,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map > keys_to_use, > { > uint idx; > double scan_time; > + Item *null_rejecting_conds= NULL; > DBUG_ENTER("SQL_SELECT::test_quick_select"); > DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: > %lu", > (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables, > @@ -2423,6 +2425,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map > keys_to_use, > read_time= (double) records + scan_time + 1; // Force to use index > > possible_keys.clear_all(); > + null_rejecting_conds= head->null_rejecting_conds; > > DBUG_PRINT("info",("Time to scan table: %g", read_time)); > > @@ -2431,7 +2434,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map > keys_to_use, > { > uchar buff[STACK_BUFF_ALLOC]; > MEM_ROOT alloc; > - SEL_TREE *tree= NULL; > + SEL_TREE *tree= NULL, *not_null_cond_tree= NULL; > KEY_PART *key_parts; > KEY *key_info; > PARAM param; > @@ -2540,6 +2543,12 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map > keys_to_use, > TRP_GROUP_MIN_MAX *group_trp; > double best_read_time= read_time; > Please add a comment saying that as far as range optimizer is concerned, null_rejecting_conds is just an extra condition on this table. (That is, we don't care if it has NOT NULL conditions or something else). > + if (null_rejecting_conds) > + { > + not_null_cond_tree= null_rejecting_conds->get_mm_tree(¶m, > + &null_rejecting_conds); > + } > + > if (cond) > { > if ((tree= cond->get_mm_tree(¶m, &cond))) > @@ -2558,6 +2567,13 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map > keys_to_use, > tree= NULL; > } > } > + if (not_null_cond_tree) > + { > + if (!tree) > + tree= not_null_cond_tree; > + else > + tree= tree_and(¶m, tree, not_null_cond_tree); This if-else logic is redundant, as it's already present in tree_and(). Can just have this: tree= tree_and(tree, not_null_cond_tree); > + } > > /* > Try to construct a QUICK_GROUP_MIN_MAX_SELECT. > @@ -14643,6 +14659,113 @@ void > QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, > add_key_and_length(key_names, used_lengths, &first); > } > > +inline void add_cond(THD *thd, Item **e1, Item *e2) > +{ > + if (*e1) > + { > + if (!e2) > + return; > + Item *res; > + if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2))) > + { > + res->fix_fields(thd, 0); > + res->update_used_tables(); > + *e1= res; > + } > + } > + else > + *e1= e2; > +} > + > +/* > + Create null rejecting conditions for a table, for all the equalites > + present in the WHERE clause of a query. > + > + SYNOPSIS > + make_null_rejecting_conds() > + @param TABLE - Keys of this table will participate in null > + rejecting conditions > + @param keyuse_array - array that has all the equalites of the > + WHERE clasuse > + > + DESCRIPTION > + This function creates null rejecting conditions for a table. These > + conditions are created to do range analysis on them , the conditions > + are of the form tbl.key.keypart IS NOT NULL. > + > + IMPLEMENTATION > + Lookup in the keyuse array to check if it has equalites that belong > + to the given table. If yes then find out if the conditions are null > + rejecting and accordingly create all the condition for the keys of a > + given table and AND them. > + > + > + RETURN > + NOT NULL - Found null rejecting conditions for the given table > + NULL - No null rejecting conditions for the given table > +*/ > + > +void make_null_rejecting_conds(THD *thd, TABLE *table, > + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys) ^ Please fix identation. > +{ > + KEY *keyinfo; > + Item *cond= NULL; > + KEYUSE* keyuse; > + > + /* > + The null rejecting conds added will be on the keypart of a key, so for > + that we need the table to atleast have a key. > + */ please fix typos/wording in the above comment. > + if (!table->s->keys) > + return ; > + if (table->null_rejecting_conds) > + return; > + > + for(uint i=0; i < keyuse_array->elements; i++) > + { This loop does excessive work. Check out how best_access_path() examines the KEYUSE array. Key points - the array is sorted by table, index_nr, key_part_nr. - JOIN_TAB::keyuse points to the first element that refers to the table described by the JOIN_TAB - The array may have "duplicates", especially when multiple equalities are present: KEYUSE(t.keyXpartY = other_table.colX) KEYUSE(t.keyXpartY = other_table.colY) KEYUSE(t.keyXpartY = some_expr) ... Since the elements are sorted by keyXpartY, it is trival to avoid generating multiple "t.keyXpartY IS NOT NULL" for consecutive KEYUSEs. I would go even further: the same "t.keyXpartY" can be part of multiple indexes (and so its KEYUSE objects will not be adjacent). Can we use a table's column bitmap to avoid generating duplicate IS NOT NULL objects? > + keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i); > + if (keyuse->table == table) > + { > + /* > + No null rejecting conds for a hash key orr full-text keys > + */ please fix typos/wording in the above comment. > + if (keyuse->key == MAX_KEY || keyuse->keypart == FT_KEYPART) > + continue; > + keyinfo= keyuse->table->key_info+keyuse->key; > + Field *field= keyinfo->key_part[keyuse->keypart].field; > + > + /* > + No need to add null-rejecting condition if we have a > + keyuse element as > + - table.key.keypart= const > + - (table.key.keypart= tbl.otherfield or table.key.keypart IS NULL) > + - table.key.keypart IS NOT NULLABLE > + */ > + > + if (keyuse->val->const_item() > + || !(keyuse->null_rejecting && field->maybe_null()) Please remove the brackets: !(A && B) -> !A || !B > + || keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) Please also put the '||' at the end of the lines, like it is done in the rest of the code. > + continue; > + > + Item_field *field_item= new (thd->mem_root)Item_field(thd, field); > + Item* not_null_item= new (thd->mem_root)Item_func_isnotnull(thd, > + field_item); Please add error handling (just return from the function if we failed to allocate any of the above) > + > + /* > + adding the key to const keys as we have the condition > + as key.keypart IS NOT NULL > + */ > + > + const_keys->set_bit(keyuse->key); > + not_null_item->fix_fields(thd, 0); > + not_null_item->update_used_tables(); > + add_cond(thd, &cond, not_null_item); > + } > + } > + table->null_rejecting_conds= cond; > + return; Please remove the above 'return;' as it is redundant and makes one suspect a merge error > +} > + > > #ifndef DBUG_OFF > > diff --git a/sql/opt_range.h b/sql/opt_range.h > index bd85a12d4a1..894d46a892c 100644 > --- a/sql/opt_range.h > +++ b/sql/opt_range.h > @@ -1728,6 +1728,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, > TABLE *table, Item **cond); > bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); > #endif > void store_key_image_to_rec(Field *field, uchar *ptr, uint len); > +void make_null_rejecting_conds(THD *thd, TABLE *table, > + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys); Please fix identation. > > extern String null_string; > > diff --git a/sql/sql_select.cc b/sql/sql_select.cc > index f658b78c33c..4aceb333340 100644 > --- a/sql/sql_select.cc > +++ b/sql/sql_select.cc > @@ -4805,6 +4805,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> > &tables_list, > add_group_and_distinct_keys(join, s); > > s->table->cond_selectivity= 1.0; > + > + make_null_rejecting_conds(join->thd, s->table, > + keyuse_array, &s->const_keys); > > /* > Perform range analysis if there are keys it could use (1). > @@ -5352,15 +5356,24 @@ add_key_field(JOIN *join, > If the condition has form "tbl.keypart = othertbl.field" and > othertbl.field can be NULL, there will be no matches if othertbl.field > has NULL value. > + > + The field KEY_FIELD::null_rejecting is set to TRUE if we have both > + the left and right hand side of the equality are NULLABLE > + This is NOT what is happening. null_rejecting is set to true, if *either* left or right-hand side of the equality is nullable. Please fix the comment. Please also fix the comment in KEY_FIELD::null_rejecting to reflect that. > We use null_rejecting in add_not_null_conds() to add > 'othertbl.field IS NOT NULL' to tab->select_cond. > + > + We use null_rejecting in make_null_rejecting_conds() to add > + tbl.keypart IS NOT NULL so we can do range analysis on this condition > + Please join the above two sentences together: null_rejecting is used - by add_not_null_conds(), to add "othertbl.field IS NOT NULL" to othertbl's tab->select_cond. (This is called "Early NULLs filtering") - by make_null_rejecting_conds(), to provide range optimizer with additional "tbl.keypart IS NOT NULL" condition. > */ > { > Item *real= (*value)->real_item(); > if (((cond->functype() == Item_func::EQ_FUNC) || > (cond->functype() == Item_func::MULT_EQUAL_FUNC)) && > - (real->type() == Item::FIELD_ITEM) && > + (((real->type() == Item::FIELD_ITEM) && > ((Item_field*)real)->field->maybe_null()) > + ||(field->maybe_null()))) ^Please fix the identation. > (*key_fields)->null_rejecting= true; > else > (*key_fields)->null_rejecting= false; > @@ -9813,7 +9826,10 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, > uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit); > j->ref.items[i]=keyuse->val; // Save for cond removal > j->ref.cond_guards[i]= keyuse->cond_guard; > - if (keyuse->null_rejecting) > + Item *real= (keyuse->val)->real_item(); > + if (keyuse->null_rejecting && > + (real->type() == Item::FIELD_ITEM) && > + ((Item_field*)real)->field->maybe_null()) ^ Please fix identation > j->ref.null_rejecting|= (key_part_map)1 << i; > keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables; > /* > @@ -18538,7 +18554,7 @@ free_tmp_table(THD *thd, TABLE *entry) > DBUG_ASSERT(entry->pos_in_table_list->table == entry); > entry->pos_in_table_list->table= NULL; > } > - > + entry->null_rejecting_conds= NULL; What is this for? As far as I understand we are freeing the TABLE object > free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ > thd_proc_info(thd, save_proc_info); > > diff --git a/sql/table.cc b/sql/table.cc > index a6e445d0d2e..097fee465de 100644 > --- a/sql/table.cc > +++ b/sql/table.cc > @@ -4597,6 +4597,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) > created= TRUE; > cond_selectivity= 1.0; > cond_selectivity_sampling_explain= NULL; > + null_rejecting_conds= NULL; > #ifdef HAVE_REPLICATION > /* used in RBR Triggers */ > master_had_triggers= 0; > diff --git a/sql/table.h b/sql/table.h > index 6fd3f219914..50d47899b49 100644 > --- a/sql/table.h > +++ b/sql/table.h > @@ -1356,6 +1356,12 @@ struct TABLE > SplM_opt_info *spl_opt_info; > key_map keys_usable_for_splitting; > > + /* > + Null rejecting conds added for all tables so we can do range analysis > + on these conditions > + */ "for all tables"? Please change the comment to something like: "NULL-rejecting conditions for this table. They are created from eq_ref access, and used by the range optimizer". > + Item* null_rejecting_conds; > + > void init(THD *thd, TABLE_LIST *tl); > bool fill_item_list(List<Item> *item_list) const; > void reset_item_list(List<Item> *item_list, uint skip) const; BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp