Hi, Jan! On May 04, Jan Lindström wrote: > revision-id: 186e8e91f3ce797258389f6d8729082f7d3ea164 > parent(s): aa5095627e2619bdad7916d33d1016802a84a9e1 > committer: Jan Lindström > branch nick: 10.0-git > timestamp: 2015-05-04 10:23:50 +0300 > message: > > MDEV-5171: Add support for --innodb-optimize-keys to mysqldump. > > Merged revision 93 from > bzr+ssh://bazaar.launchpad.net/+branch/percona-server/5.6/ > authored by Alexey Kopytov. > > This patch expands the applicability of InnoDB fast index creation to > mysqldump, ALTER TABLE and OPTIMIZE TABLE as follows: > > 1. mysqldump has now a new option, --innodb-optimize-keys, which changes > the way InnoDB tables are dumped so that secondary and foreign keys are > created after loading the data thus taking advantage of fast index > creation. > > This part of the patch is an implementation of the feature request > reported as MySQL bug #49120. > > More specifically: > > - KEY, UNIQUE KEY and CONSTRAINT specifications are omitted from CREATE > TABLE corresponding to InnoDB tables. > > - an additional ALTER TABLE is issued after dumping the data to create > the previously omitted keys. > > Delaying foreign key creation does not introduce any additional risks as > mysqldump always prepends its output with SET FOREIGN_KEY_CHECKS=0 anyway. > > 2. When ALTER TABLE requires a table copy, secondary keys are now dropped > and recreated later after copying the data. The following restrictions > apply: > > - only non-unique keys can be involved in this optimization > > - if the table contains foreign keys, or a foreign key is being added as > a part of the current ALTER TABLE statement, the optimization is > disabled for all keys. > > This part of the patch is an implementation of the feature request > reported as MySQL bug #57583. > > 3. As OPTIMIZE TABLE is mapped to ALTER TABLE ... ENGINE=InnoDB for > InnoDB tables, it now also benefits from fast index creation with the > same restrictions as for ALTER TABLE.
> diff --git a/client/client_priv.h b/client/client_priv.h > index 67a6e82..1846926 100644 > --- a/client/client_priv.h > +++ b/client/client_priv.h > @@ -94,6 +94,7 @@ enum options_client > OPT_SSL_CRL, OPT_SSL_CRLPATH, > OPT_USE_GTID, > OPT_GALERA_SST_MODE, > + OPT_INNODB_OPTIMIZE_KEYS, /* mysqldump */ Not needed, please, remove > OPT_MAX_CLIENT_OPTION /* should be always the last */ > }; > > diff --git a/client/mysqldump.c b/client/mysqldump.c > index e118198..0970082 100644 > --- a/client/mysqldump.c > +++ b/client/mysqldump.c > @@ -375,6 +386,11 @@ static struct my_option my_long_options[] = > "in dump produced with --dump-slave.", &opt_include_master_host_port, > &opt_include_master_host_port, 0, GET_BOOL, NO_ARG, > 0, 0, 0, 0, 0, 0}, > + {"innodb-optimize-keys", OPT_INNODB_OPTIMIZE_KEYS, Here use 0 instead of OPT_INNODB_OPTIMIZE_KEYS > + "Use InnoDB fast index creation by creating secondary indexes after " > + "dumping the data.", > + &opt_innodb_optimize_keys, &opt_innodb_optimize_keys, 0, GET_BOOL, > NO_ARG, > + 0, 0, 0, 0, 0, 0}, > {"insert-ignore", OPT_INSERT_IGNORE, "Insert rows with INSERT IGNORE.", > &opt_ignore, &opt_ignore, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, > 0, 0}, > @@ -2581,6 +2597,359 @@ static inline my_bool > general_log_or_slow_log_tables(const char *db, > } > > /* > + Find the first occurrence of a quoted identifier in a given string. Returns > + the pointer to the opening quote, and stores the pointer to the closing > quote > + to the memory location pointed to by the 'end' argument, > + > + If no quoted identifiers are found, returns NULL (and the value pointed to > by > + 'end' is undefined in this case). > +*/ > + > +static const char *parse_quoted_identifier(const char *str, > + const char **end) > +{ > + const char *from; > + const char *to; > + > + if (!(from= strchr(str, '`'))) > + return NULL; This is wrong, see below. Identifiers ar enot necessarily quoted, so this function should parse both quoted and not quoted identifiers (and renamed to, say, parse_next_identifier) > + > + to= from; > + > + while ((to= strchr(to + 1, '`'))) { > + /* > + Double backticks represent a backtick in identifier, rather than a > quote > + character. > + */ > + if (to[1] == '`') > + { > + to++; > + continue; > + } > + > + break; > + } > + > + if (to <= from + 1) > + return NULL; /* Empty identifier */ > + > + *end= to; > + > + return from; > +} > + > +/* > + Parse the specified key definition string and check if the key contains an > + AUTO_INCREMENT column as the first key part. We only check for the first > key > + part, because unlike MyISAM, InnoDB does not allow the AUTO_INCREMENT > column > + as a secondary key column, i.e. the AUTO_INCREMENT column would not be > + considered indexed for such key specification. > +*/ > +static my_bool contains_autoinc_column(const char *autoinc_column, > + const char *keydef, > + key_type_t type) > +{ > + const char *from, *to; > + uint idnum; > + > + DBUG_ASSERT(type != KEY_TYPE_NONE); > + > + if (autoinc_column == NULL) > + return FALSE; > + > + idnum= 0; > + > + /* > + There is only 1 iteration of the following loop for type == > KEY_TYPE_PRIMARY > + and 2 iterations for type == KEY_TYPE_UNIQUE / KEY_TYPE_NON_UNIQUE. > + */ > + while ((from= parse_quoted_identifier(keydef, &to))) > + { > + idnum++; > + > + /* > + Skip the check if it's the first identifier and we are processing a > + secondary key. > + */ > + if ((type == KEY_TYPE_PRIMARY || idnum != 1) && > + !strncmp(autoinc_column, from + 1, to - from - 1)) > + return TRUE; > + > + /* > + Check only the first (for PRIMARY KEY) or the second (for secondary > keys) > + quoted identifier. > + */ > + if ((idnum == 1 + MY_TEST(type != KEY_TYPE_PRIMARY))) > + break; > + > + keydef= to + 1; > + } > + > + return FALSE; > +} > + > +/* > + Find a node in the skipped keys list whose name matches a quoted > + identifier specified as 'id_from' and 'id_to' arguments. > +*/ > + > +static LIST *find_matching_skipped_key(const char *id_from, > + const char *id_to) > +{ > + LIST *list; > + size_t id_len; > + > + id_len= id_to - id_from + 1; > + DBUG_ASSERT(id_len > 2); > + > + for (list= skipped_keys_list; list; list= list_rest(list)) > + { > + const char *keydef; > + const char *keyname_from; > + const char *keyname_to; > + size_t keyname_len; > + > + keydef= list->data; > + > + if ((keyname_from= parse_quoted_identifier(keydef, &keyname_to))) > + { > + keyname_len= keyname_to - keyname_from + 1; > + > + if (id_len == keyname_len && > + !strncmp(keyname_from, id_from, id_len)) > + return list; > + } > + } > + > + return NULL; > +} > + > +/* > + Remove secondary/foreign key definitions from a given SHOW CREATE TABLE > string > + and store them into a temporary list to be used later. > + > + SYNOPSIS > + skip_secondary_keys() > + create_str SHOW CREATE TABLE output > + has_pk TRUE, if the table has PRIMARY KEY > + (or UNIQUE key on non-nullable columns) > + > + > + DESCRIPTION > + > + Stores all lines starting with "KEY" or "UNIQUE KEY" > + into skipped_keys_list and removes them from the input string. > + Ignoring FOREIGN KEYS constraints when creating the table is ok, because > + mysqldump sets foreign_key_checks to 0 anyway. > +*/ > + > +static void skip_secondary_keys(char *create_str, my_bool has_pk) > +{ > + char *ptr, *strend; > + char *last_comma= NULL; > + my_bool pk_processed= FALSE; > + char *autoinc_column= NULL; > + my_bool has_autoinc= FALSE; > + key_type_t type; > + const char *constr_from; > + const char *constr_to; > + LIST *keydef_node; > + my_bool keys_processed= FALSE; > + > + strend= create_str + strlen(create_str); > + > + ptr= create_str; > + while (*ptr && !keys_processed) > + { please, fix the coding style (here and everywhere in the patch) to follow the rest of the file. That is while { char *tmp, *orig_ptr, c; ... > + char *tmp, *orig_ptr, c; > + > + orig_ptr= ptr; > + /* Skip leading whitespace */ > + while (*ptr && my_isspace(charset_info, *ptr)) > + ptr++; > + > + /* Read the next line */ > + for (tmp= ptr; *tmp != '\n' && *tmp != '\0'; tmp++); > + > + c= *tmp; > + *tmp= '\0'; /* so strstr() only processes the current line */ > + > + if (!strncmp(ptr, "CONSTRAINT ", sizeof("CONSTRAINT ") - 1) && > + (constr_from= parse_quoted_identifier(ptr, &constr_to)) && yuck! identifiers do not have to be quoted. this code needs to be fixed to accept non-quoted identifiers too. > + (keydef_node= find_matching_skipped_key(constr_from, constr_to))) > + { > + char *keydef; > + size_t keydef_len; > + > + /* > + There's a skipped key with the same name as the constraint name. > Let's > + put it back before the current constraint definition and remove from > the > + skipped keys list. 1. fix the formatting, please 2. why not to create the FK constraint afterwards? > + */ > + keydef= keydef_node->data; > + keydef_len= strlen(keydef) + 5; /* ", \n " */ > + > + memmove(orig_ptr + keydef_len, orig_ptr, strend - orig_ptr + 1); > + memcpy(ptr, keydef, keydef_len - 5); > + memcpy(ptr + keydef_len - 5, ", \n ", 5); > + > + skipped_keys_list= list_delete(skipped_keys_list, keydef_node); > + my_free(keydef); > + my_free(keydef_node); > + > + strend+= keydef_len; > + orig_ptr+= keydef_len; > + ptr+= keydef_len; > + tmp+= keydef_len; > + > + type= KEY_TYPE_NONE; > + } > + else if (!strncmp(ptr, "UNIQUE KEY ", sizeof("UNIQUE KEY ") - 1)) > + type= KEY_TYPE_UNIQUE; > + else if (!strncmp(ptr, "KEY ", sizeof("KEY ") - 1)) > + type= KEY_TYPE_NON_UNIQUE; > + else if (!strncmp(ptr, "PRIMARY KEY ", sizeof("PRIMARY KEY ") - 1)) > + type= KEY_TYPE_PRIMARY; > + else > + type= KEY_TYPE_NONE; > + > + has_autoinc= (type != KEY_TYPE_NONE) ? > + contains_autoinc_column(autoinc_column, ptr, type) : FALSE; > + > + /* Is it a secondary index definition? */ > + if (c == '\n' && > + ((type == KEY_TYPE_UNIQUE && (pk_processed || !has_pk)) || > + type == KEY_TYPE_NON_UNIQUE) && !has_autoinc) > + { > + char *data, *end= tmp - 1; > + > + /* Remove the trailing comma */ > + if (*end == ',') > + end--; > + data= my_strndup(ptr, end - ptr + 1, MYF(MY_FAE)); > + skipped_keys_list= list_cons(data, skipped_keys_list); > + > + memmove(orig_ptr, tmp + 1, strend - tmp); > + ptr= orig_ptr; > + strend-= tmp + 1 - ptr; > + > + /* Remove the comma on the previos line */ > + if (last_comma != NULL) > + { > + *last_comma= ' '; > + } > + } > + else > + { > + char *end; > + > + if (last_comma != NULL && *ptr == ')') > + { > + keys_processed= TRUE; > + } > + else if (last_comma != NULL && !keys_processed) > + { > + /* > + It's not the last line of CREATE TABLE, so we have skipped a key > + definition. We have to restore the last removed comma. > + */ > + *last_comma= ','; > + } > + > + /* > + If we are skipping a key which indexes an AUTO_INCREMENT column, it > is > + safe to optimize all subsequent keys, i.e. we should not be checking > for > + that column anymore. > + */ > + if (type != KEY_TYPE_NONE && has_autoinc) > + { > + DBUG_ASSERT(autoinc_column != NULL); > + > + my_free(autoinc_column); > + autoinc_column= NULL; > + } > + > + if ((has_pk && type == KEY_TYPE_UNIQUE && !pk_processed) || > + type == KEY_TYPE_PRIMARY) > + pk_processed= TRUE; > + > + if (strstr(ptr, "AUTO_INCREMENT") && *ptr == '`') > + { > + /* > + The first secondary key defined on this column later cannot be > + skipped, as CREATE TABLE would fail on import. Unless there is a > + PRIMARY KEY and it indexes that column. > + */ > + for (end= ptr + 1; > + /* Skip double backticks as they are a part of identifier > */ > + *end != '\0' && (*end != '`' || end[1] == '`'); > + end++) > + /* empty */; > + > + if (*end == '`' && end > ptr + 1) > + { > + DBUG_ASSERT(autoinc_column == NULL); > + > + autoinc_column= my_strndup(ptr + 1, end - ptr - 1, > MYF(MY_FAE)); Eh? Why does it need to strdup? > + } > + } > + > + *tmp= c; > + > + if (tmp[-1] == ',') > + last_comma= tmp - 1; > + ptr= (*tmp == '\0') ? tmp : tmp + 1; > + } > + } > + > + my_free(autoinc_column); > +} > + > +/* > + Check if the table has a primary key defined either explicitly or > + implicitly (i.e. a unique key on non-nullable columns). > + > + SYNOPSIS > + my_bool has_primary_key(const char *table_name) > + > + table_name quoted table name > + > + RETURNS TRUE if the table has a primary key > + > + DESCRIPTION > +*/ > + > +static my_bool has_primary_key(const char *table_name) > +{ > + MYSQL_RES *res= NULL; > + MYSQL_ROW row; > + char query_buff[QUERY_LENGTH]; > + my_bool has_pk= TRUE; > + > + my_snprintf(query_buff, sizeof(query_buff), > + "SELECT COUNT(*) FROM INFORMATION_SCHEMA.COLUMNS WHERE " > + "TABLE_SCHEMA=DATABASE() AND TABLE_NAME='%s' AND " > + "COLUMN_KEY='PRI'", table_name); > + if (mysql_query(mysql, query_buff) || !(res= mysql_store_result(mysql)) || > + !(row= mysql_fetch_row(res))) > + { > + fprintf(stderr, "Warning: Couldn't determine if table %s has a " > + "primary key (%s). " > + "--innodb-optimize-keys may work inefficiently.\n", > + table_name, mysql_error(mysql)); > + goto cleanup; > + } > + > + has_pk= atoi(row[0]) > 0; > + > + cleanup: > + if (res) > + mysql_free_result(res); > + > + return has_pk; > +} > + > +/* > get_table_structure -- retrievs database structure, prints out > corresponding > CREATE statement and fills out insert_pat if the table is the type we will > be dumping. > @@ -2659,6 +3029,9 @@ static uint get_table_structure(char *table, char *db, > char *table_type, > result_table= quote_name(table, table_buff, 1); > opt_quoted_table= quote_name(table, table_buff2, 0); > > + if (opt_innodb_optimize_keys && !strcmp(table_type, "InnoDB")) > + has_pk= has_primary_key(table); > + see below > if (opt_order_by_primary) > order_by= primary_key_fields(result_table); > > @@ -2838,6 +3211,9 @@ static uint get_table_structure(char *table, char *db, > char *table_type, > > row= mysql_fetch_row(result); > > + if (opt_innodb_optimize_keys && !strcmp(table_type, "InnoDB")) > + skip_secondary_keys(row[1], has_pk); > + Not that it makes a lot of difference, but I'd remove the previous hunk, the variable has_pk, and put everything here, as if (opt_innodb_optimize_keys && !strcmp(table_type, "InnoDB")) skip_secondary_keys(row[1], has_primary_key(table)); this removes one if(), one strcmp(), and doesn't invoke has_primary_key() (meaning, one SQL query less) unless all other conditions for skip_secondary_keys() are met. even better - don't do has_primary_key() at all, remove the function altogether, and look for "PRIMARY KEY" in the create string inside skip_secondary_keys(). > is_log_table= general_log_or_slow_log_tables(db, table); > if (is_log_table) > row[1]+= 13; /* strlen("CREATE TABLE ")= 13 */ Regards, Sergei _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp