Changeset: 0e78f7e658f1 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0e78f7e658f1 Added Files: sql/backends/monet5/sql_statistics.c sql/backends/monet5/sql_statistics.h sql/scripts/80_statistics.sql Modified Files: clients/Tests/exports.stable.out sql/backends/monet5/Makefile.ag sql/backends/monet5/sql.mx sql/scripts/Makefile.ag sql/test/BugDay_2005-10-06_2.8/Tests/MapiClient-dump.SF-905851.stable.out sql/test/BugTracker-2008/Tests/sql_command_kills_db.SF-2233677.stable.err sql/test/BugTracker-2009/Tests/mclient-lsql-D.stable.out sql/test/BugTracker-2009/Tests/name_clash_with_dump.SF-2780395.stable.out sql/test/BugTracker-2011/Tests/count-count-distinct.Bug-2808.stable.out sql/test/BugTracker-2011/Tests/interrupted-initialization.Bug-2875.stable.out sql/test/BugTracker-2012/Tests/aggregate_vs_positional_column_crash.Bug-3085.stable.out sql/test/BugTracker-2012/Tests/predicate_select.Bug-3090.stable.err sql/test/BugTracker-2012/Tests/predicate_select.Bug-3090.stable.out sql/test/BugTracker/Tests/multi-column-constraint.SF-1964587.stable.out sql/test/BugTracker/Tests/rank_over.SF-1691098.stable.out sql/test/BugTracker/Tests/with_only_once.SF-1720293.stable.out sql/test/Dump/Tests/dump-empty.stable.out sql/test/Dump/Tests/dump.stable.out sql/test/Tests/systemfunctions.stable.out sql/test/UserDump/Tests/create.stable.out sql/test/bugs/Tests/simple_view.stable.out sql/test/bugs/Tests/subselect_multiple_unionall_where_1=1-bug-sf-1005596.stable.out sql/test/testdb/Tests/testdb-dump.stable.out Branch: default Log Message:
Introduce the analyze() function The min/max/nil properties for schemas up to individual columns can be requested. The results are stored in a persistent snapshot table, which could be used by optimizers. The primary method is to call a procedure CALL analyze('sys','functions','name',10); where the last argument indicates a sample to be used to avoid excessive costs. diffs (truncated from 764 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -2323,6 +2323,7 @@ str UUIDequal(bit *retval, str *l, str * str UUIDgenerateUuid(str *retval); str UUIDisaUUID(bit *retval, str *s); str UUIDstr2uuid(str *retval, str *s); +int UUIDtoString(str *retval, int *len, str handle); str UUIDuuid2str(str *retval, str *s); str XMLattribute(xml *ret, str *name, str *val); str XMLcomment(xml *x, str *s); diff --git a/sql/backends/monet5/Makefile.ag b/sql/backends/monet5/Makefile.ag --- a/sql/backends/monet5/Makefile.ag +++ b/sql/backends/monet5/Makefile.ag @@ -42,6 +42,7 @@ lib__sql = { sql_scenario.c sql_scenario.h \ rel_bin.c rel_bin.h \ sql_statement.c sql_statement.h \ + sql_statistics.c sql_statistics.h \ sql_gencode.c sql_gencode.h \ sql_optimizer.c sql_optimizer.h \ sql_result.c sql_result.h \ diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx --- a/sql/backends/monet5/sql.mx +++ b/sql/backends/monet5/sql.mx @@ -511,6 +511,27 @@ pattern dump_trace()( address dump_trace comment "dump the trace statistics"; +pattern analyze():void +address sql_analyze; +pattern analyze(sch:str):void +address sql_analyze; +pattern analyze(sch:str,tbl:str):void +address sql_analyze; +pattern analyze(sch:str,tbl:str,col:str):void +address sql_analyze +comment "Update the database statistics table"; + +pattern analyze(sample:lng):void +address sql_analyze; +pattern analyze(sch:str,sample:lng):void +address sql_analyze; +pattern analyze(sch:str,tbl:str,sample:lng):void +address sql_analyze; +pattern analyze(sch:str,tbl:str,col:str,sample:lng):void +address sql_analyze +comment "Update the database statistics table"; + + pattern storage()( schema:bat[:oid,:str], table:bat[:oid,:str], diff --git a/sql/backends/monet5/sql_statistics.c b/sql/backends/monet5/sql_statistics.c new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/sql_statistics.c @@ -0,0 +1,210 @@ +/* + * The contents of this file are subject to the MonetDB Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.monetdb.org/Legal/MonetDBLicense + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is the MonetDB Database System. + * + * The Initial Developer of the Original Code is CWI. + * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. + * Copyright August 2008-2013 MonetDB B.V. + * All Rights Reserved. + */ +/* (c) M.L. Kersten +Most optimizers need easy access to key information +for proper plan generation. Amongst others, this +information consists of the tuple count, size, +min- and max-value, and the null-density. +They are kept around as persistent tables, modeled +directly as a collection of BATs. + +We made need an directly accessible structure to speedup +analysis by optimizers. +*/ +#include "monetdb_config.h" +#include "sql_statistics.h" +#include "sql_scenario.h" + +str +sql_analyze(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + mvc *m = NULL; + str msg = getSQLContext(cntxt,mb, &m, NULL); + sql_trans *tr = m->session->tr; + node *nsch, *ntab, *ncol; + char *query, *dquery; + char *maxval, *minval; + str sch = 0, tbl = 0, col = 0; + int sorted; + lng nils = 0; + lng uniq = 0; + lng samplesize = 0; + int argc = pci->argc; + + if ( msg != MAL_SUCCEED || (msg = checkSQLContext(cntxt)) != NULL) + return msg; + + if ( argc > 1 && getVarType(mb,getArg(pci,argc-1))== TYPE_lng ){ + samplesize = *(lng*) getArgReference(stk,pci,pci->argc-1); + argc--; + } + dquery = (char*) GDKzalloc(8192); + query = (char*) GDKzalloc(8192); + maxval = (char*) GDKzalloc(8192); + minval = (char*) GDKzalloc(8192); + if ( !(dquery && query && maxval && minval)) + throw(SQL,"analyze",MAL_MALLOC_FAIL); + + switch(argc){ + case 2: + tbl = *(str*) getArgReference(stk,pci,1); + break; + case 3: + sch = *(str*) getArgReference(stk,pci,1); + tbl = *(str*) getArgReference(stk,pci,2); + break; + case 4: + sch = *(str*) getArgReference(stk,pci,1); + tbl = *(str*) getArgReference(stk,pci,2); + col = *(str*) getArgReference(stk,pci,3); + break; + } +#ifdef DEBUG_SQL_STATISTICS + mnstr_printf(cntxt->fdout,"analyze %s.%s.%s sample "LLFMT"\n",(sch?sch:""),(tbl?tbl:" "),(col?col:" "),samplesize); +#endif + for( nsch= tr->schemas.set->h; nsch; nsch= nsch->next){ + sql_base *b= nsch->data; + sql_schema *s= (sql_schema*) nsch->data; + if ( !isalpha((int)b->name[0]) ) + continue; + + if ( sch && strcmp(sch, b->name) ) + continue; + if (s->tables.set) + for(ntab= (s)->tables.set->h ;ntab; ntab= ntab->next){ + sql_base *bt= ntab->data; + sql_table *t= (sql_table*) bt; + + if ( tbl && strcmp(bt->name,tbl)) + continue; + if (isTable(t) && t->columns.set) + for (ncol= (t)->columns.set->h; ncol; ncol= ncol->next){ + sql_base *bc = ncol->data; + sql_column *c= (sql_column *) ncol->data; + BAT *bn = store_funcs.bind_col(tr, c, 0), *br; + BAT *bsample; + lng sz = BATcount(bn); + + if ( col && strcmp(bc->name,col)) + continue; + snprintf(dquery,8192, + "delete from sys.statistics where \"schema\" ='%s' and \"table\"='%s' and \"column\"='%s';", + b->name, bt->name, bc->name); + if ( samplesize >0){ + bsample = BATsample(bn, (BUN) 25000); + } else + bsample = bn; + br = BATselect(bsample, ATOMnil(bn->ttype), ATOMnil(bn->ttype)); + nils = BATcount(br); + BBPunfix(br->batCacheid); + if ( bn->tkey) + uniq = sz; + else { + br = BATkunique(BATmirror(bsample)); + uniq = BATcount(br); + BBPunfix(br->batCacheid); + } + if ( samplesize > 0){ + BBPunfix(bsample->batCacheid); + } + sorted = BATtordered(bn); + + // Gather the min/max value for builtin types +#define minmax(TYPE,FMT) \ +{\ + TYPE *val=0;\ + val= BATmax(bn,0);\ + snprintf(maxval,8192,FMT,*val);\ + GDKfree(val);\ + val= BATmin(bn,0);\ + snprintf(minval,8192,FMT,*val);\ + GDKfree(val);\ + break;\ +} + switch(bn->ttype){ + case TYPE_sht: minmax(sht,"%d"); + case TYPE_int: minmax(int,"%d"); + case TYPE_lng: minmax(lng,LLFMT); + case TYPE_flt: minmax(flt,"%f"); + case TYPE_dbl: minmax(dbl,"%f"); + default: + snprintf(maxval,8192,"nil"); + snprintf(minval,8192,"nil"); + } + snprintf(query,8192, + "insert into sys.statistics values('%s','%s','%s','%s',now(),"LLFMT","LLFMT","LLFMT",'%s','%s',%s);", + b->name, bt->name, bc->name, c->type.type->sqlname,sz,uniq,nils, minval,maxval, sorted?"true":"false"); +#ifdef DEBUG_SQL_STATISTICS + mnstr_printf(cntxt->fdout,"%s\n",dquery); + mnstr_printf(cntxt->fdout,"%s\n",query); +#endif + BBPunfix(bn->batCacheid); + msg = SQLstatementIntern(cntxt,&dquery,"SQLanalyze",TRUE,FALSE); + if ( msg){ + GDKfree(dquery); + GDKfree(query); + GDKfree(maxval); + GDKfree(minval); + return msg; + } + msg = SQLstatementIntern(cntxt,&query,"SQLanalyze",TRUE,FALSE); + if ( msg){ + GDKfree(dquery); + GDKfree(query); + GDKfree(maxval); + GDKfree(minval); + return msg; + } + } + + if (isTable(t) && t->idxs.set) + for (ncol= (t)->idxs.set->h; ncol; ncol= ncol->next){ + sql_base *bc = ncol->data; + sql_idx *c= (sql_idx *) ncol->data; + if(c->type != no_idx){ + BAT *bn = store_funcs.bind_idx(tr, c, 0); + lng sz= BATcount(bn); + sorted = BATtordered(bn); + + snprintf(query,8192, + "insert into sys.statistics values('%s','%s','%s','oid',now(),"LLFMT","LLFMT","LLFMT",'%s','%s',%s);", + b->name, bt->name, bc->name, sz,uniq,nils,minval,maxval,sorted?"true":"false"); +#ifdef DEBUG_SQL_STATISTICS + mnstr_printf(cntxt->fdout,"%s\n",query); +#endif + BBPunfix(bn->batCacheid); + msg = SQLstatementIntern(cntxt,&query,"SQLanalyze",TRUE,FALSE); + if ( msg){ + GDKfree(dquery); + GDKfree(query); + GDKfree(maxval); + GDKfree(minval); + return msg; + } + } + } + + } + } + GDKfree(dquery); + GDKfree(query); + GDKfree(maxval); + GDKfree(minval); + return MAL_SUCCEED; +} diff --git a/sql/backends/monet5/sql_statistics.h b/sql/backends/monet5/sql_statistics.h new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/sql_statistics.h @@ -0,0 +1,40 @@ +/* + * The contents of this file are subject to the MonetDB Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.monetdb.org/Legal/MonetDBLicense + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is the MonetDB Database System. + * + * The Initial Developer of the Original Code is CWI. + * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. + * Copyright August 2008-2013 MonetDB B.V. + * All Rights Reserved. + */ + +/* (co) M.L. Kersten */ +#ifndef _SQL_STATISTICS_DEF +#define _SQL_STATISTICS_DEF + +/* #define DEBUG_SQL_STATISTICS */ + +#include "sql.h" + _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list