Changeset: 0e78f7e658f1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0e78f7e658f1
Added Files:
        sql/backends/monet5/sql_statistics.c
        sql/backends/monet5/sql_statistics.h
        sql/scripts/80_statistics.sql
Modified Files:
        clients/Tests/exports.stable.out
        sql/backends/monet5/Makefile.ag
        sql/backends/monet5/sql.mx
        sql/scripts/Makefile.ag
        
sql/test/BugDay_2005-10-06_2.8/Tests/MapiClient-dump.SF-905851.stable.out
        
sql/test/BugTracker-2008/Tests/sql_command_kills_db.SF-2233677.stable.err
        sql/test/BugTracker-2009/Tests/mclient-lsql-D.stable.out
        
sql/test/BugTracker-2009/Tests/name_clash_with_dump.SF-2780395.stable.out
        sql/test/BugTracker-2011/Tests/count-count-distinct.Bug-2808.stable.out
        
sql/test/BugTracker-2011/Tests/interrupted-initialization.Bug-2875.stable.out
        
sql/test/BugTracker-2012/Tests/aggregate_vs_positional_column_crash.Bug-3085.stable.out
        sql/test/BugTracker-2012/Tests/predicate_select.Bug-3090.stable.err
        sql/test/BugTracker-2012/Tests/predicate_select.Bug-3090.stable.out
        sql/test/BugTracker/Tests/multi-column-constraint.SF-1964587.stable.out
        sql/test/BugTracker/Tests/rank_over.SF-1691098.stable.out
        sql/test/BugTracker/Tests/with_only_once.SF-1720293.stable.out
        sql/test/Dump/Tests/dump-empty.stable.out
        sql/test/Dump/Tests/dump.stable.out
        sql/test/Tests/systemfunctions.stable.out
        sql/test/UserDump/Tests/create.stable.out
        sql/test/bugs/Tests/simple_view.stable.out
        
sql/test/bugs/Tests/subselect_multiple_unionall_where_1=1-bug-sf-1005596.stable.out
        sql/test/testdb/Tests/testdb-dump.stable.out
Branch: default
Log Message:

Introduce the analyze() function
The min/max/nil properties for schemas up to individual columns
can be requested. The results are stored in a persistent snapshot
table, which could be used by optimizers.

The primary method is to call a procedure
CALL analyze('sys','functions','name',10);
where the last argument indicates a sample to be used to avoid
excessive costs.


diffs (truncated from 764 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -2323,6 +2323,7 @@ str UUIDequal(bit *retval, str *l, str *
 str UUIDgenerateUuid(str *retval);
 str UUIDisaUUID(bit *retval, str *s);
 str UUIDstr2uuid(str *retval, str *s);
+int UUIDtoString(str *retval, int *len, str handle);
 str UUIDuuid2str(str *retval, str *s);
 str XMLattribute(xml *ret, str *name, str *val);
 str XMLcomment(xml *x, str *s);
diff --git a/sql/backends/monet5/Makefile.ag b/sql/backends/monet5/Makefile.ag
--- a/sql/backends/monet5/Makefile.ag
+++ b/sql/backends/monet5/Makefile.ag
@@ -42,6 +42,7 @@ lib__sql = {
                sql_scenario.c sql_scenario.h \
                rel_bin.c rel_bin.h \
                sql_statement.c sql_statement.h \
+               sql_statistics.c sql_statistics.h \
                sql_gencode.c sql_gencode.h \
                sql_optimizer.c sql_optimizer.h \
                sql_result.c sql_result.h \
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -511,6 +511,27 @@ pattern dump_trace()(
 address dump_trace
 comment "dump the trace statistics";
 
+pattern analyze():void
+address sql_analyze;
+pattern analyze(sch:str):void
+address sql_analyze;
+pattern analyze(sch:str,tbl:str):void
+address sql_analyze;
+pattern analyze(sch:str,tbl:str,col:str):void
+address sql_analyze
+comment "Update the database statistics table";
+
+pattern analyze(sample:lng):void
+address sql_analyze;
+pattern analyze(sch:str,sample:lng):void
+address sql_analyze;
+pattern analyze(sch:str,tbl:str,sample:lng):void
+address sql_analyze;
+pattern analyze(sch:str,tbl:str,col:str,sample:lng):void
+address sql_analyze
+comment "Update the database statistics table";
+
+
 pattern storage()(
        schema:bat[:oid,:str],
        table:bat[:oid,:str],
diff --git a/sql/backends/monet5/sql_statistics.c 
b/sql/backends/monet5/sql_statistics.c
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/sql_statistics.c
@@ -0,0 +1,210 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ * 
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * The Original Code is the MonetDB Database System.
+ * 
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+/* (c) M.L. Kersten
+Most optimizers need easy access to key information 
+for proper plan generation. Amongst others, this
+information consists of the tuple count, size,
+min- and max-value, and the null-density.
+They are kept around as persistent tables, modeled 
+directly as a collection of BATs.
+
+We made need an directly accessible structure to speedup
+analysis by optimizers.
+*/
+#include "monetdb_config.h"
+#include "sql_statistics.h"
+#include "sql_scenario.h"
+
+str
+sql_analyze(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       mvc *m = NULL;
+       str msg = getSQLContext(cntxt,mb, &m, NULL);
+       sql_trans *tr = m->session->tr;
+       node *nsch, *ntab, *ncol;
+       char *query, *dquery;
+       char *maxval, *minval;
+       str sch = 0,  tbl = 0, col = 0;
+       int sorted;
+       lng nils = 0;
+       lng uniq = 0;
+       lng samplesize = 0;
+       int argc = pci->argc;
+
+       if ( msg != MAL_SUCCEED || (msg = checkSQLContext(cntxt)) != NULL)
+               return msg;
+
+       if ( argc > 1 && getVarType(mb,getArg(pci,argc-1))== TYPE_lng ){
+               samplesize = *(lng*) getArgReference(stk,pci,pci->argc-1);
+               argc--;
+       }
+       dquery = (char*) GDKzalloc(8192);
+       query = (char*) GDKzalloc(8192);
+       maxval = (char*) GDKzalloc(8192);
+       minval = (char*) GDKzalloc(8192);
+       if ( !(dquery && query && maxval && minval))
+               throw(SQL,"analyze",MAL_MALLOC_FAIL);
+
+       switch(argc){
+       case 2:
+               tbl = *(str*) getArgReference(stk,pci,1);
+               break;
+       case 3:
+               sch = *(str*) getArgReference(stk,pci,1);
+               tbl = *(str*) getArgReference(stk,pci,2);
+               break;
+       case 4:
+               sch = *(str*) getArgReference(stk,pci,1);
+               tbl = *(str*) getArgReference(stk,pci,2);
+               col = *(str*) getArgReference(stk,pci,3);
+               break;
+       }
+#ifdef DEBUG_SQL_STATISTICS
+       mnstr_printf(cntxt->fdout,"analyze %s.%s.%s sample 
"LLFMT"\n",(sch?sch:""),(tbl?tbl:" "),(col?col:" "),samplesize);
+#endif
+       for( nsch= tr->schemas.set->h; nsch; nsch= nsch->next){
+               sql_base *b= nsch->data;
+               sql_schema *s= (sql_schema*) nsch->data;
+               if ( !isalpha((int)b->name[0]) )
+                       continue;
+
+               if ( sch && strcmp(sch, b->name) )
+                       continue;
+               if (s->tables.set) 
+               for(ntab= (s)->tables.set->h ;ntab; ntab= ntab->next){
+                       sql_base *bt= ntab->data;
+                       sql_table *t= (sql_table*) bt;
+
+                       if ( tbl && strcmp(bt->name,tbl))
+                               continue;
+                       if (isTable(t) && t->columns.set) 
+                       for (ncol= (t)->columns.set->h; ncol; ncol= ncol->next){
+                               sql_base *bc = ncol->data;
+                               sql_column *c= (sql_column *) ncol->data;
+                               BAT *bn = store_funcs.bind_col(tr, c, 0), *br;
+                               BAT *bsample;
+                               lng sz = BATcount(bn);
+
+                               if ( col && strcmp(bc->name,col))
+                                       continue;
+                               snprintf(dquery,8192,
+                                       "delete from sys.statistics where 
\"schema\" ='%s' and \"table\"='%s' and \"column\"='%s';",
+                                               b->name, bt->name, bc->name);
+                               if ( samplesize  >0){
+                                       bsample = BATsample(bn, (BUN) 25000);
+                               } else 
+                                       bsample = bn;
+                               br = BATselect(bsample, ATOMnil(bn->ttype), 
ATOMnil(bn->ttype));
+                               nils = BATcount(br);
+                               BBPunfix(br->batCacheid);
+                               if ( bn->tkey)
+                                       uniq = sz;
+                               else {
+                                       br = BATkunique(BATmirror(bsample));
+                                       uniq = BATcount(br);
+                                       BBPunfix(br->batCacheid);
+                               }
+                               if ( samplesize  > 0){
+                                       BBPunfix(bsample->batCacheid);
+                               } 
+                               sorted = BATtordered(bn);
+
+                               // Gather the min/max value for builtin types
+#define minmax(TYPE,FMT) \
+{\
+       TYPE *val=0;\
+       val= BATmax(bn,0);\
+       snprintf(maxval,8192,FMT,*val);\
+       GDKfree(val);\
+       val= BATmin(bn,0);\
+       snprintf(minval,8192,FMT,*val);\
+       GDKfree(val);\
+       break;\
+}
+                               switch(bn->ttype){
+                               case TYPE_sht: minmax(sht,"%d");
+                               case TYPE_int: minmax(int,"%d");
+                               case TYPE_lng: minmax(lng,LLFMT);
+                               case TYPE_flt: minmax(flt,"%f");
+                               case TYPE_dbl: minmax(dbl,"%f");
+                               default:
+                                       snprintf(maxval,8192,"nil");
+                                       snprintf(minval,8192,"nil");
+                               }
+                               snprintf(query,8192,
+                                       "insert into sys.statistics 
values('%s','%s','%s','%s',now(),"LLFMT","LLFMT","LLFMT",'%s','%s',%s);",
+                                               b->name, bt->name, bc->name, 
c->type.type->sqlname,sz,uniq,nils, minval,maxval, sorted?"true":"false");
+#ifdef DEBUG_SQL_STATISTICS
+                               mnstr_printf(cntxt->fdout,"%s\n",dquery);
+                               mnstr_printf(cntxt->fdout,"%s\n",query);
+#endif
+                               BBPunfix(bn->batCacheid);
+                               msg = 
SQLstatementIntern(cntxt,&dquery,"SQLanalyze",TRUE,FALSE);
+                               if ( msg){
+                                       GDKfree(dquery);
+                                       GDKfree(query);
+                                       GDKfree(maxval);
+                                       GDKfree(minval);
+                                       return msg;
+                               }
+                               msg = 
SQLstatementIntern(cntxt,&query,"SQLanalyze",TRUE,FALSE);
+                               if ( msg){
+                                       GDKfree(dquery);
+                                       GDKfree(query);
+                                       GDKfree(maxval);
+                                       GDKfree(minval);
+                                       return msg;
+                               }
+                       }
+
+                       if (isTable(t) && t->idxs.set)
+                               for (ncol= (t)->idxs.set->h; ncol; ncol= 
ncol->next){
+                                       sql_base *bc = ncol->data;
+                                       sql_idx *c= (sql_idx *) ncol->data;
+                                       if(c->type != no_idx){
+                                               BAT *bn = 
store_funcs.bind_idx(tr, c, 0);
+                                               lng sz= BATcount(bn);
+                                               sorted = BATtordered(bn);
+               
+                                               snprintf(query,8192,
+                                                       "insert into 
sys.statistics 
values('%s','%s','%s','oid',now(),"LLFMT","LLFMT","LLFMT",'%s','%s',%s);",
+                                                               b->name, 
bt->name, bc->name, sz,uniq,nils,minval,maxval,sorted?"true":"false");
+#ifdef DEBUG_SQL_STATISTICS
+                                               
mnstr_printf(cntxt->fdout,"%s\n",query);
+#endif
+                                               BBPunfix(bn->batCacheid);
+                                               msg = 
SQLstatementIntern(cntxt,&query,"SQLanalyze",TRUE,FALSE);
+                                               if ( msg){
+                                                       GDKfree(dquery);
+                                                       GDKfree(query);
+                                                       GDKfree(maxval);
+                                                       GDKfree(minval);
+                                                       return msg;
+                                               }
+                                       }
+                               }
+               
+               }
+       }
+       GDKfree(dquery);
+       GDKfree(query);
+       GDKfree(maxval);
+       GDKfree(minval);
+       return MAL_SUCCEED;
+}
diff --git a/sql/backends/monet5/sql_statistics.h 
b/sql/backends/monet5/sql_statistics.h
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/sql_statistics.h
@@ -0,0 +1,40 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ * 
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * The Original Code is the MonetDB Database System.
+ * 
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+/*  (co) M.L. Kersten */
+#ifndef _SQL_STATISTICS_DEF
+#define _SQL_STATISTICS_DEF
+
+/* #define DEBUG_SQL_STATISTICS */
+
+#include "sql.h"
+
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to