Changeset: 8a230af5936b for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8a230af5936b Modified Files: monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/bat5.h monetdb5/modules/kernel/bat5.mal monetdb5/optimizer/opt_pipes.c sql/backends/monet5/sql_rdf.c sql/backends/monet5/sql_rdf.h sql/backends/monet5/sql_rdf.mal sql/scripts/30_rdf.sql Branch: rdf Log Message:
Fix the issue caused by rdf_opt_pipe + Add function for generating uniform sample from a BAT diffs (truncated from 309 to 300 lines): diff --git a/monetdb5/modules/kernel/bat5.c b/monetdb5/modules/kernel/bat5.c --- a/monetdb5/modules/kernel/bat5.c +++ b/monetdb5/modules/kernel/bat5.c @@ -213,6 +213,62 @@ BKCdensebat(bat *ret, const wrd *size) } str +BKCdensebatSeq(bat *ret, const wrd *size, const oid *seq) +{ + BAT *bn; + wrd sz = *size; + oid seqbase = *seq; + + if (sz < 0) + sz = 0; + if (sz > (wrd) BUN_MAX) + sz = (wrd) BUN_MAX; + bn = BATdense(0, seqbase, (BUN) sz); + if (bn == NULL) + throw(MAL, "bat.densebat", GDK_EXCEPTION); + *ret = bn->batCacheid; + BBPkeepref(*ret); + return MAL_SUCCEED; +} + +str +BKCsamplebatUni(bat *ret, bat *bid, const flt *_sample){ + + BAT *bn = NULL, *b = NULL; + oid sampsize = 0; //size of sample bat + float step = 0.0; + oid i; + flt nextpos = 0; + BATiter bi; + flt sample = *_sample; + + if ((b = BATdescriptor(*bid)) == NULL) { + throw(MAL, "bat.getInfo", RUNTIME_OBJECT_MISSING); + } + + bi = bat_iterator(b); + sampsize = (oid)(sample * BATcount(b) / 100); + step = 100.0 / (float)sample; + + //printf("Sample size = "BUNFMT" and step = %f\n", sampsize, step); + + bn= BATnew(TYPE_void, TYPE_oid, sampsize , TRANSIENT); + + if (bn == NULL) + throw(MAL, "bat.BKCsamplebatUni", GDK_EXCEPTION); + + for (i = 0; i < sampsize; i++){ + BUNappend(bn, BUNtail(bi, (oid) nextpos), FALSE); + nextpos += step; + } + + *ret = bn->batCacheid; + BBPkeepref(*ret); + + return MAL_SUCCEED; +} + +str BKCmirror(bat *ret, const bat *bid) { BAT *b, *bn; diff --git a/monetdb5/modules/kernel/bat5.h b/monetdb5/modules/kernel/bat5.h --- a/monetdb5/modules/kernel/bat5.h +++ b/monetdb5/modules/kernel/bat5.h @@ -26,6 +26,8 @@ bat5_export char *BKCsetRole(void *r, co bat5_export str BKCnewBAT(bat *res, const int *tt, const BUN *cap, int role); bat5_export str BKCattach(bat *ret, const int *tt, const char * const *heapfile); bat5_export str BKCdensebat(bat *ret, const wrd *size); +bat5_export str BKCdensebatSeq(bat *ret, const wrd *size, const oid *seq); +bat5_export str BKCsamplebatUni(bat *ret, bat *bid, const flt *sample); bat5_export str BKCmirror(bat *ret, const bat *bid); bat5_export str BKCdelete(bat *r, const bat *bid, const oid *h); bat5_export str BKCdelete_multi(bat *r, const bat *bid, const bat *sid); diff --git a/monetdb5/modules/kernel/bat5.mal b/monetdb5/modules/kernel/bat5.mal --- a/monetdb5/modules/kernel/bat5.mal +++ b/monetdb5/modules/kernel/bat5.mal @@ -63,6 +63,14 @@ command densebat(sz:wrd) :bat[:oid,:oid] address BKCdensebat comment "Creates a new [void,void] BAT of size 'sz'."; +command densebatSeq(sz:wrd, seq:oid) :bat[:oid,:oid] +address BKCdensebatSeq +comment "[RDF] Creates a new [void,void] BAT of size 'sz'. with seqbase seq"; + +command samplebatUni(b:bat[:oid,:oid], sample:flt) :bat[:oid,:oid] +address BKCsamplebatUni +comment "[RDF] Creates a sample bat with uniform distribution of sample percent"; + command info ( b:bat[:oid,:any_1]) (:bat[:oid,:str], :bat[:oid,:str]) address BKCinfo comment "Produce a table containing information about a BAT in [attribute,value] format. diff --git a/monetdb5/optimizer/opt_pipes.c b/monetdb5/optimizer/opt_pipes.c --- a/monetdb5/optimizer/opt_pipes.c +++ b/monetdb5/optimizer/opt_pipes.c @@ -163,6 +163,7 @@ static struct PIPELINES { {"rdf_opt_pipe", "optimizer.inline();" "optimizer.remap();" + "optimizer.candidates();" "optimizer.costModel();" "optimizer.coercions();" "optimizer.evaluate();" @@ -170,8 +171,10 @@ static struct PIPELINES { "optimizer.pushselect();" "optimizer.mergetable();" "optimizer.deadcode();" + "optimizer.aliases();" + "optimizer.constants();" "optimizer.commonTerms();" - "optimizer.joinPath();" + "optimizer.projectionpath();" "optimizer.reorder();" "optimizer.deadcode();" "optimizer.reduce();" @@ -179,6 +182,7 @@ static struct PIPELINES { "optimizer.dataflow();" "optimizer.querylog();" "optimizer.multiplex();" + "optimizer.profiler();" "optimizer.generator();" "optimizer.garbageCollector();", "stable", NULL, NULL, 1}, diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c --- a/sql/backends/monet5/sql_rdf.c +++ b/sql/backends/monet5/sql_rdf.c @@ -1502,6 +1502,125 @@ SQLrdftimetoid(oid *ret, str *datetime){ #endif + +str +SQLrdf_convert_to_orig_oid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ + str msg; + mvc *m = NULL; + BAT *lmapBat = NULL, *rmapBat = NULL; + bat lmapBatId, rmapBatId; + str bnamelBat = "map_to_tknz_left"; + str bnamerBat = "map_to_tknz_right"; + BUN pos; + oid *origId; + oid *id = (oid *)getArgReference(stk,pci,1); + oid *ret = (oid *) getArgReference(stk, pci, 0); + + rethrow("sql.rdfidtostr", msg, getSQLContext(cntxt, mb, &m, NULL)); + + lmapBatId = BBPindex(bnamelBat); + rmapBatId = BBPindex(bnamerBat); + + if (lmapBatId == 0 || rmapBatId == 0){ + throw(SQL, "sql.SQLrdfidtostr", "The lmap/rmap Bats should be built already"); + } + + if ((lmapBat= BATdescriptor(lmapBatId)) == NULL) { + throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + } + + if ((rmapBat= BATdescriptor(rmapBatId)) == NULL) { + throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + } + + pos = BUNfnd(lmapBat,id); + if (pos == BUN_NONE) //this id is not converted to a new id + origId = id; + else + origId = (oid *) Tloc(rmapBat, pos); + + *ret = *origId; + + return msg; +} + +str +SQLrdf_convert_to_orig_oid_bat(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ + str msg; + mvc *m = NULL; + BAT *lmapBat = NULL, *rmapBat = NULL; + bat lmapBatId, rmapBatId; + str bnamelBat = "map_to_tknz_left"; + str bnamerBat = "map_to_tknz_right"; + BUN pos; + oid *origId; + BAT *srcBat = NULL, *desBat = NULL, *tmp = NULL; + BAT *o, *g; + BATiter srci; + BUN p, q; + bat *srcbid, *desbid; + oid *id; + srcbid = (bat *)getArgReference(stk,pci,1); + desbid = (bat *) getArgReference(stk, pci, 0); + + rethrow("sql.rdfidtostr", msg, getSQLContext(cntxt, mb, &m, NULL)); + + if ((srcBat = BATdescriptor(*srcbid)) == NULL){ + throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + } + srci = bat_iterator(srcBat); + + desBat = BATnew(TYPE_void, TYPE_oid, BATcount(srcBat) + 1, TRANSIENT); + BATseqbase(desBat, 0); + + /* Init the BATs for looking up the URIs*/ + lmapBatId = BBPindex(bnamelBat); + rmapBatId = BBPindex(bnamerBat); + + if (lmapBatId == 0 || rmapBatId == 0){ + throw(SQL, "sqlbat.SQLrdfidtostr_bat", "The lmap/rmap Bats should be built already"); + } + + if ((lmapBat= BATdescriptor(lmapBatId)) == NULL) { + throw(MAL, "sqlbat.SQLrdfidtostr_bat", RUNTIME_OBJECT_MISSING); + } + + if ((rmapBat= BATdescriptor(rmapBatId)) == NULL) { + throw(MAL, "sqlbat.SQLrdfidtostr_bat", RUNTIME_OBJECT_MISSING); + } + + BATloop(srcBat, p, q){ + id = (oid *)BUNtail(srci, p); + + pos = BUNfnd(lmapBat,id); + if (pos == BUN_NONE) //this id is not converted to a new id + origId = id; + else + origId = (oid *) Tloc(rmapBat, pos); + + //Append to desBAT + BUNappend(desBat, origId, TRUE); + + } + + //Sort des BAT + tmp = desBat; + if (BATsort(&desBat, &o, &g, tmp, NULL, NULL, 0, 0) == GDK_FAIL){ + if (tmp != NULL) BBPreclaim(tmp); + throw(RDF, "SQLrdf_convert_to_orig_oid_bat", "Fail in sorting output BAT"); + } + + + + *desbid = desBat->batCacheid; + BBPkeepref(*desbid); + + BBPunfix(lmapBat->batCacheid); + BBPunfix(rmapBat->batCacheid); + + return msg; +} + str SQLrdfScan_old(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str msg; diff --git a/sql/backends/monet5/sql_rdf.h b/sql/backends/monet5/sql_rdf.h --- a/sql/backends/monet5/sql_rdf.h +++ b/sql/backends/monet5/sql_rdf.h @@ -49,6 +49,10 @@ sql5_export str SQLrdfidtostr(Client cnt sql5_export str SQLrdfidtostr_bat(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); sql5_export str SQLrdfstrtoid(oid *ret, str *s); + +sql5_export str SQLrdf_convert_to_orig_oid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); +sql5_export str SQLrdf_convert_to_orig_oid_bat(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); + sql5_export str SQLrdftimetoid(oid *ret, str *dt); @@ -83,6 +87,6 @@ extern int need_handling_exception; #define RDF_HANDLING_EXCEPTION_POSSIBLE_TBL_OPT 1 /* Use the set of possible table for the set of required props to limit the number of matching subj Id */ -#define PRINT_FOR_DEBUG 1 +#define PRINT_FOR_DEBUG 0 #endif /*_SQL_RDF_H */ diff --git a/sql/backends/monet5/sql_rdf.mal b/sql/backends/monet5/sql_rdf.mal --- a/sql/backends/monet5/sql_rdf.mal +++ b/sql/backends/monet5/sql_rdf.mal @@ -46,6 +46,14 @@ pattern batsql.rdfidtostr(v:bat[:oid]):b address SQLrdfidtostr_bat comment "Convert from oid to string"; +pattern rdf_convert_to_orig_oid(v:oid):oid +address SQLrdf_convert_to_orig_oid +comment "Convert subject oid to the orignal oid"; + +pattern batsql.rdf_convert_to_orig_oid(v:bat[:oid]):bat[:oid] +address SQLrdf_convert_to_orig_oid_bat +comment "Convert subject oid to the orignal oid"; + pattern rdfdeserialize() address SQLrdfdeserialize comment "Deserialize dump BATs to a SimpleCS set"; diff --git a/sql/scripts/30_rdf.sql b/sql/scripts/30_rdf.sql --- a/sql/scripts/30_rdf.sql +++ b/sql/scripts/30_rdf.sql @@ -65,6 +65,9 @@ create function rdf_idtostr(id oid) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list