Changeset: 53355c1147d4 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=53355c1147d4 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h sql/backends/monet5/sql.mx Branch: rdf Log Message:
Done with re-organize triple store by using cs tables (for regular data) and pso table (for irregular data). diffs (truncated from 313 to 300 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -1958,7 +1958,7 @@ PropStat* getPropStatisticsByTable(CSset return propStat; } -static + void printPropStat(PropStat* propStat){ int i, j; oid *pbt; @@ -3041,6 +3041,8 @@ void getTblidFromSoid(oid Soid, int *tbi *baseSoid = Soid - ((oid) (*tbidx) << (sizeof(BUN)*8 - NBITS_FOR_CSID)); + *tbidx = *tbidx - 1; + //return freqCSid; } @@ -3107,6 +3109,10 @@ void initCStablesAndIdxMapping(CStableSt cstablestat->lstbatid = (bat**) malloc(sizeof (bat*) * k); cstablestat->numPropPerTable = (int*) malloc(sizeof (int) * k); + cstablestat->pbat = BATnew(TYPE_void, TYPE_oid, smallbatsz); + cstablestat->sbat = BATnew(TYPE_void, TYPE_oid, smallbatsz); + cstablestat->obat = BATnew(TYPE_void, TYPE_oid, smallbatsz); + cstablestat->lastInsertedS = (oid**) malloc(sizeof(oid*) * k); cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) * k); @@ -3166,7 +3172,9 @@ void freeCStableStat(CStableStat* cstabl } free(cstablestat->lstcstable[i].colBats); } - + BBPunfix(cstablestat->pbat->batCacheid); + BBPunfix(cstablestat->sbat->batCacheid); + BBPunfix(cstablestat->obat->batCacheid); free(cstablestat->lstbatid); free(cstablestat->lastInsertedS); free(cstablestat->lstcstable); @@ -3233,6 +3241,7 @@ void fillMissingvalues(BAT* curBat, oid } } + str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* propStat, CStableStat *cstablestat, oid* lastSubjId){ BAT *sbat = NULL, *pbat = NULL, *obat = NULL; BATiter si,pi,oi; @@ -3257,6 +3266,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s (void) bun; + (void) lastSubjId; //BAT** setofBats = NULL; if ((sbat = BATdescriptor(*sbatid)) == NULL) { @@ -3282,7 +3292,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s lastP = BUN_NONE; lastS = BUN_NONE; - printf("Created cstablestat with %d tables \n", cstablestat->numTables); + printf("Reorganize the triple store by using %d CS tables \n", cstablestat->numTables); //setofBats = (BAT**)malloc(sizeof(BAT*) * cstablestat->numTables); @@ -3292,8 +3302,20 @@ str RDFdistTriplesToCSs(int *ret, bat *s obt = (oid *) BUNtloc(oi, p); + getTblidFromSoid(*sbt, &tblIdx, &tmpSoid); + + if (tblIdx == -1){ // This is for irregular triples, put them to pso table + BUNappend(cstablestat->pbat,pbt , TRUE); + BUNappend(cstablestat->sbat,sbt , TRUE); + BUNappend(cstablestat->obat,obt , TRUE); + + continue; + } + if (*pbt != lastP){ - fillMissingvalues(curBat, lastSubjId[tblIdx]); + if (lasttblIdx != -1) { + fillMissingvalues(curBat, lastSubjId[lasttblIdx]); + } //Get number of BATs for this p ppos = BUNfnd(BATmirror(propStat->pBat),pbt); @@ -3314,7 +3336,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s } else{ if (*sbt == lastS){ //multi-values prop - printf("Multi values prop \n"); + //printf("Multi values prop \n"); continue; } else{ @@ -3322,12 +3344,14 @@ str RDFdistTriplesToCSs(int *ret, bat *s } } - getTblidFromSoid(*sbt, &tblIdx, &tmpSoid); - tmpColIdx = tmpTblIdxPropIdxMap[tblIdx]; if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){ - fillMissingvalues(curBat, lastSubjId[lasttblIdx]); + + if (lasttblIdx != -1) { + fillMissingvalues(curBat, lastSubjId[lasttblIdx]); + } + lastColIdx = tmpColIdx; lasttblIdx = tblIdx; } @@ -3338,7 +3362,6 @@ str RDFdistTriplesToCSs(int *ret, bat *s //TODO: Check last subjectId for this prop. If the subjectId is not continuous, insert NIL if (tmpSoid > (tmplastInsertedS + 1)){ for (k = tmplastInsertedS; k < tmpSoid-1; k++){ - printf(" Add nil value \n"); bun = oid_nil; BUNappend(curBat,&bun , TRUE); } @@ -3346,13 +3369,16 @@ str RDFdistTriplesToCSs(int *ret, bat *s BUNappend(curBat, obt, TRUE); - printf(BUNFMT": Table %d | column %d for prop " BUNFMT " | sub " BUNFMT " | obj " BUNFMT "\n",p, tblIdx, - tmpColIdx, *pbt, tmpSoid, *obt); + //printf(BUNFMT": Table %d | column %d for prop " BUNFMT " | sub " BUNFMT " | obj " BUNFMT "\n",p, tblIdx, + // tmpColIdx, *pbt, tmpSoid, *obt); + //Update last inserted S cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid; } - fillMissingvalues(curBat, lastSubjId[tblIdx]); + if (lasttblIdx != -1) { + fillMissingvalues(curBat, lastSubjId[lasttblIdx]); + } // Keep the batCacheId for (i = 0; i < cstablestat->numTables; i++){ @@ -3362,6 +3388,8 @@ str RDFdistTriplesToCSs(int *ret, bat *s } *ret = 1; + + printf(" ... Done \n"); BBPunfix(sbat->batCacheid); BBPunfix(pbat->batCacheid); @@ -3384,7 +3412,7 @@ RDFreorganize(int *ret, CStableStat *cst BUN newId; oid *sbt; oid *lastSubjId; /* Store the last subject Id in each freqCS */ - oid tblIdx; + int tblIdx; oid lastS; oid l,r; bat oNewBatid, pNewBatid; @@ -3433,9 +3461,6 @@ RDFreorganize(int *ret, CStableStat *cst throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); } - BATprint(sbat); - //BATprint(pbat); - sNewBat = BATnew(TYPE_void, TYPE_oid, BATcount(sbat)); if (sNewBat== NULL) { throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); @@ -3466,13 +3491,15 @@ RDFreorganize(int *ret, CStableStat *cst sbt = (oid *) BUNtloc(si, p); tblIdx = csTblIdxMapping[subjCSMap[*sbt]]; - if (tblIdx != BUN_NONE){ + if (tblIdx != -1){ if (lastS != *sbt){ //new subject lastS = *sbt; - + newId = lastSubjId[tblIdx] + 1; - newId |= (BUN)tblIdx << (sizeof(BUN)*8 - NBITS_FOR_CSID); + newId |= (BUN)(tblIdx + 1) << (sizeof(BUN)*8 - NBITS_FOR_CSID); + // subject belongs to CS0 will have + // the value for 'CS identifying field' of this subject: 1 l = *sbt; r = newId; @@ -3497,8 +3524,6 @@ RDFreorganize(int *ret, CStableStat *cst origobat = getOriginalOBat(obat); - //BATprint(origobat); - if (RDFpartialjoin(&oNewBatid, &lmap->batCacheid, &rmap->batCacheid, &origobat->batCacheid) == MAL_SUCCEED){ if ((oNewBat = BATdescriptor(oNewBatid)) == NULL) { throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); @@ -3516,24 +3541,19 @@ RDFreorganize(int *ret, CStableStat *cst else throw(RDF, "rdf.RDFreorganize", "Problem in using RDFpartialjoin for obat"); - //BATprint(oNewBat); printf("Done! \n"); - printf("sNewBat before sorting by P \n"); - BATprint(sNewBat); printf("Sort triple table according to P, S, O order ... "); if (triplesubsort(&pNewBat, &sNewBat, &oNewBat) != MAL_SUCCEED){ throw(RDF, "rdf.RDFreorganize", "Problem in sorting PSO"); } printf("Done \n"); - printf("pNewBat after sorting by P \n"); - BATprint(pNewBat); - printf("sNewBat after sorting by P \n"); - BATprint(sNewBat); + //BATprint(pNewBat); + //BATprint(sNewBat); propStat = getPropStatisticsByTable(freqCSset,mfreqIdxTblIdxMapping, &numdistinctMCS); - printPropStat(propStat); + //printPropStat(propStat); if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, &pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstablestat,lastSubjId) != MAL_SUCCEED){ throw(RDF, "rdf.RDFreorganize", "Problem in distributing triples to BATs using CSs"); @@ -3542,6 +3562,7 @@ RDFreorganize(int *ret, CStableStat *cst freeCSset(freqCSset); free(subjCSMap); free(csTblIdxMapping); + free(lastSubjId); //freeCStableStat(cstablestat); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -191,6 +191,9 @@ typedef struct CStableStat { oid** lastInsertedS; //Last S for each column //sql_schema* schema; CStable* lstcstable; + BAT* pbat; + BAT* sbat; + BAT* obat; } CStableStat; @@ -203,4 +206,8 @@ RDFreorganize(int *ret, CStableStat *cst rdf_export void freeCStableStat(CStableStat *cstablestat); + +rdf_export void +printPropStat(PropStat *propstat); + #endif /* _RDFSCHEMA_H_ */ diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx --- a/sql/backends/monet5/sql.mx +++ b/sql/backends/monet5/sql.mx @@ -7599,6 +7599,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr char tmpcolname[100]; sql_subtype tpe; sql_table **cstables; + sql_table *psotbl; int i, j; str msg; @@ -7625,6 +7626,24 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr // Put to SQL tables sql_find_subtype(&tpe, "oid", 31, 0); + // Add irregular triples to pso tbale + psotbl = mvc_create_table(m, sch, "pso", tt_table, 0, + SQL_PERSIST, 0, 3); + mvc_create_column(m, psotbl, "p", &tpe); + mvc_create_column(m, psotbl, "s", &tpe); + mvc_create_column(m, psotbl, "o", &tpe); + + + store_funcs.append_col(m->session->tr, + mvc_bind_column(m, psotbl,"p" ), + cstablestat->pbat, TYPE_bat); + store_funcs.append_col(m->session->tr, + mvc_bind_column(m, psotbl,"s" ), + cstablestat->sbat, TYPE_bat); + store_funcs.append_col(m->session->tr, + mvc_bind_column(m, psotbl,"o" ), + cstablestat->obat, TYPE_bat); + // Add regular triple to cstables for (i = 0; i < cstablestat->numTables; i++){ sprintf(tmptbname, "cstable%d",i); cstables[i] = mvc_create_table(m, sch, tmptbname, tt_table, 0, @@ -7634,7 +7653,6 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr //TODO: Use propertyId from Propstat sprintf(tmpcolname, "col%d",j); mvc_create_column(m, cstables[i], tmpcolname, &tpe); - printf("Create column for %d for table %d \n", j, i); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list