Changeset: 53355c1147d4 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=53355c1147d4
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        sql/backends/monet5/sql.mx
Branch: rdf
Log Message:

Done with re-organize triple store by using cs tables (for regular data) and 
pso table (for irregular data).


diffs (truncated from 313 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1958,7 +1958,7 @@ PropStat* getPropStatisticsByTable(CSset
        return propStat; 
 }
 
-static
+
 void printPropStat(PropStat* propStat){
        int i, j; 
        oid     *pbt; 
@@ -3041,6 +3041,8 @@ void getTblidFromSoid(oid Soid, int *tbi
 
        *baseSoid = Soid - ((oid) (*tbidx) << (sizeof(BUN)*8 - NBITS_FOR_CSID));
        
+       *tbidx = *tbidx - 1; 
+
        //return freqCSid; 
 }
 
@@ -3107,6 +3109,10 @@ void initCStablesAndIdxMapping(CStableSt
        cstablestat->lstbatid = (bat**) malloc(sizeof (bat*) * k); 
        cstablestat->numPropPerTable = (int*) malloc(sizeof (int) * k); 
 
+       cstablestat->pbat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
+       cstablestat->sbat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
+       cstablestat->obat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
+
        cstablestat->lastInsertedS = (oid**) malloc(sizeof(oid*) * k);
 
        cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) * k); 
@@ -3166,7 +3172,9 @@ void freeCStableStat(CStableStat* cstabl
                }
                free(cstablestat->lstcstable[i].colBats);
        }
-
+       BBPunfix(cstablestat->pbat->batCacheid); 
+       BBPunfix(cstablestat->sbat->batCacheid); 
+       BBPunfix(cstablestat->obat->batCacheid); 
        free(cstablestat->lstbatid); 
        free(cstablestat->lastInsertedS); 
        free(cstablestat->lstcstable); 
@@ -3233,6 +3241,7 @@ void fillMissingvalues(BAT* curBat, oid 
        }
 }
 
+
 str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, 
PropStat* propStat, CStableStat *cstablestat, oid* lastSubjId){
        BAT *sbat = NULL, *pbat = NULL, *obat = NULL; 
        BATiter si,pi,oi; 
@@ -3257,6 +3266,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
 
 
        (void) bun; 
+       (void) lastSubjId;
        //BAT** setofBats = NULL;
 
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
@@ -3282,7 +3292,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        lastP = BUN_NONE; 
        lastS = BUN_NONE; 
        
-       printf("Created cstablestat with %d tables \n", cstablestat->numTables);
+       printf("Reorganize the triple store by using %d CS tables \n", 
cstablestat->numTables);
 
        //setofBats = (BAT**)malloc(sizeof(BAT*) * cstablestat->numTables); 
 
@@ -3292,8 +3302,20 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                obt = (oid *) BUNtloc(oi, p);
 
 
+               getTblidFromSoid(*sbt, &tblIdx, &tmpSoid);      
+
+               if (tblIdx == -1){      // This is for irregular triples, put 
them to pso table
+                       BUNappend(cstablestat->pbat,pbt , TRUE);
+                       BUNappend(cstablestat->sbat,sbt , TRUE);
+                       BUNappend(cstablestat->obat,obt , TRUE);
+
+                       continue; 
+               }
+
                if (*pbt != lastP){
-                       fillMissingvalues(curBat, lastSubjId[tblIdx]); 
+                       if (lasttblIdx != -1) {
+                               fillMissingvalues(curBat, 
lastSubjId[lasttblIdx]); 
+                       }
                        
                        //Get number of BATs for this p
                        ppos = BUNfnd(BATmirror(propStat->pBat),pbt);
@@ -3314,7 +3336,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                }
                else{
                        if (*sbt == lastS){     //multi-values prop
-                               printf("Multi values prop \n"); 
+                               //printf("Multi values prop \n"); 
                                continue;                               
                        }
                        else{
@@ -3322,12 +3344,14 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                        }
                }
 
-               getTblidFromSoid(*sbt, &tblIdx, &tmpSoid);              
-
                tmpColIdx = tmpTblIdxPropIdxMap[tblIdx]; 
 
                if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){
-                       fillMissingvalues(curBat, lastSubjId[lasttblIdx]);
+
+                       if (lasttblIdx != -1) {
+                               fillMissingvalues(curBat, 
lastSubjId[lasttblIdx]); 
+                       }
+
                        lastColIdx = tmpColIdx; 
                        lasttblIdx = tblIdx; 
                }
@@ -3338,7 +3362,6 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                //TODO: Check last subjectId for this prop. If the subjectId is 
not continuous, insert NIL
                if (tmpSoid > (tmplastInsertedS + 1)){  
                        for (k = tmplastInsertedS; k < tmpSoid-1; k++){
-                               printf("        Add nil value \n");
                                bun = oid_nil; 
                                BUNappend(curBat,&bun , TRUE);
                        }
@@ -3346,13 +3369,16 @@ str RDFdistTriplesToCSs(int *ret, bat *s
 
                BUNappend(curBat, obt, TRUE); 
 
-               printf(BUNFMT": Table %d | column %d  for prop " BUNFMT " | sub 
" BUNFMT " | obj " BUNFMT "\n",p, tblIdx, 
-                                                       tmpColIdx, *pbt, 
tmpSoid, *obt); 
+               //printf(BUNFMT": Table %d | column %d  for prop " BUNFMT " | 
sub " BUNFMT " | obj " BUNFMT "\n",p, tblIdx, 
+               //                                      tmpColIdx, *pbt, 
tmpSoid, *obt); 
+                                       
                //Update last inserted S
                cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid;
        }
 
-       fillMissingvalues(curBat, lastSubjId[tblIdx]); 
+       if (lasttblIdx != -1) {
+               fillMissingvalues(curBat, lastSubjId[lasttblIdx]); 
+       }
 
        // Keep the batCacheId
        for (i = 0; i < cstablestat->numTables; i++){
@@ -3362,6 +3388,8 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        }
 
        *ret = 1; 
+
+       printf(" ... Done \n");
        
        BBPunfix(sbat->batCacheid);
        BBPunfix(pbat->batCacheid);
@@ -3384,7 +3412,7 @@ RDFreorganize(int *ret, CStableStat *cst
        BUN             newId; 
        oid             *sbt; 
        oid             *lastSubjId;    /* Store the last subject Id in each 
freqCS */
-       oid             tblIdx; 
+       int             tblIdx; 
        oid             lastS;
        oid             l,r; 
        bat             oNewBatid, pNewBatid; 
@@ -3433,9 +3461,6 @@ RDFreorganize(int *ret, CStableStat *cst
                throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
        }
 
-       BATprint(sbat); 
-       //BATprint(pbat); 
-
        sNewBat = BATnew(TYPE_void, TYPE_oid, BATcount(sbat));
        if (sNewBat== NULL) {
                throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
@@ -3466,13 +3491,15 @@ RDFreorganize(int *ret, CStableStat *cst
                sbt = (oid *) BUNtloc(si, p);
                tblIdx = csTblIdxMapping[subjCSMap[*sbt]];
 
-               if (tblIdx != BUN_NONE){
+               if (tblIdx != -1){
 
                        if (lastS != *sbt){     //new subject
                                lastS = *sbt; 
-
+                               
                                newId = lastSubjId[tblIdx] + 1;
-                               newId |= (BUN)tblIdx << (sizeof(BUN)*8 - 
NBITS_FOR_CSID);
+                               newId |= (BUN)(tblIdx + 1) << (sizeof(BUN)*8 - 
NBITS_FOR_CSID);
+                               // subject belongs to CS0 will  have 
+                               // the value for 'CS identifying field' of this 
subject: 1
 
                                l = *sbt; 
                                r = newId; 
@@ -3497,8 +3524,6 @@ RDFreorganize(int *ret, CStableStat *cst
        
        origobat = getOriginalOBat(obat); 
 
-       //BATprint(origobat);
-       
        if (RDFpartialjoin(&oNewBatid, &lmap->batCacheid, &rmap->batCacheid, 
&origobat->batCacheid) == MAL_SUCCEED){
                if ((oNewBat = BATdescriptor(oNewBatid)) == NULL) {
                        throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
@@ -3516,24 +3541,19 @@ RDFreorganize(int *ret, CStableStat *cst
        else
                throw(RDF, "rdf.RDFreorganize", "Problem in using 
RDFpartialjoin for obat");
 
-       //BATprint(oNewBat);
        printf("Done! \n");
-       printf("sNewBat before sorting by P \n"); 
-       BATprint(sNewBat);
        printf("Sort triple table according to P, S, O order ... ");
        if (triplesubsort(&pNewBat, &sNewBat, &oNewBat) != MAL_SUCCEED){
                throw(RDF, "rdf.RDFreorganize", "Problem in sorting PSO");      
        }       
        printf("Done  \n");
 
-       printf("pNewBat after sorting by P \n"); 
-       BATprint(pNewBat);
-       printf("sNewBat after sorting by P \n"); 
-       BATprint(sNewBat);
+       //BATprint(pNewBat);
+       //BATprint(sNewBat);
 
        propStat = getPropStatisticsByTable(freqCSset,mfreqIdxTblIdxMapping, 
&numdistinctMCS); 
        
-       printPropStat(propStat); 
+       //printPropStat(propStat); 
 
        if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, 
&pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstablestat,lastSubjId) 
!= MAL_SUCCEED){
                throw(RDF, "rdf.RDFreorganize", "Problem in distributing 
triples to BATs using CSs");           
@@ -3542,6 +3562,7 @@ RDFreorganize(int *ret, CStableStat *cst
        freeCSset(freqCSset); 
        free(subjCSMap); 
        free(csTblIdxMapping);
+       
        free(lastSubjId);
        //freeCStableStat(cstablestat); 
 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -191,6 +191,9 @@ typedef struct CStableStat {
        oid**           lastInsertedS;  //Last S for each column
        //sql_schema*   schema;         
        CStable*        lstcstable; 
+       BAT*            pbat; 
+       BAT*            sbat; 
+       BAT*            obat; 
 } CStableStat; 
 
 
@@ -203,4 +206,8 @@ RDFreorganize(int *ret, CStableStat *cst
 rdf_export void
 freeCStableStat(CStableStat *cstablestat); 
 
+
+rdf_export void
+printPropStat(PropStat *propstat); 
+
 #endif /* _RDFSCHEMA_H_ */
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -7599,6 +7599,7 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
        char    tmpcolname[100]; 
        sql_subtype tpe; 
        sql_table       **cstables; 
+       sql_table       *psotbl;
        int     i, j; 
 
        str msg;
@@ -7625,6 +7626,24 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
        // Put to SQL tables
        sql_find_subtype(&tpe, "oid", 31, 0);
 
+       // Add irregular triples to pso tbale
+       psotbl = mvc_create_table(m, sch, "pso", tt_table, 0,
+                                                          SQL_PERSIST, 0, 3);
+       mvc_create_column(m, psotbl, "p",  &tpe);
+       mvc_create_column(m, psotbl, "s",  &tpe);
+       mvc_create_column(m, psotbl, "o",  &tpe);
+
+
+       store_funcs.append_col(m->session->tr,
+                       mvc_bind_column(m, psotbl,"p" ), 
+                       cstablestat->pbat, TYPE_bat);
+       store_funcs.append_col(m->session->tr,
+                       mvc_bind_column(m, psotbl,"s" ), 
+                       cstablestat->sbat, TYPE_bat);
+       store_funcs.append_col(m->session->tr,
+                       mvc_bind_column(m, psotbl,"o" ), 
+                       cstablestat->obat, TYPE_bat);
+       // Add regular triple to cstables
        for (i = 0; i < cstablestat->numTables; i++){
                sprintf(tmptbname, "cstable%d",i);
                cstables[i] = mvc_create_table(m, sch, tmptbname, tt_table, 0,
@@ -7634,7 +7653,6 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
                        //TODO: Use propertyId from Propstat
                        sprintf(tmpcolname, "col%d",j);
                        mvc_create_column(m, cstables[i], tmpcolname,  &tpe);
-                       printf("Create column for %d for table %d \n", j, i);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to