Changeset: c85a5f2c0824 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c85a5f2c0824
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Update frequency and coverage of each column after moving each triple to PSO 
table


diffs (146 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -5946,7 +5946,9 @@ str initFullSampleData(CSSampleExtend *c
                        colIdx++;
                        csSampleEx[i].lstProp[colIdx] = 
csPropTypes[i].lstPropTypes[j].prop;
                        csSampleEx[i].lstPropSupport[colIdx] = 
csPropTypes[i].lstPropTypes[j].propFreq;
-                       
+
+                       if (csPropTypes[i].lstPropTypes[j].propFreq == 0) 
printf("[Verify] Empty Bat at table %d col %d  Prop "BUNFMT 
"\n",i,colIdx,csPropTypes[i].lstPropTypes[j].prop);
+               
                        //Mark whther this col is a MV col
                        csSampleEx[i].lstIsMVCol[colIdx] = 
csPropTypes[i].lstPropTypes[j].isMVProp;
                        
@@ -8888,7 +8890,34 @@ void getRealValue(ValPtr returnValue, oi
        }
 
 }
-
+static
+void updatePropTypeForRemovedTriple(CSPropTypes *csPropTypes, int* 
tmpTblIdxPropIdxMap, int tblIdx, oid *subjCSMap, int* csTblIdxMapping, oid sbt, 
oid pbt, oid *lastRemovedProp, oid* lastRemovedSubj, char isMultiToSingleProp){
+       int tmptblIdx, tmpPropIdx;
+
+       if (tblIdx == -1)
+               tmptblIdx = csTblIdxMapping[subjCSMap[sbt]];
+       else 
+               tmptblIdx = tblIdx;
+
+       tmpPropIdx = tmpTblIdxPropIdxMap[tmptblIdx];
+       //if (tmptblIdx == 3 && tmpPropIdx == 51) printf("Removing <p> <s> : " 
BUNFMT "  |   " BUNFMT "\n",pbt,sbt);
+       //Update PropTypes
+       if (isMultiToSingleProp){
+               csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propCover--;
+               return; 
+       }
+
+       if (pbt != *lastRemovedProp || sbt != *lastRemovedSubj){
+               csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propCover--;
+               csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propFreq--;
+
+               *lastRemovedProp = pbt;
+               *lastRemovedSubj = sbt; 
+       } 
+       else{   //Multivalue
+               csPropTypes[tmptblIdx].lstPropTypes[tmpPropIdx].propCover--;
+       }
+}
 
 //Macro for inserting to PSO
 #define insToPSO(pb, sb, ob, pbt, sbt, obt)    \
@@ -8905,7 +8934,7 @@ void getRealValue(ValPtr returnValue, oi
        }while (0)
 
 
-str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,  bat 
*mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat 
*cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj){
+str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,  bat 
*mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat 
*cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj, 
oid *subjCSMap, int* csTblIdxMapping){
        
        BAT *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL, *lmap = 
NULL, *rmap = NULL; 
        BATiter si,pi,oi, mi; 
@@ -8961,7 +8990,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
        int     initHashBatgz = 0; 
        BUN     tmpFKRefBun = BUN_NONE; 
        char    isFKCol = 0; 
-       #endif
+       #endif  
+
+       oid     lastRemovedSubj = BUN_NONE; 
+       oid     lastRemovedProp = BUN_NONE; 
 
        (void) isLotsNullSubj;
 
@@ -9092,6 +9124,9 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                if (tblIdx == -1 && isLotsNullSubj[*sbt]){      
                        // A lots-of-null subject
                        insToPSO(cstablestat->pbat,cstablestat->sbat, 
cstablestat->obat, pbt, sbt, obt);
+                       
+                       //Update propTypes
+                       updatePropTypeForRemovedTriple(csPropTypes, 
tmpTblIdxPropIdxMap, tblIdx, subjCSMap, csTblIdxMapping, *sbt, *pbt, 
&lastRemovedProp, &lastRemovedSubj,0);
 
                        continue; 
                }
@@ -9112,6 +9147,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                        if (objType != URI){ //Must be a dirty one --> put to 
pso
                                //printf("Dirty FK at tbl %d | propId " BUNFMT 
" \n", tblIdx, *pbt);
                                insToPSO(cstablestat->pbat,cstablestat->sbat, 
cstablestat->obat, pbt, sbt, obt);
+
+                               //Update propTypes
+                               updatePropTypeForRemovedTriple(csPropTypes, 
tmpTblIdxPropIdxMap,tblIdx, subjCSMap, csTblIdxMapping, *sbt, *pbt, 
&lastRemovedProp, &lastRemovedSubj,0);
+
                                continue; 
                        }
                        else{ //  
@@ -9119,6 +9158,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                                if (tmpOidTblIdx != 
csPropTypes[tblIdx].lstPropTypes[tmpPropIdx].refTblId){
                                        //printf("Dirty FK at tbl %d | propId " 
BUNFMT " \n", tblIdx, *pbt);
                                        
insToPSO(cstablestat->pbat,cstablestat->sbat, cstablestat->obat, pbt, sbt, obt);
+
+                                       //Update propTypes
+                                       
updatePropTypeForRemovedTriple(csPropTypes, tmpTblIdxPropIdxMap,tblIdx, 
subjCSMap, csTblIdxMapping, *sbt, *pbt, &lastRemovedProp, &lastRemovedSubj,0);
+
                                        continue; 
                                }
                        }
@@ -9391,6 +9434,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                        else{   // This is an extra object value
                                insToPSO(cstablestat->pbat,cstablestat->sbat, 
cstablestat->obat, pbt, sbt, obt);
                                //printf(" Extra object value ==> To PSO \n");
+
+                               //Update propTypes
+                               updatePropTypeForRemovedTriple(csPropTypes, 
tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt, 
&lastRemovedProp, &lastRemovedSubj,1);
+
                                continue; 
                        }
                }
@@ -9402,6 +9449,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                if (tmpTableType == PSOTBL){                    //For 
infrequent type ---> go to PSO
                        insToPSO(cstablestat->pbat,cstablestat->sbat, 
cstablestat->obat, pbt, sbt, obt);
                        //printf(" ==> To PSO \n");
+
+                       //Update propTypes
+                       updatePropTypeForRemovedTriple(csPropTypes, 
tmpTblIdxPropIdxMap, tblIdx,subjCSMap, csTblIdxMapping, *sbt, *pbt, 
&lastRemovedProp, &lastRemovedSubj,0);
+
                        continue; 
                }
 
@@ -9841,7 +9892,7 @@ RDFreorganize(int *ret, CStableStat *cst
        printf (" Prepare and create sub-sorted PSO took  %f seconds.\n", 
((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
        tmpLastT = curT;                
        returnStr = RDFdistTriplesToCSs(ret, &sNewBat->batCacheid, 
&pNewBat->batCacheid, &oNewBat->batCacheid, mapbatid, 
-                       &lmap->batCacheid, &rmap->batCacheid, propStat, 
cstablestat, csPropTypes, lastSubjId, isLotsNullSubj);
+                       &lmap->batCacheid, &rmap->batCacheid, propStat, 
cstablestat, csPropTypes, lastSubjId, isLotsNullSubj, subjCSMap, 
csTblIdxMapping);
        printf("Return value from RDFdistTriplesToCSs is %s \n", returnStr);
        if (returnStr != MAL_SUCCEED){
                throw(RDF, "rdf.RDFreorganize", "Problem in distributing 
triples to BATs using CSs");           
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -449,7 +449,7 @@ typedef struct CSSampleExtend{
 } CSSampleExtend;
 
 rdf_export str
-RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,bat 
*mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat 
*cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj);
+RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,bat 
*mbatid, bat *lmapbatid, bat *rmapbatid, PropStat* propStat, CStableStat 
*cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId, char *isLotsNullSubj, 
oid *subjCSMap, int* csTblIdxMapping);
 
 rdf_export str 
 RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, bat *ontbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, 
oid *maxCSoid, int *maxNumPwithDup, CSlabel** labels, CSrel 
**csRelBetweenMergeFreqSet);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to