Changeset: 5a6592348b31 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5a6592348b31
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        sql/backends/monet5/sql.mx
Branch: rdf
Log Message:

Create tables corresponding to type-specific CS's.

Each base CS table is divided into default-type table and non-default-type 
table.

These two tables are then combined into one view.

This has been checked with test dataset.


diffs (truncated from 790 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -558,6 +558,48 @@ str printCSrelWithMaxSet(CSset *freqCSse
 }
 
 
+static 
+void setdefaultSubCSs(SubCSSet *subcsset, int num, BAT *sbat, oid 
*subjSubCSMap,oid *subjCSMap, char *subjdefaultMap){
+
+       int i; 
+       int j; 
+       int     tmpmaxfreq; 
+       int     defaultidx; 
+       BUN     p,q; 
+       BATiter si; 
+       oid     *sbt; 
+       oid     csId; 
+       oid     subId; 
+
+       for (i = 0; i < num; i++){
+               if (subcsset[i].numSubCS != 0){ 
+                       tmpmaxfreq = 0; 
+                       defaultidx = -1; 
+                       for (j = 0; j < subcsset[i].numSubCS; j++){
+                               if (subcsset[i].freq[j] > tmpmaxfreq){
+                                       tmpmaxfreq = subcsset[i].freq[j];
+                                       defaultidx = j; 
+                               }       
+                       }
+
+                       //Update default value
+                       subcsset[i].subCSs[defaultidx].isdefault = 1; 
+
+               }
+       }
+
+       si = bat_iterator(sbat);
+
+       BATloop(sbat, p, q){
+               sbt = (oid *) BUNtloc(si, p);
+               csId = subjCSMap[*sbt];
+               subId = subjSubCSMap[*sbt];
+               //printf("csId = " BUNFMT " | subId = " BUNFMT " \n", csId, 
subId);
+               if (subcsset[csId].subCSs[subId].isdefault == 1){
+                       subjdefaultMap[*sbt] = 1; 
+               }
+       }
+}
 
 static 
 void printSubCSInformation(SubCSSet *subcsset, BAT* freqBat, int num, char 
isWriteTofile, int freqThreshold){
@@ -602,7 +644,7 @@ void printSubCSInformation(SubCSSet *sub
                for (i = 0; i < num; i++){
                        if (subcsset[i].numSubCS != 0){ 
                                freq  = (int *) Tloc(freqBat, i);
-                               fprintf(fout, "CS " BUNFMT ": ", 
subcsset[i].csId);
+                               fprintf(fout, "CS " BUNFMT " (Freq: %d) : ", 
subcsset[i].csId, *freq);
                                        
                                if (*freq > freqThreshold){
                                        fprintf(foutfreq, BUNFMT "  ", 
subcsset[i].csId);
@@ -610,7 +652,11 @@ void printSubCSInformation(SubCSSet *sub
                                }
                                numSubCSFilter = 0;
                                for (j = 0; j < subcsset[i].numSubCS; j++){
-                                       fprintf(fout, BUNFMT " (%d) ", 
subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);     
+                                       if (subcsset[i].subCSs[j].isdefault == 
1)
+                                               fprintf(fout, "(default) 
"BUNFMT " (%d) ", subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]); 
+                                       else
+                                               fprintf(fout, BUNFMT " (%d) ", 
subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]);     
+                                       
                                        
                                        // Check frequent subCS which appears 
in > 1% 
                                        if (*freq <  subcsset[i].freq[j]*10){
@@ -640,6 +686,7 @@ SubCS* creatSubCS(oid subCSId, int numP,
        subcs->subCSId = subCSId;
        subcs->numSubTypes = numP; 
        subcs->sign = subCSsign; 
+       subcs->isdefault = 0; 
        return subcs; 
 }
 
@@ -656,7 +703,7 @@ SubCSSet* createaSubCSSet(oid csId){
 }
 
 static 
-SubCSSet* initCS_SubCSMap(oid numSubCSSet){
+SubCSSet* initCS_SubCSSets(oid numSubCSSet){
        oid i; 
        SubCSSet *subcssets = (SubCSSet*) malloc(sizeof(SubCSSet) * 
numSubCSSet); 
        SubCSSet *subcsset;
@@ -741,7 +788,7 @@ void addSubCStoSet(SubCSSet *subcsSet, S
 }
 
 static 
-oid addSubCS(char *buff, int numP, int csId, SubCSSet* csSubCSMap){
+oid addSubCS(char *buff, int numP, int csId, SubCSSet* csSubCSSet){
        SubCSSet *subcsset;
        oid subCSsign; 
        char isFound; 
@@ -749,7 +796,7 @@ oid addSubCS(char *buff, int numP, int c
        SubCS *subCS; 
 
 
-       subcsset = &(csSubCSMap[csId]);
+       subcsset = &(csSubCSSet[csId]);
 
        // Check the duplication
        subCSsign = RDF_hash_Tyleslist(buff, numP);
@@ -2499,7 +2546,7 @@ str RDFassignCSId(int *ret, BAT *sbat, B
 
 static 
 str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,  
-               oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSMap, CSrel 
*csrelSet, BUN maxSoid, int maxNumPwithDup){
+               oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSSet, CSrel 
*csrelSet, BUN maxSoid, int maxNumPwithDup){
 
        BUN             p, q; 
        oid             *sbt = 0, *obt, *pbt;
@@ -2529,10 +2576,10 @@ str RDFrelationships(int *ret, BAT *sbat
                sbt = (oid *) BUNtloc(si, p);           
                if (*sbt != curS){
                        if (p != 0){    /* Not the first S */
-                               returnSubCSid = addSubCS(buffTypes, 
numPwithDup, subjCSMap[curS], csSubCSMap);
+                               returnSubCSid = addSubCS(buffTypes, 
numPwithDup, subjCSMap[curS], csSubCSSet);
 
                                //Get the subCSId
-                               subjSubCSMap[*sbt] = returnSubCSid; 
+                               subjSubCSMap[curS] = returnSubCSid; 
 
                        }
                        curS = *sbt; 
@@ -2574,7 +2621,7 @@ str RDFrelationships(int *ret, BAT *sbat
        }
        
        /* Check for the last CS */
-       returnSubCSid = addSubCS(buffTypes, numPwithDup, subjCSMap[*sbt], 
csSubCSMap);
+       returnSubCSid = addSubCS(buffTypes, numPwithDup, subjCSMap[*sbt], 
csSubCSSet);
        subjSubCSMap[*sbt] = returnSubCSid; 
 
        free (buffTypes); 
@@ -2745,13 +2792,14 @@ int     ontmetadataCount = 0;
 
 /* Extract CS from SPO triples table */
 str
-RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid 
*maxCSoid){
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid 
*maxCSoid, char **subjdefaultMap){
 
        BAT             *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL; 
        BATiter         si, pi, oi;     /*iterator for BAT of s,p,o columns in 
spo table */
 
        CSBats          *csBats; 
        oid             *subjSubCSMap;  /* Store the corresponding CS sub Id 
for each subject */
+
        BUN             *maxSoid;       
        int             maxNumProp = 0;
        int             maxNumPwithDup = 0; 
@@ -2760,7 +2808,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        CSrel           *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
        CSrel           *csrelBetweenMaxFreqSet; 
        CSmergeRel      *csRelBetweenMergeFreqSet;
-       SubCSSet        *csSubCSMap; 
+       SubCSSet        *csSubCSSet; 
 
        int*            csIdFreqIdxMap; /* Map a CSId to a freqIdx. Should be 
removed in the future .... */
 
@@ -2811,8 +2859,10 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        *subjCSMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1)); 
        subjSubCSMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1)); 
+       *subjdefaultMap = (char *) malloc (sizeof(char) * ((*maxSoid) + 1));
        
        initArray(*subjCSMap, (*maxSoid) + 1, BUN_NONE);
+       initCharArray(*subjdefaultMap,(*maxSoid) + 1, 0); 
 
 
        //Phase 1: Assign an ID for each CS
@@ -2840,14 +2890,16 @@ RDFextractCSwithTypes(int *ret, bat *sba
        csrelSet = initCSrelset(*maxCSoid + 1);
 
 
-       csSubCSMap = initCS_SubCSMap(*maxCSoid +1); 
-
-       RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, subjSubCSMap, 
csSubCSMap, csrelSet, *maxSoid, maxNumPwithDup);
+       csSubCSSet = initCS_SubCSSets(*maxCSoid +1); 
+
+       RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, subjSubCSMap, 
csSubCSSet, csrelSet, *maxSoid, maxNumPwithDup);
 
 
        printCSrelSet(csrelSet,csFreqMap, csBats->freqBat, *maxCSoid + 1, 1, 
*freqThreshold);  
 
-       printSubCSInformation(csSubCSMap, csBats->freqBat, *maxCSoid + 1, 1, 
*freqThreshold); 
+       setdefaultSubCSs(csSubCSSet,*maxCSoid + 1, sbat, subjSubCSMap, 
*subjCSMap, *subjdefaultMap);
+
+       printSubCSInformation(csSubCSSet, csBats->freqBat, *maxCSoid + 1, 1, 
*freqThreshold); 
 
        printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
 
@@ -2911,7 +2963,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        free (superCSFreqCSMap);
        free (superCSMergeMaxCSMap); 
 
-       freeCS_SubCSMapSet(csSubCSMap, *maxCSoid + 1); 
+       freeCS_SubCSMapSet(csSubCSSet, *maxCSoid + 1); 
 
        free(csIdFreqIdxMap); 
        free(csRelBetweenMergeFreqSet);
@@ -3033,14 +3085,26 @@ BAT* getOriginalOBat(BAT *obat){
        return origobat; 
 }
 
+/*
+ * In case of using type-specific cs table, we use one more bit at the 
+ * position sizeof(BUN)*8 - NBITS_FOR_CSID - 1 for specifying whether 
+ * a subject has the default data types for its properties or not. 
+ * Thus, the way to calculate the table idx and base idx is changed
+ * */
 static 
-void getTblidFromSoid(oid Soid, int *tbidx, oid *baseSoid){
+void getTblidFromSoid(oid Soid, int *tbidx, oid *baseSoid, char *isdefault){
        //int   freqCSid;       
+       *isdefault = 0; 
        
-       *tbidx = (int) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID))  &  ((1 << 
(NBITS_FOR_CSID-1)) - 1)) ;       
-
-       *baseSoid = Soid - ((oid) (*tbidx) << (sizeof(BUN)*8 - NBITS_FOR_CSID));
+       *tbidx = (int) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID))  &  ((1 << 
(NBITS_FOR_CSID-1)) - 1)) ;
+
        
+#if CSTYPE_TABLE == 1
+       *isdefault = (char) ((Soid >> (sizeof(BUN)*8 - NBITS_FOR_CSID -1))  &  
1 ) ;    
+#endif 
+
+       *baseSoid = Soid - ((oid) (*tbidx * 2 + *isdefault) << (sizeof(BUN)*8 - 
NBITS_FOR_CSID -1));
+
        *tbidx = *tbidx - 1; 
 
        //return freqCSid; 
@@ -3114,8 +3178,11 @@ void initCStablesAndIdxMapping(CStableSt
        cstablestat->obat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
 
        cstablestat->lastInsertedS = (oid**) malloc(sizeof(oid*) * k);
-
        cstablestat->lstcstable = (CStable*) malloc(sizeof(CStable) * k); 
+       #if CSTYPE_TABLE == 1
+       cstablestat->lastInsertedSEx = (oid**) malloc(sizeof(oid*) * k);
+       cstablestat->lstcstableEx = (CStable*) malloc(sizeof(CStable) * k);
+       #endif
 
        k = 0; 
        for (i = 0; i < freqCSset->numCSadded; i++){
@@ -3126,10 +3193,19 @@ void initCStablesAndIdxMapping(CStableSt
                        cstablestat->lastInsertedS[k] = (oid*) 
malloc(sizeof(oid) * tmpNumProp); 
                        cstablestat->lstcstable[k].numCol = tmpNumProp;
                        cstablestat->lstcstable[k].colBats = 
(BAT**)malloc(sizeof(BAT*) * tmpNumProp); 
-                       
+                       #if CSTYPE_TABLE == 1
+                       cstablestat->lastInsertedSEx[k] = (oid*) 
malloc(sizeof(oid) * tmpNumProp); 
+                       cstablestat->lstcstableEx[k].numCol = tmpNumProp;
+                       cstablestat->lstcstableEx[k].colBats = 
(BAT**)malloc(sizeof(BAT*) * tmpNumProp); 
+                       #endif
+
                        for(j = 0; j < tmpNumProp; j++){
                                cstablestat->lstcstable[k].colBats[j] = 
BATnew(TYPE_void, TYPE_oid, smallbatsz);
                                //TODO: use exact aount for each BAT
+                               #if CSTYPE_TABLE == 1
+                               cstablestat->lstcstableEx[k].colBats[j] = 
BATnew(TYPE_void, TYPE_oid, smallbatsz);
+                               #endif
+
                        }
 
                        k++; 
@@ -3167,10 +3243,19 @@ void freeCStableStat(CStableStat* cstabl
        for (i = 0; i < cstablestat->numTables; i++){
                free(cstablestat->lstbatid[i]); 
                free(cstablestat->lastInsertedS[i]); 
+               #if CSTYPE_TABLE == 1
+               free(cstablestat->lastInsertedSEx[i]);  
+               #endif
                for (j = 0; j < cstablestat->numPropPerTable[i];j++){
                        
BBPunfix(cstablestat->lstcstable[i].colBats[j]->batCacheid); 
+                       #if CSTYPE_TABLE == 1
+                       
BBPunfix(cstablestat->lstcstableEx[i].colBats[j]->batCacheid); 
+                       #endif
                }
                free(cstablestat->lstcstable[i].colBats);
+               #if CSTYPE_TABLE == 1
+               free(cstablestat->lstcstableEx[i].colBats);
+               #endif
        }
        BBPunfix(cstablestat->pbat->batCacheid); 
        BBPunfix(cstablestat->sbat->batCacheid); 
@@ -3178,6 +3263,10 @@ void freeCStableStat(CStableStat* cstabl
        free(cstablestat->lstbatid); 
        free(cstablestat->lastInsertedS); 
        free(cstablestat->lstcstable); 
+       #if CSTYPE_TABLE == 1
+       free(cstablestat->lastInsertedSEx); 
+       free(cstablestat->lstcstableEx);
+       #endif
        free(cstablestat->numPropPerTable);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to