Changeset: 78cc3766e723 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=78cc3766e723
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Fix the bug while probing the FK relationship in large dataset.

- Pay attention to the last value return by HASHloop. It is BUN_NONE.


diffs (269 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -80,6 +80,91 @@ void addCStoSet(CSset *csSet, CS item)
 }
 
 static 
+CSrel* creataCSrel(oid csoid){
+       CSrel *csrel = malloc(sizeof(CSrel));
+       csrel->origCSoid = csoid; 
+       csrel->lstRefCSoid = (oid*) malloc(sizeof(oid) * INIT_NUM_CSREL);
+       csrel->lstCnt = (int*) malloc(sizeof(int) * INIT_NUM_CSREL);            
+       csrel->numRef = 0;
+       csrel->numAllocation = INIT_NUM_CSREL;
+
+       return csrel; 
+}
+
+static 
+CSrel* initCSrelset(oid numCSrel){
+       oid i; 
+       CSrel *csrelSet = malloc(sizeof(CSrel) * numCSrel); 
+       CSrel *csrel; 
+       for (i = 0; i < numCSrel; i++){
+               csrel = creataCSrel(i); 
+               csrelSet[i] = (CSrel) *csrel;
+       }
+       return csrelSet;
+}
+
+static 
+void printCSrelSet(CSrel *csrelSet, int num){
+
+       int i; 
+       int j; 
+       for (i = 0; i < num; i++){
+               if (csrelSet[i].numRef != 0){   //Only print CS with FK
+                       printf("Relationship i: ");
+                       printf("CS " BUNFMT " --> ", csrelSet[i].origCSoid);
+                       for (j = 0; j < csrelSet[i].numRef; j++){
+                               printf(BUNFMT " (%d) ", 
csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]);      
+                       }       
+                       printf("\n");
+               }
+       }
+}
+
+static 
+void addReltoCSRel(oid origCSoid, oid refCSoid, CSrel *csrel)
+{
+       void *_tmp; 
+       void *_tmp2; 
+
+       int i = 0; 
+
+       assert (origCSoid == csrel->origCSoid);
+
+       while (i < csrel->numRef){
+               if (refCSoid == csrel->lstRefCSoid[i]){
+                       //Existing
+                       break; 
+               }
+               i++;
+       }
+       
+       if (i != csrel->numRef){ 
+               csrel->lstCnt[i]++; 
+               return; 
+       }
+       else{   // New Ref
+       
+               if(csrel->numRef == csrel->numAllocation) 
+               { 
+                       csrel->numAllocation += INIT_NUM_CSREL; 
+                       
+                       _tmp = realloc(csrel->lstRefCSoid, 
(csrel->numAllocation * sizeof(oid)));
+                       _tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation * 
sizeof(int)));
+
+                       if (!_tmp || !_tmp2){
+                               fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
+                       }
+                       csrel->lstRefCSoid = (oid*)_tmp;
+                       csrel->lstCnt = (int*)_tmp2; 
+               }
+
+               csrel->lstRefCSoid[csrel->numRef] = refCSoid;
+               csrel->lstCnt[csrel->numRef] = 1; 
+               csrel->numRef++;
+       }
+}
+
+static 
 void freeCSset(CSset *csSet){
        int i;
        for(i = 0; i < csSet->numCSadded; i ++){
@@ -171,7 +256,7 @@ char checkCSduplication(BAT* hsKeyBat, B
        BATiter bi = bat_iterator(BATmirror(hsKeyBat));
                        
        HASHloop(bi, hsKeyBat->T->hash, pos, (ptr) &cskey){
-               printf("  pos: " BUNFMT, pos);
+               //printf("  pos: " BUNFMT, pos);
 
                offset = (oid *) Tloc(pOffsetBat, pos); 
                if ((pos + 1) < pOffsetBat->batCount){
@@ -204,16 +289,19 @@ char checkCSduplication(BAT* hsKeyBat, B
 
                        //Everything match
                        if (isDuplication == 1){
+                               //printf("Everything match!!!!!");
                                *csId = pos; 
                                return 1; 
                        }
                }
+               
+               
+       }
+       
 
-       }
+       *csId = pos;  // = BUN_NONE
 
-       *csId = pos;
-
-       return 1;
+       return 0;
 }
 
 /*
@@ -300,9 +388,10 @@ oid putaCStoHash(CSBats *csBats, oid sub
        if (bun == BUN_NONE) {
                addNewCS(csBats, &csKey, key, csoid, num);
                csId = *csoid; 
+               //assert(csId != BUN_NONE);
        }
        else{
-               printf("Same HashKey: ");       
+               //printf("Same HashKey: ");     
                /* Check whether it is really an duplication (same hashvalue 
but different list of */
                isDuplicate = checkCSduplication(csBats->hsKeyBat, 
csBats->pOffsetBat, csBats->fullPBat, csKey, key, num, &csId);
 
@@ -314,7 +403,7 @@ oid putaCStoHash(CSBats *csBats, oid sub
 
                }
                else{
-                       printf(" Duplication (existed CS) at csId = " BUNFMT 
"\n", csId);       
+                       //printf(" Duplication (existed CS) at csId = " BUNFMT 
"\n", csId);     
 
                        // Update freqCS value
                        freq = (int *)Tloc(csBats->freqBat, csId);
@@ -330,6 +419,12 @@ oid putaCStoHash(CSBats *csBats, oid sub
                }
        }
 
+       if (csId == BUN_NONE){
+               printf("Not acceptable cdId " BUNFMT " \n", csId);
+       }
+
+       //assert(csId != BUN_NONE);
+
        return csId;
 }
 
@@ -620,7 +715,7 @@ void freeCSBats(CSBats *csBats){
 
 
 static 
-str RDFassignCSId(int *ret, BAT *sbat, BATiter si, BATiter pi, CSset 
*freqCSset, int *freqThreshold, CSBats* csBats, oid *subjCSMap){
+str RDFassignCSId(int *ret, BAT *sbat, BATiter si, BATiter pi, CSset 
*freqCSset, int *freqThreshold, CSBats* csBats, oid *subjCSMap, oid *maxCSoid){
 
        BUN     p, q; 
        oid     *sbt, *pbt; 
@@ -650,6 +745,9 @@ str RDFassignCSId(int *ret, BAT *sbat, B
 
                                if (numP > maxNumProp) 
                                        maxNumProp = numP; 
+                               if (returnCSid > *maxCSoid)
+                                       *maxCSoid = returnCSid; 
+                                
                        }
                        curS = *sbt; 
                        curP = 0;
@@ -679,6 +777,9 @@ str RDFassignCSId(int *ret, BAT *sbat, B
        if (numP > maxNumProp) 
                maxNumProp = numP; 
                
+       if (returnCSid > *maxCSoid)
+               *maxCSoid = returnCSid; 
+
        free (buff); 
 
        *ret = 1; 
@@ -688,7 +789,7 @@ str RDFassignCSId(int *ret, BAT *sbat, B
 
 static 
 str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi, 
CSset *freqCSset, 
-               int *freqThreshold, CSBats* csBats, oid *subjCSMap, BUN 
maxSoid){
+               int *freqThreshold, CSBats* csBats, oid *subjCSMap, BUN 
maxSoid, BUN maxCSoid){
 
        BUN     p, q; 
        oid     *sbt, *pbt, *obt; 
@@ -701,6 +802,9 @@ str RDFrelationships(int *ret, BAT *sbat
        int     maxNumProp = 0; 
        oid     objType;
        oid     returnCSid; 
+       CSrel*  csrelSet;
+
+       csrelSet = initCSrelset(maxCSoid);
        
        buff = (oid *) malloc (sizeof(oid) * INIT_PROPERTY_NUM);
 
@@ -744,7 +848,8 @@ str RDFrelationships(int *ret, BAT *sbat
                /* Look at sbat*/
                if (objType == URI){
                        if (*obt <= maxSoid && subjCSMap[*obt] != BUN_NONE){
-                               printf(" CS " BUNFMT " refer to CS " BUNFMT " 
\n",*sbt, subjCSMap[*obt]);
+                               ////printf(" Subject " BUNFMT " refer to CS " 
BUNFMT " \n",*sbt, subjCSMap[*obt]);
+                               addReltoCSRel(subjCSMap[*sbt], subjCSMap[*obt], 
&csrelSet[subjCSMap[*sbt]]);
                        }
                }
        }
@@ -759,6 +864,8 @@ str RDFrelationships(int *ret, BAT *sbat
                
        free (buff); 
 
+       printCSrelSet(csrelSet,maxCSoid);  
+
        *ret = 1; 
 
        return MAL_SUCCEED; 
@@ -775,6 +882,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        CSBats  *csBats; 
        oid     *subjCSMap;     /* Store the correspoinding CS Id for each 
subject */
        BUN     *maxSoid;       
+       oid     maxCSoid = 0; 
 
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
                throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
@@ -805,10 +913,12 @@ RDFextractCSwithTypes(int *ret, bat *sba
        initArray(subjCSMap, (*maxSoid), BUN_NONE);
 
        //Phase 1: Assign an ID for each CS
-       RDFassignCSId(ret, sbat, si, pi, freqCSset, freqThreshold, csBats, 
subjCSMap);
+       RDFassignCSId(ret, sbat, si, pi, freqCSset, freqThreshold, csBats, 
subjCSMap, &maxCSoid);
+
+       printf("Max CS oid: " BUNFMT "\n", maxCSoid);
 
        //Phase 2: Check the relationship       
-       RDFrelationships(ret, sbat, si, pi, oi, freqCSset, freqThreshold, 
csBats, subjCSMap, *maxSoid);
+       RDFrelationships(ret, sbat, si, pi, oi, freqCSset, freqThreshold, 
csBats, subjCSMap, *maxSoid, maxCSoid);
 
 
        printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -57,4 +57,13 @@ typedef struct CSset{
        int numAllocation;
 } CSset; 
 
+#define INIT_NUM_CSREL 4
+typedef struct CSrel{  
+       oid  origCSoid; 
+       oid* lstRefCSoid;               
+       int* lstCnt;    // Count per reference
+       int  numRef; 
+       int  numAllocation; 
+} CSrel;
+
 #endif /* _RDFSCHEMA_H_ */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to