Changeset: 8894a9524217 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8894a9524217
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Gather all the Bats storing CS information in one CSBats


diffs (262 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -42,6 +42,7 @@ static void copyOidSet(oid* dest, oid* o
        }
 }
 
+/*
 static void printArray(oid* inputArr, int num){
        int i; 
        printf("Print array \n");
@@ -50,7 +51,7 @@ static void printArray(oid* inputArr, in
        }
        printf("End of array \n ");
 }
-
+*/
 
 static void initArray(oid* inputArr, int num, oid defaultValue){
        int i; 
@@ -158,7 +159,7 @@ void appendArrayToBat(BAT *b, BUN* inArr
 }
 
 static 
-void checkCSduplication(BAT* pOffsetBat, BAT* fullPBat, BUN pos, oid* key, int 
numK){
+char checkCSduplication(BAT* pOffsetBat, BAT* fullPBat, BUN pos, oid* key, int 
numK){
        oid *offset; 
        oid *offset2; 
        int numP; 
@@ -180,22 +181,19 @@ void checkCSduplication(BAT* pOffsetBat,
 
        // Check each value
        if (numK != numP) {
-               printf("No duplication \n");
-               return; 
+               return 0; 
        }
        else{
                existvalue = (oid *)Tloc(fullPBat, *offset);    
                for (i = 0; i < numP; i++){
                        //if (key[i] != (int)*existvalue++) {
                        if (key[i] != existvalue[i]) {
-                               printf("No duplication \n");
-                               return;
+                               return 0;
                        }       
                }
        }
        
-       printf("There is duplication \n");
-       return;
+       return 1;
 } 
 /*
  * Put a CS to the hashmap. 
@@ -205,7 +203,7 @@ void checkCSduplication(BAT* pOffsetBat,
  *
  * */
 static 
-oid putaCStoHash(BAT* hsKeyBat, BAT* pOffsetBat, BAT* fullPBat, oid subjId, 
oid* key, int num, 
+oid putaCStoHash(CSBats *csBats, oid subjId, oid* key, int num, 
                oid *csoid, char isStoreFreqCS, int freqThreshold, CSset 
**freqCSset){
        BUN     csKey; 
        int     freq = 0; 
@@ -213,31 +211,38 @@ oid putaCStoHash(BAT* hsKeyBat, BAT* pOf
        BUN     bun; 
        BUN     offset; 
        oid     csId;           /* Id of the characteristic set */
+       char    isDuplicate = 0; 
 
        csKey = RDF_hash_oidlist(key, num);
-       bun = BUNfnd(BATmirror(hsKeyBat),(ptr) &csKey);
+       bun = BUNfnd(BATmirror(csBats->hsKeyBat),(ptr) &csKey);
        if (bun == BUN_NONE) {
-               if (hsKeyBat->T->hash && BATcount(hsKeyBat) > 4 * 
hsKeyBat->T->hash->mask) {
-                       HASHdestroy(hsKeyBat);
-                       BAThash(BATmirror(hsKeyBat), 2*BATcount(hsKeyBat));
+               if (csBats->hsKeyBat->T->hash && BATcount(csBats->hsKeyBat) > 4 
* csBats->hsKeyBat->T->hash->mask) {
+                       HASHdestroy(csBats->hsKeyBat);
+                       BAThash(BATmirror(csBats->hsKeyBat), 
2*BATcount(csBats->hsKeyBat));
                }
-               hsKeyBat = BUNappend(hsKeyBat, (ptr) &csKey, TRUE);
+
+               csBats->hsKeyBat = BUNappend(csBats->hsKeyBat, (ptr) &csKey, 
TRUE);
 
                
                csId = *csoid;
                (*csoid)++;
                
-               offset = BUNlast(fullPBat);
+               offset = BUNlast(csBats->fullPBat);
                /* Add list of p to fullPBat and pOffsetBat*/
-               BUNappend(pOffsetBat, &offset , TRUE);
-               appendArrayToBat(fullPBat, key, num);
+               BUNappend(csBats->pOffsetBat, &offset , TRUE);
+               appendArrayToBat(csBats->fullPBat, key, num);
 
        }
        else{
-               printf("This CS exists \n");    
+               printf("Same HashKey: ");       
                csId = bun; 
                /* Check whether it is really an duplication (same hashvalue 
but different list of */
-               checkCSduplication(pOffsetBat, fullPBat, bun, key, num );
+               isDuplicate = checkCSduplication(csBats->pOffsetBat, 
csBats->fullPBat, bun, key, num );
+
+               if (isDuplicate == 0) 
+                       printf(" No duplication (new CS) \n");  
+               else
+                       printf(" Duplication (existed CS) \n"); 
 
                if (isStoreFreqCS == 1){        /* Store the frequent CS to the 
CSset*/
                        //printf("FreqCS: Support = %d, Threshold %d  \n ", 
freq, freqThreshold);
@@ -485,6 +490,52 @@ str getReferCS(BAT *sbat, BAT *pbat, oid
 }
 */
 
+static 
+CSBats* initCSBats(void){
+
+       CSBats *csBats = (CSBats *) malloc(sizeof(CSBats));
+       csBats->hsKeyBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
+
+       BATseqbase(csBats->hsKeyBat, 0);
+       
+       if (csBats->hsKeyBat == NULL) {
+               return NULL; 
+       }
+       csBats->hsValueBat = BATnew(TYPE_void, TYPE_int, smallbatsz);
+
+       if (csBats->hsValueBat == NULL) {
+               return NULL; 
+       }
+       csBats->pOffsetBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
+       
+       if (csBats->pOffsetBat == NULL) {
+               return NULL; 
+       }
+       csBats->fullPBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
+       
+       if (csBats->fullPBat == NULL) {
+               return NULL; 
+       }
+       csBats->freqBat = BATnew(TYPE_void, TYPE_int, smallbatsz);
+       
+       if (csBats->freqBat == NULL) {
+               return NULL; 
+       }
+
+       return csBats; 
+}
+static 
+void freeCSBats(CSBats *csBats){
+       BBPreclaim(csBats->hsKeyBat); 
+       BBPreclaim(csBats->hsValueBat); 
+       BBPreclaim(csBats->freqBat); 
+       BBPreclaim(csBats->pOffsetBat); 
+       BBPreclaim(csBats->fullPBat); 
+
+       free(csBats);
+}
+
+
 /* Extract CS from SPO triples table */
 str
 RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, int 
*freqThreshold){
@@ -502,11 +553,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        CSset   *freqCSset;     /* Set of frequent CSs */
        oid     objType;
 
-       BAT     *hsKeyBat; 
-       //BAT   *hsValueBat;
-       BAT     *pOffsetBat;    /* BAT storing the offset for set of 
properties, refer to fullPBat */
-       BAT     *fullPBat;      /* Stores all set of properties */
-
+       CSBats  *csBats; 
        oid     *subjCSMap;     /* Store the correspoinding CS Id for each 
subject */
        BUN     *maxSoid;       
        oid     returnCSid; 
@@ -536,15 +583,9 @@ RDFextractCSwithTypes(int *ret, bat *sba
        pi = bat_iterator(pbat); 
        oi = bat_iterator(obat);
 
-       hsKeyBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
-       //hsValueBat = BATnew(TYPE_void, TYPE_int, smallbatsz);
-       pOffsetBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
-       fullPBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
 
-       if (hsKeyBat == NULL) {
-               throw(MAL, "rdf.RDFextractCSwithTypes", "Error in BAT 
creation");
-       }
-       BATseqbase(hsKeyBat, 0);
+       csBats = initCSBats();
+
 
        freqCSset = initCSset();
 
@@ -557,7 +598,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
                sbt = (oid *) BUNtloc(si, p);           
                if (*sbt != curS){
                        if (p != 0){    /* Not the first S */
-                               returnCSid = putaCStoHash(hsKeyBat, pOffsetBat, 
fullPBat, curS, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset); 
+                               returnCSid = putaCStoHash(csBats, curS, buff, 
numP, &CSoid, 1, *freqThreshold, &freqCSset); 
 
                                subjCSMap[curS] = returnCSid;                   
        
 
@@ -595,7 +636,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        }
        
        /*put the last CS */
-       returnCSid = putaCStoHash(hsKeyBat, pOffsetBat, fullPBat, curS, buff, 
numP, &CSoid, 1, *freqThreshold, &freqCSset ); 
+       returnCSid = putaCStoHash(csBats, curS, buff, numP, &CSoid, 1, 
*freqThreshold, &freqCSset ); 
        
        subjCSMap[curS] = returnCSid;                           
 
@@ -615,24 +656,16 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        //getStatisticCSsBySupports(csMap, 5000, 1, 0);
 
-       printf("pOffsetBat ------- ");
-       BATprint(pOffsetBat);
-
-       printf("fullBat ------- ");
-       BATprint(fullPBat);
-
-       printArray(subjCSMap,(int) *maxSoid); 
 
        BBPreclaim(sbat); 
        BBPreclaim(pbat); 
-
-       BBPreclaim(hsKeyBat); 
-       BBPreclaim(pOffsetBat); 
-       BBPreclaim(fullPBat); 
+       BBPreclaim(obat);
        
        free (buff); 
        free (subjCSMap); 
 
+       freeCSBats(csBats);
+               
        freeCSset(freqCSset); 
 
 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -33,9 +33,12 @@ rdf_export str
 RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, int 
*freqThreshold);
 
 typedef struct {
-       BAT*    keyBat; 
-       BAT*    valueBat; 
-} hsBats; 
+       BAT*    hsKeyBat; 
+       BAT*    hsValueBat; 
+       BAT*    freqBat;    /* Store the frequency of each Characteristic set 
*/        
+       BAT*    pOffsetBat; /* BAT storing the offset for set of properties, 
refer to fullPBat */
+       BAT*    fullPBat;   /* Stores all set of properties */  
+} CSBats;      // BATs for storing all information about CSs
 
 typedef struct CS
 {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to