Changeset: 8894a9524217 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8894a9524217 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Gather all the Bats storing CS information in one CSBats diffs (262 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -42,6 +42,7 @@ static void copyOidSet(oid* dest, oid* o } } +/* static void printArray(oid* inputArr, int num){ int i; printf("Print array \n"); @@ -50,7 +51,7 @@ static void printArray(oid* inputArr, in } printf("End of array \n "); } - +*/ static void initArray(oid* inputArr, int num, oid defaultValue){ int i; @@ -158,7 +159,7 @@ void appendArrayToBat(BAT *b, BUN* inArr } static -void checkCSduplication(BAT* pOffsetBat, BAT* fullPBat, BUN pos, oid* key, int numK){ +char checkCSduplication(BAT* pOffsetBat, BAT* fullPBat, BUN pos, oid* key, int numK){ oid *offset; oid *offset2; int numP; @@ -180,22 +181,19 @@ void checkCSduplication(BAT* pOffsetBat, // Check each value if (numK != numP) { - printf("No duplication \n"); - return; + return 0; } else{ existvalue = (oid *)Tloc(fullPBat, *offset); for (i = 0; i < numP; i++){ //if (key[i] != (int)*existvalue++) { if (key[i] != existvalue[i]) { - printf("No duplication \n"); - return; + return 0; } } } - printf("There is duplication \n"); - return; + return 1; } /* * Put a CS to the hashmap. @@ -205,7 +203,7 @@ void checkCSduplication(BAT* pOffsetBat, * * */ static -oid putaCStoHash(BAT* hsKeyBat, BAT* pOffsetBat, BAT* fullPBat, oid subjId, oid* key, int num, +oid putaCStoHash(CSBats *csBats, oid subjId, oid* key, int num, oid *csoid, char isStoreFreqCS, int freqThreshold, CSset **freqCSset){ BUN csKey; int freq = 0; @@ -213,31 +211,38 @@ oid putaCStoHash(BAT* hsKeyBat, BAT* pOf BUN bun; BUN offset; oid csId; /* Id of the characteristic set */ + char isDuplicate = 0; csKey = RDF_hash_oidlist(key, num); - bun = BUNfnd(BATmirror(hsKeyBat),(ptr) &csKey); + bun = BUNfnd(BATmirror(csBats->hsKeyBat),(ptr) &csKey); if (bun == BUN_NONE) { - if (hsKeyBat->T->hash && BATcount(hsKeyBat) > 4 * hsKeyBat->T->hash->mask) { - HASHdestroy(hsKeyBat); - BAThash(BATmirror(hsKeyBat), 2*BATcount(hsKeyBat)); + if (csBats->hsKeyBat->T->hash && BATcount(csBats->hsKeyBat) > 4 * csBats->hsKeyBat->T->hash->mask) { + HASHdestroy(csBats->hsKeyBat); + BAThash(BATmirror(csBats->hsKeyBat), 2*BATcount(csBats->hsKeyBat)); } - hsKeyBat = BUNappend(hsKeyBat, (ptr) &csKey, TRUE); + + csBats->hsKeyBat = BUNappend(csBats->hsKeyBat, (ptr) &csKey, TRUE); csId = *csoid; (*csoid)++; - offset = BUNlast(fullPBat); + offset = BUNlast(csBats->fullPBat); /* Add list of p to fullPBat and pOffsetBat*/ - BUNappend(pOffsetBat, &offset , TRUE); - appendArrayToBat(fullPBat, key, num); + BUNappend(csBats->pOffsetBat, &offset , TRUE); + appendArrayToBat(csBats->fullPBat, key, num); } else{ - printf("This CS exists \n"); + printf("Same HashKey: "); csId = bun; /* Check whether it is really an duplication (same hashvalue but different list of */ - checkCSduplication(pOffsetBat, fullPBat, bun, key, num ); + isDuplicate = checkCSduplication(csBats->pOffsetBat, csBats->fullPBat, bun, key, num ); + + if (isDuplicate == 0) + printf(" No duplication (new CS) \n"); + else + printf(" Duplication (existed CS) \n"); if (isStoreFreqCS == 1){ /* Store the frequent CS to the CSset*/ //printf("FreqCS: Support = %d, Threshold %d \n ", freq, freqThreshold); @@ -485,6 +490,52 @@ str getReferCS(BAT *sbat, BAT *pbat, oid } */ +static +CSBats* initCSBats(void){ + + CSBats *csBats = (CSBats *) malloc(sizeof(CSBats)); + csBats->hsKeyBat = BATnew(TYPE_void, TYPE_oid, smallbatsz); + + BATseqbase(csBats->hsKeyBat, 0); + + if (csBats->hsKeyBat == NULL) { + return NULL; + } + csBats->hsValueBat = BATnew(TYPE_void, TYPE_int, smallbatsz); + + if (csBats->hsValueBat == NULL) { + return NULL; + } + csBats->pOffsetBat = BATnew(TYPE_void, TYPE_oid, smallbatsz); + + if (csBats->pOffsetBat == NULL) { + return NULL; + } + csBats->fullPBat = BATnew(TYPE_void, TYPE_oid, smallbatsz); + + if (csBats->fullPBat == NULL) { + return NULL; + } + csBats->freqBat = BATnew(TYPE_void, TYPE_int, smallbatsz); + + if (csBats->freqBat == NULL) { + return NULL; + } + + return csBats; +} +static +void freeCSBats(CSBats *csBats){ + BBPreclaim(csBats->hsKeyBat); + BBPreclaim(csBats->hsValueBat); + BBPreclaim(csBats->freqBat); + BBPreclaim(csBats->pOffsetBat); + BBPreclaim(csBats->fullPBat); + + free(csBats); +} + + /* Extract CS from SPO triples table */ str RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, int *freqThreshold){ @@ -502,11 +553,7 @@ RDFextractCSwithTypes(int *ret, bat *sba CSset *freqCSset; /* Set of frequent CSs */ oid objType; - BAT *hsKeyBat; - //BAT *hsValueBat; - BAT *pOffsetBat; /* BAT storing the offset for set of properties, refer to fullPBat */ - BAT *fullPBat; /* Stores all set of properties */ - + CSBats *csBats; oid *subjCSMap; /* Store the correspoinding CS Id for each subject */ BUN *maxSoid; oid returnCSid; @@ -536,15 +583,9 @@ RDFextractCSwithTypes(int *ret, bat *sba pi = bat_iterator(pbat); oi = bat_iterator(obat); - hsKeyBat = BATnew(TYPE_void, TYPE_oid, smallbatsz); - //hsValueBat = BATnew(TYPE_void, TYPE_int, smallbatsz); - pOffsetBat = BATnew(TYPE_void, TYPE_oid, smallbatsz); - fullPBat = BATnew(TYPE_void, TYPE_oid, smallbatsz); - if (hsKeyBat == NULL) { - throw(MAL, "rdf.RDFextractCSwithTypes", "Error in BAT creation"); - } - BATseqbase(hsKeyBat, 0); + csBats = initCSBats(); + freqCSset = initCSset(); @@ -557,7 +598,7 @@ RDFextractCSwithTypes(int *ret, bat *sba sbt = (oid *) BUNtloc(si, p); if (*sbt != curS){ if (p != 0){ /* Not the first S */ - returnCSid = putaCStoHash(hsKeyBat, pOffsetBat, fullPBat, curS, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset); + returnCSid = putaCStoHash(csBats, curS, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset); subjCSMap[curS] = returnCSid; @@ -595,7 +636,7 @@ RDFextractCSwithTypes(int *ret, bat *sba } /*put the last CS */ - returnCSid = putaCStoHash(hsKeyBat, pOffsetBat, fullPBat, curS, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset ); + returnCSid = putaCStoHash(csBats, curS, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset ); subjCSMap[curS] = returnCSid; @@ -615,24 +656,16 @@ RDFextractCSwithTypes(int *ret, bat *sba //getStatisticCSsBySupports(csMap, 5000, 1, 0); - printf("pOffsetBat ------- "); - BATprint(pOffsetBat); - - printf("fullBat ------- "); - BATprint(fullPBat); - - printArray(subjCSMap,(int) *maxSoid); BBPreclaim(sbat); BBPreclaim(pbat); - - BBPreclaim(hsKeyBat); - BBPreclaim(pOffsetBat); - BBPreclaim(fullPBat); + BBPreclaim(obat); free (buff); free (subjCSMap); + freeCSBats(csBats); + freeCSset(freqCSset); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -33,9 +33,12 @@ rdf_export str RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, int *freqThreshold); typedef struct { - BAT* keyBat; - BAT* valueBat; -} hsBats; + BAT* hsKeyBat; + BAT* hsValueBat; + BAT* freqBat; /* Store the frequency of each Characteristic set */ + BAT* pOffsetBat; /* BAT storing the offset for set of properties, refer to fullPBat */ + BAT* fullPBat; /* Stores all set of properties */ +} CSBats; // BATs for storing all information about CSs typedef struct CS { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list