Changeset: 0c3d286ac863 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0c3d286ac863 Modified Files: monetdb5/extras/rdf/hashmap/hashmap.c monetdb5/extras/rdf/hashmap/hashmap.h monetdb5/extras/rdf/rdfschema.c Branch: rdf Log Message:
Modify the function for storing predicates into a hash (putPtoHash). diffs (truncated from 314 to 300 lines): diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c b/monetdb5/extras/rdf/hashmap/hashmap.c --- a/monetdb5/extras/rdf/hashmap/hashmap.c +++ b/monetdb5/extras/rdf/hashmap/hashmap.c @@ -1,6 +1,5 @@ -/* - * Generic map implementation. - */ +/* HashMap for the characteristic sets (CSs') in RDF */ + #include <hashmap.h> #include <stdlib.h> @@ -62,6 +61,37 @@ static char intsetcmp(int* key1, int* ke return 0; } +/* Return 1 if sorted arr2[] is a subset of sorted arr1[] + * arr1 has m members, arr2 has n members + * */ + +static int isSubset(int* arr1, int* arr2, int m, int n) +{ + int i = 0, j = 0; + + if(m < n) + return 0; + + while( i < n && j < m ) + { + if( arr1[j] < arr2[i] ) + j++; + else if( arr1[j] == arr2[i] ) + { + j++; + i++; + } + else if( arr1[j] > arr2[i] ) + return 0; + } + + if( i < n ) + return 0; + else + return 1; +} + + /* * Return the integer of the location in data * to store the point to the item, or MAP_FULL. @@ -163,6 +193,39 @@ int hashmap_put(map_t in, int* key, int return MAP_OK; } + +/* + * Add a pointer to the hashmap with some key + * This function is ONLY used for the case of + * the predicate in RDF triple + */ +int hashmap_put_forP(map_t in, int* key, int num, any_t value, int support){ + int index; + hashmap_map* m; + + /* Cast the hashmap */ + m = (hashmap_map *) in; + + /* Find a place to put our value */ + index = hashmap_hash(in, key, num); + while(index == MAP_FULL){ + if (hashmap_rehash(in) == MAP_OMEM) { + return MAP_OMEM; + } + index = hashmap_hash(in, key, num); + } + + /* Set the data */ + m->data[index].data = value; + m->data[index].key = key; + m->data[index].num = num; + m->data[index].in_use = 1; + m->data[index].freq = support; + m->size++; + + return MAP_OK; +} + /* * Get your pointer out of the hashmap with a key */ @@ -180,7 +243,7 @@ int hashmap_get(map_t in, int* key, int /* Linear probing, if necessary */ for(i = 0; i<MAX_CHAIN_LENGTH; i++){ - int in_use = m->data[curr].in_use; + char in_use = m->data[curr].in_use; if (in_use == 1){ if ((m->data[curr].num == num) && (intsetcmp(m->data[curr].key,key,num)==0)){ *arg = (m->data[curr].data); @@ -201,6 +264,42 @@ int hashmap_get(map_t in, int* key, int } +/* + * Get your pointer out of the hashmap with a key + * This function is ONLY used for the case of + * the predicate in RDF triple + */ +int hashmap_get_forP(map_t in, int* key, any_t *arg){ + int curr; + int i; + hashmap_map* m; + + /* Cast the hashmap */ + m = (hashmap_map *) in; + + /* Find data location */ + curr = hashmap_hash_int(m, key, 1); + + /* Linear probing, if necessary */ + for(i = 0; i<MAX_CHAIN_LENGTH; i++){ + + int in_use = m->data[curr].in_use; + if (in_use == 1){ + if (*m->data[curr].key == *key){ + *arg = (m->data[curr].data); + return MAP_OK; + } + } + curr = (curr + 1) % m->table_size; + } + + *arg = NULL; + + /* Not found */ + return MAP_MISSING; +} + + /* * Iterate the function parameter over each element in the hashmap. The @@ -367,7 +466,7 @@ int hashmap_remove(map_t in, int* key, i /* Linear probing, if necessary */ for(i = 0; i<MAX_CHAIN_LENGTH; i++){ - int in_use = m->data[curr].in_use; + char in_use = m->data[curr].in_use; if (in_use == 1){ if ((m->data[curr].num == num) && (intsetcmp(m->data[curr].key,key,num)==0)){ /* Blank out the fields */ diff --git a/monetdb5/extras/rdf/hashmap/hashmap.h b/monetdb5/extras/rdf/hashmap/hashmap.h --- a/monetdb5/extras/rdf/hashmap/hashmap.h +++ b/monetdb5/extras/rdf/hashmap/hashmap.h @@ -1,13 +1,9 @@ -/* - * Generic hashmap manipulation functions - * - * Originally by Elliot C Back - http://elliottback.com/wp/hashmap-implementation-in-c/ - * - * Modified by Pete Warden to fix a serious performance problem, support strings as keys - * and removed thread synchronization - http://petewarden.typepad.com - * - * Modified by Minh-Duc Pham to support key as a set of integer values - * Added functions for collecting data statistic +/* HashMap for the characteristic sets (CSs') in RDF */ + +/* Originally by Elliot C Back - http://elliottback.com/wp/hashmap-implementation-in-c/ + * Modified by Minh-Duc Pham to support key as a set of integer values + * + various functions for collecting data statistic + * and for specific data analysis of RDF triples * */ @@ -43,7 +39,7 @@ typedef struct _hashmap_element{ int* key; int num; int freq; - int in_use; + char in_use; any_t data; } hashmap_element; @@ -112,10 +108,26 @@ extern int hashmap_statistic_groupcs_by_ extern int hashmap_put(map_t in, int* key, int num, any_t value); /* + * Add a pointer to the hashmap with some key + * This function is ONLY used for the case of + * the predicate in RDF triple + */ +extern int hashmap_put_forP(map_t in, int* key, int num, any_t value, int support); + +/* * Get an element from the hashmap. Return MAP_OK or MAP_MISSING. */ extern int hashmap_get(map_t in, int* key, int num, any_t *arg, char isUpdateFreq, int *retfreq); + +/* + * Get your pointer out of the hashmap with a key + * This function is ONLY used for the case of + * the predicate in RDF triple + */ + +extern int hashmap_get_forP(map_t in, int* key, any_t *arg); + /* * Remove an element from the hashmap. Return MAP_OK or MAP_MISSING. */ diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -73,22 +73,21 @@ static void putCStoHash(map_t csmap, int } -static void putPtoHash(map_t pmap, int value, oid *poid){ +static void putPtoHash(map_t pmap, int value, oid *poid, int support){ oid *getPoid; oid *putPoid; int err; int* pkey; - int freq; pkey = (int*) malloc(sizeof(int)); *pkey = value; - if (hashmap_get(pmap, pkey, 1,(void**)(&getPoid),1, &freq) != MAP_OK){ + if (hashmap_get_forP(pmap, pkey,(void**)(&getPoid)) != MAP_OK){ putPoid = malloc(sizeof(oid)); *putPoid = *poid; - err = hashmap_put(pmap, pkey, 1, putPoid); + err = hashmap_put_forP(pmap, pkey, 1, putPoid, support); assert(err == MAP_OK); (*poid)++; @@ -125,7 +124,7 @@ static void getStatisticCSsBySize(map_t printf(" --- Number of CS per size (Max = %d)--- \n", maximumNumP); for (i = 1; i <= maximumNumP; i++){ - printf("%d : %d \n", i, statCS[i]); + printf("%d : %d \n", i, statCS[i]); } free(statCS); @@ -149,10 +148,10 @@ static void getStatisticCSsBySupports(ma /* Output the result */ - if (isWriteToFile == 0){ + if (isWriteToFile == 0){ printf(" --- Number of CS per support (Max = %d)--- \n", maxSupport); for (i = 1; i <= maxSupport; i++){ - printf("%d : %d \n", i, statCS[i]); + printf("%d : %d \n", i, statCS[i]); } } else { @@ -173,6 +172,8 @@ static void getStatisticCSsBySupports(ma free(statCS); } + + /* Extract CS from SPO triples table */ str RDFextractCS(int *ret, bat *sbatid, bat *pbatid){ @@ -214,8 +215,6 @@ RDFextractCS(int *ret, bat *sbatid, bat if (numP > maxNumProp) maxNumProp = numP; - - } curS = *bt; curP = 0; @@ -229,12 +228,12 @@ RDFextractCS(int *ret, bat *sbatid, bat exit(-1); } - if (curP != *pbt){ + if (curP != *pbt){ /* Multi values property */ buff[numP] = *pbt; numP++; curP = *pbt; } - //printf("Travel sbat at %d value: %d , for pbat: %d \n", (int) p, (int) *bt, (int) *pbt); + //printf("Travel sbat at %d value: %d , for pbat: %d \n", (int) p, (int) *bt, (int) *pbt); } /*put the last CS */ @@ -293,7 +292,8 @@ RDFextractPfromPSO(int *ret, bat *sbatid bt = (oid *) BUNtloc(pi, p); if (*bt != curP){ if (p != 0){ /* Not the first S */ - putPtoHash(pMap, *bt, &Poid); + putPtoHash(pMap, *bt, &Poid, supportP); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list