Changeset: 0c3d286ac863 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0c3d286ac863
Modified Files:
        monetdb5/extras/rdf/hashmap/hashmap.c
        monetdb5/extras/rdf/hashmap/hashmap.h
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Modify the function for storing predicates into a hash (putPtoHash).


diffs (truncated from 314 to 300 lines):

diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c 
b/monetdb5/extras/rdf/hashmap/hashmap.c
--- a/monetdb5/extras/rdf/hashmap/hashmap.c
+++ b/monetdb5/extras/rdf/hashmap/hashmap.c
@@ -1,6 +1,5 @@
-/*
- * Generic map implementation.
- */
+/* HashMap for the characteristic sets (CSs') in RDF */
+
 #include <hashmap.h>
 
 #include <stdlib.h>
@@ -62,6 +61,37 @@ static char intsetcmp(int* key1, int* ke
        return 0; 
 }
 
+/* Return 1 if sorted arr2[] is a subset of sorted arr1[] 
+ * arr1 has m members, arr2 has n members
+ * */
+
+static int isSubset(int* arr1, int* arr2, int m, int n)
+{
+       int i = 0, j = 0;
+        
+       if(m < n)
+               return 0;
+                
+       while( i < n && j < m )
+       {
+               if( arr1[j] < arr2[i] )
+                       j++;
+               else if( arr1[j] == arr2[i] )
+               {
+                       j++;
+                       i++;
+               }
+               else if( arr1[j] > arr2[i] )
+                       return 0;
+       }
+               
+       if( i < n )
+               return 0;
+       else
+               return 1;
+} 
+
+
 /*
  * Return the integer of the location in data
  * to store the point to the item, or MAP_FULL.
@@ -163,6 +193,39 @@ int hashmap_put(map_t in, int* key, int 
        return MAP_OK;
 }
 
+
+/*
+ * Add a pointer to the hashmap with some key
+ * This function is ONLY used for the case of 
+ * the predicate in RDF triple
+ */
+int hashmap_put_forP(map_t in, int* key, int num, any_t value, int support){
+       int index;
+       hashmap_map* m;
+
+       /* Cast the hashmap */
+       m = (hashmap_map *) in;
+
+       /* Find a place to put our value */
+       index = hashmap_hash(in, key, num);
+       while(index == MAP_FULL){
+               if (hashmap_rehash(in) == MAP_OMEM) {
+                       return MAP_OMEM;
+               }
+               index = hashmap_hash(in, key, num);
+       }
+
+       /* Set the data */
+       m->data[index].data = value;
+       m->data[index].key = key;
+       m->data[index].num = num; 
+       m->data[index].in_use = 1;
+       m->data[index].freq = support; 
+       m->size++; 
+
+       return MAP_OK;
+}
+
 /*
  * Get your pointer out of the hashmap with a key
  */
@@ -180,7 +243,7 @@ int hashmap_get(map_t in, int* key, int 
        /* Linear probing, if necessary */
        for(i = 0; i<MAX_CHAIN_LENGTH; i++){
 
-               int in_use = m->data[curr].in_use;
+               char in_use = m->data[curr].in_use;
                if (in_use == 1){
                        if ((m->data[curr].num == num) && 
(intsetcmp(m->data[curr].key,key,num)==0)){
                                *arg = (m->data[curr].data);
@@ -201,6 +264,42 @@ int hashmap_get(map_t in, int* key, int 
 }
 
 
+/*
+ * Get your pointer out of the hashmap with a key
+ * This function is ONLY used for the case of 
+ * the predicate in RDF triple
+ */
+int hashmap_get_forP(map_t in, int* key, any_t *arg){
+       int curr;
+       int i;
+       hashmap_map* m;
+
+       /* Cast the hashmap */
+       m = (hashmap_map *) in;
+
+       /* Find data location */
+       curr = hashmap_hash_int(m, key, 1);
+
+       /* Linear probing, if necessary */
+       for(i = 0; i<MAX_CHAIN_LENGTH; i++){
+
+               int in_use = m->data[curr].in_use;
+               if (in_use == 1){
+                       if (*m->data[curr].key == *key){
+                               *arg = (m->data[curr].data);
+                               return MAP_OK;
+                       }
+               }
+               curr = (curr + 1) % m->table_size;
+       }
+
+       *arg = NULL;
+
+       /* Not found */
+       return MAP_MISSING;
+}
+
+
 
 /*
  * Iterate the function parameter over each element in the hashmap.  The
@@ -367,7 +466,7 @@ int hashmap_remove(map_t in, int* key, i
        /* Linear probing, if necessary */
        for(i = 0; i<MAX_CHAIN_LENGTH; i++){
 
-        int in_use = m->data[curr].in_use;
+        char in_use = m->data[curr].in_use;
         if (in_use == 1){
            if ((m->data[curr].num == num) && 
(intsetcmp(m->data[curr].key,key,num)==0)){
                 /* Blank out the fields */
diff --git a/monetdb5/extras/rdf/hashmap/hashmap.h 
b/monetdb5/extras/rdf/hashmap/hashmap.h
--- a/monetdb5/extras/rdf/hashmap/hashmap.h
+++ b/monetdb5/extras/rdf/hashmap/hashmap.h
@@ -1,13 +1,9 @@
-/*
- * Generic hashmap manipulation functions
- *
- * Originally by Elliot C Back - 
http://elliottback.com/wp/hashmap-implementation-in-c/
- *
- * Modified by Pete Warden to fix a serious performance problem, support 
strings as keys
- * and removed thread synchronization - http://petewarden.typepad.com
- *
- * Modified by Minh-Duc Pham to support key as a set of integer values
- * Added functions for collecting data statistic
+/* HashMap for the characteristic sets (CSs') in RDF */
+
+/* Originally by Elliot C Back - 
http://elliottback.com/wp/hashmap-implementation-in-c/
+ * Modified by Minh-Duc Pham to support key as a set of integer values 
+ * + various functions for collecting data statistic
+ * and for specific data analysis of RDF triples 
  *
  */
 
@@ -43,7 +39,7 @@ typedef struct _hashmap_element{
        int* key;
        int num; 
        int freq;       
-       int in_use;
+       char in_use;
        any_t data;
 } hashmap_element;
 
@@ -112,10 +108,26 @@ extern int hashmap_statistic_groupcs_by_
 extern int hashmap_put(map_t in, int* key, int num,  any_t value);
 
 /*
+ * Add a pointer to the hashmap with some key
+ * This function is ONLY used for the case of 
+ * the predicate in RDF triple
+ */
+extern int hashmap_put_forP(map_t in, int* key, int num, any_t value, int 
support);
+
+/*
  * Get an element from the hashmap. Return MAP_OK or MAP_MISSING.
  */
 extern int hashmap_get(map_t in, int* key, int num, any_t *arg, char 
isUpdateFreq, int *retfreq);
 
+
+/*
+ * Get your pointer out of the hashmap with a key
+ * This function is ONLY used for the case of 
+ * the predicate in RDF triple
+ */
+
+extern int hashmap_get_forP(map_t in, int* key, any_t *arg);
+
 /*
  * Remove an element from the hashmap. Return MAP_OK or MAP_MISSING.
  */
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -73,22 +73,21 @@ static void putCStoHash(map_t csmap, int
 }
 
 
-static void putPtoHash(map_t pmap, int value, oid *poid){
+static void putPtoHash(map_t pmap, int value, oid *poid, int support){
        oid     *getPoid; 
        oid     *putPoid; 
        int     err; 
        int*    pkey; 
-       int     freq; 
 
        pkey = (int*) malloc(sizeof(int));
 
        *pkey = value; 
 
-       if (hashmap_get(pmap, pkey, 1,(void**)(&getPoid),1, &freq) != MAP_OK){
+       if (hashmap_get_forP(pmap, pkey,(void**)(&getPoid)) != MAP_OK){
                putPoid = malloc(sizeof(oid)); 
                *putPoid = *poid; 
 
-               err = hashmap_put(pmap, pkey, 1, putPoid);      
+               err = hashmap_put_forP(pmap, pkey, 1, putPoid, support);        
                assert(err == MAP_OK); 
                                
                (*poid)++; 
@@ -125,7 +124,7 @@ static void getStatisticCSsBySize(map_t 
        
        printf(" --- Number of CS per size (Max = %d)--- \n", maximumNumP);
        for (i = 1; i <= maximumNumP; i++){
-               printf("%d  :  %d \n", i, statCS[i]); 
+               printf("%d : %d \n", i, statCS[i]); 
        } 
 
        free(statCS); 
@@ -149,10 +148,10 @@ static void getStatisticCSsBySupports(ma
 
        /* Output the result */
        
-       if (isWriteToFile  == 0){
+       if (isWriteToFile == 0){
                printf(" --- Number of CS per support (Max = %d)--- \n", 
maxSupport);
                for (i = 1; i <= maxSupport; i++){
-                       printf("%d  :  %d \n", i, statCS[i]); 
+                       printf("%d : %d \n", i, statCS[i]); 
                } 
        }
        else {
@@ -173,6 +172,8 @@ static void getStatisticCSsBySupports(ma
        free(statCS); 
 }
 
+
+
 /* Extract CS from SPO triples table */
 str
 RDFextractCS(int *ret, bat *sbatid, bat *pbatid){
@@ -214,8 +215,6 @@ RDFextractCS(int *ret, bat *sbatid, bat 
                                
                                if (numP > maxNumProp) 
                                        maxNumProp = numP; 
-                                       
-
                        }
                        curS = *bt; 
                        curP = 0;
@@ -229,12 +228,12 @@ RDFextractCS(int *ret, bat *sbatid, bat 
                        exit(-1);
                }
                
-               if (curP != *pbt){
+               if (curP != *pbt){      /* Multi values property */             
                        buff[numP] = *pbt; 
                        numP++; 
                        curP = *pbt; 
                }
-               //printf("Travel sbat at %d  value: %d , for pbat: %d \n", 
(int) p, (int) *bt, (int) *pbt);
+               //printf("Travel sbat at %d value: %d , for pbat: %d \n", (int) 
p, (int) *bt, (int) *pbt);
        }
        
        /*put the last CS */
@@ -293,7 +292,8 @@ RDFextractPfromPSO(int *ret, bat *sbatid
                bt = (oid *) BUNtloc(pi, p);            
                if (*bt != curP){
                        if (p != 0){    /* Not the first S */
-                               putPtoHash(pMap, *bt, &Poid); 
+                               putPtoHash(pMap, *bt, &Poid, supportP); 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to