Changeset: 50a2cd0713ec for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=50a2cd0713ec
Modified Files:
        monetdb5/extras/rdf/hashmap/hashmap.c
        monetdb5/extras/rdf/hashmap/hashmap.h
        monetdb5/extras/rdf/rdf.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        monetdb5/extras/rdf/rdfschema.mal
        sql/backends/monet5/sql.mx
Branch: rdf
Log Message:

Check the foreign key relationship from RDF triples.


diffs (truncated from 402 to 300 lines):

diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c 
b/monetdb5/extras/rdf/hashmap/hashmap.c
--- a/monetdb5/extras/rdf/hashmap/hashmap.c
+++ b/monetdb5/extras/rdf/hashmap/hashmap.c
@@ -358,6 +358,34 @@ int hashmap_statistic_CSbysupport(map_t 
 }
 
 
+/* Simply print all the elements in the hashmap */
+int hashmap_print(map_t in){
+
+       int i,j ;
+       
+
+       /* Cast the hashmap */
+       hashmap_map* m = (hashmap_map*) in;
+       
+       /* On empty hashmap, return immediately */
+       if (hashmap_length(m) <= 0)
+               return MAP_MISSING;     
+
+       /* Linear probing */
+       for(i = 0; i< m->table_size; i++)
+               if(m->data[i].in_use != 0) {
+                       /* Print each item */
+                       printf("Key: ");
+                       for (j = 0; j <  m->data[i].num; j++){
+                               printf(" %d ", m->data[i].key[j]);
+                       }
+                       printf("\n");
+                       printf("Freq: %d \n",  m->data[i].freq);
+                       /* m->data[i].data */
+               }
+    return MAP_OK;
+}
+
 /*
  * Collect the number of CSs cummulatively for support values ranging 
  * from 1 to maxfreqthreshold
diff --git a/monetdb5/extras/rdf/hashmap/hashmap.h 
b/monetdb5/extras/rdf/hashmap/hashmap.h
--- a/monetdb5/extras/rdf/hashmap/hashmap.h
+++ b/monetdb5/extras/rdf/hashmap/hashmap.h
@@ -37,7 +37,7 @@ typedef any_t map_t;
 /* We need to keep keys and values */
 typedef struct _hashmap_element{
        int* key;
-       int num; 
+       int num;                /* Number of integers for the key */
        int freq;       
        char in_use;
        any_t data;
@@ -139,6 +139,13 @@ extern int hashmap_remove(map_t in, int*
  */
 extern int hashmap_get_one(map_t in, any_t *arg, int remove);
 
+/* 
+ * Simply print all the elements in the hashmap 
+ *
+ * */
+extern int hashmap_print(map_t in);
+
+
 /*
  * Free the hashmap
  */
diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -52,9 +52,9 @@ rdf_export str
 RDFleftfetchjoin_sorted(int *result, int* lid, int *rid);
 
 typedef enum {
+       URI,
        DATETIME, 
        NUMERIC, 
-       URI, 
        STRING
 } ObjectType; 
 
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -114,26 +114,26 @@ CS* creatCS(int subId, int numP, int* bu
  * If yes, add that frequent CS to the freqCSset. 
  *
  * */
-static void putaCStoHash(map_t csmap, int* buff, int num, oid *csoid, char 
isStoreFreqCS, int freqThreshold, CSset **freqCSset){
+static void putaCStoHash(map_t csmap, int* key, int num, oid *csoid, char 
isStoreFreqCS, int freqThreshold, CSset **freqCSset){
        oid     *getCSoid; 
        oid     *putCSoid; 
        int     err; 
-       int*    cs; 
+       int*    csKey; 
        int     freq = 0; 
        CS      *freqCS; 
 
-       cs = (int*) malloc(sizeof(int) * num);
-       if (cs==NULL){
+       csKey = (int*) malloc(sizeof(int) * num);
+       if (csKey==NULL){
                printf("Malloc failed. at %d", num);
                exit(-1); 
        }
 
-       copyIntSet(cs, buff, num); 
-       if (hashmap_get(csmap, cs, num,(void**)(&getCSoid),1, &freq) != MAP_OK){
+       copyIntSet(csKey, key, num); 
+       if (hashmap_get(csmap, csKey, num,(void**)(&getCSoid),1, &freq) != 
MAP_OK){
                putCSoid = malloc(sizeof(oid)); 
                *putCSoid = *csoid; 
 
-               err = hashmap_put(csmap, cs, num, 1,  putCSoid);        
+               err = hashmap_put(csmap, csKey, num, 1,  putCSoid);     
                assert(err == MAP_OK); 
 
                (*csoid)++; 
@@ -142,11 +142,11 @@ static void putaCStoHash(map_t csmap, in
                if (isStoreFreqCS == 1){        /* Store the frequent CS to the 
CSset*/
                        //printf("FreqCS: Support = %d, Threshold %d  \n ", 
freq, freqThreshold);
                        if (freq == freqThreshold){
-                               freqCS = creatCS(*getCSoid, num, buff);         
+                               freqCS = creatCS(*getCSoid, num, key);          
                                addCStoSet(*freqCSset, *freqCS);
                        }
                }
-               free(cs); 
+               free(csKey); 
        }
 
 }
@@ -234,7 +234,7 @@ void getMaximumFreqCSs(CSset *freqCSset)
 
 
 
-static void putPtoHash(map_t pmap, int value, oid *poid, int support){
+static void putPtoHash(map_t pmap, int key, oid *poid, int support){
        oid     *getPoid; 
        oid     *putPoid; 
        int     err; 
@@ -242,7 +242,7 @@ static void putPtoHash(map_t pmap, int v
 
        pkey = (int*) malloc(sizeof(int));
 
-       *pkey = value; 
+       *pkey = key; 
 
        if (hashmap_get_forP(pmap, pkey,(void**)(&getPoid)) != MAP_OK){
                putPoid = malloc(sizeof(oid)); 
@@ -351,6 +351,7 @@ RDFextractCS(int *ret, bat *sbatid, bat 
        int     maxNumProp = 0; 
        CSset   *freqCSset;     /* Set of frequent CSs */
 
+
        buff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM);
        
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
@@ -431,10 +432,179 @@ RDFextractCS(int *ret, bat *sbatid, bat 
        return MAL_SUCCEED; 
 }
 
+/*
+ * Get the refer CS 
+ * Input: oid of a URI object 
+ * Return the id of the CS
+ * */
+static 
+str getReferCS(BAT *sbat, BAT *pbat, oid *obt){
+
+       /* For detecting foreign key relationships */
+       BAT     *tmpbat;        /* Get the result of searching objectURI from 
sbat */
+       BATiter ti; 
+       oid     *tbt;
+       BUN     pt, qt; 
+       oid     *s_t, *p_t;     
+       //int   *tmpbuff;
+
+       //tmpbuff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM);
+
+       /* BATsubselect(inputbat, <dont know yet>, lowValue, Highvalue, 
isIncludeLowValue, isIncludeHigh, <anti> */
+       printf("Checking for object " BUNFMT "\n", *obt);
+       tmpbat = BATsubselect(sbat, NULL, obt, obt, 1, 1, 0); 
+       /* tmpbat tail contain head oid of sbat for matching elements */
+       if (tmpbat != NULL){
+               printf("Matching: " BUNFMT "\n", BATcount(tmpbat));
+               BATprint(tmpbat); 
+                                               
+               if (BATcount(tmpbat) > 0){
+                       ti = bat_iterator(tmpbat);
+                       BATloop(tmpbat, pt, qt){
+                               tbt = (oid *) BUNtail(ti, pt);
+                               s_t = (oid *) Tloc(sbat, *tbt);
+                               p_t = (oid *) Tloc(pbat, *tbt); 
+                               printf("s_t: " BUNFMT "\n", (*s_t));
+                               printf("p_t: " BUNFMT "\n", (*p_t));
+                                               /* Check which CS is referred */
+
+                       }
+               }
+       }
+       else
+               throw(MAL, "rdf.RDFextractCSwithTypes", "Null Bat returned for 
BATsubselect");                  
+
+                       
+                       
+       /* temporarily use */
+       if (tmpbat)
+               BBPunfix(tmpbat->batCacheid);
+
+       return MAL_SUCCEED;
+}
+
+/* Extract CS from SPO triples table */
+str
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, int 
*freqThreshold){
+       BUN     p, q; 
+       BAT     *sbat = NULL, *pbat = NULL, *obat = NULL; 
+       BATiter si, pi, oi;     /*iterator for BAT of s,p,o columns in spo 
table */
+       oid     *sbt, *pbt, *obt; 
+       oid     curS;           /* current Subject oid */
+       oid     curP;           /* current Property oid */
+       oid     CSoid = 0;      /* Characteristic set oid */
+       int     numP;           /* Number of properties for current S */
+       map_t   csMap;          
+       int*    buff;    
+       int     INIT_PROPERTY_NUM = 5000; 
+       int     maxNumProp = 0; 
+       CSset   *freqCSset;     /* Set of frequent CSs */
+       oid     objType;
+       
+
+       
+       buff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM);
+
+       if ((sbat = BATdescriptor(*sbatid)) == NULL) {
+               throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
+       }
+       if (!(sbat->tsorted)){
+                throw(MAL, "rdf.RDFextractCSwithTypes", "sbat is not sorted");
+       }
+
+       if ((pbat = BATdescriptor(*pbatid)) == NULL) {
+               throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
+       }
+       if ((obat = BATdescriptor(*obatid)) == NULL) {
+               throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
+       }
+       
+       si = bat_iterator(sbat); 
+       pi = bat_iterator(pbat); 
+       oi = bat_iterator(obat);
+
+       /* Init a hashmap */
+       csMap = hashmap_new(); 
+       freqCSset = initCSset();
+
+       numP = 0;
+       curP = 0; 
+
+       printf("freqThreshold = %d \n", *freqThreshold);        
+       BATloop(sbat, p, q){
+               sbt = (oid *) BUNtloc(si, p);           
+               if (*sbt != curS){
+                       if (p != 0){    /* Not the first S */
+                               putaCStoHash(csMap, buff, numP, &CSoid, 1, 
*freqThreshold, &freqCSset); 
+                               
+                               if (numP > maxNumProp) 
+                                       maxNumProp = numP; 
+                       }
+                       curS = *sbt; 
+                       curP = 0;
+                       numP = 0;
+               }
+                               
+               pbt = (oid *) BUNtloc(pi, p); 
+
+               if (numP > INIT_PROPERTY_NUM){
+                       throw(MAL, "rdf.RDFextractCS", "# of properties is 
greater than INIT_PROPERTY_NUM");
+                       exit(-1);
+               }
+               
+               if (curP != *pbt){      /* Multi values property */             
+                       buff[numP] = *pbt; 
+                       numP++; 
+                       curP = *pbt; 
+               }
+               
+               obt = (oid *) BUNtloc(oi, p); 
+               /* Check type of object */
+               objType = ((*obt) >> (sizeof(BUN)*8 - 3))  &  3 ;       /* Get 
two bits 63th, 62nd from object oid */
+               
+               //printf("object type: " BUNFMT "\n", objType); 
+
+               /* Look at sbat*/
+               if (objType == URI){
+
+                       getReferCS(sbat, pbat, obt);            
+               }
+       }
+       
+       /*put the last CS */
+       putaCStoHash(csMap, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset 
); 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to