Changeset: f087221703f5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f087221703f5
Modified Files:
        monetdb5/extras/rdf/hashmap/hashmap.c
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Implement function for detecting maximum CSs from set of CSs


diffs (203 lines):

diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c 
b/monetdb5/extras/rdf/hashmap/hashmap.c
--- a/monetdb5/extras/rdf/hashmap/hashmap.c
+++ b/monetdb5/extras/rdf/hashmap/hashmap.c
@@ -61,37 +61,7 @@ static char intsetcmp(int* key1, int* ke
        return 0; 
 }
 
-/* Return 1 if sorted arr2[] is a subset of sorted arr1[] 
- * arr1 has m members, arr2 has n members
- * */
 
-/*
-static int isSubset(int* arr1, int* arr2, int m, int n)
-{
-       int i = 0, j = 0;
-        
-       if(m < n)
-               return 0;
-                
-       while( i < n && j < m )
-       {
-               if( arr1[j] < arr2[i] )
-                       j++;
-               else if( arr1[j] == arr2[i] )
-               {
-                       j++;
-                       i++;
-               }
-               else if( arr1[j] > arr2[i] )
-                       return 0;
-       }
-               
-       if( i < n )
-               return 0;
-       else
-               return 1;
-} 
-*/
 
 /*
  * Return the integer of the location in data
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -103,6 +103,7 @@ CS* creatCS(int subId, int numP, int* bu
        cs->subIdx = subId;
        cs->numProp = numP; 
        cs->numAllocation = numP; 
+       cs->isSubset = 0; /*By default, this CS is not known to be a subset of 
any other CS*/
        return cs; 
 }
 
@@ -139,6 +140,7 @@ static void putaCStoHash(map_t csmap, in
        }
        else{
                if (isStoreFreqCS == 1){        /* Store the frequent CS to the 
CSset*/
+                       printf("FreqCS: Support = %d, Threshold %d  \n ", freq, 
freqThreshold);
                        if (freq == freqThreshold){
                                freqCS = creatCS(*getCSoid, num, buff);         
                                addCStoSet(freqCSset, *freqCS);
@@ -149,6 +151,80 @@ static void putaCStoHash(map_t csmap, in
 
 }
 
+/* Return 1 if sorted arr2[] is a subset of sorted arr1[] 
+ * arr1 has m members, arr2 has n members
+ * */
+
+static int isSubset(int* arr1, int* arr2, int m, int n)
+{
+       int i = 0, j = 0;
+        
+       if(m < n)
+               return 0;
+                
+       while( i < n && j < m )
+       {
+               if( arr1[j] < arr2[i] )
+                       j++;
+               else if( arr1[j] == arr2[i] )
+               {
+                       j++;
+                       i++;
+               }
+               else if( arr1[j] > arr2[i] )
+                       return 0;
+       }
+               
+       if( i < n )
+               return 0;
+       else
+               return 1;
+}
+
+static 
+void printCS(CS cs){
+       int i; 
+       printf("CS %d: ", cs.subIdx);
+       for (i = 0; i < cs.numProp; i++){
+               printf(" %d  ", cs.lstProp[i]);
+       }
+       printf("\n");
+}
+
+/*
+ * Get the maximum frequent CSs from a CSset
+ * Here maximum frequent CS is a CS that there exist no other CS which 
contains that CS
+ * */
+static 
+void getMaximumFreqCSs(CSset *freqCSset){
+
+       int numCS = freqCSset->numCSadded; 
+       int i, j; 
+
+       printf("Maximum frequent CSs: \n");
+
+       for (i = 0; i < numCS; i++){
+               if (freqCSset->items[i].isSubset == 1) continue;
+               for (j = (i+1); j < numCS; j++){
+                       if (isSubset(freqCSset->items[i].lstProp, 
freqCSset->items[j].lstProp,  
+                                       
freqCSset->items[i].numProp,freqCSset->items[j].numProp) == 1) { 
+                               /* CSj is a subset of CSi */
+                               freqCSset->items[j].isSubset = 1; 
+                       }
+                       else if (isSubset(freqCSset->items[j].lstProp, 
freqCSset->items[i].lstProp,  
+                                       
freqCSset->items[j].numProp,freqCSset->items[i].numProp) == 1) { 
+                               /* CSj is a subset of CSi */
+                               freqCSset->items[i].isSubset = 1; 
+                               break; 
+                       }
+                       
+               } 
+               /* By the end, if this CS is not a subset of any other CS */
+               if (freqCSset->items[i].isSubset == 0) printCS( 
freqCSset->items[i]); 
+       }
+}
+
+
 
 
 static void putPtoHash(map_t pmap, int value, oid *poid, int support){
@@ -252,7 +328,7 @@ static void getStatisticCSsBySupports(ma
 
 /* Extract CS from SPO triples table */
 str
-RDFextractCS(int *ret, bat *sbatid, bat *pbatid, int freqThreshold){
+RDFextractCS(int *ret, bat *sbatid, bat *pbatid, int *freqThreshold){
        BUN     p, q; 
        BAT     *sbat = NULL, *pbat = NULL; 
        BATiter si, pi;         /*iterator for BAT of s,p columns in spo table 
*/
@@ -286,11 +362,12 @@ RDFextractCS(int *ret, bat *sbatid, bat 
        numP = 0;
        curP = 0; 
 
+       printf("freqThreshold = %d \n", *freqThreshold);        
        BATloop(sbat, p, q){
                bt = (oid *) BUNtloc(si, p);            
                if (*bt != curS){
                        if (p != 0){    /* Not the first S */
-                               putaCStoHash(csMap, buff, numP, &CSoid, 1, 
freqThreshold, freqCSset); 
+                               putaCStoHash(csMap, buff, numP, &CSoid, 1, 
*freqThreshold, freqCSset); 
                                
                                if (numP > maxNumProp) 
                                        maxNumProp = numP; 
@@ -316,7 +393,7 @@ RDFextractCS(int *ret, bat *sbatid, bat 
        }
        
        /*put the last CS */
-       putaCStoHash(csMap, buff, numP, &CSoid, 1, freqThreshold, freqCSset ); 
+       putaCStoHash(csMap, buff, numP, &CSoid, 1, *freqThreshold, freqCSset ); 
 
        if (numP > maxNumProp) 
                maxNumProp = numP; 
@@ -325,6 +402,9 @@ RDFextractCS(int *ret, bat *sbatid, bat 
        printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
 
        /*get the statistic */
+
+       getMaximumFreqCSs(freqCSset); 
+
        getTopFreqCSs(csMap,20);
 
        getStatisticCSsBySize(csMap,maxNumProp); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -24,7 +24,7 @@ rdf_export str
 RDFSchemaExplore(int *ret, str *tbname, str *clname);
 
 rdf_export str
-RDFextractCS(int *ret, bat *sbatid, bat *pbatid, int freqThreshold); 
+RDFextractCS(int *ret, bat *sbatid, bat *pbatid, int *freqThreshold); 
 
 rdf_export str
 RDFextractPfromPSO(int *ret, bat *pbatid, bat *sbatid); 
@@ -35,6 +35,7 @@ typedef struct CS
        int*    lstProp;        //List of properties' Ids
        int     numProp;
        int     numAllocation;
+       char    isSubset; 
 } CS;
 
 #define INIT_NUM_CS 100
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to