Changeset: e3b5ff87c6d9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e3b5ff87c6d9
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Filtering the FK relationship in RDF schema.

Only keep FK relationship that appears more than 1% number of appearances of 
the original CS


diffs (178 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -296,42 +296,62 @@ void printCSrelSet(CSrel *csrelSet, char
  * */
 
 static 
-void printCSrelWithMaxSet(oid* csSuperCSMap, CSrel *csrelWithMaxSet, CSrel 
*csrelBetweenMaxSet, CSrel *csrelSet, char *csFreqMap, BAT* freqBat, int num, 
int freqThreshold){
+void printCSrelWithMaxSet(oid* csSuperCSMap, CSrel *csrelToMaxSet, CSrel 
*csrelFromMaxSet, CSrel *csrelBetweenMaxSet, CSrel *csrelSet, char *csFreqMap, 
BAT* freqBat, int num, int freqThreshold){
 
        int     i; 
        int     j; 
        int     *freq; 
-       FILE    *fout, *fout2; 
-       char    filename[100], filename2[100];
+       FILE    *fout, *fout1, *fout1filter, *fout2; 
+       char    filename[100], filename1[100], filename2[100];
        char    tmpStr[50];
        oid     maxCSoid; 
 
 
-       strcpy(filename, "csRelatioinshipWithMaxFreqCS");
+
+
+       // Merge the relationships to create csrelToMaxSet, csrelFromMaxSet
+       for (i = 0; i < num; i++){
+               maxCSoid = csSuperCSMap[csrelSet[i].origCSoid];
+               if (csrelSet[i].numRef != 0){
+                       for (j = 0; j < csrelSet[i].numRef; j++){               
+                               if (csSuperCSMap[csrelSet[i].lstRefCSoid[j]] != 
BUN_NONE){
+                                       
addReltoCSRelWithFreq(csrelSet[i].origCSoid, 
csSuperCSMap[csrelSet[i].lstRefCSoid[j]], csrelSet[i].lstCnt[j], 
&csrelToMaxSet[i]);
+                               }
+                       }
+
+
+                       // Add to csrelFromMaxSet
+                       // For a referenced CS that is frequent, use its 
maxCSoid
+                       // Else, use its csoid
+                       if (maxCSoid != BUN_NONE){
+                               for (j = 0; j < csrelSet[i].numRef; j++){       
        
+                                       if 
(csSuperCSMap[csrelSet[i].lstRefCSoid[j]] != BUN_NONE){
+                                               addReltoCSRelWithFreq(maxCSoid, 
csSuperCSMap[csrelSet[i].lstRefCSoid[j]], csrelSet[i].lstCnt[j], 
&csrelFromMaxSet[maxCSoid]);
+                                       }
+                                       else{
+                                               addReltoCSRelWithFreq(maxCSoid, 
csrelSet[i].lstRefCSoid[j], csrelSet[i].lstCnt[j], &csrelFromMaxSet[maxCSoid]);
+                                       }
+                               }
+                       }
+               }
+       }
+
+       // Write csrelToMaxSet to File
+       
+       strcpy(filename, "csRelatioinshipToMaxFreqCS");
        sprintf(tmpStr, "%d", freqThreshold);
        strcat(filename, tmpStr);
        strcat(filename, ".txt");
 
        fout = fopen(filename,"wt"); 
 
-       // Merge the relationships to create csrelWithMaxSet
        for (i = 0; i < num; i++){
-               if (csrelSet[i].numRef != 0){
-                       for (j = 0; j < csrelSet[i].numRef; j++){               
-                               if (csSuperCSMap[csrelSet[i].lstRefCSoid[j]] != 
BUN_NONE){
-                                       
addReltoCSRelWithFreq(csrelSet[i].origCSoid, 
csSuperCSMap[csrelSet[i].lstRefCSoid[j]], csrelSet[i].lstCnt[j], 
&csrelWithMaxSet[i]);
-                               }
-                       }
-               }
-       }
-
-       for (i = 0; i < num; i++){
-               if (csrelWithMaxSet[i].numRef != 0){    //Only print CS with FK
+               if (csrelToMaxSet[i].numRef != 0){      //Only print CS with FK
                        fprintf(fout, "Relationship %d: ", i);
                        freq  = (int *) Tloc(freqBat, i);
-                       fprintf(fout, "CS " BUNFMT " (Freq: %d, isFreq: %d) --> 
", csrelWithMaxSet[i].origCSoid, *freq, csFreqMap[i]);
-                       for (j = 0; j < csrelWithMaxSet[i].numRef; j++){
-                               fprintf(fout, BUNFMT " (%d) ", 
csrelWithMaxSet[i].lstRefCSoid[j],csrelWithMaxSet[i].lstCnt[j]); 
+                       fprintf(fout, "CS " BUNFMT " (Freq: %d, isFreq: %d) --> 
", csrelToMaxSet[i].origCSoid, *freq, csFreqMap[i]);
+                       for (j = 0; j < csrelToMaxSet[i].numRef; j++){
+                               fprintf(fout, BUNFMT " (%d) ", 
csrelToMaxSet[i].lstRefCSoid[j],csrelToMaxSet[i].lstCnt[j]);     
                        }       
                        fprintf(fout, "\n");
                }
@@ -339,6 +359,40 @@ void printCSrelWithMaxSet(oid* csSuperCS
 
        fclose(fout);
 
+       // Write csrelFromMaxSet to File
+       
+       strcpy(filename1, "csRelatioinshipFromMaxFreqCS");
+       sprintf(tmpStr, "%d", freqThreshold);
+       strcat(filename1, tmpStr);
+       strcat(filename1, ".txt");
+
+       fout1 = fopen(filename1,"wt"); 
+       strcat(filename1, ".filter");
+       fout1filter = fopen(filename1,"wt");
+
+       for (i = 0; i < num; i++){
+               if (csrelFromMaxSet[i].numRef != 0){    //Only print CS with FK
+                       fprintf(fout1, "Relationship %d: ", i);
+                       freq  = (int *) Tloc(freqBat, i);
+                       fprintf(fout1, "CS " BUNFMT " (Freq: %d, isFreq: %d) 
--> ", csrelFromMaxSet[i].origCSoid, *freq, csFreqMap[i]);
+                       fprintf(fout1filter, "CS " BUNFMT " (Freq: %d, isFreq: 
%d) --> ", csrelFromMaxSet[i].origCSoid, *freq, csFreqMap[i]);           
+
+                       for (j = 0; j < csrelFromMaxSet[i].numRef; j++){
+                               fprintf(fout1, BUNFMT " (%d) ", 
csrelFromMaxSet[i].lstRefCSoid[j],csrelFromMaxSet[i].lstCnt[j]);        
+
+                               // Only put into the filer output file, the 
reference with appears in > 1 % of original CS
+                               if (*freq < csrelFromMaxSet[i].lstCnt[j]*100){
+                                       fprintf(fout1filter, BUNFMT " (%d) ", 
csrelFromMaxSet[i].lstRefCSoid[j],csrelFromMaxSet[i].lstCnt[j]);
+                               }
+                       }       
+                       fprintf(fout1, "\n");
+                       fprintf(fout1filter, "\n");
+
+               }
+       }
+
+       fclose(fout1);
+       fclose(fout1filter);
 
        /*------------------------*/
 
@@ -349,13 +403,13 @@ void printCSrelWithMaxSet(oid* csSuperCS
 
        fout2 = fopen(filename2,"wt"); 
 
-       // Merge the csrelWithMaxSet --> csrelBetweenMaxSet
+       // Merge the csrelToMaxSet --> csrelBetweenMaxSet
        for (i = 0; i < num; i++){
-               maxCSoid = csSuperCSMap[csrelWithMaxSet[i].origCSoid];
-               if (csrelWithMaxSet[i].numRef != 0 && maxCSoid != BUN_NONE){
-                       for (j = 0; j < csrelWithMaxSet[i].numRef; j++){        
        
-                               
assert(csSuperCSMap[csrelWithMaxSet[i].lstRefCSoid[j]] == 
csrelWithMaxSet[i].lstRefCSoid[j]);
-                               addReltoCSRelWithFreq(maxCSoid, 
csSuperCSMap[csrelWithMaxSet[i].lstRefCSoid[j]], csrelWithMaxSet[i].lstCnt[j], 
&csrelBetweenMaxSet[maxCSoid]);
+               maxCSoid = csSuperCSMap[csrelToMaxSet[i].origCSoid];
+               if (csrelToMaxSet[i].numRef != 0 && maxCSoid != BUN_NONE){
+                       for (j = 0; j < csrelToMaxSet[i].numRef; j++){          
+                               
assert(csSuperCSMap[csrelToMaxSet[i].lstRefCSoid[j]] == 
csrelToMaxSet[i].lstRefCSoid[j]);
+                               addReltoCSRelWithFreq(maxCSoid, 
csSuperCSMap[csrelToMaxSet[i].lstRefCSoid[j]], csrelToMaxSet[i].lstCnt[j], 
&csrelBetweenMaxSet[maxCSoid]);
                        }
                }
        }
@@ -1327,7 +1381,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        int             maxNumPwithDup = 0; 
        char            *csFreqMap; 
        CSrel           *csrelSet;
-       CSrel           *csrelWithMaxFreqSet;
+       CSrel           *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
        CSrel           *csrelBetweenMaxFreqSet; 
        SubCSSet        *csSubCSMap; 
        oid             *csSuperCSMap;  
@@ -1407,10 +1461,11 @@ RDFextractCSwithTypes(int *ret, bat *sba
        getMaximumFreqCSs(freqCSset, csSuperCSMap, maxCSoid + 1); 
 
 
-       csrelWithMaxFreqSet = initCSrelset(maxCSoid + 1);       // CS --> 
Reference MaxCSs
+       csrelToMaxFreqSet = initCSrelset(maxCSoid + 1); // CS --> Reference 
MaxCSs
+       csrelFromMaxFreqSet = initCSrelset(maxCSoid + 1);       // CS --> 
Reference MaxCSs
        csrelBetweenMaxFreqSet = initCSrelset(maxCSoid + 1);    // MaxCS --> 
Reference MaxCSs
 
-       printCSrelWithMaxSet(csSuperCSMap, csrelWithMaxFreqSet, 
csrelBetweenMaxFreqSet, csrelSet,csFreqMap, csBats->freqBat, maxCSoid + 1, 
*freqThreshold);  
+       printCSrelWithMaxSet(csSuperCSMap, csrelToMaxFreqSet, 
csrelFromMaxFreqSet, csrelBetweenMaxFreqSet, csrelSet,csFreqMap, 
csBats->freqBat, maxCSoid + 1, *freqThreshold);  
 
 
        //getStatisticCSsBySize(csMap,maxNumProp); 
@@ -1429,7 +1484,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        freeCS_SubCSMapSet(csSubCSMap, maxCSoid + 1); 
 
        freeCSrelSet(csrelSet, maxCSoid + 1); 
-       freeCSrelSet(csrelWithMaxFreqSet, maxCSoid + 1); 
+       freeCSrelSet(csrelToMaxFreqSet, maxCSoid + 1); 
        freeCSrelSet(csrelBetweenMaxFreqSet, maxCSoid + 1);  
 
        freeCSBats(csBats);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to