Changeset: 59246b0623d0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=59246b0623d0
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Use percentage of the total CS's frequency  as the threshold for detecting 
dimension tables


diffs (60 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -432,10 +432,17 @@ void updateFreqCStype(CSset *freqCSset, 
 
        int     i; 
        int     numDimensionCS = 0; 
-
+       int     totalSupport = 0;       /* Total CS frequency */
+       float   threshold  = 0.0; 
+       
+       for (i = 0; i < num; i++){      
+               totalSupport += freqCSset->items[i].support; 
+       }
+       threshold = (float)totalSupport * IR_DIMENSION_THRESHOLD_PERCENTAGE; 
+       printf("Total support %d --> Threshold for dimension table is: %f \n", 
totalSupport, threshold);
        for (i = 0; i < num; i++){
                if (refCount[i] < freqCSset->items[i].support) continue; 
-               if (curIRScores[i] < IR_DIMENSION_THRESHOLD) continue; 
+               if (curIRScores[i] < threshold) continue; 
 
                freqCSset->items[i].type = DIMENSIONCS;
                //printf("A dimension CS with IR score = %f \n", 
curIRScores[i]);
@@ -6571,7 +6578,10 @@ RDFreorganize(int *ret, CStableStat *cst
 
        // print labels
        printf("Start exporting labels \n"); 
+       
+       #if EXPORT_LABEL
        exportLabels(labels, freqCSset, csRelMergeFreqSet, *freqThreshold);
+       #endif
 
 
        curT = clock(); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -125,9 +125,10 @@ typedef struct PropStat {
 /* ---- For detecting dimension table */
 #define        NUM_ITERATION_FOR_IR    3       /* Number of iteration for 
indirect referrences to a CS (table) */
 
-#define IR_DIMENSION_THRESHOLD 100000  /* Score of indirect references that 
the CS can be considered as a dimension CS 
-                                          Number of IR references should be 
several times larger than the CS frequency
-                                       */
+#define IR_DIMENSION_THRESHOLD_PERCENTAGE      0.02    /* Score of indirect 
references that the CS can be considered as a dimension CS 
+                                                          
IR_DIMENSION_THRESHOLD_PERCENTAGE * totalFrequency
+                                                          Number of IR 
references should be several times larger than the CS frequency
+                                                       */
 #define NOT_MERGE_DIMENSIONCS  1
 
 #define FILTER_INFREQ_FK_FOR_IR        1               /* We filter out all 
the dirty references from a CS */
@@ -211,6 +212,8 @@ typedef struct SubCSSet{
 #define        MIN_FK_FREQUENCY        0.1     // The frequency of a FK should 
be > MIN_FK_FREQUENCY * The frequency of a mergedCS (or the number of tuples in 
one table)      
 #define MIN_FK_PROPCOVERAGE    0.9     // The FK needs to happen in 
MIN_FK_PROPCOVERAGE of all instances of the particular property
 
+#define EXPORT_LABEL           0       /* Export labels: TODO: Only disable 
the  */
+
 typedef struct CSset{
        CS* items;
        int numOrigFreqCS; 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to