Changeset: 59246b0623d0 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=59246b0623d0 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Use percentage of the total CS's frequency as the threshold for detecting dimension tables diffs (60 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -432,10 +432,17 @@ void updateFreqCStype(CSset *freqCSset, int i; int numDimensionCS = 0; - + int totalSupport = 0; /* Total CS frequency */ + float threshold = 0.0; + + for (i = 0; i < num; i++){ + totalSupport += freqCSset->items[i].support; + } + threshold = (float)totalSupport * IR_DIMENSION_THRESHOLD_PERCENTAGE; + printf("Total support %d --> Threshold for dimension table is: %f \n", totalSupport, threshold); for (i = 0; i < num; i++){ if (refCount[i] < freqCSset->items[i].support) continue; - if (curIRScores[i] < IR_DIMENSION_THRESHOLD) continue; + if (curIRScores[i] < threshold) continue; freqCSset->items[i].type = DIMENSIONCS; //printf("A dimension CS with IR score = %f \n", curIRScores[i]); @@ -6571,7 +6578,10 @@ RDFreorganize(int *ret, CStableStat *cst // print labels printf("Start exporting labels \n"); + + #if EXPORT_LABEL exportLabels(labels, freqCSset, csRelMergeFreqSet, *freqThreshold); + #endif curT = clock(); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -125,9 +125,10 @@ typedef struct PropStat { /* ---- For detecting dimension table */ #define NUM_ITERATION_FOR_IR 3 /* Number of iteration for indirect referrences to a CS (table) */ -#define IR_DIMENSION_THRESHOLD 100000 /* Score of indirect references that the CS can be considered as a dimension CS - Number of IR references should be several times larger than the CS frequency - */ +#define IR_DIMENSION_THRESHOLD_PERCENTAGE 0.02 /* Score of indirect references that the CS can be considered as a dimension CS + IR_DIMENSION_THRESHOLD_PERCENTAGE * totalFrequency + Number of IR references should be several times larger than the CS frequency + */ #define NOT_MERGE_DIMENSIONCS 1 #define FILTER_INFREQ_FK_FOR_IR 1 /* We filter out all the dirty references from a CS */ @@ -211,6 +212,8 @@ typedef struct SubCSSet{ #define MIN_FK_FREQUENCY 0.1 // The frequency of a FK should be > MIN_FK_FREQUENCY * The frequency of a mergedCS (or the number of tuples in one table) #define MIN_FK_PROPCOVERAGE 0.9 // The FK needs to happen in MIN_FK_PROPCOVERAGE of all instances of the particular property +#define EXPORT_LABEL 0 /* Export labels: TODO: Only disable the */ + typedef struct CSset{ CS* items; int numOrigFreqCS; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list