Changeset: 8407c931a2e1 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8407c931a2e1 Modified Files: monetdb5/extras/rdf/rdfparams.c monetdb5/extras/rdf/rdfparams.h monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Compute average precision diffs (122 lines): diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c --- a/monetdb5/extras/rdf/rdfparams.c +++ b/monetdb5/extras/rdf/rdfparams.c @@ -29,6 +29,8 @@ int dimensionFactor; float ontologySimThreshold; +int upperboundNumTables; +float generalityThreshold; void createDefaultParamsFile(void){ @@ -37,7 +39,8 @@ void createDefaultParamsFile(void){ paramFile = fopen("params.ini", "wt"); fprintf(paramFile, "dimensionFactor 3\n"); - fprintf(paramFile, "ontologySimThreshold 0.8\n"); + fprintf(paramFile, "ontologySimThreshold 0.75\n"); + fprintf(paramFile, "upperboundNumTables 1000"); fclose(paramFile); } @@ -64,7 +67,21 @@ void readParamsInput(void){ ontologySimThreshold = atof(value); printf("ontologySimThreshold = %f\n",ontologySimThreshold); } + else if (strcmp(variable, "upperboundNumTables") == 0){ + upperboundNumTables = atoi(value); + printf("upperboundNumTables = %d\n", upperboundNumTables); + } } } + + if (upperboundNumTables != 0){ + generalityThreshold = (float) 1 / (float)upperboundNumTables; + printf("generalityThreshold = %f\n",generalityThreshold); + } + else{ //default + generalityThreshold = 0.001; + } + + } diff --git a/monetdb5/extras/rdf/rdfparams.h b/monetdb5/extras/rdf/rdfparams.h --- a/monetdb5/extras/rdf/rdfparams.h +++ b/monetdb5/extras/rdf/rdfparams.h @@ -33,6 +33,8 @@ extern int dimensionFactor; extern float ontologySimThreshold; +extern int upperboundNumTables; +extern float generalityThreshold; rdf_export void createDefaultParamsFile(void); diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -3088,7 +3088,7 @@ void updateParentIdxAll(CSset *freqCSset #if USE_LABEL_FINDING_MAXCS /* - * * Return 1 if there is semantic evidence against merging the two CS's, this is the case iff the two CS's have a hierarchy and their common ancestor is too generic (support above IMPORTANCE_THRESHOLD). + * * Return 1 if there is semantic evidence against merging the two CS's, this is the case iff the two CS's have a hierarchy and their common ancestor is too generic (support above generalityThreshold). * */ static char isEvidenceAgainstMerging(int freqId1, int freqId2, CSlabel* labels, OntoUsageNode *tree) { @@ -3132,7 +3132,7 @@ char isEvidenceAgainstMerging(int freqId level++; } - if (tmpNode->percentage >= IMPORTANCE_THRESHOLD) { + if (tmpNode->percentage >= generalityThreshold) { // have common ancestor but it is too generic --> there is semantic evidence against merging the two CS's return 1; } else { @@ -4494,7 +4494,7 @@ char isSemanticSimilar(int freqId1, int */ - if (tmpNode->percentage < IMPORTANCE_THRESHOLD) { + if (tmpNode->percentage < generalityThreshold) { //printf("Merge two CS's %d (Label: "BUNFMT") and %d (Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score: %f)\n", // freqId1, labels[freqId1].name, freqId2, labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage); oid classOid; @@ -8972,6 +8972,7 @@ void computeMetricsQ(CSset *freqCSset){ int tblIdx = -1; CS cs; int totalCov = 0; + float totalPrecision = 0.0; float Q = 0.0; int i; int curNumMergeCS = countNumberMergeCS(freqCSset); @@ -8990,11 +8991,14 @@ void computeMetricsQ(CSset *freqCSset){ weight[tblIdx] = (float) cs.coverage * ( fillRatio[tblIdx] + refRatio[tblIdx]); //weight[tblIdx] = (float) cs.coverage * ( fillRatio[tblIdx]); //If do not consider reference ratio totalCov += cs.coverage; + totalPrecision += fillRatio[tblIdx]; Q += weight[tblIdx]; } } printf("Performance metric Q = (weighting %f)/(totalCov %d * numTbl %d) \n", Q,totalCov, curNumMergeCS); + printf("Average precision = %f\n",(float)totalPrecision/curNumMergeCS); + //printf("Average precision = %f\n",(float)totalPrecision/totalCov); Q = Q/((float)totalCov * curNumMergeCS); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -235,7 +235,8 @@ typedef struct SubCSSet{ #define INIT_NUM_CS 1000 #define SIM_THRESHOLD 0.6 #define SIM_TFIDF_THRESHOLD 0.75 -#define IMPORTANCE_THRESHOLD 0.001 //This is used when merging CS's by common ancestor +//#define IMPORTANCE_THRESHOLD 0.001 //This is used when merging CS's by common ancestor + // Replace by generalityThreshold = 1/(upperboundNumTables) #define COMMON_ANCESTOR_LOWEST_SPECIFIC_LEVEL 2 //#define MIN_PERCETAGE_S5 5 // Merge all CS refered by more than 1/MIN_PERCETAGE_S6 percent of a CS via one property _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list