Changeset: e26375e5f8db for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e26375e5f8db Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Modify the function for detecting the good parameter diffs (84 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -8911,6 +8911,7 @@ Pscore computeMetricsQ(CSset *freqCSset) float Q = 0.0; int i; + int numExpFinalTbl = 0; //Expected number of table after removing small table Pscore pscore; int curNumMergeCS = countNumberMergeCS(freqCSset); @@ -8937,6 +8938,7 @@ Pscore computeMetricsQ(CSset *freqCSset) Q += weight[tblIdx]; } + if (isCSTable(freqCSset->items[i], 1)) numExpFinalTbl++; } printf("Performance metric Q = (weighting %f)/(totalCov %d * numTbl %d) \n", Q,totalCov, curNumMergeCS); printf("Average precision = %f\n",(float)totalPrecision/curNumMergeCS); @@ -8952,6 +8954,7 @@ Pscore computeMetricsQ(CSset *freqCSset) pscore.Qscore = Q; //pscore.Cscore = pscore.nTable = curNumMergeCS; + pscore.nFinalTable = numExpFinalTbl; free(fillRatio); free(refRatio); @@ -9468,23 +9471,37 @@ CSset* copyCSset(CSset *srcCSset){ static void setFinalsimTfidfThreshold(Pscore *pscores, int numRun){ int i; - - printf("#SimThreshold #avgPrecision #OvrallPrecision #numTable \n"); + float cumgap; + float totalgap; + + + printf("SimThreshold|avgPrecision|OvrallPrecision|numTable|FinalTable\n"); for ( i = 0; i < numRun; i++){ - printf("%f %f %f %d\n",0.5 + i * 0.05,pscores[i].avgPrec, pscores[i].overallPrec, pscores[i].nTable); - } - + printf("%f|%f|%f|%d|%d\n",0.5 + i * 0.05,pscores[i].avgPrec, pscores[i].overallPrec, pscores[i].nTable,pscores[i].nFinalTable); + } + + totalgap = pscores[numRun-1].overallPrec - pscores[0].overallPrec; for ( i = 0; i < numRun; i++){ + /* + float curgap; float trendRatio = 1.0; //Find the turning point if (i > 0 && i < (numRun - 1)){ + curgap = pscores[i].overallPrec - pscores[i-1].overallPrec; + cumgap = pscores[numRun].overallPrec - pscores[i].overallPrec; trendRatio = (float)(pscores[i].overallPrec - pscores[i-1].overallPrec)/(pscores[i+1].overallPrec - pscores[i].overallPrec); printf("Turning %f \n",trendRatio); - if (trendRatio > 2) { + if (trendRatio > 2 && curgap > cumgap && pscores[i].nFinalTable < upperboundNumTables) { simTfidfThreshold = 0.5 + i * 0.05; break; } } + */ + cumgap = pscores[i].overallPrec - pscores[0].overallPrec; + if (cumgap > 0.9 * totalgap){ + simTfidfThreshold = 0.5 + i * 0.05; + break; + } } } diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -444,7 +444,8 @@ typedef struct Pscore{ //Performance sc float overallPrec; //overall precision float Qscore; //metric score Q float Cscore; //metric score C - int nTable; //number of tables + int nTable; //number of tables + int nFinalTable; //Expected number of final table after removing e.g., small size table } Pscore; #define NUM_SAMPLETABLE 20 _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list