Changeset: 4bfab5b73cf2 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4bfab5b73cf2 Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdflabels.h Branch: rdf Log Message:
Identify availability of good type value. We consider good type value is the value that appears in more than > 95% of a CS --> really frequent diffs (51 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -2142,6 +2142,7 @@ void getTableName(CSlabel* label, int cs int choosenFreq = 0; int bestOntCandIdx = -1; + int isGoodTypeExist = 0; (void) ontmetaBat; // --- TYPE --- @@ -2176,8 +2177,10 @@ void getTableName(CSlabel* label, int cs } } */ - + if (typeAttributesHistogram[csIdx][i][0].percent < TYPE_FREQ_THRESHOLD) continue; // sorted + if (typeAttributesHistogram[csIdx][i][0].percent > GOOD_TYPE_FREQ_THRESHOLD) isGoodTypeExist = 1; + tmpList = (oid *) realloc(tmpList, sizeof(oid) * (tmpListCount + 1)); if (!tmpList) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); @@ -2325,13 +2328,13 @@ void getTableName(CSlabel* label, int cs } // If the name found previously (based on the type values) is not - // an ontology-based value (e.g., simply a string), we will choose the ontology name for - // the CS's name. + // an ontology-based value (e.g., simply a string), and not a really good (so frequent) type value + // we will choose the ontology name for the CS's name. // chose the best ontology candidate based on number of matched props as label // TODO: Improve this score a bit, by choosing the higher tfidf score, than number of matched prop - if (choosenOntologyTypeValue == BUN_NONE && resultCount[csIdx] >= 1){ + if (choosenOntologyTypeValue == BUN_NONE && isGoodTypeExist == 0 && resultCount[csIdx] >= 1){ label->name = result[csIdx][bestOntCandIdx]; nameFound = 1; #if INFO_WHERE_NAME_FROM diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h --- a/monetdb5/extras/rdf/rdflabels.h +++ b/monetdb5/extras/rdf/rdflabels.h @@ -93,6 +93,7 @@ enum { #define FK_FREQ_THRESHOLD 25 // X % of the targeted subjects have to be in this table #define TYPE_FREQ_THRESHOLD 80 // X % of the type values have to be this value +#define GOOD_TYPE_FREQ_THRESHOLD 95 // If a type appears really frequent in that CS, it should be choosen //#define ONTOLOGY_FREQ_THRESHOLD 0.4 // similarity threshold for tfidf simularity for ontology classes #define ONTOLOGY_FREQ_THRESHOLD 0.8 // similarity threshold for tfidf simularity for ontology classes _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list