Changeset: 4bfab5b73cf2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4bfab5b73cf2
Modified Files:
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
Branch: rdf
Log Message:

Identify availability of good type value.

We consider good type value is the value that appears in more than > 95% of a 
CS --> really frequent


diffs (51 lines):

diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -2142,6 +2142,7 @@ void getTableName(CSlabel* label, int cs
        int             choosenFreq = 0;
 
        int             bestOntCandIdx = -1;
+       int             isGoodTypeExist = 0; 
 
        (void) ontmetaBat;
        // --- TYPE ---
@@ -2176,8 +2177,10 @@ void getTableName(CSlabel* label, int cs
                        }
                }
                */
-
+               
                if (typeAttributesHistogram[csIdx][i][0].percent < 
TYPE_FREQ_THRESHOLD) continue; // sorted
+               if (typeAttributesHistogram[csIdx][i][0].percent > 
GOOD_TYPE_FREQ_THRESHOLD) isGoodTypeExist = 1;
+
                tmpList = (oid *) realloc(tmpList, sizeof(oid) * (tmpListCount 
+ 1));
                if (!tmpList) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
 
@@ -2325,13 +2328,13 @@ void getTableName(CSlabel* label, int cs
        }
        
        // If the name found previously (based on the type values) is not 
-       // an ontology-based value (e.g., simply a string), we will choose the 
ontology name for 
-       // the CS's name. 
+       // an ontology-based value (e.g., simply a string), and not a really 
good (so frequent) type value 
+       // we will choose the ontology name for the CS's name. 
        
        // chose the best ontology candidate based on number of matched props 
as label 
        // TODO: Improve this score a bit, by choosing the higher tfidf score, 
than number of matched prop
        
-       if (choosenOntologyTypeValue == BUN_NONE && resultCount[csIdx] >= 1){
+       if (choosenOntologyTypeValue == BUN_NONE && isGoodTypeExist == 0 && 
resultCount[csIdx] >= 1){
                label->name = result[csIdx][bestOntCandIdx];
                nameFound = 1;
                #if INFO_WHERE_NAME_FROM
diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h
--- a/monetdb5/extras/rdf/rdflabels.h
+++ b/monetdb5/extras/rdf/rdflabels.h
@@ -93,6 +93,7 @@ enum {
 
 #define FK_FREQ_THRESHOLD 25           // X % of the targeted subjects have to 
be in this table
 #define TYPE_FREQ_THRESHOLD 80         // X % of the type values have to be 
this value
+#define GOOD_TYPE_FREQ_THRESHOLD 95    // If a type appears really frequent in 
that CS, it should be choosen
 //#define ONTOLOGY_FREQ_THRESHOLD 0.4  // similarity threshold for tfidf 
simularity for ontology classes
 #define ONTOLOGY_FREQ_THRESHOLD 0.8    // similarity threshold for tfidf 
simularity for ontology classes
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to