Changeset: 6b057271bba9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6b057271bba9
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Allow using S2 for all merged CS.

Increase the threshold for TF-IDF


diffs (65 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3799,9 +3799,11 @@ char isSemanticSimilar(int freqId1, int 
        }
 
        */
-
+       
+       if (0){
        if ((freqId1 > numOrigFreqCS -1) || (freqId2 > numOrigFreqCS -1))
                return 0;
+       }
 
        for (i = 0; i < minCount; i++){
                if (labels[freqId1].hierarchy[hCount1-1-i] != 
labels[freqId2].hierarchy[hCount2-1-i])
@@ -3821,6 +3823,7 @@ char isSemanticSimilar(int freqId1, int 
                        }
                        level++;
                }
+               
                /*
                printf("The common ancestor of freqCS %d ("BUNFMT") and freqCS 
%d ("BUNFMT") is: "BUNFMT" --- %f \n", freqId1, labels[freqId1].name, freqId2, 
labels[freqId2].name, tmpNode->uri, tmpNode->percentage);
 
@@ -3830,8 +3833,8 @@ char isSemanticSimilar(int freqId1, int 
                */
 
                if (tmpNode->percentage < IMPORTANCE_THRESHOLD) {
-                       printf("Merge two CS's %d (Label: "BUNFMT") and %d 
(Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score: 
%f)\n",
-                                       freqId1, labels[freqId1].name, freqId2, 
labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage);
+                       //printf("Merge two CS's %d (Label: "BUNFMT") and %d 
(Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score: 
%f)\n",
+                       //              freqId1, labels[freqId1].name, freqId2, 
labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage);
 
                        (*ancestor) = tmpNode->uri;
                        return 1;
@@ -4070,7 +4073,7 @@ void mergeCSByS2(CSset *freqCSset, CSlab
                        #if     NOT_MERGE_DIMENSIONCS
                        if (freqCSset->items[freqId2].type == DIMENSIONCS) 
continue; 
                        #endif
-
+                       
                        if (isLabelComparable == 1 && 
isSemanticSimilar(freqId1, freqId2, (*labels), 
ontoUsageTree,freqCSset->numOrigFreqCS, &name) == 1){
                                //printf("Same labels between freqCS %d and 
freqCS %d - Old simscore is %f \n", freqId1, freqId2, simscore);
                                doMerge(freqCSset, S2, freqId1, freqId2, 
mergecsId, labels, ontmetadata, ontmetadataCount, name);
@@ -4137,6 +4140,7 @@ void mergeCSByS4(CSset *freqCSset, CSlab
                        #else   
                        if (simscore > SIM_THRESHOLD) {
                        #endif          
+                               //printf("Merge %d and %d with simscore = %f 
\n",freqId1, freqId2,simscore);
                                doMerge(freqCSset, S4, freqId1, freqId2, 
mergecsId, labels, ontmetadata, ontmetadataCount, BUN_NONE);
                        }
                }
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -235,7 +235,8 @@ typedef struct SubCSSet{
 //#define INIT_NUM_CS 9999 // workaround
 #define INIT_NUM_CS 1000 // workaround
 #define SIM_THRESHOLD 0.6
-#define SIM_TFIDF_THRESHOLD 0.55
+//#define SIM_TFIDF_THRESHOLD 0.55
+#define SIM_TFIDF_THRESHOLD 0.75
 #define IMPORTANCE_THRESHOLD 0.01
 #define MIN_PERCETAGE_S5 5     // Merge all CS refered by more than 
1/MIN_PERCETAGE_S6 percent of a CS via one property
 #define MIN_FROMTABLE_SIZE_S5 100  // The minimum size of the "from" table in 
S6. Meaning that 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to