Changeset: 953fd23b3505 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=953fd23b3505
Modified Files:
        monetdb5/extras/rdf/rdf_shredder.c
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Use lables for finding maxCS and fix problems in using label in merging process.


diffs (151 lines):

diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -359,7 +359,7 @@ tripleHandler(void* user_data, const rap
                
                } else if (triple->object->type == RAPTOR_TERM_TYPE_LITERAL) {
                        unsigned char* objStr;
-                       ObjectType objType;
+                       ObjectType objType = STRING;
                        objStr = raptor_term_to_string(triple->object);
                        objType = getObjectType(objStr, &realNumValue);
 
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1853,19 +1853,31 @@ void getMaximumFreqCSs(CSset *freqCSset,
        int     tmpParentIdx; 
        int*    coverage; 
        int*    freq; 
+       char    isLabelComparable = 0;
 
        (void) labels; 
+       (void) isLabelComparable;
 
        printf("Retrieving maximum frequent CSs: \n");
 
        for (i = 0; i < numFreqCS; i++){
                if (freqCSset->items[i].parentFreqIdx != -1) continue;
+               isLabelComparable = 0;
+               if (strcmp(labels[i].name, "DUMMY") != 0) isLabelComparable = 1;
+
                for (j = (i+1); j < numFreqCS; j++){
                        if (freqCSset->items[j].numProp > 
freqCSset->items[i].numProp){
                                if (isSubset(freqCSset->items[j].lstProp, 
freqCSset->items[i].lstProp,  
                                                
freqCSset->items[j].numProp,freqCSset->items[i].numProp) == 1) { 
                                        /* CSj is a superset of CSi */
+                                       #if USE_LABEL_FINDING_MAXCS
+                                       if (isLabelComparable == 1 && 
strcmp(labels[i].name, labels[j].name) == 0) {
+                                               
freqCSset->items[i].parentFreqIdx = j;
+                                               break;
+                                       }
+                                       #else   
                                        freqCSset->items[i].parentFreqIdx = j; 
+                                       #endif
                                        break; 
                                }
                        }
@@ -1873,7 +1885,13 @@ void getMaximumFreqCSs(CSset *freqCSset,
                                if (isSubset(freqCSset->items[i].lstProp, 
freqCSset->items[j].lstProp,  
                                                
freqCSset->items[i].numProp,freqCSset->items[j].numProp) == 1) { 
                                        /* CSj is a subset of CSi */
+                                       #if USE_LABEL_FINDING_MAXCS
+                                       if (isLabelComparable == 1 && 
strcmp(labels[i].name, labels[j].name) == 0) {
+                                               
freqCSset->items[j].parentFreqIdx = i;
+                                       }
+                                       #else
                                        freqCSset->items[j].parentFreqIdx = i; 
+                                       #endif
                                }               
                        
                        }
@@ -2227,8 +2245,11 @@ void mergeMaximumFreqCSsAll(CSset *freqC
 
        PropStat        *propStat;      /* Store statistics about properties */
        int             nummergedCSs = 0;
+       char            isLabelComparable = 0; 
+       char            isSameLabel = 0; 
        
        (void) labels;
+       (void) isLabelComparable;
 
        for (i = 0; i < freqCSset->numCSadded; i++){
                if (freqCSset->items[i].parentFreqIdx == -1){
@@ -2248,30 +2269,38 @@ void mergeMaximumFreqCSsAll(CSset *freqC
 
        for (i = 0; i < numMaxCSs; i++){
                freqId1 = superCSFreqCSMap[i];
+               //printf("Label of %d CS is %s \n", freqId1, 
labels[freqId1].name);
+               isLabelComparable = 0; 
+               if (strcmp(labels[freqId1].name,"DUMMY") != 0) 
isLabelComparable = 1; 
+
                cs1 = (CS*) &(freqCSset->items[freqId1]);
                for (j = (i+1); j < numMaxCSs; j++){
                        freqId2 = superCSFreqCSMap[j];
                        cs2 = (CS*) &(freqCSset->items[freqId2]);
-                       
-                       if(USINGTFIDF == 0){
-                               simscore = similarityScore(cs1->lstProp, 
cs2->lstProp,
-                                       cs1->numProp,cs2->numProp,&numCombineP);
-
-                               //printf("simscore Jaccard = %f \n", simscore);
-                       }
-                       else{
-                               simscore = similarityScoreTFIDF(cs1->lstProp, 
cs2->lstProp,
-                                       cs1->numProp,cs2->numProp,&numCombineP, 
propStat);
-                               //printf("         Cosine = %f \n", simscore);
-                               
-                       }
-                       
+                       isSameLabel = 0; 
+
                        #if     USE_LABEL_FOR_MERGING
-                       if (strcmp(labels[freqId1].name, labels[freqId2].name) 
== 0){
-                               //printf("Same labels between freqCS %d and 
freqCS %d \n", freqId1, freqId2);
+                       if (isLabelComparable == 1 && 
strcmp(labels[freqId1].name, labels[freqId2].name) == 0){
+                               //printf("Same labels between freqCS %d and 
freqCS %d - Old simscore is %f \n", freqId1, freqId2, simscore);
+                               isSameLabel = 1;
                                simscore = 1; 
                        }
                        #endif
+
+                       if (isSameLabel == 0){
+                               if(USINGTFIDF == 0){
+                                       simscore = 
similarityScore(cs1->lstProp, cs2->lstProp,
+                                               
cs1->numProp,cs2->numProp,&numCombineP);
+
+                                       //printf("simscore Jaccard = %f \n", 
simscore);
+                               }
+                               else{
+                                       simscore = 
similarityScoreTFIDF(cs1->lstProp, cs2->lstProp,
+                                               
cs1->numProp,cs2->numProp,&numCombineP, propStat);
+                                       //printf("         Cosine = %f \n", 
simscore);
+                                       
+                               }
+                       }
                        
                        //simscore = 0.0;
                        #if     USINGTFIDF      
@@ -3163,6 +3192,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
        // Create label per freqCS
        csIdFreqIdxMap = (int *) malloc (sizeof(int) * (*maxCSoid + 1));
        initcsIdFreqIdxMap(csIdFreqIdxMap, *maxCSoid + 1, -1, freqCSset);
+       printf("Using ontologies with %d ontattributesCount and %d 
ontmetadataCount \n",ontattributesCount,ontmetadataCount);
+
        labels = createLabels(freqCSset, csrelSet, *maxCSoid + 1, sbat, si, pi, 
oi, *subjCSMap, mbat, csIdFreqIdxMap, *freqThreshold, ontattributes, 
ontattributesCount, ontmetadata, ontmetadataCount);
 
 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -88,7 +88,9 @@ typedef struct PropStat {
 
 #define FULL_PROP_STAT 1       // Only use for showing the statistic on all 
properties / all CSs. (Default should be 0)
 
-#define USE_LABEL_FOR_MERGING  1       // Use the labels received from 
labeling process for finding maxCS and mergeCS
+
+#define USE_LABEL_FINDING_MAXCS        1       // Use the labels received from 
labeling process for finding maxCS 
+#define USE_LABEL_FOR_MERGING  1       // Use the labels received from 
labeling process for finding mergeCS
 
 typedef struct CS
 {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to