Changeset: 303ec4914a45 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=303ec4914a45
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Clean the code


diffs (truncated from 432 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3682,7 +3682,6 @@ void generatecsRelSum(CSrel csRel, int f
 
 }
 
-#if USE_LABEL_FOR_MERGING
 static
 LabelStat* initLabelStat(void){
        LabelStat *labelStat = (LabelStat*) malloc(sizeof(LabelStat)); 
@@ -3701,12 +3700,10 @@ LabelStat* initLabelStat(void){
 
        return labelStat; 
 }
-#endif
 
 /*
  * 
  * */
-#if USE_LABEL_FOR_MERGING
 #if USE_ALTERNATIVE_NAME 
 static
 oid getMostSuitableName(CSlabel *labels, int freqIdx, int candIdx){
@@ -3741,7 +3738,6 @@ oid getMostSuitableName(CSlabel *labels,
 
 }
 #endif
-#endif
 
 #if DETECT_INCORRECT_TYPE_SUBJECT
 
@@ -3902,7 +3898,6 @@ void buildLabelStatForFinalMergeCS(Label
 
 #endif
 
-#if USE_LABEL_FOR_MERGING
 static
 void buildLabelStat(LabelStat *labelStat, CSlabel *labels, CSset *freqCSset, 
int k){
        int     i,j; 
@@ -3989,9 +3984,7 @@ void buildLabelStat(LabelStat *labelStat
        }
 
 }
-#endif
-
-#if USE_LABEL_FOR_MERGING
+
 static 
 void freeLabelStat(LabelStat *labelStat){
        int i; 
@@ -4005,7 +3998,6 @@ void freeLabelStat(LabelStat *labelStat)
        BBPreclaim(labelStat->labelBat);
        free(labelStat);
 }
-#endif
 
 static
 char isSignificationPrecisionDrop(CS *cs1, CS *cs2){
@@ -4026,13 +4018,21 @@ char isSignificationPrecisionDrop(CS *cs
 
        estimatedFillRatio = (float) newFill / (float) (newSupport * 
numCombineP);
 
-       if ((minFillRatio / estimatedFillRatio) > 2) return 1; 
+       if ((minFillRatio / estimatedFillRatio) > 5) return 1; 
 
        return 0;
-       
-
-}
-
+}
+
+static
+char isNoCommonProp(CS *cs1, CS *cs2){
+       int numCombineP = 0;
+       
+       getNumCombinedP(cs1->lstProp, cs2->lstProp, cs1->numProp, cs2->numProp, 
&numCombineP);
+       
+       if (numCombineP == (cs1->numProp + cs2->numProp)) return 1; 
+
+       return 0;
+}
 static 
 void doMerge(CSset *freqCSset, int ruleNum, int freqId1, int freqId2, oid 
*mergecsId, CSlabel** labels, oid** ontmetadata, int ontmetadataCount, oid 
name, int isType, int isOntology, int isFK){
        CS      *mergecs; 
@@ -4043,11 +4043,18 @@ void doMerge(CSset *freqCSset, int ruleN
 
        cs1 = &(freqCSset->items[freqId1]);
        cs2 = &(freqCSset->items[freqId2]);
-
-       if (isSignificationPrecisionDrop(cs1, cs2)){
-               printf("Merging freqCS %d and %d may significantly drop 
precision\n", freqId1, freqId2);
-               return;
-       }       
+       
+
+       if (0){
+               if (isSignificationPrecisionDrop(cs1, cs2)){
+                       printf("Merging freqCS %d and %d may significantly drop 
precision\n", freqId1, freqId2);
+                       return;
+               }
+               if (isNoCommonProp(cs1, cs2)){
+                       printf("FreqCS %d and %d have no prop in common--> no 
merging\n", freqId1, freqId2);
+                       return;
+               }
+       }
 
        //Check whether these CS's belong to any mergeCS
        if (cs1->parentFreqIdx == -1 && cs2->parentFreqIdx == -1){      /* New 
merge */
@@ -4094,7 +4101,6 @@ void doMerge(CSset *freqCSset, int ruleN
 
 
 
-#if USE_LABEL_FOR_MERGING
 static
 str mergeFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, oid** 
ontmetadata, int ontmetadataCount,bat *mapbatid){
        int             i, j; 
@@ -4184,101 +4190,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS
                        }
                        #else
 
-                       #if MERGING_CONSIDER_NAMEORIGINALITY    
-                       //For ontology name
-                       tmpCount = 0; 
-                       for (k = 0; k < labelStat->lstCount[i]; k++){
-                               freqId1 = labelStat->freqIdList[i][k];
-                               if ((*labels)[freqId1].isOntology == 1) {
-                                       cs1 = &(freqCSset->items[freqId1]);
-                                       #if     NOT_MERGE_DIMENSIONCS_IN_S1
-                                       if (cs1->type == DIMENSIONCS) continue;
-                                       #endif
-                                       tmpCount++;
-                                       break; 
-                               }
-                       }
-                       for (j = k+1; j < labelStat->lstCount[i]; j++){
-                               freqId2 = labelStat->freqIdList[i][j];
-                               cs2 = &(freqCSset->items[freqId2]);
-                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
-                               if (cs2->type == DIMENSIONCS) 
-                                       continue; 
-                               #endif
-                               if ((*labels)[freqId2].isOntology == 1){
-                                       //printf("Merge FreqCS %d and FreqCS %d 
by Ontology name \n", freqId1, freqId2);
-                                       doMerge(freqCSset, S1, freqId1, 
freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 1, 0); // 
isOntology
-                                       //printf("Number of added cs in freqCS: 
%d \n", freqCSset->numCSadded); 
-                                       tmpCount++;
-                               }
-                       }
-                       #if OUTPUT_FREQID_PER_LABEL
-                       fprintf(fout, " %d freqCS merged as having same name by 
Ontology. MergedCS has %d prop. \n", tmpCount, 
freqCSset->items[freqCSset->numCSadded -1].numProp);
-                       #endif
-
-                       //For Type
-                       tmpCount = 0;
-                       for (k = 0; k < labelStat->lstCount[i]; k++){
-                               freqId1 = labelStat->freqIdList[i][k];
-                               if ((*labels)[freqId1].isType == 1) {
-                                       cs1 = &(freqCSset->items[freqId1]);
-                                       #if     NOT_MERGE_DIMENSIONCS_IN_S1
-                                       if (cs1->type == DIMENSIONCS) continue;
-                                       #endif
-                                       tmpCount++;
-                                       break; 
-                               }
-                       }
-                       for (j = k+1; j < labelStat->lstCount[i]; j++){
-                               freqId2 = labelStat->freqIdList[i][j];
-                               cs2 = &(freqCSset->items[freqId2]);
-                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
-                               if (cs2->type == DIMENSIONCS) continue; 
-                               #endif
-                               if ((*labels)[freqId2].isType == 1){
-                                       //printf("Merge FreqCS %d and FreqCS %d 
by Type name \n", freqId1, freqId2);
-                                       doMerge(freqCSset, S1, freqId1, 
freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 1, 0, 0); // 
isType
-                                       //printf("Number of added cs in freqCS: 
%d \n", freqCSset->numCSadded);                                 
-                                       tmpCount++;
-                               }
-                       }
-                       #if OUTPUT_FREQID_PER_LABEL
-                       fprintf(fout, " %d freqCS merged as having same name by 
TYPE. MergedCS has %d prop. \n", tmpCount, 
freqCSset->items[freqCSset->numCSadded -1].numProp);
-                       #endif
-
-                       //For FK
-                       tmpCount = 0;
-                       for (k = 0; k < labelStat->lstCount[i]; k++){
-                               freqId1 = labelStat->freqIdList[i][k];
-                               if ((*labels)[freqId1].isFK == 1) {
-                                       cs1 = &(freqCSset->items[freqId1]);
-                                       #if     NOT_MERGE_DIMENSIONCS_IN_S1
-                                       if (cs1->type == DIMENSIONCS) continue;
-                                       #endif
-                                       tmpCount++;
-                                       break; 
-                               }
-                       }
-                       for (j = k+1; j < labelStat->lstCount[i]; j++){
-                               freqId2 = labelStat->freqIdList[i][j];
-                               cs2 = &(freqCSset->items[freqId2]);
-                               #if     NOT_MERGE_DIMENSIONCS_IN_S1
-                               if (cs2->type == DIMENSIONCS) continue; 
-                               #endif
-                               if ((*labels)[freqId2].isFK == 1){
-                                       //printf("Merge FreqCS %d and FreqCS %d 
by FK name \n", freqId1, freqId2);
-                                       doMerge(freqCSset, S1, freqId1, 
freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 0, 1); // 
isFK
-                                       //printf("Number of added cs in freqCS: 
%d \n", freqCSset->numCSadded);                                         
-                                       tmpCount++;
-                               }
-                       }
-
-                       #if OUTPUT_FREQID_PER_LABEL
-                       fprintf(fout, " %d freqCS merged as having same name by 
FK. MergedCS has %d prop. \n", tmpCount, freqCSset->items[freqCSset->numCSadded 
-1].numProp);
-                       #endif
-
-                       #else   //MERGING_CONSIDER_NAMEORIGINALITY == 0
-
                        tmpCount = 0;
                        for (k = 0; k < labelStat->lstCount[i]; k++){
                                freqId1 = labelStat->freqIdList[i][k];
@@ -4313,8 +4224,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS
                        fprintf(fout, " %d freqCS merged as having same name 
(by Ontology, Type, FK). MergedCS has %d prop. \n", tmpCount, 
freqCSset->items[freqCSset->numCSadded -1].numProp);
                        #endif
                        
-                       #endif
-
                        #endif /* USE_MULTIWAY_MERGING */
 
                        #if OUTPUT_FREQID_PER_LABEL
@@ -4350,7 +4259,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS
 
        return MAL_SUCCEED; 
 }
-#endif
 
 static
 void mergeFreqCSByS5(CSrel *csrelMergeFreqSet, CSset *freqCSset, CSlabel** 
labels, oid* mergeCSFreqCSMap, int curNumMergeCS, oid *mergecsId, oid** 
ontmetadata, int ontmetadataCount){
@@ -4482,7 +4390,6 @@ void mergeFreqCSByS5(CSrel *csrelMergeFr
 }
 
 
-#if USE_LABEL_FOR_MERGING
 static
 char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels, 
OntoUsageNode *tree, int numOrigFreqCS, oid *ancestor, BAT *ontmetaBat, 
OntClass *ontclassSet){      /*Rule S1 S2 S3*/
        int i, j; 
@@ -4492,49 +4399,11 @@ char isSemanticSimilar(int freqId1, int 
        int level; 
        OntoUsageNode *tmpNode; 
                
-       /*
-       int k1, k2; 
-       if (labels[freqId1].name == labels[freqId2].name)
-               return 1;
-       else{ 
-               k1 =  (labels[freqId1].candidatesCount < 
TOPK)?labels[freqId1].candidatesCount:TOPK;
-               k2 =  (labels[freqId2].candidatesCount < 
TOPK)?labels[freqId2].candidatesCount:TOPK;    
-
-               for (i = 0; i < k1; i++){
-                       for (j = 0; j < k2; j++){
-                               if (labels[freqId1].candidates[i] == 
labels[freqId2].candidates[j])
-                               {
-                                       (*ancestor) = 
labels[freqId1].candidates[i];
-                                       return 1; 
-                               }
-                       }
-               }
-       }
-       */
-       
        // Check for the most common ancestor
        hCount1 = labels[freqId1].hierarchyCount;
        hCount2 = labels[freqId2].hierarchyCount;
        minCount = (hCount1 > hCount2)?hCount2:hCount1;
        
-       /*
-       if (minCount > 0){
-       printf("minCount = %d \n", minCount);
-       printf("Finding common ancestor for %d and %d \n", freqId1, freqId2 );
-       printf("FreqCS1: ");
-       for (i = 0; i < hCount1; i++){
-               printf(" " BUNFMT, labels[freqId1].hierarchy[hCount1-1-i]);
-       }
-       printf(" \n ");
-       printf("FreqCS2: ");
-       for (i = 0; i < hCount2; i++){
-               printf(" " BUNFMT, labels[freqId2].hierarchy[hCount2-1-i]);
-       }
-       printf(" \n ");
-       }
-       */
-
-       
        if (0){
        if ((freqId1 > numOrigFreqCS -1) || (freqId2 > numOrigFreqCS -1))
                return 0;
@@ -4560,15 +4429,6 @@ char isSemanticSimilar(int freqId1, int 
                }
                
                
-               /*
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to