Changeset: 303ec4914a45 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=303ec4914a45 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Clean the code diffs (truncated from 432 to 300 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -3682,7 +3682,6 @@ void generatecsRelSum(CSrel csRel, int f } -#if USE_LABEL_FOR_MERGING static LabelStat* initLabelStat(void){ LabelStat *labelStat = (LabelStat*) malloc(sizeof(LabelStat)); @@ -3701,12 +3700,10 @@ LabelStat* initLabelStat(void){ return labelStat; } -#endif /* * * */ -#if USE_LABEL_FOR_MERGING #if USE_ALTERNATIVE_NAME static oid getMostSuitableName(CSlabel *labels, int freqIdx, int candIdx){ @@ -3741,7 +3738,6 @@ oid getMostSuitableName(CSlabel *labels, } #endif -#endif #if DETECT_INCORRECT_TYPE_SUBJECT @@ -3902,7 +3898,6 @@ void buildLabelStatForFinalMergeCS(Label #endif -#if USE_LABEL_FOR_MERGING static void buildLabelStat(LabelStat *labelStat, CSlabel *labels, CSset *freqCSset, int k){ int i,j; @@ -3989,9 +3984,7 @@ void buildLabelStat(LabelStat *labelStat } } -#endif - -#if USE_LABEL_FOR_MERGING + static void freeLabelStat(LabelStat *labelStat){ int i; @@ -4005,7 +3998,6 @@ void freeLabelStat(LabelStat *labelStat) BBPreclaim(labelStat->labelBat); free(labelStat); } -#endif static char isSignificationPrecisionDrop(CS *cs1, CS *cs2){ @@ -4026,13 +4018,21 @@ char isSignificationPrecisionDrop(CS *cs estimatedFillRatio = (float) newFill / (float) (newSupport * numCombineP); - if ((minFillRatio / estimatedFillRatio) > 2) return 1; + if ((minFillRatio / estimatedFillRatio) > 5) return 1; return 0; - - -} - +} + +static +char isNoCommonProp(CS *cs1, CS *cs2){ + int numCombineP = 0; + + getNumCombinedP(cs1->lstProp, cs2->lstProp, cs1->numProp, cs2->numProp, &numCombineP); + + if (numCombineP == (cs1->numProp + cs2->numProp)) return 1; + + return 0; +} static void doMerge(CSset *freqCSset, int ruleNum, int freqId1, int freqId2, oid *mergecsId, CSlabel** labels, oid** ontmetadata, int ontmetadataCount, oid name, int isType, int isOntology, int isFK){ CS *mergecs; @@ -4043,11 +4043,18 @@ void doMerge(CSset *freqCSset, int ruleN cs1 = &(freqCSset->items[freqId1]); cs2 = &(freqCSset->items[freqId2]); - - if (isSignificationPrecisionDrop(cs1, cs2)){ - printf("Merging freqCS %d and %d may significantly drop precision\n", freqId1, freqId2); - return; - } + + + if (0){ + if (isSignificationPrecisionDrop(cs1, cs2)){ + printf("Merging freqCS %d and %d may significantly drop precision\n", freqId1, freqId2); + return; + } + if (isNoCommonProp(cs1, cs2)){ + printf("FreqCS %d and %d have no prop in common--> no merging\n", freqId1, freqId2); + return; + } + } //Check whether these CS's belong to any mergeCS if (cs1->parentFreqIdx == -1 && cs2->parentFreqIdx == -1){ /* New merge */ @@ -4094,7 +4101,6 @@ void doMerge(CSset *freqCSset, int ruleN -#if USE_LABEL_FOR_MERGING static str mergeFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, oid** ontmetadata, int ontmetadataCount,bat *mapbatid){ int i, j; @@ -4184,101 +4190,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS } #else - #if MERGING_CONSIDER_NAMEORIGINALITY - //For ontology name - tmpCount = 0; - for (k = 0; k < labelStat->lstCount[i]; k++){ - freqId1 = labelStat->freqIdList[i][k]; - if ((*labels)[freqId1].isOntology == 1) { - cs1 = &(freqCSset->items[freqId1]); - #if NOT_MERGE_DIMENSIONCS_IN_S1 - if (cs1->type == DIMENSIONCS) continue; - #endif - tmpCount++; - break; - } - } - for (j = k+1; j < labelStat->lstCount[i]; j++){ - freqId2 = labelStat->freqIdList[i][j]; - cs2 = &(freqCSset->items[freqId2]); - #if NOT_MERGE_DIMENSIONCS_IN_S1 - if (cs2->type == DIMENSIONCS) - continue; - #endif - if ((*labels)[freqId2].isOntology == 1){ - //printf("Merge FreqCS %d and FreqCS %d by Ontology name \n", freqId1, freqId2); - doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 1, 0); // isOntology - //printf("Number of added cs in freqCS: %d \n", freqCSset->numCSadded); - tmpCount++; - } - } - #if OUTPUT_FREQID_PER_LABEL - fprintf(fout, " %d freqCS merged as having same name by Ontology. MergedCS has %d prop. \n", tmpCount, freqCSset->items[freqCSset->numCSadded -1].numProp); - #endif - - //For Type - tmpCount = 0; - for (k = 0; k < labelStat->lstCount[i]; k++){ - freqId1 = labelStat->freqIdList[i][k]; - if ((*labels)[freqId1].isType == 1) { - cs1 = &(freqCSset->items[freqId1]); - #if NOT_MERGE_DIMENSIONCS_IN_S1 - if (cs1->type == DIMENSIONCS) continue; - #endif - tmpCount++; - break; - } - } - for (j = k+1; j < labelStat->lstCount[i]; j++){ - freqId2 = labelStat->freqIdList[i][j]; - cs2 = &(freqCSset->items[freqId2]); - #if NOT_MERGE_DIMENSIONCS_IN_S1 - if (cs2->type == DIMENSIONCS) continue; - #endif - if ((*labels)[freqId2].isType == 1){ - //printf("Merge FreqCS %d and FreqCS %d by Type name \n", freqId1, freqId2); - doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 1, 0, 0); // isType - //printf("Number of added cs in freqCS: %d \n", freqCSset->numCSadded); - tmpCount++; - } - } - #if OUTPUT_FREQID_PER_LABEL - fprintf(fout, " %d freqCS merged as having same name by TYPE. MergedCS has %d prop. \n", tmpCount, freqCSset->items[freqCSset->numCSadded -1].numProp); - #endif - - //For FK - tmpCount = 0; - for (k = 0; k < labelStat->lstCount[i]; k++){ - freqId1 = labelStat->freqIdList[i][k]; - if ((*labels)[freqId1].isFK == 1) { - cs1 = &(freqCSset->items[freqId1]); - #if NOT_MERGE_DIMENSIONCS_IN_S1 - if (cs1->type == DIMENSIONCS) continue; - #endif - tmpCount++; - break; - } - } - for (j = k+1; j < labelStat->lstCount[i]; j++){ - freqId2 = labelStat->freqIdList[i][j]; - cs2 = &(freqCSset->items[freqId2]); - #if NOT_MERGE_DIMENSIONCS_IN_S1 - if (cs2->type == DIMENSIONCS) continue; - #endif - if ((*labels)[freqId2].isFK == 1){ - //printf("Merge FreqCS %d and FreqCS %d by FK name \n", freqId1, freqId2); - doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 0, 1); // isFK - //printf("Number of added cs in freqCS: %d \n", freqCSset->numCSadded); - tmpCount++; - } - } - - #if OUTPUT_FREQID_PER_LABEL - fprintf(fout, " %d freqCS merged as having same name by FK. MergedCS has %d prop. \n", tmpCount, freqCSset->items[freqCSset->numCSadded -1].numProp); - #endif - - #else //MERGING_CONSIDER_NAMEORIGINALITY == 0 - tmpCount = 0; for (k = 0; k < labelStat->lstCount[i]; k++){ freqId1 = labelStat->freqIdList[i][k]; @@ -4313,8 +4224,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS fprintf(fout, " %d freqCS merged as having same name (by Ontology, Type, FK). MergedCS has %d prop. \n", tmpCount, freqCSset->items[freqCSset->numCSadded -1].numProp); #endif - #endif - #endif /* USE_MULTIWAY_MERGING */ #if OUTPUT_FREQID_PER_LABEL @@ -4350,7 +4259,6 @@ str mergeFreqCSByS1(CSset *freqCSset, CS return MAL_SUCCEED; } -#endif static void mergeFreqCSByS5(CSrel *csrelMergeFreqSet, CSset *freqCSset, CSlabel** labels, oid* mergeCSFreqCSMap, int curNumMergeCS, oid *mergecsId, oid** ontmetadata, int ontmetadataCount){ @@ -4482,7 +4390,6 @@ void mergeFreqCSByS5(CSrel *csrelMergeFr } -#if USE_LABEL_FOR_MERGING static char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels, OntoUsageNode *tree, int numOrigFreqCS, oid *ancestor, BAT *ontmetaBat, OntClass *ontclassSet){ /*Rule S1 S2 S3*/ int i, j; @@ -4492,49 +4399,11 @@ char isSemanticSimilar(int freqId1, int int level; OntoUsageNode *tmpNode; - /* - int k1, k2; - if (labels[freqId1].name == labels[freqId2].name) - return 1; - else{ - k1 = (labels[freqId1].candidatesCount < TOPK)?labels[freqId1].candidatesCount:TOPK; - k2 = (labels[freqId2].candidatesCount < TOPK)?labels[freqId2].candidatesCount:TOPK; - - for (i = 0; i < k1; i++){ - for (j = 0; j < k2; j++){ - if (labels[freqId1].candidates[i] == labels[freqId2].candidates[j]) - { - (*ancestor) = labels[freqId1].candidates[i]; - return 1; - } - } - } - } - */ - // Check for the most common ancestor hCount1 = labels[freqId1].hierarchyCount; hCount2 = labels[freqId2].hierarchyCount; minCount = (hCount1 > hCount2)?hCount2:hCount1; - /* - if (minCount > 0){ - printf("minCount = %d \n", minCount); - printf("Finding common ancestor for %d and %d \n", freqId1, freqId2 ); - printf("FreqCS1: "); - for (i = 0; i < hCount1; i++){ - printf(" " BUNFMT, labels[freqId1].hierarchy[hCount1-1-i]); - } - printf(" \n "); - printf("FreqCS2: "); - for (i = 0; i < hCount2; i++){ - printf(" " BUNFMT, labels[freqId2].hierarchy[hCount2-1-i]); - } - printf(" \n "); - } - */ - - if (0){ if ((freqId1 > numOrigFreqCS -1) || (freqId2 > numOrigFreqCS -1)) return 0; @@ -4560,15 +4429,6 @@ char isSemanticSimilar(int freqId1, int } - /* _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list