Changeset: d201cd7814d2 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d201cd7814d2 Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdflabels.h monetdb5/extras/rdf/rdfschema.c Branch: rdf Log Message:
set name origin (isType, isOntology, isFK) when updating labels diffs (285 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -2928,7 +2928,7 @@ oid* mergeCandidates(int *candidatesCoun * If no MERGECS is created (subset-superset relation), mergeCSFreqId contains the Id of the superset class. * For S1 and S2, parameter 'name' is used to avoid recomputation of CS names */ -str updateLabel(int ruleNumber, CSset *freqCSset, CSlabel **labels, int newCS, int mergeCSFreqId, int freqCS1, int freqCS2, oid name, oid **ontmetadata, int ontmetadataCount, int *lstFreqId, int numIds){ +str updateLabel(int ruleNumber, CSset *freqCSset, CSlabel **labels, int newCS, int mergeCSFreqId, int freqCS1, int freqCS2, oid name, int isType, int isOntology, int isFK, oid **ontmetadata, int ontmetadataCount, int *lstFreqId, int numIds){ int i; int freqCS1Counter; CSlabel big, small; @@ -2945,6 +2945,12 @@ str updateLabel(int ruleNumber, CSset *f (void) lstFreqId; (void) numIds; + #if ! INFO_WHERE_NAME_FROM + (void) isType; + (void) isOntology; + (void) isFK; + #endif + if (newCS) { // realloc labels *labels = GDKrealloc(*labels, sizeof(CSlabel) * freqCSset->numCSadded); @@ -2979,6 +2985,11 @@ str updateLabel(int ruleNumber, CSset *f case S1: // was: (S1 or S2), now combined // use common name label->name = name; + #if INFO_WHERE_NAME_FROM + label->isType = isType; + label->isOntology = isOntology; + label->isFK = isFK; + #endif #if USE_MULTIWAY_MERGING (void)ontmetadata; @@ -2996,9 +3007,6 @@ str updateLabel(int ruleNumber, CSset *f label->candidatesOntology = candidatesOntology; label->candidatesFK = candidatesFK; removeDuplicatedCandidates(label); - if (label->name == BUN_NONE && label->candidates[0] != BUN_NONE) { - label->name = label->candidates[0]; - } // hierarchy if ((*labels)[freqCS1].name == label->name) { @@ -3033,6 +3041,11 @@ str updateLabel(int ruleNumber, CSset *f case S2: // use common ancestor label->name = name; + #if INFO_WHERE_NAME_FROM + label->isType = isType; + label->isOntology = isOntology; + label->isFK = isFK; + #endif // candidates mergedCandidates = mergeCandidates(&candidatesCount, &candidatesNew, &candidatesType, &candidatesOntology, &candidatesFK, (*labels)[freqCS1], (*labels)[freqCS2], label->name); @@ -3044,9 +3057,6 @@ str updateLabel(int ruleNumber, CSset *f label->candidatesOntology = candidatesOntology; label->candidatesFK = candidatesFK; removeDuplicatedCandidates(label); - if (label->name == BUN_NONE && label->candidates[0] != BUN_NONE) { - label->name = label->candidates[0]; - } // hierarchy freqCS1Counter = (*labels)[freqCS1].hierarchyCount - 1; @@ -3080,8 +3090,14 @@ str updateLabel(int ruleNumber, CSset *f label->candidatesFK = candidatesFK; removeDuplicatedCandidates(label); if (label->name == BUN_NONE && label->candidates[0] != BUN_NONE) { + // superCS had no name before, but subCS adds candidates label->name = label->candidates[0]; - } + #if INFO_WHERE_NAME_FROM + label->isType = (*labels)[freqCS2].isType; + label->isOntology = (*labels)[freqCS2].isOntology; + label->isFK = (*labels)[freqCS2].isFK; + #endif + } // else: old name and isType/isOntology/isFK remain valid // hierarchy already set // properties already set @@ -3113,6 +3129,11 @@ str updateLabel(int ruleNumber, CSset *f } // #endif label->name = big.name; + #if INFO_WHERE_NAME_FROM + label->isType = big.isType; + label->isOntology = big.isOntology; + label->isFK = big.isFK; + #endif // candidates mergedCandidates = mergeCandidates(&candidatesCount, &candidatesNew, &candidatesType, &candidatesOntology, &candidatesFK, big, small, label->name); @@ -3125,7 +3146,13 @@ str updateLabel(int ruleNumber, CSset *f label->candidatesFK = candidatesFK; removeDuplicatedCandidates(label); if (label->name == BUN_NONE && label->candidates[0] != BUN_NONE) { + // no name yet, use name of small table label->name = label->candidates[0]; + #if INFO_WHERE_NAME_FROM + label->isType = small.isType; + label->isOntology = small.isOntology; + label->isFK = small.isFK; + #endif } // hierarchy diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h --- a/monetdb5/extras/rdf/rdflabels.h +++ b/monetdb5/extras/rdf/rdflabels.h @@ -119,7 +119,7 @@ rdf_export void exportLabels(CSlabel* labels, CSset* freqCSset, CSrel* csRelBetweenMergeFreqSet, int freqThreshold, int* mTblIdxFreqIdxMapping,int* mfreqIdxTblIdxMapping,int numTables); rdf_export str -updateLabel(int ruleNumber, CSset *freqCSset, CSlabel **labels, int newCS, int mergeCSFreqId, int freqCS1, int freqCS2, oid name, oid **ontmetadata, int ontmetadataCount, int *lstFreqId, int numIds); +updateLabel(int ruleNumber, CSset *freqCSset, CSlabel **labels, int newCS, int mergeCSFreqId, int freqCS1, int freqCS2, oid name, int isType, int isOnto, int isFK, oid **ontmetadata, int ontmetadataCount, int *lstFreqId, int numIds); rdf_export void freeLabels(CSlabel* labels, CSset* freqCSset); diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -2945,7 +2945,7 @@ void mergeCSbyS3(CSset *freqCSset, CSlab if (isSubset(freqCSset->items[freqId2].lstProp, freqCSset->items[freqId1].lstProp, numP2,numP1) == 1) { /* CSj is a superset of CSi */ freqCSset->items[freqId1].parentFreqIdx = freqId2; - updateLabel(S3, freqCSset, labels, 0, freqId2, freqId1, freqId2, BUN_NONE, ontmetadata, ontmetadataCount, NULL, -1); + updateLabel(S3, freqCSset, labels, 0, freqId2, freqId1, freqId2, BUN_NONE, 0, 0, 0, ontmetadata, ontmetadataCount, NULL, -1); // name, isType, isOntology, isFK are not used for case CS break; } } @@ -2954,7 +2954,7 @@ void mergeCSbyS3(CSset *freqCSset, CSlab numP1,numP2) == 1) { /* CSj is a subset of CSi */ freqCSset->items[freqId2].parentFreqIdx = freqId1; - updateLabel(S3, freqCSset, labels, 0, freqId1, freqId1, freqId2, BUN_NONE, ontmetadata, ontmetadataCount, NULL, -1); + updateLabel(S3, freqCSset, labels, 0, freqId1, freqId1, freqId2, BUN_NONE, 0, 0, 0, ontmetadata, ontmetadataCount, NULL, -1); // name, isType, isOntology, isFK are not used for case CS } } @@ -3558,7 +3558,7 @@ void freeLabelStat(LabelStat *labelStat) } static -void doMerge(CSset *freqCSset, int ruleNum, int freqId1, int freqId2, oid *mergecsId, CSlabel** labels, oid** ontmetadata, int ontmetadataCount, oid name){ +void doMerge(CSset *freqCSset, int ruleNum, int freqId1, int freqId2, oid *mergecsId, CSlabel** labels, oid** ontmetadata, int ontmetadataCount, oid name, int isType, int isOntology, int isFK){ CS *mergecs; int existMergecsId; CS *existmergecs, *mergecs1, *mergecs2; @@ -3574,7 +3574,7 @@ void doMerge(CSset *freqCSset, int ruleN cs1->parentFreqIdx = freqCSset->numCSadded; cs2->parentFreqIdx = freqCSset->numCSadded; addCStoSet(freqCSset,*mergecs); - updateLabel(ruleNum, freqCSset, labels, 1, freqCSset->numCSadded - 1, freqId1, freqId2, name, ontmetadata, ontmetadataCount, NULL, -1); + updateLabel(ruleNum, freqCSset, labels, 1, freqCSset->numCSadded - 1, freqId1, freqId2, name, isType, isOntology, isFK, ontmetadata, ontmetadataCount, NULL, -1); free(mergecs); mergecsId[0]++; @@ -3584,7 +3584,7 @@ void doMerge(CSset *freqCSset, int ruleN existmergecs = &(freqCSset->items[existMergecsId]); mergeACStoExistingmergeCS(*cs1,freqId1, existmergecs); cs1->parentFreqIdx = existMergecsId; - updateLabel(ruleNum, freqCSset, labels, 0, existMergecsId, freqId1, freqId2, name, ontmetadata, ontmetadataCount, NULL, -1); + updateLabel(ruleNum, freqCSset, labels, 0, existMergecsId, freqId1, freqId2, name, isType, isOntology, isFK, ontmetadata, ontmetadataCount, NULL, -1); } else if (cs1->parentFreqIdx != -1 && cs2->parentFreqIdx == -1){ @@ -3592,7 +3592,7 @@ void doMerge(CSset *freqCSset, int ruleN existmergecs = &(freqCSset->items[existMergecsId]); mergeACStoExistingmergeCS(*cs2,freqId2, existmergecs); cs2->parentFreqIdx = existMergecsId; - updateLabel(ruleNum, freqCSset, labels, 0, existMergecsId, freqId1, freqId2, name, ontmetadata, ontmetadataCount, NULL, -1); + updateLabel(ruleNum, freqCSset, labels, 0, existMergecsId, freqId1, freqId2, name, isType, isOntology, isFK, ontmetadata, ontmetadataCount, NULL, -1); } else if (cs1->parentFreqIdx != cs2->parentFreqIdx){ mergecs1 = &(freqCSset->items[cs1->parentFreqIdx]); @@ -3604,7 +3604,7 @@ void doMerge(CSset *freqCSset, int ruleN for (k = 0; k < mergecs2->numConsistsOf; k++){ freqCSset->items[mergecs2->lstConsistsOf[k]].parentFreqIdx = cs1->parentFreqIdx; } - updateLabel(ruleNum, freqCSset, labels, 0, cs1->parentFreqIdx, freqId1, freqId2, name, ontmetadata, ontmetadataCount, NULL, -1); + updateLabel(ruleNum, freqCSset, labels, 0, cs1->parentFreqIdx, freqId1, freqId2, name, isType, isOntology, isFK, ontmetadata, ontmetadataCount, NULL, -1); } } @@ -3673,7 +3673,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, #if USE_MULTIWAY_MERGING lstDistinctFreqId = mergeMultiCS(freqCSset, labelStat->freqIdList[i], labelStat->lstCount[i], mergecsId, &numDistinct, &isNew, &mergeFreqIdx); if (lstDistinctFreqId != NULL){ - updateLabel(S1, freqCSset, labels, isNew, mergeFreqIdx, -1, -1, *name, ontmetadata, ontmetadataCount, lstDistinctFreqId, numDistinct); + updateLabel(S1, freqCSset, labels, isNew, mergeFreqIdx, -1, -1, *name, labelStat->freqIdList[i][0].isType, labelStat->freqIdList[i][0].isOntology, labelStat->freqIdList[i][0].isFK, ontmetadata, ontmetadataCount, lstDistinctFreqId, numDistinct); // use isType/isOntology/isFK information from first CS with that label } #else @@ -3700,7 +3700,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, #endif if ((*labels)[freqId2].isOntology == 1){ //printf("Merge FreqCS %d and FreqCS %d by Ontology name \n", freqId1, freqId2); - doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name); + doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 1, 0); // isOntology //printf("Number of added cs in freqCS: %d \n", freqCSset->numCSadded); tmpCount++; } @@ -3730,7 +3730,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, #endif if ((*labels)[freqId2].isType == 1){ //printf("Merge FreqCS %d and FreqCS %d by Type name \n", freqId1, freqId2); - doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name); + doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 1, 0, 0); // isType //printf("Number of added cs in freqCS: %d \n", freqCSset->numCSadded); tmpCount++; } @@ -3760,7 +3760,7 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, #endif if ((*labels)[freqId2].isFK == 1){ //printf("Merge FreqCS %d and FreqCS %d by FK name \n", freqId1, freqId2); - doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name); + doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, 0, 0, 1); // isFK //printf("Number of added cs in freqCS: %d \n", freqCSset->numCSadded); tmpCount++; } @@ -3783,12 +3783,22 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, break; } for (j = k+1; j < labelStat->lstCount[i]; j++){ + int isType = 0, isOntology = 0, isFK = 0; freqId2 = labelStat->freqIdList[i][j]; cs2 = &(freqCSset->items[freqId2]); #if NOT_MERGE_DIMENSIONCS_IN_S1 if (cs2->type == DIMENSIONCS) continue; #endif - doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name); + #if INFO_WHERE_NAME_FROM + if ((*labels)[freqId1].isType == 1 || (*labels)[freqId2].isType == 1) { + isType = 1; + } else if ((*labels)[freqId1].isOntology == 1 || (*labels)[freqId2].isOntology == 1) { + isOntology = 1; + } else if ((*labels)[freqId1].isFK == 1 || (*labels)[freqId2].isFK == 1) { + isFK = 1; + } + #endif + doMerge(freqCSset, S1, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, *name, isType, isOntology, isFK); tmpCount++; } @@ -3913,7 +3923,7 @@ void mergeMaxFreqCSByS5(CSrel *csrelMerg lstDistinctFreqId = mergeMultiCS(freqCSset, csRelSum->freqIdList[j],csRelSum->numPropRef[j], mergecsId, &numDistinct, &isNew, &mergeFreqIdx); if (lstDistinctFreqId != NULL){ - updateLabel(S5, freqCSset, labels, isNew, mergeFreqIdx, -1, -1, BUN_NONE, ontmetadata, ontmetadataCount, lstDistinctFreqId, numDistinct); + updateLabel(S5, freqCSset, labels, isNew, mergeFreqIdx, -1, -1, BUN_NONE, 0, 0, 0, ontmetadata, ontmetadataCount, lstDistinctFreqId, numDistinct); // name, isType, isOntology, isFK are not used for case S5 } #else @@ -3940,7 +3950,7 @@ void mergeMaxFreqCSByS5(CSrel *csrelMerg if (cs2->type == DIMENSIONCS) continue; #endif - doMerge(freqCSset, S5, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, BUN_NONE); + doMerge(freqCSset, S5, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, BUN_NONE, 0, 0, 0); // no name known } @@ -4155,7 +4165,7 @@ void mergeCSByS2(CSset *freqCSset, CSlab #endif if (isLabelComparable == 1 && isSemanticSimilar(freqId1, freqId2, (*labels), ontoUsageTree,freqCSset->numOrigFreqCS, &name, ontmetaBat, ontclassSet) == 1){ //printf("Same labels between freqCS %d and freqCS %d - Old simscore is %f \n", freqId1, freqId2, simscore); - doMerge(freqCSset, S2, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, name); + doMerge(freqCSset, S2, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, name, 0, 1, 0); // isOntology because of the common ancestor name that was found in isSemanticSimilar } } @@ -4248,7 +4258,7 @@ void mergeCSByS4(CSset *freqCSset, CSlab printf(" %d (DUMMY) with simscore = %f \n",freqId2, simscore); } */ - doMerge(freqCSset, S4, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, BUN_NONE); + doMerge(freqCSset, S4, freqId1, freqId2, mergecsId, labels, ontmetadata, ontmetadataCount, BUN_NONE, 0, 0, 0); // no name known } } } _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list