Changeset: 640ae9291b1c for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=640ae9291b1c Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Merging CS using rules S1, S2 diffs (60 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -2665,6 +2665,27 @@ void mergeMaxFreqCSByS6(CSrel *csrelBetw } static +char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels){ /*Rule S1 S2 S3*/ + int i, j; + int k1, k2; + + if (strcmp(labels[freqId1].name, labels[freqId2].name) == 0) + return 1; + else{ /* Check top k candidates */ + k1 = (labels[freqId1].candidatesCount < TOPK)?labels[freqId1].candidatesCount:TOPK; + k2 = (labels[freqId2].candidatesCount < TOPK)?labels[freqId2].candidatesCount:TOPK; + + for (i = 0; i < k1; i++){ + for (j = 0; j < k2; j++){ + if (strcmp(labels[freqId1].candidates[i], labels[freqId2].candidates[j]) == 0) return 1; + } + } + } + + return 0; +} + +static void mergeMaximumFreqCSsAll(CSset *freqCSset, CSlabel* labels, oid* superCSFreqCSMap, int numMaxCSs, oid *mergecsId){ int i, j, k; int freqId1, freqId2; @@ -2700,7 +2721,7 @@ void mergeMaximumFreqCSsAll(CSset *freqC propStat = initPropStat(); getPropStatisticsFromMaxCSs(propStat, numMaxCSs, superCSFreqCSMap, freqCSset); /*TODO: Get PropStat from MaxCSs or From mergedCS only*/ - for (i = 0; i < numMaxCSs; i++){ + for (i = 0; i < numMaxCSs; i++){ /*TODO: Only go through the list of mergedCS. */ freqId1 = superCSFreqCSMap[i]; //printf("Label of %d CS is %s \n", freqId1, labels[freqId1].name); isLabelComparable = 0; @@ -2713,7 +2734,7 @@ void mergeMaximumFreqCSsAll(CSset *freqC isSameLabel = 0; #if USE_LABEL_FOR_MERGING - if (isLabelComparable == 1 && strcmp(labels[freqId1].name, labels[freqId2].name) == 0){ + if (isLabelComparable == 1 && isSemanticSimilar(freqId1, freqId2, labels) == 1){ //printf("Same labels between freqCS %d and freqCS %d - Old simscore is %f \n", freqId1, freqId2, simscore); isSameLabel = 1; simscore = 1; diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -102,6 +102,7 @@ typedef struct PropStat { #define USE_LABEL_FINDING_MAXCS 0 // Use the labels received from labeling process for finding maxCS #define USE_LABEL_FOR_MERGING 1 // Use the labels received from labeling process for finding mergeCS +#define TOPK 3 //Check top 3 candidate typedef struct CS { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list