Changeset: 31ddaed2cf15 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=31ddaed2cf15 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
- RE-update labels after using S4 Choosing the name with highest support from those parent CS's of a merged CS. - Check whether types of a subject and its redirected subject are in the same ontology hierarchy. If not, they can be marked as different. diffs (143 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -4469,6 +4469,8 @@ void mergeCSByS4(CSset *freqCSset, CSlab char existDiscriminatingProp = 0; + int oldNumCSadded = 0; + (void) oldNumCSadded; /* int ret; char* schema = "rdf"; @@ -4476,6 +4478,9 @@ void mergeCSByS4(CSset *freqCSset, CSlab TKNZRopen (NULL, &schema); */ + #if UPDATE_NAME_BASEDON_POPULARTABLE + oldNumCSadded = freqCSset->numCSadded; + #endif (void) labels; @@ -4545,7 +4550,53 @@ void mergeCSByS4(CSset *freqCSset, CSlab } } } - + #if UPDATE_NAME_BASEDON_POPULARTABLE + { + int tmpSubFreqId = -1; + int tmpFreqIdwithMaxSupport = -1; + int tmpmaxSupport = 0; + int k; + oid oldName; + oid newName; + for (i = oldNumCSadded; i < freqCSset->numCSadded; i++){ + freqId1 = i; + cs1 = (CS*) &(freqCSset->items[freqId1]); + oldName = (*labels)[freqId1].name; + + if (cs1->parentFreqIdx == -1 && oldName != BUN_NONE){ + tmpmaxSupport = 0; + newName = BUN_NONE; + for (j = 0; j < cs1->numConsistsOf; j++){ + tmpSubFreqId = cs1->lstConsistsOf[j]; + if (freqCSset->items[tmpSubFreqId].support > tmpmaxSupport){ + tmpFreqIdwithMaxSupport = tmpSubFreqId; + tmpmaxSupport = freqCSset->items[tmpSubFreqId].support; + } + } + + newName = (*labels)[tmpFreqIdwithMaxSupport].name; + if (newName != BUN_NONE && newName != oldName){ + //update label + (*labels)[freqId1].name = newName; + //update candidates + assert(oldName == (*labels)[freqId1].candidates[0]); + for (k = 1; k < (*labels)[freqId1].candidatesCount; k++){ + //If newName is already in the candidates, swap the first candidate with this + if ((*labels)[freqId1].candidates[k] == newName){ + (*labels)[freqId1].candidates[k] = oldName; + (*labels)[freqId1].candidates[0] = newName; + break; + } + } + //If no candidate has the new Name + if ((*labels)[freqId1].candidates[0] != newName){ + (*labels)[freqId1].candidates[0] = newName; + } + } + } + } + } + #endif //TKNZRclose(&ret); @@ -5089,6 +5140,33 @@ int getOntologySpecificLevel(oid valueOi } static +char isSupSuperOntology(oid value1, oid value2){ + BUN ontclasspos1 = BUN_NONE; + BUN ontclasspos2 = BUN_NONE; + int tmpscPos = -1; + int j; + + ontclasspos1 = BUNfnd(BATmirror(ontmetaBat), &value1); + ontclasspos2 = BUNfnd(BATmirror(ontmetaBat), &value2); + + if (ontclasspos1 == BUN_NONE || ontclasspos2 == BUN_NONE) return 0; + + //check the superclass for value 1 + for (j = 0; j < ontclassSet[ontclasspos1].numsc; j++){ + tmpscPos = ontclassSet[ontclasspos1].scIdxes[j]; + if (tmpscPos == (int)ontclasspos2) return 1; + } + + //check the superclass for value 2 + for (j = 0; j < ontclassSet[ontclasspos2].numsc; j++){ + tmpscPos = ontclassSet[ontclasspos2].scIdxes[j]; + if (tmpscPos == (int)ontclasspos1) return 1; + } + + return 0; +} + +static PropStat* getPropStatisticsByOntologyClass(int numClass, OntClass *ontClassSet){ int i, j; @@ -5790,12 +5868,18 @@ str RDFcheckWrongTypeSubject(BAT *sbat, takeOid(redirectS, &redirectSstr); takeOid(subjTypeMap[*sbt], &curStype); takeOid(subjTypeMap[redirectS],&redirecttype); - printf("Subject %s [Type: %s] redirects to %s [Type: %s] \n", + printf("Subject %s [Type: %s] redirects to %s [Type: %s]", curSstr,curStype,redirectSstr,redirecttype); GDKfree(curSstr); GDKfree(redirectSstr); GDKfree(curStype); GDKfree(redirecttype); + + if (isSupSuperOntology(subjTypeMap[*sbt],subjTypeMap[redirectS]) == 0){ + printf (" [NOT IN SAME HIERARCHY] \n"); + } else { + printf ("\n"); + } } } diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -253,6 +253,8 @@ typedef struct SubCSSet{ // such as type, description. They should have at least one discriminating prop in common. #define MIN_TFIDF_PROP_FINALTABLE 2.5 //Discriminating prop is prop that appears in less than 10% of the table +#define UPDATE_NAME_BASEDON_POPULARTABLE 1//Update table name from merging multiple freqCS by using the most popular one + //#define MIN_FROMTABLE_SIZE_S5 1 /* For example data */ #define MINIMUM_TABLE_SIZE 10000 //The minimum number of triples coverred by a table (i.e., a final CS) //#define MINIMUM_TABLE_SIZE 1 // For example dataset only _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list