Changeset: 2b0ab4777950 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2b0ab4777950 Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdflabels.h monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Update labels when CS's are merged Updates label, hierarchy and properties. Does not update candidates yet. diffs (truncated from 851 to 300 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -1790,7 +1790,7 @@ oid* getOntoHierarchy(oid ontology, int* // add 'ontology' to hierarchy (*hierarchyCount) = 1; - hierarchy = (oid *) malloc(sizeof(oid) * (*hierarchyCount)); + hierarchy = (oid *) GDKmalloc(sizeof(oid) * (*hierarchyCount)); if (!hierarchy) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); hierarchy[(*hierarchyCount) -1] = ontology; @@ -1815,7 +1815,7 @@ oid* getOntoHierarchy(oid ontology, int* // superclass // add 'msuperstr' to hierarchy (*hierarchyCount) += 1; - hierarchy = realloc(hierarchy, sizeof(oid) * (*hierarchyCount)); + hierarchy = GDKrealloc(hierarchy, sizeof(oid) * (*hierarchyCount)); if (!hierarchy) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); hierarchy[(*hierarchyCount) -1] = msuper; @@ -1843,7 +1843,7 @@ void getTableName(CSlabel* label, int cs // --- ONTOLOGY --- // add all ontology candidates to list of candidates if (resultCount[csIdx] >= 1) { - label->candidates = realloc(label->candidates, sizeof(oid) * (label->candidatesCount + resultCount[csIdx])); + label->candidates = GDKrealloc(label->candidates, sizeof(oid) * (label->candidatesCount + resultCount[csIdx])); if (!label->candidates) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); for (i = 0; i < resultCount[csIdx]; ++i) { label->candidates[label->candidatesCount + i] = result[csIdx][i]; @@ -1925,7 +1925,7 @@ void getTableName(CSlabel* label, int cs // add all most frequent type values to list of candidates if (tmpListCount >= 1) { int counter = 0; - label->candidates = realloc(label->candidates, sizeof(oid) * (label->candidatesCount + tmpListCount)); + label->candidates = GDKrealloc(label->candidates, sizeof(oid) * (label->candidatesCount + tmpListCount)); if (!label->candidates) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); for (i = 0; i < typeStatCount; ++i) { for (j = 0; j < tmpListCount; ++j) { @@ -1965,7 +1965,7 @@ void getTableName(CSlabel* label, int cs // --- FK --- // add top3 fk values to list of candidates if (links[csIdx].num > 0) { - label->candidates = realloc(label->candidates, sizeof(oid) * (label->candidatesCount + MIN(3, links[csIdx].num))); + label->candidates = GDKrealloc(label->candidates, sizeof(oid) * (label->candidatesCount + MIN(3, links[csIdx].num))); if (!label->candidates) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); for (i = 0; i < MIN(3, links[csIdx].num); ++i) { label->candidates[label->candidatesCount + i] = links[csIdx].fks[0].prop; @@ -1983,7 +1983,7 @@ void getTableName(CSlabel* label, int cs // --- NOTHING --- if (label->candidatesCount == 0) { - label->candidates = realloc(label->candidates, sizeof(oid)); + label->candidates = GDKrealloc(label->candidates, sizeof(oid)); if (!label->candidates) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); label->candidates[0] = BUN_NONE; label->candidatesCount = 1; @@ -2004,7 +2004,7 @@ CSlabel* initLabels(CSset *freqCSset) { CSlabel *labels; int i; - labels = (CSlabel *) malloc(sizeof(CSlabel) * freqCSset->numCSadded); + labels = (CSlabel *) GDKmalloc(sizeof(CSlabel) * freqCSset->numCSadded); if (!labels) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); for (i = 0; i < freqCSset->numCSadded; ++i) { labels[i].candidates = NULL; @@ -2031,7 +2031,7 @@ void getAllLabels(CSlabel* labels, CSset // copy attribute oids (names) labels[i].numProp = cs.numProp; - labels[i].lstProp = (oid *) malloc(sizeof(oid) * cs.numProp); + labels[i].lstProp = (oid *) GDKmalloc(sizeof(oid) * cs.numProp); if (!labels[i].lstProp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); for (j = 0; j < cs.numProp; ++j) { labels[i].lstProp[j] = cs.lstProp[j]; @@ -2169,7 +2169,6 @@ void addToOntoUsageTree(OntoUsageNode* t addToOntoUsageTree(leaf, hierarchy, hierarchyCount, numTuples); } - static void printTree(OntoUsageNode* tree, int level) { int i; @@ -2253,7 +2252,7 @@ void createOntoUsageTree(OntoUsageNode** // search class in tree and add CS to statistics addToOntoUsageTree(*tree, hierarchy, hierarchyCount, freqCSset->items[i].support); - free(hierarchy); + GDKfree(hierarchy); // numTuples += freqCSset->items[i].support; // update total number of tuples in dataset // TODO cs.support not yet available numTuples += 1; } @@ -2353,60 +2352,6 @@ void freeOntologyLookupResult(oid** onto free(ontologyLookupResult); } -/* Returns the indexes of all CS a CS consists of. */ -static -int* getSubCS(CSset* freqCSset, int csIdx, int* csListLength) { - int *csList = NULL; - int i, j; - - CS cs = freqCSset->items[csIdx]; - - (*csListLength) = 0; - - if (cs.type == MAXCS) { -//printf("MAXCS "); - // get itself & all childen - (*csListLength)++; - csList = (int *) malloc(sizeof(int) * *csListLength); - csList[*csListLength - 1] = csIdx; - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != csIdx) continue; - (*csListLength)++; - csList = (int *) realloc(csList, sizeof(int) * *csListLength); - if (!csList) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); - csList[*csListLength - 1] = i; -//printf(BUNFMT" ", freqCSset->items[i].csId); - } - } else if (cs.type == MERGECS) { -//printf("MERGECS ( "); - // get all children - for (i = 0; i < cs.numConsistsOf; ++i) { - int length; - int *list = getSubCS(freqCSset, cs.lstConsistsOf[i], &length); // recursive call - - // merge into existing list - csList = realloc(csList, sizeof(int) * (*csListLength + length)); - if (!csList) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); - for (j = 0; j < length; ++j) { - csList[*csListLength + j] = list[j]; - } - (*csListLength) += length; - free(list); - } -//printf(")\n"); - } else { // FREQCS -//printf("FREQCS "); - // copy entry - csList = (int *) malloc(sizeof(int) * 1); - *csListLength = 1; - csList[0] = csIdx; -//printf(BUNFMT" ", freqCSset->items[csIdx].csId); - return csList; - } - - return csList; -} - /* Creates labels for all CS (without a parent). */ CSlabel* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, OntoUsageNode** ontoUsageTree) { #if USE_TYPE_NAMES @@ -2505,155 +2450,180 @@ CSlabel* createLabels(CSset* freqCSset, return labels; } -str updateLabel(int ruleNumber, CSlabel *labels, int mergeCSFreqId, int freqCS1, int freqCS2){ - (void) ruleNumber; - (void) labels; - (void) mergeCSFreqId; - (void) freqCS1; - (void) freqCS2; +/* Create labels for merged CS's. Uses rules S1 to S5 (new names!). + * If no MERGECS is created (subset-superset relation), mergeCSFreqId contains the Id of the superset class. + * For S1 and S2, parameter 'name' is used to avoid recomputation of CS names + */ +str updateLabel(int ruleNumber, CSset *freqCSset, CSlabel **labels, int newCS, int mergeCSFreqId, int freqCS1, int freqCS2, oid name, oid **ontmetadata, int ontmetadataCount){ + int i; + int freqCS1Counter; + CSlabel *big; + CSlabel *label; + CS cs; - return MAL_SUCCEED; + if (newCS) { + // realloc labels + *labels = GDKrealloc(*labels, sizeof(CSlabel) * freqCSset->numCSadded); + if (!(*labels)) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); + (*labels)[mergeCSFreqId].candidates = NULL; + (*labels)[mergeCSFreqId].candidatesCount = 0; + (*labels)[mergeCSFreqId].hierarchy = NULL; + (*labels)[mergeCSFreqId].hierarchyCount = 0; + (*labels)[mergeCSFreqId].numProp = 0; + (*labels)[mergeCSFreqId].lstProp = NULL; + } + label = &(*labels)[mergeCSFreqId]; + cs = freqCSset->items[mergeCSFreqId]; + + // copy properties + if (ruleNumber != S3) { + if (label->numProp > 0) GDKfree(label->lstProp); + label->numProp = cs.numProp; + label->lstProp = (oid *) GDKmalloc(sizeof(oid) * label->numProp); + if (!label->lstProp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + for (i = 0; i < label->numProp; ++i) { + label->lstProp[i] = cs.lstProp[i]; + } + } + + switch (ruleNumber) { + case S1: // was: (S1 or S2), now combined + // use common name + label->name = name; + + // TODO candidates + //label->candidates = ; + //label->candidatesCount = ; + + // hierarchy + if ((*labels)[freqCS1].name == label->name) { + // copy hierarchy from CS freqCS1 + label->hierarchyCount = (*labels)[freqCS1].hierarchyCount; + if (label->hierarchyCount > 0) { + label->hierarchy = (oid *) GDKmalloc(sizeof(oid) * label->hierarchyCount); + if (!label->hierarchy) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + for (i = 0; i < label->hierarchyCount; ++i) { + label->hierarchy[i] = (*labels)[freqCS1].hierarchy[i]; + } + } + } else if ((*labels)[freqCS2].name == label->name) { + // copy hierarchy from CS freqCS2 + label->hierarchyCount = (*labels)[freqCS2].hierarchyCount; + if (label->hierarchyCount > 0) { + label->hierarchy = (oid *) GDKmalloc(sizeof(oid) * label->hierarchyCount); + if (!label->hierarchy) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + for (i = 0; i < label->hierarchyCount; ++i) { + label->hierarchy[i] = (*labels)[freqCS2].hierarchy[i]; + } + } + } else { + // no top 1 name, no hierarchy available + label->hierarchy = getOntoHierarchy(name, &(label->hierarchyCount), ontmetadata, ontmetadataCount); + } + + break; + + case S2: + // use common ancestor + label->name = name; + + // TODO candidates + //label->candidates = ; + //label->candidatesCount = ; + + // hierarchy + freqCS1Counter = (*labels)[freqCS1].hierarchyCount - 1; + while (TRUE) { + if ((*labels)[freqCS1].hierarchy[freqCS1Counter] == label->name) + break; + freqCS1Counter--; + } + label->hierarchyCount = (*labels)[freqCS1].hierarchyCount - freqCS1Counter; + if (label->hierarchyCount > 0) { + label->hierarchy = (oid *) GDKmalloc(sizeof(oid) * label->hierarchyCount); + if (!label->hierarchy) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + for (i = 0; i < label->hierarchyCount; ++i) { + label->hierarchy[i] = (*labels)[freqCS1].hierarchy[freqCS1Counter + i]; + } + } + + break; + + case S3: + // subset-superset relation + // candidates already set + // hierarchy already set + // properties already set + + break; + + case S4: // FALLTHROUGH + case S5: + // use label of biggest CS (higher coverage value) + if (freqCSset->items[freqCS1].coverage > freqCSset->items[freqCS2].coverage) { + big = &(*labels)[freqCS1]; + } else { + big = &(*labels)[freqCS2]; + } + + label->name = big->name; + + // TODO candidates + //label->candidatesCount = ; + //label->candidates = ; + + // hierarchy + label->hierarchyCount = big->hierarchyCount; + if (label->hierarchyCount > 0) { + label->hierarchy = (oid *) GDKmalloc(sizeof(oid) * label->hierarchyCount); + if (!label->hierarchy) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + for (i = 0; i < label->hierarchyCount; ++i) { + label->hierarchy[i] = big->hierarchy[i]; + } + } + + break; + + default: + // error _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list