Changeset: 2b0ab4777950 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2b0ab4777950
Modified Files:
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Update labels when CS's are merged
Updates label, hierarchy and properties. Does not update candidates yet.


diffs (truncated from 851 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -1790,7 +1790,7 @@ oid* getOntoHierarchy(oid ontology, int*
 
        // add 'ontology' to hierarchy
        (*hierarchyCount) = 1;
-       hierarchy = (oid *) malloc(sizeof(oid) * (*hierarchyCount));
+       hierarchy = (oid *) GDKmalloc(sizeof(oid) * (*hierarchyCount));
        if (!hierarchy)
                fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
        hierarchy[(*hierarchyCount) -1] = ontology;
@@ -1815,7 +1815,7 @@ oid* getOntoHierarchy(oid ontology, int*
                                        // superclass
                                        // add 'msuperstr' to hierarchy
                                        (*hierarchyCount) += 1;
-                                       hierarchy = realloc(hierarchy, 
sizeof(oid) * (*hierarchyCount));
+                                       hierarchy = GDKrealloc(hierarchy, 
sizeof(oid) * (*hierarchyCount));
                                        if (!hierarchy)
                                                fprintf(stderr, "ERROR: 
Couldn't realloc memory!\n");
                                        hierarchy[(*hierarchyCount) -1] = 
msuper;
@@ -1843,7 +1843,7 @@ void getTableName(CSlabel* label, int cs
        // --- ONTOLOGY ---
        // add all ontology candidates to list of candidates
        if (resultCount[csIdx] >= 1) {
-               label->candidates = realloc(label->candidates, sizeof(oid) * 
(label->candidatesCount + resultCount[csIdx]));
+               label->candidates = GDKrealloc(label->candidates, sizeof(oid) * 
(label->candidatesCount + resultCount[csIdx]));
                if (!label->candidates) fprintf(stderr, "ERROR: Couldn't 
realloc memory!\n");
                for (i = 0; i < resultCount[csIdx]; ++i) {
                        label->candidates[label->candidatesCount + i] = 
result[csIdx][i];
@@ -1925,7 +1925,7 @@ void getTableName(CSlabel* label, int cs
        // add all most frequent type values to list of candidates
        if (tmpListCount >= 1) {
                int counter = 0;
-               label->candidates = realloc(label->candidates, sizeof(oid) * 
(label->candidatesCount + tmpListCount));
+               label->candidates = GDKrealloc(label->candidates, sizeof(oid) * 
(label->candidatesCount + tmpListCount));
                if (!label->candidates) fprintf(stderr, "ERROR: Couldn't 
realloc memory!\n");
                for (i = 0; i < typeStatCount; ++i) {
                        for (j = 0; j < tmpListCount; ++j) {
@@ -1965,7 +1965,7 @@ void getTableName(CSlabel* label, int cs
        // --- FK ---
        // add top3 fk values to list of candidates
        if (links[csIdx].num > 0) {
-               label->candidates = realloc(label->candidates, sizeof(oid) * 
(label->candidatesCount + MIN(3, links[csIdx].num)));
+               label->candidates = GDKrealloc(label->candidates, sizeof(oid) * 
(label->candidatesCount + MIN(3, links[csIdx].num)));
                if (!label->candidates) fprintf(stderr, "ERROR: Couldn't 
realloc memory!\n");
                for (i = 0; i < MIN(3, links[csIdx].num); ++i) {
                        label->candidates[label->candidatesCount + i] = 
links[csIdx].fks[0].prop;
@@ -1983,7 +1983,7 @@ void getTableName(CSlabel* label, int cs
 
        // --- NOTHING ---
        if (label->candidatesCount == 0) {
-               label->candidates = realloc(label->candidates, sizeof(oid));
+               label->candidates = GDKrealloc(label->candidates, sizeof(oid));
                if (!label->candidates) fprintf(stderr, "ERROR: Couldn't 
realloc memory!\n");
                label->candidates[0] = BUN_NONE;
                label->candidatesCount = 1;
@@ -2004,7 +2004,7 @@ CSlabel* initLabels(CSset *freqCSset) {
        CSlabel         *labels;
        int             i;
 
-       labels = (CSlabel *) malloc(sizeof(CSlabel) * freqCSset->numCSadded);
+       labels = (CSlabel *) GDKmalloc(sizeof(CSlabel) * freqCSset->numCSadded);
        if (!labels) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                labels[i].candidates = NULL;
@@ -2031,7 +2031,7 @@ void getAllLabels(CSlabel* labels, CSset
 
                // copy attribute oids (names)
                labels[i].numProp = cs.numProp;
-               labels[i].lstProp = (oid *) malloc(sizeof(oid) * cs.numProp);
+               labels[i].lstProp = (oid *) GDKmalloc(sizeof(oid) * cs.numProp);
                if (!labels[i].lstProp) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
                for (j = 0; j < cs.numProp; ++j) {
                        labels[i].lstProp[j] = cs.lstProp[j];
@@ -2169,7 +2169,6 @@ void addToOntoUsageTree(OntoUsageNode* t
        addToOntoUsageTree(leaf, hierarchy, hierarchyCount, numTuples);
 }
 
-
 static
 void printTree(OntoUsageNode* tree, int level) {
        int i;
@@ -2253,7 +2252,7 @@ void createOntoUsageTree(OntoUsageNode**
 
                // search class in tree and add CS to statistics
                addToOntoUsageTree(*tree, hierarchy, hierarchyCount, 
freqCSset->items[i].support);
-               free(hierarchy);
+               GDKfree(hierarchy);
 //             numTuples += freqCSset->items[i].support; // update total 
number of tuples in dataset // TODO cs.support not yet available
                numTuples += 1;
        }
@@ -2353,60 +2352,6 @@ void freeOntologyLookupResult(oid** onto
        free(ontologyLookupResult);
 }
 
-/* Returns the indexes of all CS a CS consists of. */
-static
-int* getSubCS(CSset* freqCSset, int csIdx, int* csListLength) {
-       int             *csList = NULL;
-       int             i, j;
-
-       CS cs = freqCSset->items[csIdx];
-
-       (*csListLength) = 0;
-
-       if (cs.type == MAXCS) {
-//printf("MAXCS ");
-               // get itself & all childen
-               (*csListLength)++;
-               csList = (int *) malloc(sizeof(int) * *csListLength);
-               csList[*csListLength - 1] = csIdx;
-               for (i = 0; i < freqCSset->numCSadded; ++i) {
-                       if (freqCSset->items[i].parentFreqIdx != csIdx) 
continue;
-                       (*csListLength)++;
-                       csList = (int *) realloc(csList, sizeof(int) * 
*csListLength);
-                       if (!csList) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
-                       csList[*csListLength - 1] = i;
-//printf(BUNFMT" ", freqCSset->items[i].csId);
-               }
-       } else if (cs.type == MERGECS) {
-//printf("MERGECS ( ");
-               // get all children
-               for (i = 0; i < cs.numConsistsOf; ++i) {
-                       int length;
-                       int *list = getSubCS(freqCSset, cs.lstConsistsOf[i], 
&length); // recursive call
-
-                       // merge into existing list
-                       csList = realloc(csList, sizeof(int) * (*csListLength + 
length));
-                       if (!csList) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
-                       for (j = 0; j < length; ++j) {
-                               csList[*csListLength + j] = list[j];
-                       }
-                       (*csListLength) += length;
-                       free(list);
-               }
-//printf(")\n");
-       } else { // FREQCS
-//printf("FREQCS ");
-               // copy entry
-               csList = (int *) malloc(sizeof(int) * 1);
-               *csListLength = 1;
-               csList[0] = csIdx;
-//printf(BUNFMT" ", freqCSset->items[csIdx].csId);
-               return csList;
-       }
-
-       return csList;
-}
-
 /* Creates labels for all CS (without a parent). */
 CSlabel* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, 
BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** 
ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, 
OntoUsageNode** ontoUsageTree) {
 #if USE_TYPE_NAMES
@@ -2505,155 +2450,180 @@ CSlabel* createLabels(CSset* freqCSset, 
        return labels;
 }
 
-str updateLabel(int ruleNumber, CSlabel *labels, int mergeCSFreqId, int 
freqCS1, int freqCS2){
-       (void) ruleNumber;
-       (void) labels;
-       (void) mergeCSFreqId;
-       (void) freqCS1;
-       (void) freqCS2;
+/* Create labels for merged CS's. Uses rules S1 to S5 (new names!).
+ * If no MERGECS is created (subset-superset relation), mergeCSFreqId contains 
the Id of the superset class.
+ * For S1 and S2, parameter 'name' is used to avoid recomputation of CS names
+ */
+str updateLabel(int ruleNumber, CSset *freqCSset, CSlabel **labels, int newCS, 
int mergeCSFreqId, int freqCS1, int freqCS2, oid name, oid **ontmetadata, int 
ontmetadataCount){
+       int             i;
+       int             freqCS1Counter;
+       CSlabel         *big;
+       CSlabel         *label;
+       CS              cs;
 
-       return MAL_SUCCEED; 
+       if (newCS) {
+               // realloc labels
+               *labels = GDKrealloc(*labels, sizeof(CSlabel) * 
freqCSset->numCSadded);
+               if (!(*labels)) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
+               (*labels)[mergeCSFreqId].candidates = NULL;
+               (*labels)[mergeCSFreqId].candidatesCount = 0;
+               (*labels)[mergeCSFreqId].hierarchy = NULL;
+               (*labels)[mergeCSFreqId].hierarchyCount = 0;
+               (*labels)[mergeCSFreqId].numProp = 0;
+               (*labels)[mergeCSFreqId].lstProp = NULL;
+       }
+       label = &(*labels)[mergeCSFreqId];
+       cs = freqCSset->items[mergeCSFreqId];
+
+       // copy properties
+       if (ruleNumber != S3) {
+               if (label->numProp > 0) GDKfree(label->lstProp);
+               label->numProp = cs.numProp;
+               label->lstProp = (oid *) GDKmalloc(sizeof(oid) * 
label->numProp);
+               if (!label->lstProp) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
+               for (i = 0; i < label->numProp; ++i) {
+                       label->lstProp[i] = cs.lstProp[i];
+               }
+       }
+
+       switch (ruleNumber) {
+               case S1: // was: (S1 or S2), now combined
+               // use common name
+               label->name = name;
+
+               // TODO candidates
+               //label->candidates = ;
+               //label->candidatesCount = ;
+
+               // hierarchy
+               if ((*labels)[freqCS1].name == label->name) {
+                       // copy hierarchy from CS freqCS1
+                       label->hierarchyCount = 
(*labels)[freqCS1].hierarchyCount;
+                       if (label->hierarchyCount > 0) {
+                               label->hierarchy = (oid *) 
GDKmalloc(sizeof(oid) * label->hierarchyCount);
+                               if (!label->hierarchy) fprintf(stderr, "ERROR: 
Couldn't malloc memory!\n");
+                               for (i = 0; i < label->hierarchyCount; ++i) {
+                                       label->hierarchy[i] = 
(*labels)[freqCS1].hierarchy[i];
+                               }
+                       }
+               } else if ((*labels)[freqCS2].name == label->name) {
+                       // copy hierarchy from CS freqCS2
+                       label->hierarchyCount = 
(*labels)[freqCS2].hierarchyCount;
+                       if (label->hierarchyCount > 0) {
+                               label->hierarchy = (oid *) 
GDKmalloc(sizeof(oid) * label->hierarchyCount);
+                               if (!label->hierarchy) fprintf(stderr, "ERROR: 
Couldn't malloc memory!\n");
+                               for (i = 0; i < label->hierarchyCount; ++i) {
+                                       label->hierarchy[i] = 
(*labels)[freqCS2].hierarchy[i];
+                               }
+                       }
+               } else {
+                       // no top 1 name, no hierarchy available
+                       label->hierarchy = getOntoHierarchy(name, 
&(label->hierarchyCount), ontmetadata, ontmetadataCount);
+               }
+
+               break;
+
+               case S2:
+               // use common ancestor
+               label->name = name;
+
+               // TODO candidates
+               //label->candidates = ;
+               //label->candidatesCount = ;
+
+               // hierarchy
+               freqCS1Counter = (*labels)[freqCS1].hierarchyCount - 1;
+               while (TRUE) {
+                       if ((*labels)[freqCS1].hierarchy[freqCS1Counter] == 
label->name)
+                               break;
+                       freqCS1Counter--;
+               }
+               label->hierarchyCount = (*labels)[freqCS1].hierarchyCount - 
freqCS1Counter;
+               if (label->hierarchyCount > 0) {
+                       label->hierarchy = (oid *) GDKmalloc(sizeof(oid) * 
label->hierarchyCount);
+                       if (!label->hierarchy) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
+                       for (i = 0; i < label->hierarchyCount; ++i) {
+                               label->hierarchy[i] = 
(*labels)[freqCS1].hierarchy[freqCS1Counter + i];
+                       }
+               }
+
+               break;
+
+               case S3:
+               // subset-superset relation
+               // candidates already set
+               // hierarchy already set
+               // properties already set
+
+               break;
+
+               case S4: // FALLTHROUGH
+               case S5:
+               // use label of biggest CS (higher coverage value)
+               if (freqCSset->items[freqCS1].coverage > 
freqCSset->items[freqCS2].coverage) {
+                       big = &(*labels)[freqCS1];
+               } else {
+                       big = &(*labels)[freqCS2];
+               }
+
+               label->name = big->name;
+
+               // TODO candidates
+               //label->candidatesCount = ;
+               //label->candidates = ;
+
+               // hierarchy
+               label->hierarchyCount = big->hierarchyCount;
+               if (label->hierarchyCount > 0) {
+                       label->hierarchy = (oid *) GDKmalloc(sizeof(oid) * 
label->hierarchyCount);
+                       if (!label->hierarchy) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
+                       for (i = 0; i < label->hierarchyCount; ++i) {
+                               label->hierarchy[i] = big->hierarchy[i];
+                       }
+               }
+
+               break;
+
+               default:
+               // error
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to