Changeset: 4f48da03dc66 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4f48da03dc66
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Modify the merging function for merging multiple (more than 2) maximumCS's.

Change the data structure for mergeCS.

Add functions for merging two CS's, mering a CS and a mergeCS, merging two 
mergeCS's


diffs (truncated from 392 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -743,6 +743,7 @@ void freemergeCSset(mergeCSset *csSet){
        int i;
        for(i = 0; i < csSet->nummergeCSadded; i ++){
                free(csSet->items[i].lstProp);
+               free(csSet->items[i].lstParent);
        }
        free(csSet->items);
        free(csSet);    
@@ -810,7 +811,7 @@ CS* creatCS(oid csId, int numP, oid* buf
 }
 
 static 
-void mergeOidSets(oid* arr1, oid* arr2, oid* mergeArr, int m, int n, int 
numCombineP){
+void mergeOidSets(oid* arr1, oid* arr2, oid* mergeArr, int m, int n, int 
*numCombineP){
        
        int i = 0, j = 0;
        int pos = 0;
@@ -851,7 +852,7 @@ void mergeOidSets(oid* arr1, oid* arr2, 
                }               
        } 
        
-       assert(pos == numCombineP); 
+       *numCombineP = pos; 
        /*
        printf("pos = %d, numCombineP = %d\n", pos, numCombineP);
 
@@ -877,28 +878,125 @@ void mergeOidSets(oid* arr1, oid* arr2, 
 }
 
 static 
-mergeCS* mergeTwoCSs(CS cs1, CS cs2, int numCombineP, int support, int 
coverage){
-
+mergeCS* mergeTwoCSs(CS cs1, CS cs2, oid maxCSid1, oid maxCSid2, int support, 
int coverage){
+       
+       int numCombineP; 
        mergeCS *mergecs = (mergeCS*) malloc (sizeof (mergeCS)); 
-       mergecs->id1 = cs1.csId;  
-       mergecs->id2 = cs2.csId; 
-       mergecs->lstProp = (oid*) malloc(sizeof(oid) * numCombineP); 
+       mergecs->numParent = 2; 
+       mergecs->lstParent = (oid*) malloc(sizeof(oid) * 2);
+
+       //mergecs->lstParent[0] = cs1.csId;  
+       //mergecs->lstParent[1] = cs2.csId; 
+
+       mergecs->lstParent[0] = maxCSid1;  
+       mergecs->lstParent[1] = maxCSid2; 
+       
+       mergecs->lstProp = (oid*) malloc(sizeof(oid) * (cs1.numProp + 
cs2.numProp));  // will be redundant
 
        if (mergecs->lstProp == NULL){
                printf("Malloc failed. at %d", numCombineP);
                exit(-1);
        }
 
-       mergeOidSets(cs1.lstProp, cs2.lstProp, mergecs->lstProp, cs1.numProp, 
cs2.numProp, numCombineP); 
+       mergeOidSets(cs1.lstProp, cs2.lstProp, mergecs->lstProp, cs1.numProp, 
cs2.numProp, &numCombineP); 
 
        mergecs->numProp = numCombineP;
        mergecs->support = support;
        mergecs->coverage = coverage;
+       mergecs->isRemove = 0;
        
        return mergecs; 
 
 }
 
+
+static 
+void mergeACStoExistingmergeCS(CS cs, oid maxCSid, mergeCS *mergecs, int 
support, int coverage){
+       
+       int numCombineP; 
+       oid* _tmp1; 
+       oid* _tmp2; 
+       oid* oldlstProp; 
+
+        _tmp1 = realloc(mergecs->lstParent, ((mergecs->numParent + 1) * 
sizeof(oid)));
+
+       if (!_tmp1){
+               fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
+       }
+       mergecs->lstParent = (oid*)_tmp1;
+       //mergecs->lstParent[mergecs->numParent] = cs.csId; 
+       mergecs->lstParent[mergecs->numParent] = maxCSid; 
+       mergecs->numParent++;
+
+       oldlstProp = malloc (sizeof(oid) * (mergecs->numProp)); 
+       memcpy(oldlstProp, mergecs->lstProp, (mergecs->numProp) * sizeof(oid));
+       
+        _tmp2 = realloc(mergecs->lstProp, ((mergecs->numProp + cs.numProp) * 
sizeof(oid)));
+
+       if (!_tmp2){
+               fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
+       }
+       mergecs->lstProp = (oid*)_tmp2;
+
+       mergeOidSets(cs.lstProp, oldlstProp, mergecs->lstProp, cs.numProp, 
mergecs->numProp, &numCombineP); 
+
+       mergecs->numProp = numCombineP;
+       mergecs->support = support;
+       mergecs->coverage = coverage;
+
+       free(oldlstProp);
+}
+
+
+/*Merge two mergeCSs with the condition that no parent belongs to both of them 
*/
+static 
+void mergeTwomergeCS(mergeCS *mergecs1, mergeCS *mergecs2, int support, int 
coverage){
+       
+       int numCombineP; 
+       oid* _tmp1; 
+       oid* _tmp2; 
+       oid* oldlstProp1; 
+       oid* oldlstProp2; 
+       int i; 
+
+        _tmp1 = realloc(mergecs1->lstParent, ((mergecs1->numParent + 
mergecs2->numParent) * sizeof(oid)));
+
+       if (!_tmp1){
+               fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
+       }
+       mergecs1->lstParent = (oid*)_tmp1;
+       for (i = 0; i < mergecs2->numParent; i++){
+               mergecs1->lstParent[mergecs1->numParent] = 
mergecs2->lstParent[i]; 
+               mergecs1->numParent++;
+       }
+
+
+       oldlstProp1 = malloc (sizeof(oid) * mergecs1->numProp); 
+       memcpy(oldlstProp1, mergecs1->lstProp, (mergecs1->numProp) * 
sizeof(oid));
+       
+       oldlstProp2 = malloc (sizeof(oid) * mergecs2->numProp); 
+       memcpy(oldlstProp2, mergecs2->lstProp, (mergecs2->numProp) * 
sizeof(oid));
+
+        _tmp2 = realloc(mergecs1->lstProp, ((mergecs1->numProp + 
mergecs2->numProp) * sizeof(oid)));
+
+       if (!_tmp2){
+               fprintf(stderr, "ERROR: Couldn't realloc memory!\n");
+       }
+       mergecs1->lstProp = (oid*)_tmp2;
+
+       mergeOidSets(oldlstProp1, oldlstProp2, mergecs1->lstProp, 
mergecs1->numProp, mergecs2->numProp, &numCombineP); 
+
+       mergecs1->numProp = numCombineP;
+       mergecs1->support = support;
+       mergecs1->coverage = coverage;
+
+       // Remove mergecs2
+       mergecs2->isRemove = 1; 
+
+       free(oldlstProp1);
+       free(oldlstProp2); 
+}
+
 static 
 str printFreqCSSet(CSset *freqCSset, oid* csSuperCSMap, BAT *freqBat, BAT 
*mapbat, char isWriteTofile, int freqThreshold){
 
@@ -1018,8 +1116,40 @@ str printFreqCSSet(CSset *freqCSset, oid
 }
 
 
+/*
 static 
-str printmergeCSSet(mergeCSset *mergecsSet, int freqThreshold){
+str printamergeCS(mergeCS cs, int mergecsid, CSset *freqCSset, oid* 
superCSFreqCSMap){
+       int ret; 
+       char*   schema = "rdf";
+       int j; 
+       CS freqcs; 
+       str propStr; 
+
+       if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
+               throw(RDF, "rdf.rdfschema",
+                               "could not open the tokenizer\n");
+       }
+
+       printf("MergeCS %d - (numParent: %d) \n",mergecsid, cs.numParent);
+       for (j = 0; j < cs.numParent; j++){
+               freqcs = freqCSset->items[superCSFreqCSMap[cs.lstParent[j]]];
+               printf(" " BUNFMT " ", freqcs.csId);
+       }
+       printf("\n");
+       for (j = 0; j < cs.numProp; j++){
+               takeOid(cs.lstProp[j], &propStr);       
+               printf("          %s\n", propStr);
+       }
+       printf("\n");
+
+
+       TKNZRclose(&ret);
+       return MAL_SUCCEED;
+}
+*/
+
+static 
+str printmergeCSSet(mergeCSset *mergecsSet, CSset *freqCSset, oid* 
superCSFreqCSMap, int freqThreshold){
 
        int     i,j; 
        FILE    *fout; 
@@ -1030,6 +1160,7 @@ str printmergeCSSet(mergeCSset *mergecsS
        str     propStr; 
        char*   schema = "rdf";
        int     nummergecs;     
+       CS      freqcs; 
 
        nummergecs = mergecsSet->nummergeCSadded; 
        
@@ -1048,13 +1179,19 @@ str printmergeCSSet(mergeCSset *mergecsS
 
        for (i = 0; i < nummergecs; i++){
                mergeCS cs = (mergeCS)mergecsSet->items[i];
-               
-               fprintf(fout, "MergeCS %d: "BUNFMT " and " BUNFMT 
"\n",i,cs.id1, cs.id2);
-               for (j = 0; j < cs.numProp; j++){
-                       takeOid(cs.lstProp[j], &propStr);       
-                       fprintf(fout,"          %s\n", propStr);
+               if (cs.isRemove == 0){
+                       fprintf(fout, "MergeCS %d (Number of parent: %d) \n",i, 
cs.numParent);
+                       for (j = 0; j < cs.numParent; j++){
+                               freqcs = 
freqCSset->items[superCSFreqCSMap[cs.lstParent[j]]];
+                               fprintf(fout, " " BUNFMT " ", freqcs.csId);
+                       }
+                       fprintf(fout, "\n");
+                       for (j = 0; j < cs.numProp; j++){
+                               takeOid(cs.lstProp[j], &propStr);       
+                               fprintf(fout,"          %s\n", propStr);
+                       }
+                       fprintf(fout, "\n");
                }
-               fprintf(fout, "\n");
        }
 
        fclose(fout);
@@ -1478,6 +1615,7 @@ void getMaximumFreqCSs(CSset *freqCSset,
        */
 }
 
+/*
 static
 void mergeMaximumFreqCSs(CSset *freqCSset, oid* superCSFreqCSMap, oid* 
superCSMergeMaxCSMap, mergeCSset* mergecsSet, int numMaxCSs){
        int             i, j; 
@@ -1487,6 +1625,7 @@ void mergeMaximumFreqCSs(CSset *freqCSse
        mergeCS         *mergecs;
        int             numCombineP = 0; 
 
+
        for (i = 0; i < freqCSset->numCSadded; i++){
                if (freqCSset->items[i].isSubset == 0){
                        superCSFreqCSMap[maxCSid] = i; 
@@ -1503,7 +1642,7 @@ void mergeMaximumFreqCSs(CSset *freqCSse
                                        
freqCSset->items[freqId1].numProp,freqCSset->items[freqId2].numProp,
                                        &numCombineP);
                        if (simscore > 0.6){
-                               mergecs = 
mergeTwoCSs(freqCSset->items[freqId1],freqCSset->items[freqId2],numCombineP, 0, 
0);
+                               mergecs = 
mergeTwoCSs(freqCSset->items[freqId1],freqCSset->items[freqId2], 0, 0);
                                addmergeCStoSet(mergecsSet, *mergecs);
                                superCSMergeMaxCSMap[i] = j;    
                                //printf("Can merge " BUNFMT " and " BUNFMT " 
(sscore: %.2f) \n", 
freqCSset->items[freqId1].csId,freqCSset->items[freqId2].csId, simscore);
@@ -1511,6 +1650,85 @@ void mergeMaximumFreqCSs(CSset *freqCSse
                }
        }
 }
+*/
+
+static
+void mergeMaximumFreqCSsAll(CSset *freqCSset, oid* superCSFreqCSMap, oid* 
superCSMergeMaxCSMap, mergeCSset* mergecsSet, int numMaxCSs){
+       int             i, j, k; 
+       int             maxCSid = 0; 
+       int             freqId1, freqId2; 
+       float           simscore = 0.0; 
+       mergeCS         *mergecs;
+       oid             mercsId = 0; 
+       oid             existMergecsId = BUN_NONE; 
+       int             numCombineP = 0; 
+       CS              cs1, cs2;
+       mergeCS         *existmergecs, *mergecs1, *mergecs2; 
+
+
+       for (i = 0; i < freqCSset->numCSadded; i++){
+               if (freqCSset->items[i].isSubset == 0){
+                       superCSFreqCSMap[maxCSid] = i; 
+                       maxCSid++;
+               }
+       }
+
+       //Initial superCSMergeMaxCSMap
+       for (i = 0; i < numMaxCSs; i++){
+               superCSMergeMaxCSMap[i] = BUN_NONE; 
+       }
+
+       
+       for (i = 0; i < numMaxCSs; i++){
+               freqId1 = superCSFreqCSMap[i];
+               cs1 = (CS)freqCSset->items[freqId1];
+               for (j = (i+1); j < numMaxCSs; j++){
+                       freqId2 = superCSFreqCSMap[j];
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to