Changeset: 78c8c3b1ca65 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=78c8c3b1ca65
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Merge the implementation from Linnea for detecting the relationships between 
MaxCS and MergeCS


diffs (209 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -2450,6 +2450,159 @@ str RDFrelationships(int *ret, BAT *sbat
        return MAL_SUCCEED; 
 }
 
+static
+void initCsRelBetweenMergeFreqSet(CSmergeRel *csRelBetweenMergeFreqSet, int 
num){
+       int i;
+       for (i = 0; i < num; ++i) {
+               csRelBetweenMergeFreqSet[i].origFreqIdx = i;
+               csRelBetweenMergeFreqSet[i].lstRefFreqIdx = (int *) malloc 
(sizeof(int) * INIT_NUM_CSREL);
+               csRelBetweenMergeFreqSet[i].lstPropId = (oid*) 
malloc(sizeof(oid) * INIT_NUM_CSREL);
+
+               csRelBetweenMergeFreqSet[i].lstCnt = (int*) malloc(sizeof(int) 
* INIT_NUM_CSREL);
+               csRelBetweenMergeFreqSet[i].lstBlankCnt = (int*) 
malloc(sizeof(int) * INIT_NUM_CSREL);
+
+               csRelBetweenMergeFreqSet[i].numRef = 0;
+               csRelBetweenMergeFreqSet[i].numAllocation = INIT_NUM_CSREL;
+       }
+}
+
+static
+void addReltoCSmergeRel(int origFreqIdx, int refFreqIdx, oid propId, int freq, 
int numBlank, CSmergeRel *csmergerel)
+{
+       void *_tmp;
+       void *_tmp1;
+       void *_tmp2;
+       void *_tmp3;
+
+       int i = 0;
+
+       assert (origFreqIdx == csmergerel->origFreqIdx);
+#ifdef NDEBUG
+       /* parameter origCSoid is not used other in about assertion */
+       (void) origFreqIdx;
+#endif
+
+       while (i < csmergerel->numRef){
+               if (refFreqIdx == csmergerel->lstRefFreqIdx[i] && propId == 
csmergerel->lstPropId[i]){
+                       //Existing
+                       break;
+               }
+               i++;
+       }
+
+       if (i != csmergerel->numRef){
+               csmergerel->lstCnt[i] = csmergerel->lstCnt[i] + freq;
+               csmergerel->lstBlankCnt[i] = csmergerel->lstBlankCnt[i] + 
numBlank;
+               return;
+       }
+       else{   // New Ref
+               if(csmergerel->numRef == csmergerel->numAllocation)
+               {
+                       csmergerel->numAllocation += INIT_NUM_CSREL;
+
+                       _tmp = realloc(csmergerel->lstRefFreqIdx, 
(csmergerel->numAllocation * sizeof(int)));
+                       _tmp1 = realloc(csmergerel->lstPropId, 
(csmergerel->numAllocation * sizeof(oid)));
+                       _tmp2 = realloc(csmergerel->lstCnt, 
(csmergerel->numAllocation * sizeof(int)));
+                       _tmp3 = realloc(csmergerel->lstBlankCnt, 
(csmergerel->numAllocation * sizeof(int)));
+
+                       if (!_tmp || !_tmp2 || !_tmp3){
+                               fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
+                       }
+                       csmergerel->lstRefFreqIdx = (int*)_tmp;
+                       csmergerel->lstPropId = (oid*)_tmp1;
+                       csmergerel->lstCnt = (int*)_tmp2;
+                       csmergerel->lstBlankCnt = (int*)_tmp3;
+               }
+
+               csmergerel->lstRefFreqIdx[csmergerel->numRef] = refFreqIdx;
+               csmergerel->lstPropId[csmergerel->numRef] = propId;
+               csmergerel->lstCnt[csmergerel->numRef] = freq;
+               csmergerel->lstBlankCnt[csmergerel->numRef] = numBlank;
+               csmergerel->numRef++;
+       }
+}
+
+/* Create a new data structure to store relationships including merged CS */
+static
+void generateCsRelBetweenMergeFreqSet(CSmergeRel *csRelBetweenMergeFreqSet, 
CSrel *csrelBetweenMaxFreqSet, int numOid, int *csIdFreqIdxMap, CSset 
*freqCSset){
+       int i,j;
+       for (i = 0; i < numOid; ++i) {
+               CSrel rel;
+               int from;
+               if (csrelBetweenMaxFreqSet[i].numRef == 0) continue; // ignore 
CS without relations
+               rel = csrelBetweenMaxFreqSet[i];
+
+               // update the 'from' value
+               from = csIdFreqIdxMap[rel.origCSoid];
+               assert (from != -1);
+               if (freqCSset->items[from].parentFreqIdx != -1) {
+                       from = freqCSset->items[from].parentFreqIdx;
+                       assert (freqCSset->items[from].type = MERGECS);
+               }
+
+               for (j = 0; j < rel.numRef; ++j) {
+                       int to;
+                       // update the 'to' value
+                       to = csIdFreqIdxMap[rel.lstRefCSoid[j]];
+                       assert (to != -1);
+                       if (freqCSset->items[to].parentFreqIdx != -1) {
+                               to = freqCSset->items[to].parentFreqIdx;
+                               assert (freqCSset->items[to].type = MERGECS);
+                       }
+
+                       // add relation to new data structure
+                       addReltoCSmergeRel(from, to, rel.lstPropId[j], 
rel.lstCnt[j], rel.lstBlankCnt[j], &csRelBetweenMergeFreqSet[from]);
+               }
+       }
+}
+
+static
+void printCSmergeRel(CSset *freqCSset, CSmergeRel *csRelBetweenMergeFreqSet, 
int freqThreshold){
+       FILE    *fout2,*fout2filter;
+       char    filename2[100];
+       char    tmpStr[20];
+       str     propStr;
+       int             i,j;
+       int             freq;
+
+       strcpy(filename2, "csRelationshipBetweenMergeFreqCS");
+       sprintf(tmpStr, "%d", freqThreshold);
+       strcat(filename2, tmpStr);
+       strcat(filename2, ".txt");
+
+       fout2 = fopen(filename2,"wt");
+       strcat(filename2, ".filter");
+       fout2filter = fopen(filename2,"wt");
+
+       for (i = 0; i < freqCSset->numCSadded; i++){
+               if (csRelBetweenMergeFreqSet[i].numRef != 0){   //Only print CS 
with FK
+                       fprintf(fout2, "Relationship "BUNFMT": ", 
freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId);
+                       fprintf(fout2filter, "Relationship "BUNFMT": ", 
freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId);
+                       freq = 
freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].support;
+                       fprintf(fout2, "CS " BUNFMT " (Freq: %d, isFreq: %d) 
--> ", freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId, freq, 1);
+                       fprintf(fout2filter, "CS " BUNFMT " (Freq: %d, isFreq: 
%d) --> ", freqCSset->items[csRelBetweenMergeFreqSet[i].origFreqIdx].csId, 
freq, 1);
+
+                       for (j = 0; j < csRelBetweenMergeFreqSet[i].numRef; 
j++){
+                               #if SHOWPROPERTYNAME
+                               
takeOid(csRelBetweenMergeFreqSet[i].lstPropId[j], &propStr);
+                               fprintf(fout2, BUNFMT "(P:" BUNFMT " - %s) 
(%d)(Blank:%d) ", 
freqCSset->items[csRelBetweenMergeFreqSet[i].lstRefFreqIdx[j]].csId,csRelBetweenMergeFreqSet[i].lstPropId[j],
 propStr, csRelBetweenMergeFreqSet[i].lstCnt[j], 
csRelBetweenMergeFreqSet[i].lstBlankCnt[j]);
+                               #else
+                               fprintf(fout2, BUNFMT "(P:" BUNFMT ") 
(%d)(Blank:%d) ", 
freqCSset->items[csRelBetweenMergeFreqSet[i].lstRefFreqIdx[j]].csId,csRelBetweenMergeFreqSet[i].lstPropId[j],
 csRelBetweenMergeFreqSet[i].lstCnt[j], 
csRelBetweenMergeFreqSet[i].lstBlankCnt[j]);
+                               #endif
+
+                               if (freq < 
csRelBetweenMergeFreqSet[i].lstCnt[j]*100){
+                                       fprintf(fout2filter, BUNFMT "(P:" 
BUNFMT ") (%d)(Blank:%d) ", 
freqCSset->items[csRelBetweenMergeFreqSet[i].lstRefFreqIdx[j]].csId,csRelBetweenMergeFreqSet[i].lstPropId[j],
 csRelBetweenMergeFreqSet[i].lstCnt[j], 
csRelBetweenMergeFreqSet[i].lstBlankCnt[j]);
+                               }
+                       }
+                       fprintf(fout2, "\n");
+                       fprintf(fout2filter, "\n");
+               }
+       }
+
+       fclose(fout2);
+       fclose(fout2filter);
+}
+
 /* Extract CS from SPO triples table */
 str
 RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold){
@@ -2469,6 +2622,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        CSrel           *csrelSet;
        CSrel           *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
        CSrel           *csrelBetweenMaxFreqSet; 
+       CSmergeRel      *csRelBetweenMergeFreqSet;
        SubCSSet        *csSubCSMap; 
 
        int*            csIdFreqIdxMap; /* Map a CSId to a freqIdx. Should be 
removed in the future .... */
@@ -2585,6 +2739,11 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        mergeMaximumFreqCSsAll(freqCSset, superCSFreqCSMap, 
superCSMergeMaxCSMap, numMaxCSs, maxCSoid);
 
+       csRelBetweenMergeFreqSet = (CSmergeRel *) malloc (sizeof(CSmergeRel) * 
freqCSset->numCSadded);
+       initCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet, 
freqCSset->numCSadded);
+       generateCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet, 
csrelBetweenMaxFreqSet, maxCSoid + 1, csIdFreqIdxMap, freqCSset);
+       printCSmergeRel(freqCSset, csRelBetweenMergeFreqSet, *freqThreshold);
+
        printmergeCSSet(freqCSset, *freqThreshold);
        //getStatisticCSsBySize(csMap,maxNumProp); 
 
@@ -2605,6 +2764,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        freeCS_SubCSMapSet(csSubCSMap, maxCSoid + 1); 
 
        free(csIdFreqIdxMap); 
+       free(csRelBetweenMergeFreqSet);
        freeCSrelSet(csrelSet, maxCSoid + 1); 
        freeCSrelSet(csrelToMaxFreqSet, maxCSoid + 1); 
        freeCSrelSet(csrelBetweenMaxFreqSet, maxCSoid + 1);  
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -148,4 +148,14 @@ typedef struct CSrel{
        int  numAllocation; 
 } CSrel;
 
+typedef struct CSmergeRel{
+       int  origFreqIdx;
+       int* lstRefFreqIdx;
+       oid* lstPropId;         // Predicate for a relationship
+       int* lstCnt;            // Count per reference
+       int* lstBlankCnt;       // Count # links to blank node
+       int  numRef;
+       int  numAllocation;
+} CSmergeRel;
+
 #endif /* _RDFSCHEMA_H_ */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to