Changeset: 0f594c750389 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0f594c750389 Modified Files: monetdb5/extras/rdf/rdfschema.c Branch: rdf Log Message:
Collect the statistics on frequencies of CSs. diffs (209 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -84,6 +84,15 @@ static void initArray(oid* inputArr, int } } + + +static void generateFreqCSMap(CSset *freqCSset, char *csFreqMap){ + int i; + for (i = 0; i < freqCSset->numCSadded; i++){ + csFreqMap[freqCSset->items[i].csId] = 1; + } +} + static void addCStoSet(CSset *csSet, CS item) { @@ -128,16 +137,34 @@ CSrel* initCSrelset(oid numCSrel){ } static -void printCSrelSet(CSrel *csrelSet, int num){ +void printCSrelSet(CSrel *csrelSet, char *csFreqMap, BAT* freqBat, int num){ + + int i; + int j; + int *freq; + for (i = 0; i < num; i++){ + if (csrelSet[i].numRef != 0){ //Only print CS with FK + printf("Relationship %d: ", i); + freq = (int *) Tloc(freqBat, i); + printf("CS " BUNFMT " (Freq: %d, isFreq: %d) --> ", csrelSet[i].origCSoid, *freq, csFreqMap[i]); + for (j = 0; j < csrelSet[i].numRef; j++){ + printf(BUNFMT " (%d) ", csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]); + } + printf("\n"); + } + } +} + +static +void printSubCSInformation(SubCSSet *subcsset, int num){ int i; int j; for (i = 0; i < num; i++){ - if (csrelSet[i].numRef != 0){ //Only print CS with FK - printf("Relationship %d: ", i); - printf("CS " BUNFMT " --> ", csrelSet[i].origCSoid); - for (j = 0; j < csrelSet[i].numRef; j++){ - printf(BUNFMT " (%d) ", csrelSet[i].lstRefCSoid[j],csrelSet[i].lstCnt[j]); + if (subcsset[i].numSubCS != 0){ //Only print CS with FK + printf("CS " BUNFMT ": ", subcsset[i].csId); + for (j = 0; j < subcsset[i].numSubCS; j++){ + printf(BUNFMT " (%d) ", subcsset[i].subCSs[j].subCSId, subcsset[i].freq[j]); } printf("\n"); } @@ -532,8 +559,8 @@ oid putaCStoHash(CSBats *csBats, oid* ke csKey = RDF_hash_oidlist(key, num); bun = BUNfnd(BATmirror(csBats->hsKeyBat),(ptr) &csKey); if (bun == BUN_NONE) { + csId = *csoid; addNewCS(csBats, &csKey, key, csoid, num); - csId = *csoid; //assert(csId != BUN_NONE); } else{ @@ -544,8 +571,8 @@ oid putaCStoHash(CSBats *csBats, oid* ke if (isDuplicate == 0) { printf(" No duplication (new CS) \n"); // New CS + csId = *csoid; addNewCS(csBats, &csKey, key, csoid, num); - csId = *csoid; } else{ @@ -887,7 +914,8 @@ str RDFassignCSId(int *ret, BAT *sbat, B if (p != 0){ /* Not the first S */ returnCSid = putaCStoHash(csBats, buff, numP, &CSoid, 1, *freqThreshold, freqCSset); - subjCSMap[curS] = returnCSid; + subjCSMap[curS] = returnCSid; + //printf("subjCSMap[" BUNFMT "]=" BUNFMT " (CSoid = " BUNFMT ") \n", curS, returnCSid, CSoid); if (numP > *maxNumProp) *maxNumProp = numP; @@ -923,7 +951,8 @@ str RDFassignCSId(int *ret, BAT *sbat, B /*put the last CS */ returnCSid = putaCStoHash(csBats, buff, numP, &CSoid, 1, *freqThreshold, freqCSset ); - subjCSMap[curS] = returnCSid; + subjCSMap[curS] = returnCSid; + //printf("subjCSMap[" BUNFMT "]=" BUNFMT " (CSoid = " BUNFMT ") \n", curS, returnCSid, CSoid); if (numP > *maxNumProp) *maxNumProp = numP; @@ -943,23 +972,17 @@ str RDFassignCSId(int *ret, BAT *sbat, B static str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter oi, - oid *subjCSMap, oid *subjSubCSMap, BUN maxSoid, BUN maxCSoid, int maxNumPwithDup){ + oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSMap, CSrel *csrelSet, BUN maxSoid, int maxNumPwithDup){ - BUN p, q; - oid *sbt, *obt; - oid curS; /* current Subject oid */ - //oid CSoid = 0; /* Characteristic set oid */ - int numPwithDup; /* Number of properties for current S */ - char objType; - oid returnSubCSid; - CSrel *csrelSet; - SubCSSet *csSubCSMap; - char* buffTypes; + BUN p, q; + oid *sbt, *obt; + oid curS; /* current Subject oid */ + //oid CSoid = 0; /* Characteristic set oid */ + int numPwithDup; /* Number of properties for current S */ + char objType; + oid returnSubCSid; + char* buffTypes; - csrelSet = initCSrelset(maxCSoid + 1); - - csSubCSMap = initCS_SubCSMap(maxCSoid +1); - buffTypes = (char *) malloc(sizeof(char) * (maxNumPwithDup + 1)); numPwithDup = 0; @@ -1000,7 +1023,7 @@ str RDFrelationships(int *ret, BAT *sbat free (buffTypes); - printCSrelSet(csrelSet,maxCSoid + 1); + *ret = 1; @@ -1011,17 +1034,20 @@ str RDFrelationships(int *ret, BAT *sbat str RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, int *freqThreshold){ - BAT *sbat = NULL, *pbat = NULL, *obat = NULL; - BATiter si, pi, oi; /*iterator for BAT of s,p,o columns in spo table */ - CSset *freqCSset; /* Set of frequent CSs */ + BAT *sbat = NULL, *pbat = NULL, *obat = NULL; + BATiter si, pi, oi; /*iterator for BAT of s,p,o columns in spo table */ + CSset *freqCSset; /* Set of frequent CSs */ - CSBats *csBats; - oid *subjCSMap; /* Store the corresponding CS Id for each subject */ - oid *subjSubCSMap; /* Store the corresponding CS sub Id for each subject */ - BUN *maxSoid; - oid maxCSoid = 0; - int maxNumProp = 0; - int maxNumPwithDup = 0; + CSBats *csBats; + oid *subjCSMap; /* Store the corresponding CS Id for each subject */ + oid *subjSubCSMap; /* Store the corresponding CS sub Id for each subject */ + BUN *maxSoid; + oid maxCSoid = 0; + int maxNumProp = 0; + int maxNumPwithDup = 0; + char *csFreqMap; + CSrel *csrelSet; + SubCSSet *csSubCSMap; if ((sbat = BATdescriptor(*sbatid)) == NULL) { throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING); @@ -1052,15 +1078,33 @@ RDFextractCSwithTypes(int *ret, bat *sba initArray(subjCSMap, (*maxSoid) + 1, BUN_NONE); + //Phase 1: Assign an ID for each CS RDFassignCSId(ret, sbat, si, pi, freqCSset, freqThreshold, csBats, subjCSMap, &maxCSoid, &maxNumProp, &maxNumPwithDup); + + + //Phase 2: Check the relationship + printf("Max CS oid: " BUNFMT "\n", maxCSoid); - //Phase 2: Check the relationship - RDFrelationships(ret, sbat, si, oi, subjCSMap, subjSubCSMap, *maxSoid, maxCSoid, maxNumPwithDup); + csFreqMap = malloc(sizeof(char) * (maxCSoid +1)); + generateFreqCSMap(freqCSset,csFreqMap); + + + csrelSet = initCSrelset(maxCSoid + 1); + + csSubCSMap = initCS_SubCSMap(maxCSoid +1); + + RDFrelationships(ret, sbat, si, oi, subjCSMap, subjSubCSMap, csSubCSMap, csrelSet, *maxSoid, maxNumPwithDup); + + + printCSrelSet(csrelSet,csFreqMap, csBats->freqBat, maxCSoid + 1); + + printSubCSInformation(csSubCSMap, maxCSoid + 1); + printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded); /*get the statistic */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list