Changeset: ecccfc50d79d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ecccfc50d79d Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Add function for creating set of freqIds per label (S1) diffs (171 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -2407,6 +2407,11 @@ CSlabel* createLabels(CSset* freqCSset, } str updateLabel(int ruleNumber, CSlabel *labels, int mergeCSFreqId, int freqCS1, int freqCS2){ + (void) ruleNumber; + (void) labels; + (void) mergeCSFreqId; + (void) freqCS1; + (void) freqCS2; return MAL_SUCCEED; } diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -3077,6 +3077,103 @@ str getReferCS(BAT *sbat, BAT *pbat, oid } */ +static +LabelStat* initLabelStat(void){ + LabelStat *labelStat = (LabelStat*) malloc(sizeof(LabelStat)); + labelStat->labelBat = BATnew(TYPE_void, TYPE_str, INIT_DISTINCT_LABEL); + if (labelStat->labelBat == NULL){ + return NULL; + } + (void)BATprepareHash(BATmirror(labelStat->labelBat)); + if (!(labelStat->labelBat->T->hash)) + return NULL; + labelStat->lstCount = (int*)malloc(sizeof(int) * INIT_DISTINCT_LABEL); + + labelStat->freqIdList = NULL; + labelStat->numLabeladded = 0; + labelStat->numAllocation = INIT_DISTINCT_LABEL; + + return labelStat; +} + +static +void buildLabelStat(LabelStat *labelStat, CSlabel *labels, CSset *freqCSset){ + int i; + BUN bun; + int *_tmp; + int freqIdx; + + //Preparation + for (i = 0; i < freqCSset->numCSadded; i++){ + if (strcmp(labels[i].name,"DUMMY") != 0){ + bun = BUNfnd(BATmirror(labelStat->labelBat),(ptr)labels[i].name); + if (bun == BUN_NONE) { + /*New string*/ + if (labelStat->labelBat->T->hash && BATcount(labelStat->labelBat) > 4 * labelStat->labelBat->T->hash->mask) { + HASHdestroy(labelStat->labelBat); + BAThash(BATmirror(labelStat->labelBat), 2*BATcount(labelStat->labelBat)); + } + + labelStat->labelBat = BUNappend(labelStat->labelBat, (ptr) (str)labels[i].name, TRUE); + + if(labelStat->numLabeladded == labelStat->numAllocation) + { + labelStat->numAllocation += INIT_DISTINCT_LABEL; + + _tmp = realloc(labelStat->lstCount, (labelStat->numAllocation * sizeof(int))); + + if (!_tmp){ + fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); + } + labelStat->lstCount = (int*)_tmp; + } + labelStat->lstCount[labelStat->numLabeladded] = 1; + labelStat->numLabeladded++; + } + else{ + labelStat->lstCount[bun]++; + } + } + } + + printf("Total number of distinct labels is %d \n", labelStat->numLabeladded); + //Build list of FreqCS + labelStat->freqIdList = (int**) malloc(sizeof(int*) * labelStat->numLabeladded); + for (i =0; i < labelStat->numLabeladded; i++){ + labelStat->freqIdList[i] = (int*)malloc(sizeof(int) * labelStat->lstCount[i]); + //reset the lstCount + labelStat->lstCount[i] = 0; + } + + for (i = 0; i < freqCSset->numCSadded; i++){ + if (strcmp(labels[i].name,"DUMMY") != 0){ + bun = BUNfnd(BATmirror(labelStat->labelBat),(ptr) labels[i].name); + if (bun == BUN_NONE) { + fprintf(stderr, "All the name should be stored already!\n"); + } + else{ + freqIdx = labelStat->lstCount[bun]; + labelStat->freqIdList[bun][freqIdx] = i; + labelStat->lstCount[bun]++; + } + } + } +} +static +void freeLabelStat(LabelStat *labelStat){ + int i; + if (labelStat->freqIdList != NULL){ + for (i = 0; i < labelStat->numLabeladded;i++){ + free(labelStat->freqIdList[i]); + } + free(labelStat->freqIdList); + } + free(labelStat->lstCount); + BBPreclaim(labelStat->labelBat); + free(labelStat); +} + + @@ -3824,6 +3921,8 @@ RDFextractCSwithTypes(int *ret, bat *sba clock_t curT; clock_t tmpLastT; OntoUsageNode *ontoUsageTree = NULL; + LabelStat *labelStat = NULL; + if ((sbat = BATdescriptor(*sbatid)) == NULL) { throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING); @@ -3949,6 +4048,20 @@ RDFextractCSwithTypes(int *ret, bat *sba tmpLastT = curT; + labelStat = initLabelStat(); + buildLabelStat(labelStat, *labels, freqCSset); + freeLabelStat(labelStat); + /* + { + str tknzLabel = "cslabel"; + if (TKNZRopen (NULL, &tknzLabel) != MAL_SUCCEED) { + throw(RDF, "RDFextractCSwithTypes", "could not open the tokenizer\n"); + } + + TKNZRclose(ret); + } + */ + /*S4: Merge two CS's having the subset-superset relationship */ getMaximumFreqCSs(freqCSset, *labels, csBats->coverageBat, csBats->freqBat, *maxCSoid + 1, &numMaxCSs); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -214,6 +214,15 @@ typedef struct CSrelSum{ int **freqIdList; } CSrelSum; +#define INIT_DISTINCT_LABEL 400 +typedef struct LabelStat{ /*Store the list of freqIds having the same label*/ + BAT *labelBat; + int *lstCount; /* Number of items per name */ + int **freqIdList; + int numLabeladded; + int numAllocation; +} LabelStat; + typedef struct CStable { BAT** colBats; ObjectType* colTypes; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list