Changeset: 2242dea64568 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2242dea64568 Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdflabels.h Branch: rdf Log Message:
Improve memory footprint of labeling algorithm diffs (truncated from 776 to 300 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -105,15 +105,17 @@ ontology ontologies[] = { #if USE_SHORT_NAMES /* Extracts the "human-readable" part of an URI (usually the last token). */ static -void getPropNameShort(char* name, char* propStr) { +void getPropNameShort(char** name, char* propStr) { char *token; - char uri[1000]; + char *uri; int length = 0; // number of tokens char **tokenizedUri = NULL; // list of tokens int i, j; int fit; // tokenize uri + uri = (char *) malloc(sizeof(char) * (strlen(propStr) + 1)); + if (!uri) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(uri, propStr); // uri will be modified during tokenization token = strtok(uri, "/#"); while (token != NULL) { @@ -134,12 +136,20 @@ void getPropNameShort(char* name, char* } if (fit) { // found matching ontology, create label + int totalLength = 0; for (i = ontologies[j].length; i < length; ++i) { - strcat(name, tokenizedUri[i]); - strcat(name, "_"); // if label consists of >=2 tokens, use underscores + totalLength += (strlen(tokenizedUri[i]) + 1); // additional char for underscore + } + (*name) = (char *) malloc(sizeof(char) * (totalLength + 1)); + if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + strcpy(*name, "\0"); + + for (i = ontologies[j].length; i < length; ++i) { + strcat(*name, tokenizedUri[i]); + strcat(*name, "_"); // if label consists of >=2 tokens, use underscores } // remove trailing underscore - name[strlen(name) - 1] = '\0'; + (*name)[strlen(*name) - 1] = '\0'; free(tokenizedUri); return; @@ -151,12 +161,17 @@ void getPropNameShort(char* name, char* if (length == 1) { // value - strcat(name, propStr); + (*name) = (char *) malloc(sizeof(char) * (strlen(propStr) + 1)); + if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + strcpy(*name, propStr); } else { - strcat(name, tokenizedUri[length - 1]); + (*name) = (char *) malloc(sizeof(char) * (strlen(tokenizedUri[length - 1]) + 1)); + if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + strcpy(*name, tokenizedUri[length - 1]); } free(tokenizedUri); + free(uri); return; } #endif @@ -180,8 +195,8 @@ int** initTypeAttributesHistogramCount(i } static -TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int ** typeAttributesHistogramCount, int num) { - int i, j, k; +TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int num) { + int i, j; TypeAttributesFreq*** typeAttributesHistogram; typeAttributesHistogram = (TypeAttributesFreq ***) malloc(sizeof(TypeAttributesFreq **) * num); @@ -190,12 +205,7 @@ TypeAttributesFreq*** initTypeAttributes typeAttributesHistogram[i] = (TypeAttributesFreq **) malloc (sizeof(TypeAttributesFreq *) * typeAttributesCount); if (!typeAttributesHistogram[i]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); for (j = 0; j < typeAttributesCount; ++j) { - typeAttributesHistogram[i][j] = (TypeAttributesFreq *) malloc (sizeof(TypeAttributesFreq) * typeAttributesHistogramCount[i][j]); - if (!typeAttributesHistogram[i][j]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - for (k = 0; k < typeAttributesHistogramCount[i][j]; ++k) { - typeAttributesHistogram[i][j][k].freq = 0; - typeAttributesHistogram[i][j][k].percent = 0; - } + typeAttributesHistogram[i][j] = NULL; } } @@ -357,7 +367,7 @@ void convertToSQL(CSset *freqCSset, Rela // file i/o FILE *fout; - char filename[100], tmp[10]; + char filename[20], tmp[10]; // looping int i, j, k; @@ -376,13 +386,18 @@ void convertToSQL(CSset *freqCSset, Rela // create statement for every table for (i = 0; i < freqCSset->numCSadded; ++i) { - char temp[100]; + char *temp; if ( freqCSset->items[i].parentFreqIdx != -1) continue; // ignore + temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 1)); + if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(temp, labels[i].name); escapeURIforSQL(temp); fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10) PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO underscores? + free(temp); for (j = 0; j < labels[i].numProp; ++j) { - char temp2[100]; + char *temp2; + temp2 = (char *) malloc(sizeof(char) * (strlen(labels[i].lstProp[j]) + 1)); + if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(temp2, labels[i].lstProp[j]); escapeURIforSQL(temp2); @@ -392,6 +407,7 @@ void convertToSQL(CSset *freqCSset, Rela // last column fprintf(fout, "%s_%d BOOLEAN\n", temp2, j); } + free(temp2); } fprintf(fout, ");\n\n"); } @@ -400,17 +416,23 @@ void convertToSQL(CSset *freqCSset, Rela for (i = 0; i < freqCSset->numCSadded; ++i) { if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore for (j = 0; j < labels[i].numProp; ++j) { - char temp2[100]; + char *temp2; int refCounter = 0; + temp2 = (char *) malloc(sizeof(char) * (strlen(labels[i].lstProp[j]) + 1)); + if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(temp2, labels[i].lstProp[j]); escapeURIforSQL(temp2); for (k = 0; k < relationMetadataCount[i][j]; ++k) { int from, to; - char tempFrom[100], tempTo[100]; + char *tempFrom, *tempTo; if (relationMetadata[i][j][k].percent < FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough from = relationMetadata[i][j][k].from; to = relationMetadata[i][j][k].to; + tempFrom = (char *) malloc(sizeof(char) * (strlen(labels[from].name) + 1)); + if (!tempFrom) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + tempTo = (char *) malloc(sizeof(char) * (strlen(labels[to].name) + 1)); + if (!tempTo) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(tempFrom, labels[from].name); escapeURIforSQL(tempFrom); strcpy(tempTo, labels[to].name); @@ -419,7 +441,10 @@ void convertToSQL(CSset *freqCSset, Rela fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD COLUMN %s_%d_%d VARCHAR(10);\n", tempFrom, freqCSset->items[from].csId, temp2, j, refCounter); fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD FOREIGN KEY (%s_%d_%d) REFERENCES %s_"BUNFMT"(subject);\n\n", tempFrom, freqCSset->items[from].csId, temp2, j, refCounter, tempTo, freqCSset->items[to].csId); refCounter += 1; + free(tempFrom); + free(tempTo); } + free(temp2); } } @@ -438,7 +463,7 @@ void createSQLMetadata(CSset* freqCSset, if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); for (i = 0; i < freqCSset->numCSadded; ++i) { - matrix[i] = (int *) malloc(sizeof(char *) * freqCSset->numCSadded); + matrix[i] = (int *) malloc(sizeof(int) * freqCSset->numCSadded); if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); for (j = 0; j < freqCSset->numCSadded; ++j) { @@ -484,12 +509,14 @@ void createSQLMetadata(CSset* freqCSset, // print id -> table name fout = fopen("tableIdFreq.csv", "wt"); for (i = 0; i < freqCSset->numCSadded; ++i) { - char temp[100], temp2[100]; + char *temp; if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore + temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 1)); + if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(temp, labels[i].name); escapeURIforSQL(temp); - sprintf(temp2, "%s_"BUNFMT"", temp, freqCSset->items[i].csId); // TODO underscores? - fprintf(fout, "\"%d\",\"%s\",\"%d\"\n", i, temp2, freqCSset->items[i].support); + fprintf(fout, "\"%d\",\"%s_"BUNFMT"\",\"%d\"\n", i, temp, freqCSset->items[i].csId, freqCSset->items[i].support); // TODO underscores? + free(temp); } fclose(fout); @@ -505,7 +532,7 @@ void createSQLMetadata(CSset* freqCSset, static void printTxt(CSset* freqCSset, Labels* labels, int freqThreshold) { FILE *fout; - char filename[100], tmp[10]; + char filename[20], tmp[10]; int i, j; strcpy(filename, "labels"); @@ -543,7 +570,6 @@ void createTypeAttributesHistogram(BAT * str propStr, objStr; char *objStrPtr; - char temp[10000]; char *start, *end; int length; @@ -612,9 +638,10 @@ void createTypeAttributesHistogram(BAT * end = strrchr(objStr, '"'); if (start != NULL && end != NULL) { length = end - start; - memcpy(temp, start, length); - temp[length] = '\0'; - objStrPtr = temp; + objStrPtr = (char *) malloc(sizeof(char) * (length + 1)); + if (!objStrPtr) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + memcpy(objStrPtr, start, length); + objStrPtr[length] = '\0'; } else { objStrPtr = objStr; } @@ -638,10 +665,13 @@ void createTypeAttributesHistogram(BAT * if (!typeAttributesHistogram[csFreqIdx][i]) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); // insert value + typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i] - 1].value = (str) malloc(sizeof(char)*(strlen(objStrPtr)+1)); + if (!typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i] - 1].value) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i] - 1].value, objStrPtr); typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i] - 1].freq = 1; } + if (!(objType == URI || objType == BLANKNODE)) free(objStrPtr); // malloc, therefore free break; } } @@ -744,11 +774,13 @@ str** findOntologies(CS cs, int *propOnt int length = 0; char **tokenizedUri = NULL; char *token; // token, modified during tokenization - char uri[1000]; // uri, modified during tokenization + char *uri; // uri, modified during tokenization str propStr; takeOid(cs.lstProp[j], &propStr); removeBrackets(&propStr); + uri = (char *) malloc(sizeof(char) * (strlen(propStr) + 1)); + if (!uri) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(uri, propStr); // tokenize uri @@ -756,9 +788,12 @@ str** findOntologies(CS cs, int *propOnt while (token != NULL) { tokenizedUri = realloc(tokenizedUri, sizeof(char*) * ++length); if (!tokenizedUri) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); - tokenizedUri[length - 1] = token; + tokenizedUri[length -1] = (char *) malloc(sizeof(char *) * (strlen(token) + 1)); + if (!tokenizedUri[length - 1]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + strcpy(tokenizedUri[length - 1], token); token = strtok(NULL, "/#"); } + free(uri); // check for match with ontology if (length > ontologies[i].length) { @@ -778,6 +813,10 @@ str** findOntologies(CS cs, int *propOnt propOntologiesCount[i] += 1; } } + for (k = 0; k < length; ++k) { + free(tokenizedUri[k]); + } + free(tokenizedUri); } } return propOntologies; @@ -982,10 +1021,10 @@ PropStat* initPropStat(void) { } propStat->freqs = (int*) malloc(sizeof(int) * INIT_PROP_NUM); - if (propStat->freqs == NULL) return NULL; + if (!propStat->freqs) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); propStat->tfidfs = (float*) malloc(sizeof(float) * INIT_PROP_NUM); - if (propStat->tfidfs == NULL) return NULL; + if (!propStat->tfidfs) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); propStat->numAdded = 0; propStat->numAllocation = INIT_PROP_NUM; @@ -1098,18 +1137,15 @@ void printUML(CSset *freqCSset, int type int ret; char* schema = "rdf"; - char propStrEscaped[1000]; -#if USE_SHORT_NAMES - char propStrShort[1000]; -#endif _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list