Changeset: 2242dea64568 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2242dea64568
Modified Files:
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
Branch: rdf
Log Message:

Improve memory footprint of labeling algorithm


diffs (truncated from 776 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -105,15 +105,17 @@ ontology ontologies[] = {
 #if USE_SHORT_NAMES
 /* Extracts the "human-readable" part of an URI (usually the last token). */
 static
-void getPropNameShort(char* name, char* propStr) {
+void getPropNameShort(char** name, char* propStr) {
        char            *token;
-       char            uri[1000];
+       char            *uri;
        int             length = 0;             // number of tokens
        char            **tokenizedUri = NULL;  // list of tokens
        int             i, j;
        int             fit;
 
        // tokenize uri
+       uri = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+       if (!uri) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
        strcpy(uri, propStr); // uri will be modified during tokenization
        token = strtok(uri, "/#");
        while (token != NULL) {
@@ -134,12 +136,20 @@ void getPropNameShort(char* name, char* 
                        }
                        if (fit) {
                                // found matching ontology, create label
+                               int totalLength = 0;
                                for (i = ontologies[j].length; i < length; ++i) 
{
-                                       strcat(name, tokenizedUri[i]);
-                                       strcat(name, "_"); // if label consists 
of >=2 tokens, use underscores
+                                       totalLength += (strlen(tokenizedUri[i]) 
+ 1); // additional char for underscore
+                               }
+                               (*name) = (char *) malloc(sizeof(char) * 
(totalLength + 1));
+                               if (!(*name)) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
+                               strcpy(*name, "\0");
+
+                               for (i = ontologies[j].length; i < length; ++i) 
{
+                                       strcat(*name, tokenizedUri[i]);
+                                       strcat(*name, "_"); // if label 
consists of >=2 tokens, use underscores
                                }
                                // remove trailing underscore
-                               name[strlen(name) - 1] = '\0';
+                               (*name)[strlen(*name) - 1] = '\0';
 
                                free(tokenizedUri);
                                return;
@@ -151,12 +161,17 @@ void getPropNameShort(char* name, char* 
 
        if (length == 1) {
                // value
-               strcat(name, propStr);
+               (*name) = (char *) malloc(sizeof(char) * (strlen(propStr) + 1));
+               if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
+               strcpy(*name, propStr);
        } else {
-               strcat(name, tokenizedUri[length - 1]);
+               (*name) = (char *) malloc(sizeof(char) * 
(strlen(tokenizedUri[length - 1]) + 1));
+               if (!(*name)) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
+               strcpy(*name, tokenizedUri[length - 1]);
        }
 
        free(tokenizedUri);
+       free(uri);
        return;
 }
 #endif
@@ -180,8 +195,8 @@ int** initTypeAttributesHistogramCount(i
 }
 
 static
-TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int 
** typeAttributesHistogramCount, int num) {
-       int                     i, j, k;
+TypeAttributesFreq*** initTypeAttributesHistogram(int typeAttributesCount, int 
num) {
+       int                     i, j;
        TypeAttributesFreq***   typeAttributesHistogram;
 
        typeAttributesHistogram = (TypeAttributesFreq ***) 
malloc(sizeof(TypeAttributesFreq **) * num);
@@ -190,12 +205,7 @@ TypeAttributesFreq*** initTypeAttributes
                typeAttributesHistogram[i] = (TypeAttributesFreq **) malloc 
(sizeof(TypeAttributesFreq *) * typeAttributesCount);
                if (!typeAttributesHistogram[i]) fprintf(stderr, "ERROR: 
Couldn't malloc memory!\n");
                for (j = 0; j < typeAttributesCount; ++j) {
-                       typeAttributesHistogram[i][j] = (TypeAttributesFreq *) 
malloc (sizeof(TypeAttributesFreq) * typeAttributesHistogramCount[i][j]);
-                       if (!typeAttributesHistogram[i][j]) fprintf(stderr, 
"ERROR: Couldn't malloc memory!\n");
-                       for (k = 0; k < typeAttributesHistogramCount[i][j]; 
++k) {
-                               typeAttributesHistogram[i][j][k].freq = 0;
-                               typeAttributesHistogram[i][j][k].percent = 0;
-                       }
+                       typeAttributesHistogram[i][j] = NULL;
                }
        }
 
@@ -357,7 +367,7 @@ void convertToSQL(CSset *freqCSset, Rela
 
        // file i/o
        FILE            *fout;
-       char            filename[100], tmp[10];
+       char            filename[20], tmp[10];
 
        // looping
        int             i, j, k;
@@ -376,13 +386,18 @@ void convertToSQL(CSset *freqCSset, Rela
 
        // create statement for every table
        for (i = 0; i < freqCSset->numCSadded; ++i) {
-               char temp[100];
+               char *temp;
                if ( freqCSset->items[i].parentFreqIdx != -1) continue; // 
ignore
+               temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 
1));
+               if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
                strcpy(temp, labels[i].name);
                escapeURIforSQL(temp);
                fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10) 
PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO underscores?
+               free(temp);
                for (j = 0; j < labels[i].numProp; ++j) {
-                       char temp2[100];
+                       char *temp2;
+                       temp2 = (char *) malloc(sizeof(char) * 
(strlen(labels[i].lstProp[j]) + 1));
+                       if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
                        strcpy(temp2, labels[i].lstProp[j]);
                        escapeURIforSQL(temp2);
 
@@ -392,6 +407,7 @@ void convertToSQL(CSset *freqCSset, Rela
                                // last column
                                fprintf(fout, "%s_%d BOOLEAN\n", temp2, j);
                        }
+                       free(temp2);
                }
                fprintf(fout, ");\n\n");
        }
@@ -400,17 +416,23 @@ void convertToSQL(CSset *freqCSset, Rela
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
                for (j = 0; j < labels[i].numProp; ++j) {
-                       char temp2[100];
+                       char *temp2;
                        int refCounter = 0;
+                       temp2 = (char *) malloc(sizeof(char) * 
(strlen(labels[i].lstProp[j]) + 1));
+                       if (!temp2) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
                        strcpy(temp2, labels[i].lstProp[j]);
                        escapeURIforSQL(temp2);
 
                        for (k = 0; k < relationMetadataCount[i][j]; ++k) {
                                int from, to;
-                               char tempFrom[100], tempTo[100];
+                               char *tempFrom, *tempTo;
                                if (relationMetadata[i][j][k].percent < 
FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough
                                from = relationMetadata[i][j][k].from;
                                to = relationMetadata[i][j][k].to;
+                               tempFrom = (char *) malloc(sizeof(char) * 
(strlen(labels[from].name) + 1));
+                               if (!tempFrom) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
+                               tempTo = (char *) malloc(sizeof(char) * 
(strlen(labels[to].name) + 1));
+                               if (!tempTo) fprintf(stderr, "ERROR: Couldn't 
malloc memory!\n");
                                strcpy(tempFrom, labels[from].name);
                                escapeURIforSQL(tempFrom);
                                strcpy(tempTo, labels[to].name);
@@ -419,7 +441,10 @@ void convertToSQL(CSset *freqCSset, Rela
                                fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD 
COLUMN %s_%d_%d VARCHAR(10);\n", tempFrom, freqCSset->items[from].csId, temp2, 
j, refCounter);
                                fprintf(fout, "ALTER TABLE %s_"BUNFMT" ADD 
FOREIGN KEY (%s_%d_%d) REFERENCES %s_"BUNFMT"(subject);\n\n", tempFrom, 
freqCSset->items[from].csId, temp2, j, refCounter, tempTo, 
freqCSset->items[to].csId);
                                refCounter += 1;
+                               free(tempFrom);
+                               free(tempTo);
                        }
+                       free(temp2);
                }
        }
 
@@ -438,7 +463,7 @@ void createSQLMetadata(CSset* freqCSset,
        if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
 
        for (i = 0; i < freqCSset->numCSadded; ++i) {
-               matrix[i] = (int *) malloc(sizeof(char *) * 
freqCSset->numCSadded);
+               matrix[i] = (int *) malloc(sizeof(int) * freqCSset->numCSadded);
                if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
 
                for (j = 0; j < freqCSset->numCSadded; ++j) {
@@ -484,12 +509,14 @@ void createSQLMetadata(CSset* freqCSset,
        // print id -> table name
        fout = fopen("tableIdFreq.csv", "wt");
        for (i = 0; i < freqCSset->numCSadded; ++i) {
-               char temp[100], temp2[100];
+               char *temp;
                if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+               temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 
1));
+               if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
                strcpy(temp, labels[i].name);
                escapeURIforSQL(temp);
-               sprintf(temp2, "%s_"BUNFMT"", temp, freqCSset->items[i].csId); 
// TODO underscores?
-               fprintf(fout, "\"%d\",\"%s\",\"%d\"\n", i, temp2, 
freqCSset->items[i].support);
+               fprintf(fout, "\"%d\",\"%s_"BUNFMT"\",\"%d\"\n", i, temp, 
freqCSset->items[i].csId, freqCSset->items[i].support); // TODO underscores?
+               free(temp);
        }
        fclose(fout);
 
@@ -505,7 +532,7 @@ void createSQLMetadata(CSset* freqCSset,
 static
 void printTxt(CSset* freqCSset, Labels* labels, int freqThreshold) {
        FILE            *fout;
-       char            filename[100], tmp[10];
+       char            filename[20], tmp[10];
        int             i, j;
 
        strcpy(filename, "labels");
@@ -543,7 +570,6 @@ void createTypeAttributesHistogram(BAT *
        str             propStr, objStr;
        char            *objStrPtr;
 
-       char            temp[10000];
        char            *start, *end;
        int             length;
 
@@ -612,9 +638,10 @@ void createTypeAttributesHistogram(BAT *
                                        end = strrchr(objStr, '"');
                                        if (start != NULL && end != NULL) {
                                                length = end - start;
-                                               memcpy(temp, start, length);
-                                               temp[length] = '\0';
-                                               objStrPtr = temp;
+                                               objStrPtr = (char *) 
malloc(sizeof(char) * (length + 1));
+                                               if (!objStrPtr) fprintf(stderr, 
"ERROR: Couldn't malloc memory!\n");
+                                               memcpy(objStrPtr, start, 
length);
+                                               objStrPtr[length] = '\0';
                                        } else {
                                                objStrPtr = objStr;
                                        }
@@ -638,10 +665,13 @@ void createTypeAttributesHistogram(BAT *
                                        if 
(!typeAttributesHistogram[csFreqIdx][i]) fprintf(stderr, "ERROR: Couldn't 
realloc memory!\n");
 
                                        // insert value
+                                       
typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
 - 1].value = (str) malloc(sizeof(char)*(strlen(objStrPtr)+1));
+                                       if 
(!typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
 - 1].value) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
                                        
strcpy(typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
 - 1].value, objStrPtr);
                                        
typeAttributesHistogram[csFreqIdx][i][typeAttributesHistogramCount[csFreqIdx][i]
 - 1].freq = 1;
                                }
 
+                               if (!(objType == URI || objType == BLANKNODE)) 
free(objStrPtr); // malloc, therefore free
                                break;
                        }
                }
@@ -744,11 +774,13 @@ str** findOntologies(CS cs, int *propOnt
                        int             length = 0;
                        char            **tokenizedUri = NULL;
                        char            *token;                 // token, 
modified during tokenization
-                       char            uri[1000];                      // uri, 
modified during tokenization
+                       char            *uri;                   // uri, 
modified during tokenization
                        str             propStr;
 
                        takeOid(cs.lstProp[j], &propStr);
                        removeBrackets(&propStr);
+                       uri = (char *) malloc(sizeof(char) * (strlen(propStr) + 
1));
+                       if (!uri) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
                        strcpy(uri, propStr);
 
                        // tokenize uri
@@ -756,9 +788,12 @@ str** findOntologies(CS cs, int *propOnt
                        while (token != NULL) {
                                tokenizedUri = realloc(tokenizedUri, 
sizeof(char*) * ++length);
                                if (!tokenizedUri) fprintf(stderr, "ERROR: 
Couldn't realloc memory!\n");
-                               tokenizedUri[length - 1] = token;
+                               tokenizedUri[length -1] = (char *) 
malloc(sizeof(char *) * (strlen(token) + 1));
+                               if (!tokenizedUri[length - 1]) fprintf(stderr, 
"ERROR: Couldn't malloc memory!\n");
+                               strcpy(tokenizedUri[length - 1], token);
                                token = strtok(NULL, "/#");
                        }
+                       free(uri);
 
                        // check for match with ontology
                        if (length > ontologies[i].length) {
@@ -778,6 +813,10 @@ str** findOntologies(CS cs, int *propOnt
                                        propOntologiesCount[i] += 1;
                                }
                        }
+                       for (k = 0; k < length; ++k) {
+                               free(tokenizedUri[k]);
+                       }
+                       free(tokenizedUri);
                }
        }
        return propOntologies;
@@ -982,10 +1021,10 @@ PropStat* initPropStat(void) {
        }
 
        propStat->freqs = (int*) malloc(sizeof(int) * INIT_PROP_NUM);
-       if (propStat->freqs == NULL) return NULL;
+       if (!propStat->freqs) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
 
        propStat->tfidfs = (float*) malloc(sizeof(float) * INIT_PROP_NUM);
-       if (propStat->tfidfs == NULL) return NULL;
+       if (!propStat->tfidfs) fprintf(stderr, "ERROR: Couldn't malloc 
memory!\n");
 
        propStat->numAdded = 0;
        propStat->numAllocation = INIT_PROP_NUM;
@@ -1098,18 +1137,15 @@ void printUML(CSset *freqCSset, int type
        int             ret;
        char*           schema = "rdf";
 
-       char            propStrEscaped[1000];
-#if USE_SHORT_NAMES
-       char            propStrShort[1000];
-#endif
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to