Changeset: a536099d8d69 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a536099d8d69 Modified Files: monetdb5/extras/rdf/rdflabels.c Branch: rdf Log Message:
Store explicit metadata (tables and relationships) Two tables are created to store information about relationships between tables and #tuples per table diffs (117 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -322,13 +322,14 @@ void escapeURI(char* s) { } /* Modifies the parameter! */ -/* Replaces colons, quotes, spaces, and dashes with underscores. */ +/* Replaces colons, quotes, spaces, and dashes with underscores. All lowercase. */ static void escapeURIforSQL(char* s) { int i; for (i = 0; i < (int) strlen(s); ++i) { if (s[i] == ':' || s[i] == '"' || s[i] == ' ' || s[i] == '-') s[i] = '_'; + s[i] = tolower(s[i]); } } @@ -364,7 +365,7 @@ void convertToSQL(CSset *freqCSset, Rela if ( freqCSset->items[i].parentFreqIdx != -1) continue; // ignore strcpy(temp, labels[i].name); escapeURIforSQL(temp); - fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10) PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO uppercase? underscores? + fprintf(fout, "CREATE TABLE %s_"BUNFMT" (\nsubject VARCHAR(10) PRIMARY KEY,\n", temp, freqCSset->items[i].csId); // TODO underscores? for (j = 0; j < labels[i].numProp; ++j) { char temp2[100]; strcpy(temp2, labels[i].lstProp[j]); @@ -411,6 +412,80 @@ void convertToSQL(CSset *freqCSset, Rela TKNZRclose(&ret); } +static +void createSQLMetadata(CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet, Labels* labels) { + char **matrix = NULL; // matrix[from][to] + int i, j, k; + FILE *fout; + + // init + matrix = (char **) malloc(sizeof(char *) * freqCSset->numCSadded); + if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + + for (i = 0; i < freqCSset->numCSadded; ++i) { + matrix[i] = (char *) malloc(sizeof(char *) * freqCSset->numCSadded); + if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); + + for (j = 0; j < freqCSset->numCSadded; ++j) { + matrix[i][j] = 0; + } + } + + // set values + for (i = 0; i < freqCSset->numCSadded; ++i) { + if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore + + for (j = 0; j < freqCSset->items[i].numProp; ++j) { // propNo in CS order + // check foreign key frequency + int sum = 0; + for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; ++k) { + if (csRelBetweenMergeFreqSet[i].lstPropId[k] == freqCSset->items[i].lstProp[j]) { + sum += csRelBetweenMergeFreqSet[i].lstCnt[k]; + } + } + + for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; ++k) { // propNo in CSrel + if (csRelBetweenMergeFreqSet[i].lstPropId[k] == freqCSset->items[i].lstProp[j]) { + int to = csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k]; + if (i == to) continue; // ignore self references + if ((int) (100.0 * csRelBetweenMergeFreqSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough + matrix[i][to] = 1; + } + } + } + } + + // store matrix as csv + fout = fopen("adjacencyList.csv", "wt"); + for (i = 0; i < freqCSset->numCSadded; ++i) { + for (j = 0; j < freqCSset->numCSadded; ++j) { + if (matrix[i][j]) { + fprintf(fout, "\"%d\",\"%d\"\n",i,j); + } + } + } + fclose(fout); + + // print id -> table name + fout = fopen("tableIdFreq.csv", "wt"); + for (i = 0; i < freqCSset->numCSadded; ++i) { + char temp[100], temp2[100]; + if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore + strcpy(temp, labels[i].name); + escapeURIforSQL(temp); + sprintf(temp2, "%s_"BUNFMT"", temp, freqCSset->items[i].csId); // TODO underscores? + fprintf(fout, "\"%d\",\"%s\",\"%d\"\n", i, temp2, freqCSset->items[i].support); + } + fclose(fout); + + fout = fopen("CSmetadata.sql", "wt"); + fprintf(fout, "CREATE TABLE table_id_freq (id VARCHAR(10), name VARCHAR(100), frequency VARCHAR(10));\n"); + fprintf(fout, "CREATE TABLE adjacency_list (from_id VARCHAR(10), to_id VARCHAR(10));\n"); + fprintf(fout, "COPY INTO table_id_freq from '/export/scratch2/linnea/dbfarm/test/tableIdFreq.csv' USING DELIMITERS ',','\\n','\"';\n"); + fprintf(fout, "COPY INTO adjacency_list from '/export/scratch2/linnea/dbfarm/test/adjacencyList.csv' USING DELIMITERS ',','\\n','\"';"); + fclose(fout); +} + /* Simple representation of the final labels for tables and attributes. */ static void printTxt(CSset* freqCSset, Labels* labels, int freqThreshold) { @@ -1629,6 +1704,7 @@ Labels* createLabels(CSset* freqCSset, C // Print and Export printUML(freqCSset, typeAttributesCount, typeAttributesHistogram, typeAttributesHistogramCount, ontologyLookupResult, ontologyLookupResultCount, links, labels, relationMetadata, relationMetadataCount, freqThreshold); convertToSQL(freqCSset, relationMetadata, relationMetadataCount, labels, freqThreshold); + createSQLMetadata(freqCSset, csRelBetweenMergeFreqSet, labels); printTxt(freqCSset, labels, freqThreshold); // Free _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list