Changeset: cbde82c8ce68 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cbde82c8ce68 Modified Files: monetdb5/extras/rdf/rdfretrieval.c monetdb5/extras/rdf/rdfretrieval.h monetdb5/extras/rdf/rdfschema.c sql/backends/monet5/sql.mx sql/scripts/30_rdf.sql Branch: rdf Log Message:
SQL procedure to create a subschema diffs (truncated from 727 to 300 lines): diff --git a/monetdb5/extras/rdf/rdfretrieval.c b/monetdb5/extras/rdf/rdfretrieval.c --- a/monetdb5/extras/rdf/rdfretrieval.c +++ b/monetdb5/extras/rdf/rdfretrieval.c @@ -24,65 +24,25 @@ #include "rdflabels.h" static -char** initAdjacencyMatrix(int csCount) { - char **matrix = NULL; // matrix[from][to] - int i, j; - - matrix = (char **) malloc(sizeof(char *) * csCount); - if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - - for (i = 0; i < csCount; ++i) { - matrix[i] = (char *) malloc(sizeof(char *) * csCount); - if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); - - for (j = 0; j < csCount; ++j) { - matrix[i][j] = 0; - } +int edgeExists(long int from, long int to, long int* adjacency_from, long int* adjacency_to, int adjacencyCount) { + int i; + for (i = 0; i < adjacencyCount; ++i) { + if (adjacency_from[i] == from && adjacency_to[i] == to) return 1; } - - return matrix; + return 0; } static -void createAdjacencyMatrix(char** matrix, CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet) { - int i, j, k; - - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore - - for (j = 0; j < freqCSset->items[i].numProp; ++j) { // propNo in CS order - // check foreign key frequency - int sum = 0; - for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; ++k) { - if (csRelBetweenMergeFreqSet[i].lstPropId[k] == freqCSset->items[i].lstProp[j]) { - sum += csRelBetweenMergeFreqSet[i].lstCnt[k]; - } - } - - for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; ++k) { // propNo in CSrel - if (csRelBetweenMergeFreqSet[i].lstPropId[k] == freqCSset->items[i].lstProp[j]) { - int to = csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k]; - if (i == to) continue; // ignore self references - if ((int) (100.0 * csRelBetweenMergeFreqSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough - matrix[i][to] = 1; - } - } - } - } -} - -static -NodeStat* initNodeStats(CSset* freqCSset) { +NodeStat* initNodeStats1(long int* table_freq, int tableCount) { NodeStat* nodeStats = NULL; int i; - nodeStats = (NodeStat *) malloc(sizeof(NodeStat) * freqCSset->numCSadded); + nodeStats = (NodeStat *) malloc(sizeof(NodeStat) * tableCount); if (!nodeStats) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore - nodeStats[i].origWeight = freqCSset->items[i].support; - nodeStats[i].weight = freqCSset->items[i].support; // weight = origWeight + for (i = 0; i < tableCount; ++i) { + nodeStats[i].origWeight = table_freq[i]; + nodeStats[i].weight = table_freq[i]; // weight = origWeight nodeStats[i].steps = -1; nodeStats[i].predecessor = -1; } @@ -91,30 +51,11 @@ NodeStat* initNodeStats(CSset* freqCSset } static -NodeStat* initNodeStats23(CSset* freqCSset) { - NodeStat* nodeStats = NULL; - int i; - - nodeStats = (NodeStat *) malloc(sizeof(NodeStat) * freqCSset->numCSadded); - if (!nodeStats) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore - nodeStats[i].origWeight = freqCSset->items[i].support; - nodeStats[i].weight = 0; - nodeStats[i].steps = -1; // not used - nodeStats[i].predecessor = 0; // not used - } - - return nodeStats; -} - -static -void bfs1(int root, CSset* freqCSset, char** adjacencyMatrix, int* queue, int* visited, int* isInQueue, int* queuePosition, int* queueLength, NodeStat* nodeStats) { +void bfs1(int root, long int* table_id, int tableCount, long int* adjacency_from, long int* adjacency_to, int adjacencyCount, int* queue, int* visited, int* isInQueue, int* queuePosition, int* queueLength, NodeStat* nodeStats) { int i; - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (adjacencyMatrix[root][i]) { + for (i = 0; i < tableCount; ++i) { + if (edgeExists(table_id[root], table_id[i], adjacency_from, adjacency_to, adjacencyCount)) { if (nodeStats[i].steps == -1) { // no previous path to this node nodeStats[i].weight = nodeStats[root].weight + nodeStats[i].origWeight; @@ -160,7 +101,7 @@ void bfs1(int root, CSset* freqCSset, ch if (!visited[i] && !isInQueue[i]) { // add to queue - queue[((*queueLength + *queuePosition) % freqCSset->numCSadded)] = i; + queue[((*queueLength + *queuePosition) % tableCount)] = i; *queueLength += 1; isInQueue[i] = 1; } @@ -169,26 +110,26 @@ void bfs1(int root, CSset* freqCSset, ch } if (*queueLength > 0) { - visited[queue[(*queuePosition % freqCSset->numCSadded)]] = 1; - isInQueue[queue[(*queuePosition % freqCSset->numCSadded)]] = 0; + visited[queue[(*queuePosition % tableCount)]] = 1; + isInQueue[queue[(*queuePosition % tableCount)]] = 0; *queuePosition += 1; *queueLength -= 1; - bfs1(queue[((*queuePosition + freqCSset->numCSadded - 1) % freqCSset->numCSadded)], freqCSset, adjacencyMatrix, queue, visited, isInQueue, queuePosition, queueLength, nodeStats); + bfs1(queue[((*queuePosition + tableCount - 1) % tableCount)], table_id, tableCount, adjacency_from, adjacency_to, adjacencyCount, queue, visited, isInQueue, queuePosition, queueLength, nodeStats); } } static -void addNode1(char** adjacencyMatrix, NodeStat* nodeStats, CSset* freqCSset, int root, char initial) { - int queue[freqCSset->numCSadded]; // cyclic array - int visited[freqCSset->numCSadded]; - int isInQueue[freqCSset->numCSadded]; +void addNode1(long int* adjacency_from, long int* adjacency_to, int adjacencyCount, NodeStat* nodeStats, long int* table_id, int tableCount, int root, char initial) { + int queue[tableCount]; // cyclic array + int visited[tableCount]; + int isInQueue[tableCount]; int queuePosition; // next element in queue to view at int queueLength; int pathId, pathIdTmp; int i; // init - for (i = 0; i < freqCSset->numCSadded; ++i) { + for (i = 0; i < tableCount; ++i) { queue[i] = -1; visited[i] = 0; isInQueue[i] = 0; @@ -227,11 +168,11 @@ void addNode1(char** adjacencyMatrix, No assert(steps == i); } - bfs1(root, freqCSset, adjacencyMatrix, queue, visited, isInQueue, &queuePosition, &queueLength, nodeStats); + bfs1(root, table_id, tableCount, adjacency_from, adjacency_to, adjacencyCount, queue, visited, isInQueue, &queuePosition, &queueLength, nodeStats); } -int* retrieval1(int root, int numNodesMax, int* numNodesActual, CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet) { - char **adjacencyMatrix = NULL; +static +int* retrieval1(int root, int numNodesMax, int* numNodesActual, long int* table_id, str* table_name, long int* table_freq, int tableCount, long int* adjacency_from, long int* adjacency_to, int adjacencyCount) { NodeStat *nodeStats = NULL; int numNodes; int *chosenNodes = NULL; @@ -245,21 +186,18 @@ int* retrieval1(int root, int numNodesMa chosenNodes = (int *) malloc(sizeof(int) * numNodesMax); if (!chosenNodes) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - adjacencyMatrix = initAdjacencyMatrix(freqCSset->numCSadded); - createAdjacencyMatrix(adjacencyMatrix, freqCSset, csRelBetweenMergeFreqSet); - nodeStats = initNodeStats(freqCSset); + nodeStats = initNodeStats1(table_freq, tableCount); numNodes = 1; // add root node - addNode1(adjacencyMatrix, nodeStats, freqCSset, root, 1); + addNode1(adjacency_from, adjacency_to, adjacencyCount, nodeStats, table_id, tableCount, root, 1); // add nodes while (numNodes < numNodesMax) { // get top node (highest fraction (weight/steps)) int top = -1; - for (i = 0; i < freqCSset->numCSadded; ++i) { + for (i = 0; i < tableCount; ++i) { int topWeight, testWeight; - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore if (nodeStats[i].steps == -1) continue; // non-reachable if (nodeStats[i].steps == 0) continue; // already chosen if (numNodes + nodeStats[i].steps > numNodesMax) continue; // path too long @@ -275,36 +213,30 @@ int* retrieval1(int root, int numNodesMa } if (top == -1) break; // not enough nodes found numNodes += nodeStats[top].steps; - addNode1(adjacencyMatrix, nodeStats, freqCSset, top, 0); // add node(s) + addNode1(adjacency_from, adjacency_to, adjacencyCount, nodeStats, table_id, tableCount, top, 0); // add node(s) } // store list of chosen nodes printf("SUBSCHEMA:\n"); j = 0; // counter for chosenNodes[] - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore + for (i = 0; i < tableCount; ++i) { if (nodeStats[i].steps != 0) continue; // non-chosen chosenNodes[j++] = i; - printf("CS "BUNFMT"\n", freqCSset->items[i].csId); + printf("CS %s\n", table_name[i]); } // statistics - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore + for (i = 0; i < tableCount; ++i) { csCount += 1; - sumSubjects += freqCSset->items[i].support; + sumSubjects += table_freq[i]; if (nodeStats[i].steps == 0) { - sumChosenSubjects += freqCSset->items[i].support; + sumChosenSubjects += table_freq[i]; } } printf("COVERAGE:\n"); printf("%d out of %d (%f %%) using %d out of %d tables (%f %%)\n", sumChosenSubjects, sumSubjects, (100.00 * sumChosenSubjects) / sumSubjects, numNodes, csCount, (100.00 * numNodes) / csCount); // free - for (i = 0; i < freqCSset->numCSadded; ++i) { - free(adjacencyMatrix[i]); - } - free(adjacencyMatrix); free(nodeStats); *numNodesActual = numNodes; @@ -312,7 +244,25 @@ int* retrieval1(int root, int numNodesMa } static -void assignWeightToChildren2(char** adjacencyMatrix, NodeStat* nodeStats, CSset* freqCSset, int root) { +NodeStat* initNodeStats23(long int* table_freq, int tableCount) { + NodeStat* nodeStats = NULL; + int i; + + nodeStats = (NodeStat *) malloc(sizeof(NodeStat) * tableCount); + if (!nodeStats) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + + for (i = 0; i < tableCount; ++i) { + nodeStats[i].origWeight = table_freq[i]; + nodeStats[i].weight = 0; + nodeStats[i].steps = -1; // not used + nodeStats[i].predecessor = 0; // not used + } + + return nodeStats; +} + +static +void assignWeightToChildren2(long int* adjacency_from, long int* adjacency_to, int adjacencyCount, NodeStat* nodeStats, long int* table_id, int tableCount, int root) { int i, j; // mark root as a "chosen node" @@ -320,12 +270,12 @@ void assignWeightToChildren2(char** adja nodeStats[root].weight = 0; // set summed weight for children - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (adjacencyMatrix[root][i]) { + for (i = 0; i < tableCount; ++i) { + if (edgeExists(table_id[root], table_id[i], adjacency_from, adjacency_to, adjacencyCount)) { if (nodeStats[i].steps == 0) continue; // already in list nodeStats[i].weight = 0; - for (j = 0; j < freqCSset->numCSadded; ++j) { - if (adjacencyMatrix[i][j]) { + for (j = 0; j < tableCount; ++j) { + if (edgeExists(table_id[i], table_id[j], adjacency_from, adjacency_to, adjacencyCount)) { if (nodeStats[j].steps == 0) continue; // already in list nodeStats[i].weight += nodeStats[j].origWeight; } @@ -335,8 +285,8 @@ void assignWeightToChildren2(char** adja } } -int* retrieval2(int root, int numNodesMax, int* numNodesActual, CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet) { - char **adjacencyMatrix = NULL; +static +int* retrieval2(int root, int numNodesMax, int* numNodesActual, long int* table_id, str* table_name, long int* table_freq, int tableCount, long int* adjacency_from, long int* adjacency_to, int adjacencyCount) { NodeStat *nodeStats = NULL; int numNodes; int *chosenNodes = NULL; @@ -350,20 +300,18 @@ int* retrieval2(int root, int numNodesMa chosenNodes = (int *) malloc(sizeof(int) * numNodesMax); if (!chosenNodes) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - adjacencyMatrix = initAdjacencyMatrix(freqCSset->numCSadded); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list