Changeset: 4b58b4d3e908 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4b58b4d3e908 Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdfretrieval.c monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Fix the warning not-protecting-local-variables-variable-length-buffer caused by Linnea's implementation in rdflabel and rdfretrieval diffs (236 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -855,7 +855,7 @@ void createTypeAttributesHistogram(BAT * int i, j, k; int fit; - oid typeAttributesOids[typeAttributesCount]; + oid *typeAttributesOids = malloc(sizeof(oid) * typeAttributesCount); if (BATcount(sbat) == 0) { fprintf(stderr, "sbat must not be empty"); @@ -944,6 +944,8 @@ void createTypeAttributesHistogram(BAT * } } } + + free(typeAttributesOids); } #endif diff --git a/monetdb5/extras/rdf/rdfretrieval.c b/monetdb5/extras/rdf/rdfretrieval.c --- a/monetdb5/extras/rdf/rdfretrieval.c +++ b/monetdb5/extras/rdf/rdfretrieval.c @@ -129,14 +129,18 @@ void bfs1(int root, int* table_id, int t static void addNode1(int* adjacency_from, int* adjacency_to, int adjacencyCount, NodeStat* nodeStats, int* table_id, int tableCount, int root, char initial) { - int queue[tableCount]; // cyclic array - int visited[tableCount]; - int isInQueue[tableCount]; + int *queue; // cyclic array + int *visited; + int *isInQueue; int queuePosition; // next element in queue to view at int queueLength; int pathId, pathIdTmp; int i; + queue = (int*)malloc(sizeof(int) * tableCount); + visited = (int*)malloc(sizeof(int) * tableCount); + isInQueue = (int*)malloc(sizeof(int) * tableCount); + // init for (i = 0; i < tableCount; ++i) { queue[i] = -1; @@ -180,6 +184,10 @@ void addNode1(int* adjacency_from, int* } bfs1(root, table_id, tableCount, adjacency_from, adjacency_to, adjacencyCount, queue, visited, isInQueue, &queuePosition, &queueLength, nodeStats); + + free(queue); + free(visited); + free(isInQueue); } static @@ -368,8 +376,9 @@ int* retrieval2(int root, int numNodesMa static void assignWeightToChildren3(int* adjacency_from, int* adjacency_to, int adjacencyCount, NodeStat* nodeStats, int* table_id, int tableCount, int root) { int i, j, k; - char visited[tableCount]; - + char *visited; + + visited = (char*)malloc(sizeof(char) * tableCount); // mark root as a "chosen node" nodeStats[root].steps = 0; nodeStats[root].weight = 0; @@ -406,6 +415,8 @@ void assignWeightToChildren3(int* adjace nodeStats[i].weight += nodeStats[i].origWeight; } } + + free(visited); } static @@ -607,14 +618,18 @@ int* retrievalOverview(int* numNodesActu int csCount = 0; int sumChosenSubjects = 0; - int queue[tableCount]; // cyclic array - int isInQueue[tableCount]; + int *queue; // cyclic array + int *isInQueue; int queuePosition; // next element in queue to view at int queueLength; - char visited[tableCount]; + char *visited; int subgraphSize; Groups groups; int *chosenNodes = NULL; + + queue = (int*)malloc(sizeof(int) * tableCount); + isInQueue = (int*)malloc(sizeof(int) * tableCount); + visited = (char*)malloc(sizeof(char) * tableCount); groups.count = 0; groups.groups = NULL; @@ -695,6 +710,10 @@ int* retrievalOverview(int* numNodesActu assert(j == subgraphSize); } + free(queue); + free(visited); + free(isInQueue); + // transitive closure (Floyd-Warshall-Algorithm) for (k = 0; k < tableCount; ++k) { for (i = 0; i < tableCount; ++i) { @@ -732,7 +751,7 @@ int* retrievalOverview(int* numNodesActu } if (!found) { int node; - char reachability[tableCount]; + char *reachability = malloc(sizeof(char) * tableCount); int reachabilityCount = 0; int nextNode; // position in the (sorted) list of nodes to look at next @@ -778,6 +797,8 @@ int* retrievalOverview(int* numNodesActu } } } + + free(reachability); } } diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -3527,6 +3527,27 @@ void mergeMaxFreqCSByS6(CSrel *csrelMerg } +/* +static +str printTKNZStringFromOid(oid id){ + int ret; + char* schema = "rdf"; + str propStr; + + if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) { + throw(RDF, "rdf.rdfschema", + "could not open the tokenizer\n"); + } + + takeOid(id, &propStr); + printf("String for "BUNFMT": %s\n", id, propStr); + + TKNZRclose(&ret); + + return MAL_SUCCEED; +} +*/ + static char isSemanticSimilar(int freqId1, int freqId2, CSlabel* labels, OntoUsageNode *tree, int numOrigFreqCS, oid *ancestor){ /*Rule S1 S2 S3*/ int i, j; @@ -3599,9 +3620,18 @@ char isSemanticSimilar(int freqId1, int } level++; } - //printf("The common ancestor of freqCS %d (%s) and freqCS %d (%s) is: %s --- %f \n", freqId1, labels[freqId1].name, freqId2, labels[freqId2].name, tmpNode->uri, tmpNode->percentage); + /* + printf("The common ancestor of freqCS %d ("BUNFMT") and freqCS %d ("BUNFMT") is: "BUNFMT" --- %f \n", freqId1, labels[freqId1].name, freqId2, labels[freqId2].name, tmpNode->uri, tmpNode->percentage); + + printTKNZStringFromOid(labels[freqId1].name); + printTKNZStringFromOid(labels[freqId2].name); + printTKNZStringFromOid(tmpNode->uri); + */ + if (tmpNode->percentage < IMPORTANCE_THRESHOLD) { - //printf("Merge two CS's %s and %s using the common ancestor (%s) at level %d (score: %f)\n",labels[freqId1].name,labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage); + printf("Merge two CS's %d (Label: "BUNFMT") and %d (Label: "BUNFMT") using the common ancestor ("BUNFMT") at level %d (score: %f)\n", + freqId1, labels[freqId1].name, freqId2, labels[freqId2].name,tmpNode->uri, i,tmpNode->percentage); + (*ancestor) = tmpNode->uri; return 1; } @@ -3722,7 +3752,7 @@ void mergeCSByS3S5(CSset *freqCSset, CSl #else if (simscore > SIM_THRESHOLD) { #endif - //printf("S3S5: merge freqCS %d and freqCS %d \n", freqId1, freqId2); + //printf("S3S5: merge freqCS %d and freqCS %d (sim: %f)\n", freqId1, freqId2,simscore); //Check whether these CS's belong to any mergeCS if (cs1->parentFreqIdx == -1 && cs2->parentFreqIdx == -1){ /* New merge */ mergecs = mergeTwoCSs(*cs1,*cs2, freqId1,freqId2, *mergecsId); @@ -5774,14 +5804,17 @@ RDFextractCSwithTypes(int *ret, bat *sba mergeCSFreqCSMap = (oid*) malloc(sizeof(oid) * curNumMergeCS); initMergeCSFreqCSMap(freqCSset, mergeCSFreqCSMap); + + /* S6: Merged CS referred from the same CS via the same property */ + if (1){ tmpCSrelToMergeCS = generateCsRelToMergeFreqSet(csrelSet, freqCSset); tmpNumRel = freqCSset->numCSadded; - /* S6: Merged CS referred from the same CS via the same property */ - if (1) mergeMaxFreqCSByS6(tmpCSrelToMergeCS, freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS, &mergecsId, ontmetadata, ontmetadataCount); + mergeMaxFreqCSByS6(tmpCSrelToMergeCS, freqCSset, labels, mergeCSFreqCSMap, curNumMergeCS, &mergecsId, ontmetadata, ontmetadataCount); //printf("DISABLE S6 (For Testing) \n"); freeCSrelSet(tmpCSrelToMergeCS,tmpNumRel); + } curNumMergeCS = countNumberMergeCS(freqCSset); curT = clock(); diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -125,11 +125,11 @@ typedef struct PropStat { /* ---- For detecting dimension table */ #define NUM_ITERATION_FOR_IR 3 /* Number of iteration for indirect referrences to a CS (table) */ -#define IR_DIMENSION_THRESHOLD_PERCENTAGE 0.2 /* Score of indirect references that the CS can be considered as a dimension CS +#define IR_DIMENSION_THRESHOLD_PERCENTAGE 0.02 /* Score of indirect references that the CS can be considered as a dimension CS IR_DIMENSION_THRESHOLD_PERCENTAGE * totalFrequency - Number of IR references should be several times larger than the CS frequency + Number of IR references should be several times larger than the CS frequenc */ -//#define IR_DIMENSION_THRESHOLD_PERCENTAGE 0.02 //Value 0.2 is for example data only +//#define IR_DIMENSION_THRESHOLD_PERCENTAGE 0.2 //Value 0.2 is for example data only #define NOT_MERGE_DIMENSIONCS 1 /* Default: 1, 0: Is for example data */ @@ -214,6 +214,7 @@ typedef struct SubCSSet{ // the CS's to-be-merged in this rule must cover > MIN_FROMTABLE_SIZE_S6 / MIN_PERCETAGE_S6 triples //#define MIN_FROMTABLE_SIZE_S6 1 /* For example data */ #define MINIMUM_TABLE_SIZE 10000 //The minimum number of triples coverred by a table (i.e., a final CS) +//#define MINIMUM_TABLE_SIZE 1 // For example dataset only #define SAMPLE_FILTER_THRESHOLD 1 // SAMPLE_FILTER_THRESHOLD/ 100 #define HIGH_REFER_THRESHOLD 5 _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list