Changeset: 72b6716bcfd7 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=72b6716bcfd7 Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdflabels.h monetdb5/extras/rdf/rdfschema.c Branch: rdf Log Message:
create labels for freqCS, not maxCS/mergeCS diffs (truncated from 394 to 300 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -221,7 +221,6 @@ int** initRelationMetadataCount(CSset* f if (!relationMetadataCount) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); for (i = 0; i < freqCSset->numCSadded; ++i) { relationMetadataCount[i] = NULL; - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore relationMetadataCount[i] = (int *) malloc(sizeof(int) * freqCSset->items[i].numProp); if (!relationMetadataCount[i]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); for (j = 0; j < freqCSset->items[i].numProp; ++j) { @@ -234,7 +233,7 @@ int** initRelationMetadataCount(CSset* f /* Calculate frequency per foreign key relationship. */ static -Relation*** initRelationMetadata(int** relationMetadataCount, CSmergeRel* csRelBetweenMergeFreqSet, CSset* freqCSset) { +Relation*** initRelationMetadata(int** relationMetadataCount, CSrel* csrelSet, int num, CSset* freqCSset, int* csIdFreqIdxMap) { int i, j, k; Relation*** relationMetadata; @@ -245,49 +244,51 @@ Relation*** initRelationMetadata(int** r relationMetadata = (Relation ***) malloc(sizeof(Relation **) * freqCSset->numCSadded); if (!relationMetadata) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - for (i = 0; i < freqCSset->numCSadded; ++i) { // CS - CS cs = (CS) freqCSset->items[i]; - if (cs.parentFreqIdx != -1) continue; // ignore - relationMetadata[i] = (Relation **) malloc (sizeof(Relation *) * cs.numProp); - if (!relationMetadata[i]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + for (i = 0; i < num; ++i) { // CS + int csId = csIdFreqIdxMap[i]; + CS cs = (CS) freqCSset->items[csId]; + if (csId == -1) continue; // ignore + relationMetadata[csId] = (Relation **) malloc (sizeof(Relation *) * cs.numProp); + if (!relationMetadata[csId]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); for (j = 0; j < cs.numProp; ++j) { // propNo in CS order int sum = 0; - relationMetadataCount[i][j] = 0; - relationMetadata[i][j] = NULL; - for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; ++k) { // propNo in CSrel + relationMetadataCount[csId][j] = 0; + relationMetadata[csId][j] = NULL; + for (k = 0; k < csrelSet[i].numRef; ++k) { // propNo in CSrel - if (csRelBetweenMergeFreqSet[i].lstPropId[k] == cs.lstProp[j]) { - int toId = csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k]; - relationMetadataCount[i][j] += 1; + if (csrelSet[i].lstPropId[k] == cs.lstProp[j]) { + int toId = csIdFreqIdxMap[ csrelSet[i].lstRefCSoid[k] ]; + if (toId == -1) continue; // ignore + relationMetadataCount[csId][j] += 1; // alloc/realloc - if (relationMetadataCount[i][j] == 1) { + if (relationMetadataCount[csId][j] == 1) { // alloc - relationMetadata[i][j] = (Relation *) malloc (sizeof(Relation)); - if (!relationMetadata[i][j]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - relationMetadata[i][j][0].to = toId; - relationMetadata[i][j][0].from = i; - relationMetadata[i][j][0].freq = csRelBetweenMergeFreqSet[i].lstCnt[k]; - relationMetadata[i][j][0].percent = -1; + relationMetadata[csId][j] = (Relation *) malloc (sizeof(Relation)); + if (!relationMetadata[csId][j]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + relationMetadata[csId][j][0].to = toId; + relationMetadata[csId][j][0].from = csId; + relationMetadata[csId][j][0].freq = csrelSet[i].lstCnt[k]; + relationMetadata[csId][j][0].percent = -1; } else { // realloc - relationMetadata[i][j] = (Relation *) realloc(relationMetadata[i][j], sizeof(Relation) * relationMetadataCount[i][j]); - if (!relationMetadata[i][j]) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); - relationMetadata[i][j][relationMetadataCount[i][j] - 1].to = toId; - relationMetadata[i][j][relationMetadataCount[i][j] - 1].from = i; - relationMetadata[i][j][relationMetadataCount[i][j] - 1].freq = csRelBetweenMergeFreqSet[i].lstCnt[k]; - relationMetadata[i][j][relationMetadataCount[i][j] - 1].percent = -1; + relationMetadata[csId][j] = (Relation *) realloc(relationMetadata[csId][j], sizeof(Relation) * relationMetadataCount[csId][j]); + if (!relationMetadata[csId][j]) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); + relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].to = toId; + relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].from = csId; + relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].freq = csrelSet[i].lstCnt[k]; + relationMetadata[csId][j][relationMetadataCount[csId][j] - 1].percent = -1; } } } // get total count of values - for (k = 0; k < relationMetadataCount[i][j]; ++k) { - sum += relationMetadata[i][j][k].freq; + for (k = 0; k < relationMetadataCount[csId][j]; ++k) { + sum += relationMetadata[csId][j][k].freq; } // assign percentage values for every value - for (k = 0; k < relationMetadataCount[i][j]; ++k) { - relationMetadata[i][j][k].percent = (int) (100.0 * relationMetadata[i][j][k].freq / sum + 0.5); + for (k = 0; k < relationMetadataCount[csId][j]; ++k) { + relationMetadata[csId][j][k].percent = (int) (100.0 * relationMetadata[csId][j][k].freq / sum + 0.5); } } } @@ -387,7 +388,6 @@ void convertToSQL(CSset *freqCSset, Rela // create statement for every table for (i = 0; i < freqCSset->numCSadded; ++i) { char *temp; - if ( freqCSset->items[i].parentFreqIdx != -1) continue; // ignore temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 1)); if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(temp, labels[i].name); @@ -414,7 +414,6 @@ void convertToSQL(CSset *freqCSset, Rela // add foreign key columns and add foreign keys for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore for (j = 0; j < labels[i].numProp; ++j) { char *temp2; int refCounter = 0; @@ -453,7 +452,7 @@ void convertToSQL(CSset *freqCSset, Rela } static -void createSQLMetadata(CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet, Labels* labels) { +void createSQLMetadata(CSset* freqCSset, CSrel* csrelSet, int num, Labels* labels, int* csIdFreqIdxMap) { int **matrix = NULL; // matrix[from][to] frequency int i, j, k; FILE *fout; @@ -472,24 +471,27 @@ void createSQLMetadata(CSset* freqCSset, } // set values - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore + for (i = 0; i < num; ++i) { + int csId = csIdFreqIdxMap[i]; + CS cs = (CS) freqCSset->items[csId]; + if (csId == -1) continue; // ignore - for (j = 0; j < freqCSset->items[i].numProp; ++j) { // propNo in CS order + for (j = 0; j < cs.numProp; ++j) { // propNo in CS order // check foreign key frequency int sum = 0; - for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; ++k) { - if (csRelBetweenMergeFreqSet[i].lstPropId[k] == freqCSset->items[i].lstProp[j]) { - sum += csRelBetweenMergeFreqSet[i].lstCnt[k]; + for (k = 0; k < csrelSet[i].numRef; ++k) { + if (csrelSet[i].lstPropId[k] == cs.lstProp[j]) { + sum += csrelSet[i].lstCnt[k]; } } - for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef; ++k) { // propNo in CSrel - if (csRelBetweenMergeFreqSet[i].lstPropId[k] == freqCSset->items[i].lstProp[j]) { - int to = csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k]; - if (i == to) continue; // ignore self references - if ((int) (100.0 * csRelBetweenMergeFreqSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough - matrix[i][to] += csRelBetweenMergeFreqSet[i].lstCnt[k]; // multiple links from 'i' to 'to'? add the frequencies + for (k = 0; k < csrelSet[i].numRef; ++k) { // propNo in CSrel + if (csrelSet[i].lstPropId[k] == cs.lstProp[j]) { + int toId = csIdFreqIdxMap[ csrelSet[i].lstRefCSoid[k] ]; + if (toId == -1) continue; // ignore + if (i == toId) continue; // ignore self references + if ((int) (100.0 * csrelSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD) continue; // foreign key is not frequent enough + matrix[csId][toId] += csrelSet[i].lstCnt[k]; // multiple links from 'i' to 'toId'? add the frequencies } } } @@ -510,7 +512,6 @@ void createSQLMetadata(CSset* freqCSset, fout = fopen("tableIdFreq.csv", "wt"); for (i = 0; i < freqCSset->numCSadded; ++i) { char *temp; - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore temp = (char *) malloc(sizeof(char) * (strlen(labels[i].name) + 1)); if (!temp) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); strcpy(temp, labels[i].name); @@ -542,7 +543,6 @@ void printTxt(CSset* freqCSset, Labels* fout = fopen(filename, "wt"); for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore fprintf(fout, "%s (CS "BUNFMT"): ", labels[i].name, freqCSset->items[i].csId); for (j = 0; j < labels[i].numProp; ++j) { if (j + 1 < labels[i].numProp) fprintf(fout, "%s, ", labels[i].lstProp[j]); @@ -611,12 +611,7 @@ void createTypeAttributesHistogram(BAT * for (i = 0; i < typeAttributesCount; ++i) { if (strstr(propStr, typeAttributes[i]) != NULL) { // prop is a type! - - // lookup maxCS/mergeCS csFreqIdx = csIdFreqIdxMap[subjCSMap[*sbt]]; - while (freqCSset->items[csFreqIdx].parentFreqIdx != -1) { - csFreqIdx = freqCSset->items[csFreqIdx].parentFreqIdx; - } // get object obt = (oid *) BUNtloc(oi, p); @@ -679,7 +674,6 @@ void createTypeAttributesHistogram(BAT * // sort descending by frequency for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore for (j = 0; j < typeAttributesCount; ++j) { qsort(typeAttributesHistogram[i][j], typeAttributesHistogramCount[i][j], sizeof(TypeAttributesFreq), compareTypeAttributesFreqs); } @@ -1041,7 +1035,6 @@ void createPropStatistics(PropStat* prop for (i = 0; i < freqCSset->numCSadded; ++i) { CS cs = (CS)freqCSset->items[i]; - if (cs.parentFreqIdx != -1) continue; // ignore for (j = 0; j < cs.numProp; ++j) { // add prop to propStat BUN bun = BUNfnd(BATmirror(propStat->pBat), (ptr) &cs.lstProp[j]); @@ -1089,13 +1082,9 @@ static void createOntologyLookupResult(str** result, CSset* freqCSset, int* resultCount, str** ontattributes, int ontattributesCount, str** ontmetadata, int ontmetadataCount) { int i, j; PropStat *propStat; - int numCS = 0; - for (i = 0; i < freqCSset->numCSadded; ++i) { - if (freqCSset->items[i].parentFreqIdx == -1) numCS += 1; - } propStat = initPropStat(); - createPropStatistics(propStat, numCS, freqCSset); + createPropStatistics(propStat, freqCSset->numCSadded, freqCSset); for (i = 0; i < freqCSset->numCSadded; ++i) { CS cs; @@ -1104,7 +1093,6 @@ void createOntologyLookupResult(str** re int *propOntologiesCount = NULL; cs = (CS) freqCSset->items[i]; - if (cs.parentFreqIdx != -1) continue; // ignore // order properties by ontologies propOntologiesCount = (int *) malloc(sizeof(int) * ontologyCount); @@ -1170,7 +1158,6 @@ void printUML(CSset *freqCSset, int type for (i = 0; i < freqCSset->numCSadded; ++i) { CS cs = (CS) freqCSset->items[i]; - if (cs.parentFreqIdx != -1) continue; // ignore #if SHOW_CANDIDATES /* DATA SOURCES */ @@ -1370,14 +1357,12 @@ void printUML(CSset *freqCSset, int type getPropNameShort(&propStrShort, propStr); #endif - if (cs.parentFreqIdx == -1) { - // if it is a type, include top-3 values + // if it is a type, include top-3 values #if USE_SHORT_NAMES - fprintf(fout, "<TR><TD PORT=\"%s\">%s</TD></TR>\n", propStrEscaped, propStrShort); + fprintf(fout, "<TR><TD PORT=\"%s\">%s</TD></TR>\n", propStrEscaped, propStrShort); #else - fprintf(fout, "<TR><TD PORT=\"%s\">%s</TD></TR>\n", propStrEscaped, propStr); + fprintf(fout, "<TR><TD PORT=\"%s\">%s</TD></TR>\n", propStrEscaped, propStr); #endif - } free(propStrEscaped); } @@ -1387,7 +1372,6 @@ void printUML(CSset *freqCSset, int type for (i = 0; i < freqCSset->numCSadded; ++i) { CS cs = (CS) freqCSset->items[i]; - if (cs.parentFreqIdx != -1) continue; // ignore for (j = 0; j < cs.numProp; ++j) { char *propStrEscaped = NULL; #if USE_SHORT_NAMES @@ -1588,7 +1572,6 @@ void getAllLabels(Labels* labels, CSset* for (i = 0; i < freqCSset->numCSadded; ++i) { CS cs = (CS) freqCSset->items[i]; char *temp = NULL; - if (cs.parentFreqIdx != -1) continue; // ignore // get table name getTableName(&temp, i, typeAttributesCount, typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, result, resultCount, links); @@ -1649,7 +1632,6 @@ void createLinks(CSset* freqCSset, Relat for (i = 0; i < freqCSset->numCSadded; ++i) { CS cs = (CS) freqCSset->items[i]; - if (cs.parentFreqIdx != -1) continue; // ignore for (j = 0; j < cs.numProp; ++j) { for (k = 0; k < relationMetadataCount[i][j]; ++k) { int to; @@ -1725,7 +1707,6 @@ void freeRelationMetadata(Relation*** re for (i = 0; i < freqCSset->numCSadded; ++i) { // CS CS cs = (CS) freqCSset->items[i]; - if (cs.parentFreqIdx != -1) continue; // ignore for (j = 0; j < cs.numProp; ++j) { if (relationMetadata[i][j]) free(relationMetadata[i][j]); @@ -1773,7 +1754,7 @@ void freeOntologyLookupResult(str** onto } /* Creates labels for all CS (without a parent). */ -Labels* createLabels(CSset* freqCSset, CSmergeRel* csRelBetweenMergeFreqSet, BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, BAT* mbat, int *csIdFreqIdxMap, int freqThreshold, str** ontattributes, int ontattributesCount, str** ontmetadata, int ontmetadataCount) { +Labels* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, BAT* mbat, int *csIdFreqIdxMap, int freqThreshold, str** ontattributes, int ontattributesCount, str** ontmetadata, int ontmetadataCount) { #if USE_TYPE_NAMES char* typeAttributes[] = { "http://ogp.me/ns#type", _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list