Changeset: 4088384eb40d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4088384eb40d Modified Files: monetdb5/extras/rdf/rdf_shredder.c monetdb5/extras/rdf/rdfparser.h Branch: rdf Log Message:
Count number of values per type diffs (104 lines): diff --git a/monetdb5/extras/rdf/rdf_shredder.c b/monetdb5/extras/rdf/rdf_shredder.c --- a/monetdb5/extras/rdf/rdf_shredder.c +++ b/monetdb5/extras/rdf/rdf_shredder.c @@ -236,7 +236,7 @@ getObjectType_and_Value(unsigned char* o if (isInt(valuepart, subLen) == 1){ /* Check whether the real value is an integer */ realInt = (BUN) atoi(valuepart); VALset(vrPtrRealValue,TYPE_int, &realInt); - printf("Real int value is: %d \n", vrPtrRealValue->val.ival); + //printf("Real int value is: %d \n", vrPtrRealValue->val.ival); } else obType = STRING; @@ -253,7 +253,7 @@ getObjectType_and_Value(unsigned char* o if (isDouble(valuepart, subLen) == 1){ realDbl = atof(valuepart); VALset(vrPtrRealValue,TYPE_dbl, &realDbl); - printf("Real double value is: %.10f \n", vrPtrRealValue->val.dval); + //printf("Real double value is: %.10f \n", vrPtrRealValue->val.dval); } else obType = STRING; @@ -406,7 +406,9 @@ tripleHandler(void* user_data, const rap //rdf_insert(pdata, graph[MAP_LEX], (str) objStr, &bun); rdf_tknzr_insert((str) objStr, &bun); rdf_BUNappend(pdata, graph[O_sort], &bun); - +#if CHECK_NUM_VALUES_PER_TYPE + pdata->numValuesPertype[URI]++; +#endif bun = BUN_NONE; free(objStr); } else if (triple->object->type == RAPTOR_TERM_TYPE_BLANK) { @@ -416,6 +418,9 @@ tripleHandler(void* user_data, const rap rdf_BUNappend_BlankNode_Obj(pdata, graph[O_sort], &bun); //rdf_BUNappend(pdata, graph[O_sort], &bun); +#if CHECK_NUM_VALUES_PER_TYPE + pdata->numValuesPertype[BLANKNODE]++; +#endif bun = BUN_NONE; free(objStr); @@ -432,6 +437,9 @@ tripleHandler(void* user_data, const rap encodeValueInOid(&vrRealValue, objType, &bun); } +#if CHECK_NUM_VALUES_PER_TYPE + pdata->numValuesPertype[objType]++; +#endif rdf_BUNappend(pdata, graph[O_sort], &bun); VALclear(&vrRealValue); @@ -585,7 +593,11 @@ parserData_create (str location, BAT** g pdata->numNonOnt = 0; } #endif - +#if CHECK_NUM_VALUES_PER_TYPE + for (i = 0; i < MULTIVALUES; i++){ + pdata->numValuesPertype[i] = 0; + } +#endif return pdata; } @@ -989,6 +1001,13 @@ RDFParser (BAT **graph, str *location, s #if BUILD_ONTOLOGIES_HISTO printHistogram(pdata); #endif +#if CHECK_NUM_VALUES_PER_TYPE + printf("Number of URI %d \n", pdata->numValuesPertype[URI]); + printf("Number of DATETIME %d \n", pdata->numValuesPertype[DATETIME]); + printf("Number of INTEGER %d \n", pdata->numValuesPertype[INTEGER]); + printf("Number of DOUBLE %d \n", pdata->numValuesPertype[DOUBLE]); + printf("Number of BLANKNODE %d \n", pdata->numValuesPertype[BLANKNODE]); +#endif /* post processing step */ tmpbeginT = clock(); ret = post_processing(pdata); diff --git a/monetdb5/extras/rdf/rdfparser.h b/monetdb5/extras/rdf/rdfparser.h --- a/monetdb5/extras/rdf/rdfparser.h +++ b/monetdb5/extras/rdf/rdfparser.h @@ -28,9 +28,11 @@ #define _RDF_PARSER_H_ #include <raptor2.h> +#include <rdftypes.h> #define CHECK_NUM_DBPONTOLOGY 0 /* Check how many rdf triples use dbpontology */ #define BUILD_ONTOLOGIES_HISTO 0 /* Check how much percentage each ontology has in the dataset */ +#define CHECK_NUM_VALUES_PER_TYPE 1 /* default 0*/ typedef struct parserData { /**PROPERTIES */ @@ -60,6 +62,9 @@ typedef struct parserData { #if CHECK_NUM_DBPONTOLOGY int numOntologyTriples; #endif +#if CHECK_NUM_VALUES_PER_TYPE + int numValuesPertype[MULTIVALUES]; +#endif } parserData; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list