Changeset: 4088384eb40d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4088384eb40d
Modified Files:
        monetdb5/extras/rdf/rdf_shredder.c
        monetdb5/extras/rdf/rdfparser.h
Branch: rdf
Log Message:

Count number of values per type


diffs (104 lines):

diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -236,7 +236,7 @@ getObjectType_and_Value(unsigned char* o
                        if (isInt(valuepart, subLen) == 1){     /* Check 
whether the real value is an integer */
                                realInt = (BUN) atoi(valuepart); 
                                VALset(vrPtrRealValue,TYPE_int, &realInt);
-                               printf("Real int value is: %d \n", 
vrPtrRealValue->val.ival);
+                               //printf("Real int value is: %d \n", 
vrPtrRealValue->val.ival);
                        }
                        else 
                                obType = STRING;        
@@ -253,7 +253,7 @@ getObjectType_and_Value(unsigned char* o
                        if (isDouble(valuepart, subLen) == 1){
                                realDbl = atof(valuepart);
                                VALset(vrPtrRealValue,TYPE_dbl, &realDbl);
-                               printf("Real double value is: %.10f \n", 
vrPtrRealValue->val.dval);
+                               //printf("Real double value is: %.10f \n", 
vrPtrRealValue->val.dval);
                        }
                        else
                                obType = STRING;
@@ -406,7 +406,9 @@ tripleHandler(void* user_data, const rap
                        //rdf_insert(pdata, graph[MAP_LEX], (str) objStr, &bun);
                        rdf_tknzr_insert((str) objStr, &bun);
                        rdf_BUNappend(pdata, graph[O_sort], &bun); 
-
+#if    CHECK_NUM_VALUES_PER_TYPE
+                       pdata->numValuesPertype[URI]++;
+#endif                         
                        bun = BUN_NONE;
                        free(objStr);
                } else if (triple->object->type == RAPTOR_TERM_TYPE_BLANK) {
@@ -416,6 +418,9 @@ tripleHandler(void* user_data, const rap
                        rdf_BUNappend_BlankNode_Obj(pdata, graph[O_sort], 
&bun); 
                        //rdf_BUNappend(pdata, graph[O_sort], &bun); 
 
+#if    CHECK_NUM_VALUES_PER_TYPE
+                       pdata->numValuesPertype[BLANKNODE]++;
+#endif                         
                        bun = BUN_NONE;
                        free(objStr);
                
@@ -432,6 +437,9 @@ tripleHandler(void* user_data, const rap
                                encodeValueInOid(&vrRealValue, objType, &bun);
                        }
 
+#if    CHECK_NUM_VALUES_PER_TYPE
+                       pdata->numValuesPertype[objType]++;
+#endif                         
                        rdf_BUNappend(pdata, graph[O_sort], &bun); 
 
                        VALclear(&vrRealValue);
@@ -585,7 +593,11 @@ parserData_create (str location, BAT** g
                pdata->numNonOnt = 0;
        }
        #endif
-
+#if     CHECK_NUM_VALUES_PER_TYPE
+       for (i = 0; i < MULTIVALUES; i++){
+               pdata->numValuesPertype[i] = 0;
+       }
+#endif 
        return pdata;
 }
 
@@ -989,6 +1001,13 @@ RDFParser (BAT **graph, str *location, s
        #if     BUILD_ONTOLOGIES_HISTO
        printHistogram(pdata);
        #endif
+#if    CHECK_NUM_VALUES_PER_TYPE
+       printf("Number of URI %d \n", pdata->numValuesPertype[URI]);
+       printf("Number of DATETIME %d \n", pdata->numValuesPertype[DATETIME]);
+       printf("Number of INTEGER %d \n", pdata->numValuesPertype[INTEGER]);
+       printf("Number of DOUBLE %d \n", pdata->numValuesPertype[DOUBLE]);
+       printf("Number of BLANKNODE %d \n", pdata->numValuesPertype[BLANKNODE]);
+#endif                         
        /* post processing step */
        tmpbeginT = clock();
        ret = post_processing(pdata);
diff --git a/monetdb5/extras/rdf/rdfparser.h b/monetdb5/extras/rdf/rdfparser.h
--- a/monetdb5/extras/rdf/rdfparser.h
+++ b/monetdb5/extras/rdf/rdfparser.h
@@ -28,9 +28,11 @@
 #define _RDF_PARSER_H_
 
 #include <raptor2.h>
+#include <rdftypes.h>
 
 #define CHECK_NUM_DBPONTOLOGY   0       /* Check how many rdf triples use 
dbpontology */
 #define BUILD_ONTOLOGIES_HISTO 0       /* Check how much percentage each 
ontology has in the dataset */
+#define CHECK_NUM_VALUES_PER_TYPE 1    /* default 0*/
 
 typedef struct parserData {
                                      /**PROPERTIES             */
@@ -60,6 +62,9 @@ typedef struct parserData {
 #if CHECK_NUM_DBPONTOLOGY
        int numOntologyTriples; 
 #endif
+#if    CHECK_NUM_VALUES_PER_TYPE
+       int numValuesPertype[MULTIVALUES];
+#endif 
        
 } parserData;
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to