Changeset: 650aa35e15c4 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=650aa35e15c4 Modified Files: monetdb5/extras/rdf/rdf.h monetdb5/extras/rdf/rdf_shredder.mx monetdb5/modules/mal/tokenizer.h sql/backends/monet5/sql.mx Branch: rdf Log Message:
Change two bits in the oid for the RDF triple object so that the oid can specify the type of that object diffs (188 lines): diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h --- a/monetdb5/extras/rdf/rdf.h +++ b/monetdb5/extras/rdf/rdf.h @@ -51,7 +51,12 @@ RDFleftfetchjoin_sortedestimate(int *res rdf_export str RDFleftfetchjoin_sorted(int *result, int* lid, int *rid); - +typedef enum { + DATETIME, + NUMERIC, + URI, + STRING +} ObjectType; #define IS_DUPLICATE_FREE 0 /* 0: Duplications have not been removed, otherwise 1 */ #define TRIPLE_STORE 1 diff --git a/monetdb5/extras/rdf/rdf_shredder.mx b/monetdb5/extras/rdf/rdf_shredder.mx --- a/monetdb5/extras/rdf/rdf_shredder.mx +++ b/monetdb5/extras/rdf/rdf_shredder.mx @@ -150,8 +150,29 @@ if (bun == BUN_NONE) { HASHdestroy(@1); BAThash(BATmirror(@1), 2*BATcount(@1)); } - bun = (BUN) ((@1)->hseqbase + (@1)->batCount); - @1 = BUNappend(@1, (ptr)@2, TRUE); + //bun = (BUN) ((@1)->hseqbase + (@1)->batCount); + bun = (BUN) (RDF_MIN_LITERAL + (@1)->batCount); + + /* Add the type here */ + if (@3 == DATETIME){ + printf("Datetime appears here \n Before: " BUNFMT "\n", bun); + bun |= (BUN)1 << (sizeof(BUN)*8 - 2); + printf("After: " BUNFMT "\n", bun); + } + else if (@3 == NUMERIC){ + printf("Numeric value appears here \n Before: " BUNFMT "\n", bun); + bun |= (BUN)2 << (sizeof(BUN)*8 - 2); + printf("After: " BUNFMT "\n", bun); + } + else { /* @3 == STRING */ + printf("String value appears here \n Before: " BUNFMT "\n", bun); + bun |= (BUN)3 << (sizeof(BUN)*8 - 2); + printf("After: " BUNFMT "\n", bun); + } + + //@1 = BUNappend(@1, (ptr)@2, TRUE); + @1 = BUNins(@1, (ptr) &bun, (ptr)@2, TRUE); + if (@1 == NULL) { @:raptor_exception(pdata, "could not append in@1")@ } @@ -187,6 +208,39 @@ if (@1 == NULL) { @ @c + + +/* +* Get the specific type of the object value in an RDF triple +* The URI object can be recoginized by raptor parser. +* If the object value is not an URI ==> it is a literal, and +* specifically, a numeric, a dateTime or a string. +* This function will find the specific type of Object value +*/ + +static ObjectType +getObjectType(unsigned char* objStr){ + ObjectType obType; + if (strstr((const char*) objStr, "XMLSchema#date") != NULL){ + obType = DATETIME; + printf("%s: DateTime \n", objStr); + } + else if (strstr((const char*) objStr, "XMLSchema#float") != NULL + || strstr((const char*) objStr, "XMLSchema#integer") != NULL + ) + { + obType = NUMERIC; + printf("%s: Numeric \n", objStr); + } + else { + obType = STRING; + printf("%s: String \n", objStr); + } + + return obType; +} + + static void tripleHandler(void* user_data, const raptor_statement* triple) { @@ -227,8 +281,10 @@ tripleHandler(void* user_data, const rap free(objStr); } else if (triple->object->type == RAPTOR_TERM_TYPE_LITERAL) { unsigned char* objStr; + ObjectType objType; objStr = raptor_term_to_string(triple->object); - @:rdf_BUNappend_unq_1(graph[MAP_LEX], (str)objStr)@ + objType = getObjectType(objStr); + @:rdf_BUNappend_unq_1(graph[MAP_LEX], (str)objStr, objType)@ @:rdf_BUNappend(graph[O_sort], &bun)@ bun = BUN_NONE; free(objStr); @@ -323,9 +379,14 @@ parserData_create (str location, BAT** g pdata->graph[MAP_LEX]->T->nokey[0] = 0; pdata->graph[MAP_LEX]->T->nokey[1] = 0; + /* Reset the dense property of graph[MAP_LEX] */ + pdata->graph[MAP_LEX]->hdense = FALSE; + return pdata; } + + /* * @- * After the RDF document has been shredded into 3 bats and a lexical value @@ -392,11 +453,15 @@ post_processing (parserData *pdata) BAT *ctref= NULL; #endif #ifdef _TKNZR_H - BATiter bi, mi; - BUN p, d, r; - oid *bt; + + //BATiter bi, mi; + //BUN p, d, r; + //oid *bt; /* order MAP_LEX */ + + /* Do not order the MAP_LEX BAT */ + #ifdef ORDER_MAPLEX BATorder(BATmirror(graph[MAP_LEX])); map_oid = BATmark(graph[MAP_LEX], RDF_MIN_LITERAL); /* BATmark will create a copy */ BATorder(map_oid); @@ -418,6 +483,8 @@ post_processing (parserData *pdata) } BBPreclaim(map_oid); + #endif + S = graph[S_sort]; P = graph[P_sort]; O = graph[O_sort]; @@ -583,7 +650,7 @@ RDFParser (BAT **graph, str *location, s char *buf = (char*) GDKmalloc(RDF_CHUNK_SIZE); if (buf == NULL) { throw(RDF, "rdf.rdfShred", - "could not allocate a %dMB file buffer\n", (int) (RDF_CHUNK_SIZE>>20)); + "could not allocate a %dMB file buffer\n", (int) (RDF_CHUNK_SIZE>>20)); } uri = raptor_new_uri(raptor_uri_filename_to_uri_string(pdata->location)); iret = raptor_start_parse(rparser, uri); diff --git a/monetdb5/modules/mal/tokenizer.h b/monetdb5/modules/mal/tokenizer.h --- a/monetdb5/modules/mal/tokenizer.h +++ b/monetdb5/modules/mal/tokenizer.h @@ -22,7 +22,7 @@ #include "mal_client.h" #include "mal_interpreter.h" -#define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?62:30)) +#define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?60:28)) #ifdef WIN32 #if !defined(LIBMAL) && !defined(LIBATOMS) && !defined(LIBKERNEL) && !defined(LIBMAL) && !defined(LIBOPTIMIZER) && !defined(LIBSCHEDULER) && !defined(LIBMONETDB5) diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx --- a/sql/backends/monet5/sql.mx +++ b/sql/backends/monet5/sql.mx @@ -7298,13 +7298,13 @@ SQLrdfShred(Client cntxt, MalBlkPtr mb, store_funcs.append_col(m->session->tr, mvc_bind_column(m, spo_tbl, "subject"), - sbt, TYPE_int); + sbt, TYPE_oid); store_funcs.append_col(m->session->tr, mvc_bind_column(m, spo_tbl, "property"), - pbt, TYPE_int); + pbt, TYPE_oid); store_funcs.append_col(m->session->tr, mvc_bind_column(m, spo_tbl, "object"), - obt, TYPE_int); + obt, TYPE_oid); /* Update current value */ curS = *sbt; curP = *pbt; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list