Changeset: 650aa35e15c4 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=650aa35e15c4
Modified Files:
        monetdb5/extras/rdf/rdf.h
        monetdb5/extras/rdf/rdf_shredder.mx
        monetdb5/modules/mal/tokenizer.h
        sql/backends/monet5/sql.mx
Branch: rdf
Log Message:

Change two bits in the oid for the RDF triple object so that
the oid can specify the type of that object


diffs (188 lines):

diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -51,7 +51,12 @@ RDFleftfetchjoin_sortedestimate(int *res
 rdf_export str 
 RDFleftfetchjoin_sorted(int *result, int* lid, int *rid);
 
-
+typedef enum {
+       DATETIME, 
+       NUMERIC, 
+       URI, 
+       STRING
+} ObjectType; 
 
 #define IS_DUPLICATE_FREE 0    /* 0: Duplications have not been removed, 
otherwise 1 */
 #define TRIPLE_STORE 1
diff --git a/monetdb5/extras/rdf/rdf_shredder.mx 
b/monetdb5/extras/rdf/rdf_shredder.mx
--- a/monetdb5/extras/rdf/rdf_shredder.mx
+++ b/monetdb5/extras/rdf/rdf_shredder.mx
@@ -150,8 +150,29 @@ if (bun == BUN_NONE) {
                HASHdestroy(@1);
                BAThash(BATmirror(@1), 2*BATcount(@1));
        }
-       bun = (BUN) ((@1)->hseqbase + (@1)->batCount);
-       @1 = BUNappend(@1, (ptr)@2, TRUE);
+       //bun = (BUN) ((@1)->hseqbase + (@1)->batCount);
+       bun = (BUN) (RDF_MIN_LITERAL + (@1)->batCount);
+       
+       /* Add the type here */
+       if (@3 == DATETIME){ 
+               printf("Datetime appears here \n Before: " BUNFMT "\n", bun);
+               bun |= (BUN)1 << (sizeof(BUN)*8 - 2);
+               printf("After: " BUNFMT "\n", bun);
+       }
+       else if (@3 == NUMERIC){
+               printf("Numeric value appears here \n Before: " BUNFMT "\n", 
bun);
+               bun |= (BUN)2 << (sizeof(BUN)*8 - 2);
+               printf("After: " BUNFMT "\n", bun);
+       }
+       else { /* @3 == STRING */
+               printf("String value appears here \n Before: " BUNFMT "\n", 
bun);
+               bun |= (BUN)3 << (sizeof(BUN)*8 - 2);
+               printf("After: " BUNFMT "\n", bun);
+       }
+
+       //@1 = BUNappend(@1, (ptr)@2, TRUE);
+       @1 = BUNins(@1, (ptr) &bun, (ptr)@2, TRUE); 
+
        if (@1 == NULL) {
                @:raptor_exception(pdata, "could not append in@1")@
        }
@@ -187,6 +208,39 @@ if (@1 == NULL) {
 
 @
 @c
+
+
+/*
+* Get the specific type of the object value in an RDF triple
+* The URI object can be recoginized by raptor parser. 
+* If the object value is not an URI ==> it is a literal, and 
+* specifically, a numeric, a dateTime or a string. 
+* This function will find the specific type of Object value
+*/
+
+static ObjectType 
+getObjectType(unsigned char* objStr){
+       ObjectType obType; 
+       if (strstr((const char*) objStr, "XMLSchema#date") != NULL){
+               obType = DATETIME;
+               printf("%s: DateTime \n", objStr); 
+       }
+       else if (strstr((const char*) objStr, "XMLSchema#float") != NULL
+               || strstr((const char*) objStr, "XMLSchema#integer") != NULL
+               )
+       {
+               obType = NUMERIC;
+               printf("%s: Numeric \n", objStr); 
+       }
+       else {
+               obType = STRING;
+               printf("%s: String \n", objStr); 
+       }
+
+       return obType; 
+}
+
+
 static void 
 tripleHandler(void* user_data, const raptor_statement* triple)
 {
@@ -227,8 +281,10 @@ tripleHandler(void* user_data, const rap
                free(objStr);
        } else if (triple->object->type == RAPTOR_TERM_TYPE_LITERAL) {
                unsigned char* objStr;
+               ObjectType objType;
                objStr = raptor_term_to_string(triple->object);
-               @:rdf_BUNappend_unq_1(graph[MAP_LEX], (str)objStr)@
+               objType = getObjectType(objStr);
+               @:rdf_BUNappend_unq_1(graph[MAP_LEX], (str)objStr, objType)@
                @:rdf_BUNappend(graph[O_sort], &bun)@
                bun = BUN_NONE;
                free(objStr);
@@ -323,9 +379,14 @@ parserData_create (str location, BAT** g
        pdata->graph[MAP_LEX]->T->nokey[0] = 0;
        pdata->graph[MAP_LEX]->T->nokey[1] = 0;
 
+       /* Reset the dense property of graph[MAP_LEX] */
+       pdata->graph[MAP_LEX]->hdense = FALSE;
+
        return pdata;
 }
 
+
+
 /*
  * @-
  * After the RDF document has been shredded into 3 bats and a lexical value
@@ -392,11 +453,15 @@ post_processing (parserData *pdata)
        BAT *ctref= NULL;
 #endif
 #ifdef _TKNZR_H
-       BATiter bi, mi;
-       BUN p, d, r;
-       oid *bt;
+
+       //BATiter bi, mi;
+       //BUN p, d, r;
+       //oid *bt;
 
        /* order MAP_LEX */
+
+       /* Do not order the MAP_LEX BAT */
+       #ifdef ORDER_MAPLEX
        BATorder(BATmirror(graph[MAP_LEX]));
        map_oid = BATmark(graph[MAP_LEX], RDF_MIN_LITERAL);   /* BATmark will 
create a copy */
        BATorder(map_oid);
@@ -418,6 +483,8 @@ post_processing (parserData *pdata)
        }
        BBPreclaim(map_oid);
 
+       #endif
+
        S = graph[S_sort];
        P = graph[P_sort];
        O = graph[O_sort];
@@ -583,7 +650,7 @@ RDFParser (BAT **graph, str *location, s
                char *buf = (char*) GDKmalloc(RDF_CHUNK_SIZE);
                if (buf == NULL) {
                        throw(RDF, "rdf.rdfShred",
-                               "could not allocate a %dMB file buffer\n", 
(int) (RDF_CHUNK_SIZE>>20));
+                       "could not allocate a %dMB file buffer\n", (int) 
(RDF_CHUNK_SIZE>>20));
                }
                uri = 
raptor_new_uri(raptor_uri_filename_to_uri_string(pdata->location));
                iret = raptor_start_parse(rparser, uri);
diff --git a/monetdb5/modules/mal/tokenizer.h b/monetdb5/modules/mal/tokenizer.h
--- a/monetdb5/modules/mal/tokenizer.h
+++ b/monetdb5/modules/mal/tokenizer.h
@@ -22,7 +22,7 @@
 #include "mal_client.h"
 #include "mal_interpreter.h"
 
-#define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?62:30))
+#define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?60:28))
 
 #ifdef WIN32
 #if !defined(LIBMAL) && !defined(LIBATOMS) && !defined(LIBKERNEL) && 
!defined(LIBMAL) && !defined(LIBOPTIMIZER) && !defined(LIBSCHEDULER) && 
!defined(LIBMONETDB5)
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -7298,13 +7298,13 @@ SQLrdfShred(Client cntxt, MalBlkPtr mb, 
 
                                store_funcs.append_col(m->session->tr,
                                                       mvc_bind_column(m, 
spo_tbl, "subject"),
-                                                      sbt, TYPE_int);
+                                                      sbt, TYPE_oid);
                                store_funcs.append_col(m->session->tr,
                                                       mvc_bind_column(m, 
spo_tbl, "property"),
-                                                      pbt, TYPE_int);
+                                                      pbt, TYPE_oid);
                                store_funcs.append_col(m->session->tr,
                                                       mvc_bind_column(m, 
spo_tbl, "object"),
-                                                      obt, TYPE_int);
+                                                      obt, TYPE_oid);
                                /* Update current value */                     
                                curS = *sbt; 
                                curP = *pbt; 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to