Changeset: 0646f409ff77 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0646f409ff77
Modified Files:
        monetdb5/extras/rdf/rdf_shredder.c
        monetdb5/extras/rdf/rdfparser.h
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdftypes.c
        monetdb5/extras/rdf/rdftypes.h
Branch: rdf
Log Message:

Add function for encoding/decoding numeric value in oid


diffs (truncated from 563 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -177,6 +177,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda
 
 }
 
+
 /*
 * Get the specific type of the object value in an RDF triple
 * The URI object can be recoginized by raptor parser. 
@@ -186,7 +187,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda
 */
 
 static ObjectType 
-getObjectType(unsigned char* objStr, BUN *realNumValue){
+getObjectType_and_Value(unsigned char* objStr, ValPtr vrPtrRealValue){
        ObjectType obType = STRING; 
        unsigned char* endpart;
        char* valuepart; 
@@ -194,7 +195,9 @@ getObjectType(unsigned char* objStr, BUN
        int     len = 0; 
        int     subLen = 0; 
 
-       *realNumValue = BUN_NONE; 
+        double  realDbl;
+        int     realInt;
+
        len = strlen((str)objStr);
 
        if (len > 20){
@@ -206,13 +209,15 @@ getObjectType(unsigned char* objStr, BUN
                        /* printf("%s: DateTime \n", objStr); */
                }
                else if ((pos = strstr((str) endpart, "XMLSchema#int>")) != 
NULL || (pos = strstr((str)endpart, "XMLSchema#integer>")) != NULL){
+                       //TODO: Consider nonNegativeInteger
                        obType = INTEGER;
                        subLen = (int) (pos - (str)objStr - 28);
                        valuepart = substring((char*)objStr, 2 , subLen); 
                        /* printf("%s: Integer \n. Length of value %d ==> value 
%s \n", objStr, (int) (pos - (str)objStr - 28), valuepart); */
                        if (isInt(valuepart, subLen) == 1){     /* Check 
whether the real value is an integer */
-                               *realNumValue = (BUN) atoi(valuepart); 
-                               /* printf("Real value is: " BUNFMT " \n", 
*realNumValue); */
+                               realInt = (BUN) atoi(valuepart); 
+                               VALset(vrPtrRealValue,TYPE_int, &realInt);
+                               printf("Real int value is: %d \n", 
vrPtrRealValue->val.ival);
                        }
                        else 
                                obType = STRING;        
@@ -223,8 +228,16 @@ getObjectType(unsigned char* objStr, BUN
                else if ((pos = strstr((str) endpart, "XMLSchema#float>")) != 
NULL 
                                || (pos = strstr((str) endpart, 
"XMLSchema#double>")) != NULL  
                                || (pos = strstr((str) endpart, 
"XMLSchema#decimal>")) != NULL){
-                       obType = FLOAT;
-                       /* printf("%s: Float \n", objStr); */
+                       obType = DOUBLE;
+                       subLen = (int) (pos - (str)objStr - 28);
+                       valuepart = substring((char*)objStr, 2 , subLen);
+                       if (isDouble(valuepart, subLen) == 1){
+                               realDbl = atof(valuepart);
+                               VALset(vrPtrRealValue,TYPE_dbl, &realDbl);
+                               printf("Real double value is: %.10f \n", 
vrPtrRealValue->val.dval);
+                       }
+                       else
+                               obType = STRING;
                }
                else {
                        obType = STRING;
@@ -280,7 +293,7 @@ tripleHandler(void* user_data, const rap
 #endif
        parserData *pdata = ((parserData *) user_data);
        BUN bun = BUN_NONE;
-       BUN realNumValue = BUN_NONE; 
+       ValRecord vrRealValue; 
 
        BAT **graph = pdata->graph;
 
@@ -389,11 +402,30 @@ tripleHandler(void* user_data, const rap
                        unsigned char* objStr;
                        ObjectType objType = STRING;
                        objStr = raptor_term_to_string(triple->object);
-                       objType = getObjectType(objStr, &realNumValue);
+                       objType = getObjectType_and_Value(objStr, &vrRealValue);
 
-                       rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX], 
(str)objStr, objType, &bun);    
+                       if (objType == STRING){
+                               rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX], 
(str)objStr, objType, &bun);    
+                       }
+                       else{   //For handling dateTime, Integer, Float values
+                               encodeValueInOid(&vrRealValue, objType, &bun);
+                       }
+
                        rdf_BUNappend(pdata, graph[O_sort], &bun); 
 
+                       VALclear(&vrRealValue);
+                       
+                       /*
+                       if (objType == INTEGER){
+                               decodeValueFromOid(bun, objType, &vrRealValue);
+                               printf("Decoded integer value is: %d \n", 
vrRealValue.val.ival);
+                       }
+                       if (objType == DOUBLE){
+                               decodeValueFromOid(bun, objType, &vrRealValue);
+                               printf("Decoded double value is: %.10f \n", 
vrRealValue.val.dval);
+                       }
+                       */
+
                        //printf("Object string is %s --> object type is %d 
(oid = " BUNFMT " \n",objStr,objType, bun);
 
                        bun = BUN_NONE;
diff --git a/monetdb5/extras/rdf/rdfparser.h b/monetdb5/extras/rdf/rdfparser.h
--- a/monetdb5/extras/rdf/rdfparser.h
+++ b/monetdb5/extras/rdf/rdfparser.h
@@ -29,8 +29,8 @@
 
 #include <raptor2.h>
 
-#define CHECK_NUM_DBPONTOLOGY   1       /* Check how many rdf triples use 
dbpontology */
-#define BUILD_ONTOLOGIES_HISTO 1       /* Check how much percentage each 
ontology has in the dataset */
+#define CHECK_NUM_DBPONTOLOGY   0       /* Check how many rdf triples use 
dbpontology */
+#define BUILD_ONTOLOGIES_HISTO 0       /* Check how much percentage each 
ontology has in the dataset */
 
 typedef struct parserData {
                                      /**PROPERTIES             */
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -6622,11 +6622,12 @@ char getObjTypeFromBATtype(int battype){
                case TYPE_int:
                        return INTEGER;
                        break;
-               case TYPE_flt:
-                       return FLOAT;
+               case TYPE_dbl:
+                       return DOUBLE;
                        break;
                default:
                        return 100;
+                       break; 
        }
 }
 
@@ -6634,7 +6635,7 @@ static
 int getObjValueFromMVBat(ValPtr returnValue, ValPtr castedValue, BUN pos, 
ObjectType objType, BAT *tmpBat, BAT *lmap, BAT *rmap){
        str     tmpStr; 
        str     inputStr; 
-       float   *realFloat; 
+       double  *realDbl; 
        int     *realInt; 
        oid     *tmpUriOid; 
        oid     realUriOid = BUN_NONE;
@@ -6699,11 +6700,11 @@ int getObjValueFromMVBat(ValPtr returnVa
                                return 0;
                        }
                        break; 
-               case FLOAT:
+               case DOUBLE:
                        //printf("Full object value: %s \n",objStr);
-                       realFloat = (float *)BUNtail(tmpi, pos);
-                       if (*realFloat != flt_nil){
-                               VALset(returnValue, TYPE_flt, realFloat);
+                       realDbl = (double *)BUNtail(tmpi, pos);
+                       if (*realDbl != dbl_nil){
+                               VALset(returnValue, TYPE_dbl, realDbl);
                                if (rdfcast(objType, STRING, returnValue, 
castedValue) != 1){
                                        printf("Everything should be able to 
cast to String \n");
                                }
@@ -6740,6 +6741,8 @@ int getObjValueFromMVBat(ValPtr returnVa
                        else{
                                return 0; 
                        }
+
+                       break; 
        }
 
 
@@ -7701,7 +7704,7 @@ str printFullSampleData(CSSampleExtend *
        CSSampleExtend  sample; 
        str     objStr;         
        oid     *objOid = NULL; 
-       float   *objFlt = NULL; 
+       double  *objDbl = NULL; 
        int     *objInt = NULL; 
        str     canStr; 
        char    isTitle = 0; 
@@ -7996,15 +7999,15 @@ str printFullSampleData(CSSampleExtend *
                                                GDKfree(objStr);
                                        }
                                }
-                               else if (tmpBat->ttype == TYPE_flt){
-                                       objFlt = (float *) BUNtail(tmpi, k); 
-                                       if (*objFlt == flt_nil){
+                               else if (tmpBat->ttype == TYPE_dbl){
+                                       objDbl = (double *) BUNtail(tmpi, k); 
+                                       if (*objDbl == dbl_nil){
                                                fprintf(fout,"|NULL");
                                                fprintf(foutis,"|NULL");
                                        } 
                                        else{
-                                               fprintf(fout,"|%f", *objFlt);
-                                               fprintf(foutis,"|%f", *objFlt);
+                                               fprintf(fout,"|%f", *objDbl);
+                                               fprintf(foutis,"|%f", *objDbl);
 
                                        }
                                }
@@ -9620,7 +9623,7 @@ void initCStables(CStableStat* cstablest
        mapObjBATtypes[URI] = TYPE_oid; 
        mapObjBATtypes[DATETIME] = TYPE_str;
        mapObjBATtypes[INTEGER] = TYPE_int; 
-       mapObjBATtypes[FLOAT] = TYPE_flt; 
+       mapObjBATtypes[DOUBLE] = TYPE_dbl; 
        mapObjBATtypes[STRING] = TYPE_str; 
        mapObjBATtypes[BLANKNODE] = TYPE_oid;
        mapObjBATtypes[MULTIVALUES] = TYPE_oid;
@@ -9964,7 +9967,7 @@ void getRealValue(ValPtr returnValue, oi
        str     tmpStr; 
        BUN     bun;    
        BUN     maxObjectURIOid =  ((oid)1 << (sizeof(BUN)*8 - NBITS_FOR_CSID - 
1)) - 1; //Base on getTblIdxFromS
-       float   realFloat; 
+       float   realDbl; 
        int     realInt; 
        oid     realUri;
 
@@ -10006,17 +10009,19 @@ void getRealValue(ValPtr returnValue, oi
                        realInt = getIntFromRDFString(objStr);
                        VALset(returnValue,TYPE_int, &realInt);
                        break; 
-               case FLOAT:
+               case DOUBLE:
                        //printf("Full object value: %s \n",objStr);
-                       realFloat = getFloatFromRDFString(objStr);
-                       VALset(returnValue,TYPE_flt, &realFloat);
+                       realDbl = getDoubleFromRDFString(objStr);
+                       VALset(returnValue,TYPE_dbl, &realDbl);
                        break; 
                default: //URI or BLANK NODE            
                        realUri = objOid;
                        VALset(returnValue,TYPE_oid, &realUri);
-       }
-
-}
+                       break; 
+       }
+
+}
+
 static
 void updatePropTypeForRemovedTriple(CSPropTypes *csPropTypes, int* 
tmpTblIdxPropIdxMap, int tblIdx, oid *subjCSMap, int* csTblIdxMapping, oid sbt, 
oid pbt, oid *lastRemovedProp, oid* lastRemovedSubj, char isMultiToSingleProp){
        int tmptblIdx, tmpPropIdx;
diff --git a/monetdb5/extras/rdf/rdftypes.c b/monetdb5/extras/rdf/rdftypes.c
--- a/monetdb5/extras/rdf/rdftypes.c
+++ b/monetdb5/extras/rdf/rdftypes.c
@@ -61,12 +61,27 @@ char* substring(char *string, int positi
        return pointer;
 }
 
+
+/* 
+       This function returns 1 if architecture 
+       is little endian, 0 in case of big endian.
+*/
+
+int is_little_endian(void)
+{
+       unsigned int x = 1;
+       char *c = (char*) &x;
+       return (int)*c;
+}
+
 char isInt(char *input, int len){
        
        int     i = 0;
        //int   len = strlen(input);
        //printf("... Checking value %s with len %d \n", input, len);
-       if (input[0] != '-' && isdigit(input[0]) == 0)
+       if (len > 11) return 0;
+
+       if (input[0] != '-' && input[0] != '+' && isdigit(input[0]) == 0)
                return 0;       
 
        for(i = 1; i < len; i++)
@@ -83,6 +98,37 @@ char isInt(char *input, int len){
                return 0;
 }
 
+
+char isDouble(char *input, int len){
+       
+       int     i = 0;
+       int     numE = 0;       //number of E's
+       //int   len = strlen(input);
+       //printf("... Checking value %s with len %d \n", input, len);
+       if (input[0] != '-' && input[0] != '+' && isdigit(input[0]) == 0)
+               return 0;       
+
+       for(i = 1; i < len; i++)
+       {
+               if(isdigit(input[i]) == 0 && input[i] != '.' 
+                  && input[i] != 'e' && input[i] != 'E' 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to