Changeset: 0646f409ff77 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0646f409ff77 Modified Files: monetdb5/extras/rdf/rdf_shredder.c monetdb5/extras/rdf/rdfparser.h monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdftypes.c monetdb5/extras/rdf/rdftypes.h Branch: rdf Log Message:
Add function for encoding/decoding numeric value in oid diffs (truncated from 563 to 300 lines): diff --git a/monetdb5/extras/rdf/rdf_shredder.c b/monetdb5/extras/rdf/rdf_shredder.c --- a/monetdb5/extras/rdf/rdf_shredder.c +++ b/monetdb5/extras/rdf/rdf_shredder.c @@ -177,6 +177,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda } + /* * Get the specific type of the object value in an RDF triple * The URI object can be recoginized by raptor parser. @@ -186,7 +187,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda */ static ObjectType -getObjectType(unsigned char* objStr, BUN *realNumValue){ +getObjectType_and_Value(unsigned char* objStr, ValPtr vrPtrRealValue){ ObjectType obType = STRING; unsigned char* endpart; char* valuepart; @@ -194,7 +195,9 @@ getObjectType(unsigned char* objStr, BUN int len = 0; int subLen = 0; - *realNumValue = BUN_NONE; + double realDbl; + int realInt; + len = strlen((str)objStr); if (len > 20){ @@ -206,13 +209,15 @@ getObjectType(unsigned char* objStr, BUN /* printf("%s: DateTime \n", objStr); */ } else if ((pos = strstr((str) endpart, "XMLSchema#int>")) != NULL || (pos = strstr((str)endpart, "XMLSchema#integer>")) != NULL){ + //TODO: Consider nonNegativeInteger obType = INTEGER; subLen = (int) (pos - (str)objStr - 28); valuepart = substring((char*)objStr, 2 , subLen); /* printf("%s: Integer \n. Length of value %d ==> value %s \n", objStr, (int) (pos - (str)objStr - 28), valuepart); */ if (isInt(valuepart, subLen) == 1){ /* Check whether the real value is an integer */ - *realNumValue = (BUN) atoi(valuepart); - /* printf("Real value is: " BUNFMT " \n", *realNumValue); */ + realInt = (BUN) atoi(valuepart); + VALset(vrPtrRealValue,TYPE_int, &realInt); + printf("Real int value is: %d \n", vrPtrRealValue->val.ival); } else obType = STRING; @@ -223,8 +228,16 @@ getObjectType(unsigned char* objStr, BUN else if ((pos = strstr((str) endpart, "XMLSchema#float>")) != NULL || (pos = strstr((str) endpart, "XMLSchema#double>")) != NULL || (pos = strstr((str) endpart, "XMLSchema#decimal>")) != NULL){ - obType = FLOAT; - /* printf("%s: Float \n", objStr); */ + obType = DOUBLE; + subLen = (int) (pos - (str)objStr - 28); + valuepart = substring((char*)objStr, 2 , subLen); + if (isDouble(valuepart, subLen) == 1){ + realDbl = atof(valuepart); + VALset(vrPtrRealValue,TYPE_dbl, &realDbl); + printf("Real double value is: %.10f \n", vrPtrRealValue->val.dval); + } + else + obType = STRING; } else { obType = STRING; @@ -280,7 +293,7 @@ tripleHandler(void* user_data, const rap #endif parserData *pdata = ((parserData *) user_data); BUN bun = BUN_NONE; - BUN realNumValue = BUN_NONE; + ValRecord vrRealValue; BAT **graph = pdata->graph; @@ -389,11 +402,30 @@ tripleHandler(void* user_data, const rap unsigned char* objStr; ObjectType objType = STRING; objStr = raptor_term_to_string(triple->object); - objType = getObjectType(objStr, &realNumValue); + objType = getObjectType_and_Value(objStr, &vrRealValue); - rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX], (str)objStr, objType, &bun); + if (objType == STRING){ + rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX], (str)objStr, objType, &bun); + } + else{ //For handling dateTime, Integer, Float values + encodeValueInOid(&vrRealValue, objType, &bun); + } + rdf_BUNappend(pdata, graph[O_sort], &bun); + VALclear(&vrRealValue); + + /* + if (objType == INTEGER){ + decodeValueFromOid(bun, objType, &vrRealValue); + printf("Decoded integer value is: %d \n", vrRealValue.val.ival); + } + if (objType == DOUBLE){ + decodeValueFromOid(bun, objType, &vrRealValue); + printf("Decoded double value is: %.10f \n", vrRealValue.val.dval); + } + */ + //printf("Object string is %s --> object type is %d (oid = " BUNFMT " \n",objStr,objType, bun); bun = BUN_NONE; diff --git a/monetdb5/extras/rdf/rdfparser.h b/monetdb5/extras/rdf/rdfparser.h --- a/monetdb5/extras/rdf/rdfparser.h +++ b/monetdb5/extras/rdf/rdfparser.h @@ -29,8 +29,8 @@ #include <raptor2.h> -#define CHECK_NUM_DBPONTOLOGY 1 /* Check how many rdf triples use dbpontology */ -#define BUILD_ONTOLOGIES_HISTO 1 /* Check how much percentage each ontology has in the dataset */ +#define CHECK_NUM_DBPONTOLOGY 0 /* Check how many rdf triples use dbpontology */ +#define BUILD_ONTOLOGIES_HISTO 0 /* Check how much percentage each ontology has in the dataset */ typedef struct parserData { /**PROPERTIES */ diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -6622,11 +6622,12 @@ char getObjTypeFromBATtype(int battype){ case TYPE_int: return INTEGER; break; - case TYPE_flt: - return FLOAT; + case TYPE_dbl: + return DOUBLE; break; default: return 100; + break; } } @@ -6634,7 +6635,7 @@ static int getObjValueFromMVBat(ValPtr returnValue, ValPtr castedValue, BUN pos, ObjectType objType, BAT *tmpBat, BAT *lmap, BAT *rmap){ str tmpStr; str inputStr; - float *realFloat; + double *realDbl; int *realInt; oid *tmpUriOid; oid realUriOid = BUN_NONE; @@ -6699,11 +6700,11 @@ int getObjValueFromMVBat(ValPtr returnVa return 0; } break; - case FLOAT: + case DOUBLE: //printf("Full object value: %s \n",objStr); - realFloat = (float *)BUNtail(tmpi, pos); - if (*realFloat != flt_nil){ - VALset(returnValue, TYPE_flt, realFloat); + realDbl = (double *)BUNtail(tmpi, pos); + if (*realDbl != dbl_nil){ + VALset(returnValue, TYPE_dbl, realDbl); if (rdfcast(objType, STRING, returnValue, castedValue) != 1){ printf("Everything should be able to cast to String \n"); } @@ -6740,6 +6741,8 @@ int getObjValueFromMVBat(ValPtr returnVa else{ return 0; } + + break; } @@ -7701,7 +7704,7 @@ str printFullSampleData(CSSampleExtend * CSSampleExtend sample; str objStr; oid *objOid = NULL; - float *objFlt = NULL; + double *objDbl = NULL; int *objInt = NULL; str canStr; char isTitle = 0; @@ -7996,15 +7999,15 @@ str printFullSampleData(CSSampleExtend * GDKfree(objStr); } } - else if (tmpBat->ttype == TYPE_flt){ - objFlt = (float *) BUNtail(tmpi, k); - if (*objFlt == flt_nil){ + else if (tmpBat->ttype == TYPE_dbl){ + objDbl = (double *) BUNtail(tmpi, k); + if (*objDbl == dbl_nil){ fprintf(fout,"|NULL"); fprintf(foutis,"|NULL"); } else{ - fprintf(fout,"|%f", *objFlt); - fprintf(foutis,"|%f", *objFlt); + fprintf(fout,"|%f", *objDbl); + fprintf(foutis,"|%f", *objDbl); } } @@ -9620,7 +9623,7 @@ void initCStables(CStableStat* cstablest mapObjBATtypes[URI] = TYPE_oid; mapObjBATtypes[DATETIME] = TYPE_str; mapObjBATtypes[INTEGER] = TYPE_int; - mapObjBATtypes[FLOAT] = TYPE_flt; + mapObjBATtypes[DOUBLE] = TYPE_dbl; mapObjBATtypes[STRING] = TYPE_str; mapObjBATtypes[BLANKNODE] = TYPE_oid; mapObjBATtypes[MULTIVALUES] = TYPE_oid; @@ -9964,7 +9967,7 @@ void getRealValue(ValPtr returnValue, oi str tmpStr; BUN bun; BUN maxObjectURIOid = ((oid)1 << (sizeof(BUN)*8 - NBITS_FOR_CSID - 1)) - 1; //Base on getTblIdxFromS - float realFloat; + float realDbl; int realInt; oid realUri; @@ -10006,17 +10009,19 @@ void getRealValue(ValPtr returnValue, oi realInt = getIntFromRDFString(objStr); VALset(returnValue,TYPE_int, &realInt); break; - case FLOAT: + case DOUBLE: //printf("Full object value: %s \n",objStr); - realFloat = getFloatFromRDFString(objStr); - VALset(returnValue,TYPE_flt, &realFloat); + realDbl = getDoubleFromRDFString(objStr); + VALset(returnValue,TYPE_dbl, &realDbl); break; default: //URI or BLANK NODE realUri = objOid; VALset(returnValue,TYPE_oid, &realUri); - } - -} + break; + } + +} + static void updatePropTypeForRemovedTriple(CSPropTypes *csPropTypes, int* tmpTblIdxPropIdxMap, int tblIdx, oid *subjCSMap, int* csTblIdxMapping, oid sbt, oid pbt, oid *lastRemovedProp, oid* lastRemovedSubj, char isMultiToSingleProp){ int tmptblIdx, tmpPropIdx; diff --git a/monetdb5/extras/rdf/rdftypes.c b/monetdb5/extras/rdf/rdftypes.c --- a/monetdb5/extras/rdf/rdftypes.c +++ b/monetdb5/extras/rdf/rdftypes.c @@ -61,12 +61,27 @@ char* substring(char *string, int positi return pointer; } + +/* + This function returns 1 if architecture + is little endian, 0 in case of big endian. +*/ + +int is_little_endian(void) +{ + unsigned int x = 1; + char *c = (char*) &x; + return (int)*c; +} + char isInt(char *input, int len){ int i = 0; //int len = strlen(input); //printf("... Checking value %s with len %d \n", input, len); - if (input[0] != '-' && isdigit(input[0]) == 0) + if (len > 11) return 0; + + if (input[0] != '-' && input[0] != '+' && isdigit(input[0]) == 0) return 0; for(i = 1; i < len; i++) @@ -83,6 +98,37 @@ char isInt(char *input, int len){ return 0; } + +char isDouble(char *input, int len){ + + int i = 0; + int numE = 0; //number of E's + //int len = strlen(input); + //printf("... Checking value %s with len %d \n", input, len); + if (input[0] != '-' && input[0] != '+' && isdigit(input[0]) == 0) + return 0; + + for(i = 1; i < len; i++) + { + if(isdigit(input[i]) == 0 && input[i] != '.' + && input[i] != 'e' && input[i] != 'E' _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list