Changeset: 05f19810601b for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=05f19810601b Modified Files: monetdb5/modules/atoms/Tests/json04.mal monetdb5/modules/atoms/Tests/json04.stable.out monetdb5/modules/atoms/json_atom.c monetdb5/modules/atoms/json_atom.h monetdb5/modules/atoms/json_atom.mal Branch: default Log Message:
Add list unnesting diffs (265 lines): diff --git a/monetdb5/modules/atoms/Tests/json04.mal b/monetdb5/modules/atoms/Tests/json04.mal --- a/monetdb5/modules/atoms/Tests/json04.mal +++ b/monetdb5/modules/atoms/Tests/json04.mal @@ -17,3 +17,10 @@ io.print(j); io.print(s); redo (i,j) := iterator.next(w); exit (i,j); + +js := calc.json("[1,true, [2,false]]"); +io.print(js); +(l1,l2) := json.unnest(js); +io.print(l2); +l3 := json.unnest(js); +io.print(l3); diff --git a/monetdb5/modules/atoms/Tests/json04.stable.out b/monetdb5/modules/atoms/Tests/json04.stable.out --- a/monetdb5/modules/atoms/Tests/json04.stable.out +++ b/monetdb5/modules/atoms/Tests/json04.stable.out @@ -33,6 +33,12 @@ barrier (i,j) := iterator.new(w); io.print(s); redo (i,j) := iterator.next(w); exit (i,j); + js := calc.json("[1,true, [2,false]]"); + io.print(js); + (l1,l2) := json.unnest(js); + io.print(l2); + l3 := json.unnest(js); + io.print(l3); end main; [ "[{ \"category\": \"reference\", \"author\": \"Nigel Rees\", \"title\": \"Sayings of the Century\", \"price\": 8.95 }, { \"category\": \"fiction\", \"author\": \"Evelyn Waugh\", \"title\": \"Sword of Honour\", \"price\": 12.99 }, { \"category\": \"fiction\", \"author\": \"Herman Melville\", \"title\": \"Moby Dick\", \"isbn\": \"0-553-21311-3\", \"price\": 8.99 }, { \"category\": \"fiction\", \"author\": \"J. R. R. Tolkien\", \"title\": \"The Lord of the Rings\", \"isbn\": \"0-395-19395-8\", \"price\": 22.99 }, { \"color\": \"red\", \"price\": 19.95 }]" ] #---------------------------------------------------------# @@ -124,6 +130,21 @@ end main; [ "red" ] [ "19.95 " ] [ "19.95 " ] +[ "[1,true, [2,false]]" ] +#-------------------------# +# h t # name +# void json # type +#-------------------------# +[ 0@0, "1" ] +[ 1@0, "true" ] +[ 2@0, "[2,false]" ] +#-------------------------# +# h t # name +# void json # type +#-------------------------# +[ 0@0, "1" ] +[ 1@0, "true" ] +[ 2@0, "[2,false]" ] # 22:43:20 > # 22:43:20 > "Done." diff --git a/monetdb5/modules/atoms/json_atom.c b/monetdb5/modules/atoms/json_atom.c --- a/monetdb5/modules/atoms/json_atom.c +++ b/monetdb5/modules/atoms/json_atom.c @@ -418,7 +418,7 @@ str JSONlength(int *ret, json *js) } //the access functions assume a valid json object or -//single nested array of objects ([[[{object},..]]] +//single nested array of objects ([{object},..] //any structure violation leads to an early abort //The keys should be unique in an object static str @@ -601,7 +601,7 @@ str JSONunnest(int *key, int *val, json char *msg= MAL_SUCCEED; char *result = NULL; size_t l,lim; - int nesting=0; + int nesting=0, valuelist=0; char *j = *js; bk = BATnew(TYPE_void,TYPE_str,64); @@ -629,12 +629,14 @@ str JSONunnest(int *key, int *val, json bv->T->nonil = 1; skipblancs; - while( *j == '['){ + // unnest {} or [{},...] or [val,...] + // in the latter case the name table remains empty + if( *j == '['){ nesting++; j++; + skipblancs; } - if ( *j != '{' ) - throw(MAL,"json.unnest","JSON object expected"); + valuelist = *j != '{'; // the result is an array of values result = (char *) GDKmalloc(BUFSIZ); @@ -645,36 +647,38 @@ str JSONunnest(int *key, int *val, json } lim = BUFSIZ; - for( j++; *j && *j != '}'; j++){ + for( (valuelist?j: j++); *j && *j != '}'; j++){ skipblancs; if (*j == ']'){ break; } if (*j == '}') break; - if (*j != '"'){ - msg = createException(MAL,"json.unnest","Name expected"); - goto wrapup; + if ( !valuelist){ + if (*j != '"'){ + msg = createException(MAL,"json.unnest","Name expected"); + goto wrapup; + } + namebegin = j+1; + msg = JSONstringParser(j+1, &j); + if ( msg) + goto wrapup; + nameend = j-1; + l = nameend - namebegin; + if ( l + 2 > lim ) + result = GDKrealloc(result, lim += BUFSIZ); + strncpy(result,namebegin,nameend-namebegin); + result[l] = 0; + BUNappend(bk, result, FALSE); + + skipblancs; + if ( *j != ':'){ + msg = createException(MAL,"json.unnest","Value expected"); + goto wrapup; + } + j++; + skipblancs; } - namebegin = j+1; - msg = JSONstringParser(j+1, &j); - if ( msg) - goto wrapup; - nameend = j-1; - l = nameend - namebegin; - if ( l + 2 > lim ) - result = GDKrealloc(result, lim += BUFSIZ); - strncpy(result,namebegin,nameend-namebegin); - result[l] = 0; - BUNappend(bk, result, FALSE); - - skipblancs; - if ( *j != ':'){ - msg = createException(MAL,"json.unnest","Value expected"); - goto wrapup; - } - j++; - skipblancs; valuebegin = j; msg = JSONvalueParser(j,&j); if ( msg) @@ -689,13 +693,13 @@ str JSONunnest(int *key, int *val, json skipblancs; if (*j == '}'){ - if(nesting ){ - while (*j && *j != '{' && *j != ']') j++; + if(!valuelist ){ + while (*j && *j != '{' ) j++; if ( *j != '{') j--; } continue; } - if (*j != ',') + if (*j != ',' && !(valuelist && *j == ']')) msg = createException(MAL,"json.unnest","',' expected"); } wrapup:; @@ -705,6 +709,69 @@ wrapup:; return msg; } +str JSONunnestOne(int *val, json *js) +{ + BAT *bv; + char *valuebegin,*valueend; + char *msg= MAL_SUCCEED; + char *result = NULL; + size_t l,lim; + char *j = *js; + + bv = BATnew(TYPE_void,TYPE_json,64); + if ( bv == NULL){ + throw(MAL,"json.unnest",MAL_MALLOC_FAIL); + } + BATseqbase(bv,0); + bv->hsorted = 1; + bv->hrevsorted = 0; + bv->H->nonil =1; + bv->tsorted = 1; + bv->trevsorted = 0; + bv->T->nonil = 1; + + skipblancs; + // unnest a list + if( *j != '[') + throw(MAL,"json.unnest","JSON list expected"); + + // the result is an array of values + result = (char *) GDKmalloc(BUFSIZ); + if ( result == 0){ + BBPreleaseref(bv->batCacheid); + throw(MAL,"json.unnest",MAL_MALLOC_FAIL); + } + lim = BUFSIZ; + + for( j++; *j && *j != ']'; j++){ + skipblancs; + if (*j == ']'){ + break; + } + valuebegin = j; + msg = JSONvalueParser(j,&j); + if ( msg) + goto wrapup; + valueend = j; + l= valueend - valuebegin; + if ( l + 2 > lim ) + result = GDKrealloc(result, lim += BUFSIZ); + strncpy(result,valuebegin,l); + result[l] = 0; + BUNappend(bv, result, FALSE); + + skipblancs; + if (*j == ']') + continue; + if ( *j && *j != ',' ) + msg = createException(MAL,"json.unnest","',' expected"); + } +wrapup:; + BBPkeepref(*val= bv->batCacheid); + GDKfree(result); + return msg; +} + str JSONunnestGrouped(int *grp, int *key, int *val, json *js) { BAT *bk, *bv, *bg; diff --git a/monetdb5/modules/atoms/json_atom.h b/monetdb5/modules/atoms/json_atom.h --- a/monetdb5/modules/atoms/json_atom.h +++ b/monetdb5/modules/atoms/json_atom.h @@ -56,6 +56,7 @@ json_export str JSONisarray(int *ret, js json_export str JSONlength(int *ret, json *j); json_export str JSONunnest(int *key, int *val, json *j); +json_export str JSONunnestOne(int *val, json *j); json_export str JSONunnestGrouped(int *grp, int *key, int *val, json *j); json_export str JSONnest(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); json_export str JSONnames(int *ret, json *j); diff --git a/monetdb5/modules/atoms/json_atom.mal b/monetdb5/modules/atoms/json_atom.mal --- a/monetdb5/modules/atoms/json_atom.mal +++ b/monetdb5/modules/atoms/json_atom.mal @@ -73,6 +73,10 @@ command unnest(val:json)(k:bat[:oid,:str address JSONunnest comment "Expands the outermost JSON object into key-value pairs."; +command unnest(val:json):bat[:oid,:json] +address JSONunnestOne +comment "Expands a JSON list to its elements."; + pattern nest(o:bat[:oid,:oid],k:bat[:oid,:str],v:bat[:oid,:any]):json address JSONnest comment "Nest the key-value pairs with object identity into a list of JSON objects "; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list