Changeset: 15668d66195b for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/15668d66195b Modified Files: monetdb5/modules/atoms/pg_jsonpath/jsonpath.h monetdb5/modules/atoms/pg_jsonpath/jsonpath_internal.h monetdb5/modules/atoms/pg_jsonpath/jsonpath_scan.l monetdb5/modules/atoms/pg_jsonpath/postgres_defines_internal.h Branch: json-extend Log Message:
scanner jsonpath_scan compiles diffs (230 lines): diff --git a/monetdb5/modules/atoms/pg_jsonpath/jsonpath.h b/monetdb5/modules/atoms/pg_jsonpath/jsonpath.h --- a/monetdb5/modules/atoms/pg_jsonpath/jsonpath.h +++ b/monetdb5/modules/atoms/pg_jsonpath/jsonpath.h @@ -273,7 +273,7 @@ typedef struct JsonPathParseResult bool lax; } JsonPathParseResult; -extern struct Node * init_escontext(); +extern struct Node * init_escontext(void); extern JsonPathParseResult *parsejsonpath(const char *str, int len, struct Node *escontext); diff --git a/monetdb5/modules/atoms/pg_jsonpath/jsonpath_internal.h b/monetdb5/modules/atoms/pg_jsonpath/jsonpath_internal.h --- a/monetdb5/modules/atoms/pg_jsonpath/jsonpath_internal.h +++ b/monetdb5/modules/atoms/pg_jsonpath/jsonpath_internal.h @@ -27,6 +27,7 @@ typedef struct JsonPathString #include "postgres_defines_internal.h" #include "jsonpath_gram.h" +#define YY_USER_INIT (void) result; #define YY_DECL extern int jsonpath_yylex(YYSTYPE *yylval_param, \ JsonPathParseResult **result, \ struct Node *escontext) diff --git a/monetdb5/modules/atoms/pg_jsonpath/jsonpath_scan.l b/monetdb5/modules/atoms/pg_jsonpath/jsonpath_scan.l --- a/monetdb5/modules/atoms/pg_jsonpath/jsonpath_scan.l +++ b/monetdb5/modules/atoms/pg_jsonpath/jsonpath_scan.l @@ -428,7 +428,7 @@ static const JsonPathKeyword keywords[] /* Check if current scanstring value is a keyword */ static enum yytokentype -checkKeyword() +checkKeyword(void) { int res = IDENT_P; int diff; @@ -552,9 +552,9 @@ addchar(bool init, char c) /* initial jsonpath parser context */ struct Node * -init_escontext() +init_escontext(void) { - struct Node * escontext = GDKmalloc(struct Node); + struct Node * escontext = GDKmalloc(sizeof(struct Node)); if (!escontext) return NULL; @@ -624,13 +624,8 @@ addUnicodeChar(int ch, struct Node *esco * conversion function. Otherwise call the normal form which provides * more detailed errors. */ - - if (! escontext || ! IsA(escontext, ErrorSaveContext)) - pg_unicode_to_server(ch, (unsigned char *) cbuf); - else if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf)) - ereturn(escontext, false, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("could not convert Unicode to server encoding"))); + if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf)) + ereturn(escontext, "could not convert Unicode to server encoding"); addstring(false, cbuf, strlen(cbuf)); } return true; diff --git a/monetdb5/modules/atoms/pg_jsonpath/postgres_defines_internal.h b/monetdb5/modules/atoms/pg_jsonpath/postgres_defines_internal.h --- a/monetdb5/modules/atoms/pg_jsonpath/postgres_defines_internal.h +++ b/monetdb5/modules/atoms/pg_jsonpath/postgres_defines_internal.h @@ -24,20 +24,26 @@ typedef struct StringInfoData typedef StringInfoData *StringInfo; -struct Node { + +typedef struct Node +{ allocator *sa; -}; +} Node; #define SOFT_ERROR_OCCURRED(escontext) (false) #define TODO_ERROR 0 -#define ereturn(context, dummy_value, ...) return TODO_ERROR; +#define ereturn(context, dummy_value, ...) {(void) context; return TODO_ERROR;} #define errcode(X) /* TODO */ #define errmsg(X) /* TODO */ -#define palloc(X) GDKmalloc(X) +#define palloc(X) GDKmalloc(X) +#define pfree(X) GDKfree(X) +#define repalloc(M,NSIZE) GDKrealloc(M, NSIZE) + +#define Max(A,B) MAX(A, B) #define SA (escontext->sa) #define list_make1(X) sa_list_append(SA, NULL, X) @@ -84,4 +90,132 @@ for (;cell;cell = cell->next) #define NIL NULL #define PG_UINT32_MAX ((uint32) UINT32_MAX) +#define ereport(c, m) (void) fmt; (void) msg; +#define ERROR "TODO" +#define errmsg_internal(frmt, msg) "TODO" + +#define errsave(a,b) (void) result; (void) escontext; (void) message; + +// c.h +#define lengthof(array) (sizeof (array) / sizeof ((array)[0])) + +#define pg_strncasecmp(s1, s2, l2) GDKstrncasecmp(s1, s2, strlen(s1), l2) + +// pg_wchar.h +#define MAX_UNICODE_EQUIVALENT_STRING 16 +#define MAX_MULTIBYTE_CHAR_LEN 4 + +// pg_wchar.h +typedef unsigned int pg_wchar; + +// pg_wchar.h +static inline bool +is_valid_unicode_codepoint(pg_wchar c) +{ + return (c > 0 && c <= 0x10FFFF); +} + +// pg_wchar.h +static inline bool +is_utf16_surrogate_first(pg_wchar c) +{ + return (c >= 0xD800 && c <= 0xDBFF); +} + +// pg_wchar.h +static inline bool +is_utf16_surrogate_second(pg_wchar c) +{ + return (c >= 0xDC00 && c <= 0xDFFF); +} + +// pg_wchar.h +static inline pg_wchar +surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) +{ + return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF); +} + +// pg_wchar.h +static inline +int +pg_utf_mblen(const unsigned char *s) +{ + int len; + + if ((*s & 0x80) == 0) + len = 1; + else if ((*s & 0xe0) == 0xc0) + len = 2; + else if ((*s & 0xf0) == 0xe0) + len = 3; + else if ((*s & 0xf8) == 0xf0) + len = 4; +#ifdef NOT_USED + else if ((*s & 0xfc) == 0xf8) + len = 5; + else if ((*s & 0xfe) == 0xfc) + len = 6; #endif + else + len = 1; + return len; +} + +// pg_wchar.h +static inline unsigned char * +unicode_to_utf8(pg_wchar c, unsigned char *utf8string) +{ + if (c <= 0x7F) + { + utf8string[0] = c; + } + else if (c <= 0x7FF) + { + utf8string[0] = 0xC0 | ((c >> 6) & 0x1F); + utf8string[1] = 0x80 | (c & 0x3F); + } + else if (c <= 0xFFFF) + { + utf8string[0] = 0xE0 | ((c >> 12) & 0x0F); + utf8string[1] = 0x80 | ((c >> 6) & 0x3F); + utf8string[2] = 0x80 | (c & 0x3F); + } + else + { + utf8string[0] = 0xF0 | ((c >> 18) & 0x07); + utf8string[1] = 0x80 | ((c >> 12) & 0x3F); + utf8string[2] = 0x80 | ((c >> 6) & 0x3F); + utf8string[3] = 0x80 | (c & 0x3F); + } + + return utf8string; +} + +// mbutils.h +static inline +bool +pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s) +{ + /* + * Complain if invalid Unicode code point. The choice of errcode here is + * debatable, but really our caller should have checked this anyway. + */ + if (!is_valid_unicode_codepoint(c)) + return false; + + /* Otherwise, if it's in ASCII range, conversion is trivial */ + if (c <= 0x7F) + { + s[0] = (unsigned char) c; + s[1] = '\0'; + return true; + } + + /* If the server encoding is UTF-8, we just need to reformat the code */ + unicode_to_utf8(c, s); + s[pg_utf_mblen(s)] = '\0'; + return true; +} + +#endif _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org