Changeset: 380efa80f753 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=380efa80f753 Added Files: monetdb5/modules/mal/pcre_pub.h monetdb5/modules/weldudfs/Makefile.ag monetdb5/modules/weldudfs/weld_udfs.c Modified Files: monetdb5/modules/Makefile.ag monetdb5/modules/mal/Makefile.ag monetdb5/modules/mal/pcre.c monetdb5/tools/Makefile.ag sql/backends/monet5/rel_weld.c Branch: rel-weld Log Message:
weld impl for likeselect with re_match_no_ignore udf diffs (truncated from 349 to 300 lines): diff --git a/monetdb5/modules/Makefile.ag b/monetdb5/modules/Makefile.ag --- a/monetdb5/modules/Makefile.ag +++ b/monetdb5/modules/Makefile.ag @@ -4,4 +4,4 @@ # # Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V. -SUBDIRS = atoms kernel mal +SUBDIRS = atoms kernel mal weldudfs diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag --- a/monetdb5/modules/mal/Makefile.ag +++ b/monetdb5/modules/mal/Makefile.ag @@ -38,7 +38,7 @@ lib_mal = { manifold.c manifold.h \ oltp.c oltp.h \ wlc.c wlc.h \ - pcre.c \ + pcre.c pcre_pub.h \ profiler.c profiler.h \ querylog.c querylog.h \ remote.c remote.h \ diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c --- a/monetdb5/modules/mal/pcre.c +++ b/monetdb5/modules/mal/pcre.c @@ -23,6 +23,7 @@ #include "mal.h" #include "mal_exception.h" +#include "pcre_pub.h" #ifdef HAVE_LIBPCRE #include <pcre.h> @@ -79,15 +80,6 @@ mal_export str LIKEjoin1(bat *r1, bat *r mal_export str ILIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate); mal_export str ILIKEjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate); -/* current implementation assumes simple %keyword% [keyw%]* */ -typedef struct RE { - char *k; - int search; - int skip; - int len; - struct RE *n; -} RE; - #ifndef HAVE_STRCASESTR static const char * strcasestr(const char *haystack, const char *needle) @@ -113,7 +105,7 @@ strcasestr(const char *haystack, const c } #endif -static int +int re_simple(const char *pat) { int nr = 0; @@ -156,7 +148,7 @@ re_match_ignore(const char *s, RE *patte return 1; } -static int +int re_match_no_ignore(const char *s, RE *pattern) { RE *r; @@ -183,7 +175,7 @@ re_destroy(RE *p) } } -static RE * +RE * re_create(const char *pat, int nr) { char *x = GDKstrdup(pat); diff --git a/monetdb5/modules/mal/pcre_pub.h b/monetdb5/modules/mal/pcre_pub.h new file mode 100644 --- /dev/null +++ b/monetdb5/modules/mal/pcre_pub.h @@ -0,0 +1,20 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V. + */ + +/* current implementation assumes simple %keyword% [keyw%]* */ +typedef struct RE { + char *k; + int search; + int skip; + int len; + struct RE *n; +} RE; + +int re_simple(const char *pat); +RE *re_create(const char *pat, int nr); +int re_match_no_ignore(const char *s, RE *pattern); diff --git a/monetdb5/modules/weldudfs/Makefile.ag b/monetdb5/modules/weldudfs/Makefile.ag new file mode 100644 --- /dev/null +++ b/monetdb5/modules/weldudfs/Makefile.ag @@ -0,0 +1,19 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V. + +INCLUDES = ../../mal ../atoms ../kernel ../mal \ + ../../../common/options \ + ../../../common/stream \ + ../../../common/utils \ + ../../../gdk + +MTSAFE + +lib_weldudfs = { + NOINST + SOURCES = \ + weld_udfs.c +} diff --git a/monetdb5/modules/weldudfs/weld_udfs.c b/monetdb5/modules/weldudfs/weld_udfs.c new file mode 100644 --- /dev/null +++ b/monetdb5/modules/weldudfs/weld_udfs.c @@ -0,0 +1,45 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V. + */ + +#include "monetdb_config.h" +#include "gdk.h" +#include "mal.h" +#include "pcre_pub.h" + +typedef struct { + char *data; + int64_t len; +} i8vec; + +MT_Lock initLock MT_LOCK_INITIALIZER("udfs_init"); + +mal_export void state_init(i8vec *op, int64_t *state_ptr); +mal_export void like(int64_t *state_ptr, i8vec *col, i8vec *pattern, i8vec *exc, int8_t *result); + +void state_init(i8vec *op, int64_t *state_ptr) { + (void)op; + void *ptr = calloc(0, sizeof(void*)); + *state_ptr = (int64_t)ptr; +} + +void like(int64_t *state_ptr, i8vec *col, i8vec *pattern, i8vec *exc, int8_t *result) { + (void)exc; + int64_t *adr = (void*)*state_ptr; + RE *re = (RE*)(*adr); + if (re == NULL) { + MT_lock_set(&initLock); + if (re == NULL) { + /* Create a RE struct and save it in the given mem location */ + int nr = re_simple(pattern->data); + re = re_create(pattern->data, nr); + *adr = (int64_t)re; + } + MT_lock_unset(&initLock); + } + *result = (int8_t)re_match_no_ignore(col->data, re); +} diff --git a/monetdb5/tools/Makefile.ag b/monetdb5/tools/Makefile.ag --- a/monetdb5/tools/Makefile.ag +++ b/monetdb5/tools/Makefile.ag @@ -15,7 +15,7 @@ lib_monetdb5 = { DIR = libdir SOURCES = libmonetdb5.rc LIBS = ../mal/libmal ../modules/atoms/libatoms \ - ../modules/kernel/libkernel ../modules/mal/libmal \ + ../modules/kernel/libkernel ../modules/mal/libmal ../modules/weldudfs/libweldudfs \ ../optimizer/liboptimizer ../scheduler/libscheduler \ ../../gdk/libbat \ HAVE_MAPI?../../clients/mapilib/libmapi \ diff --git a/sql/backends/monet5/rel_weld.c b/sql/backends/monet5/rel_weld.c --- a/sql/backends/monet5/rel_weld.c +++ b/sql/backends/monet5/rel_weld.c @@ -69,7 +69,8 @@ typedef struct { str program; unsigned long program_len; unsigned long program_max_len; - char str_cols[STR_BUF_SIZE * 3]; /* global string cols renaming */ + char global_init[STR_BUF_SIZE * 3]; /* global stmts such as vheap.base renaming and udfs inits */ + char global_cleanup[STR_BUF_SIZE * 3]; list *stmt_list; sql_allocator *sa; int error; @@ -137,16 +138,17 @@ get_weld_cmp(int cmp) { static str get_weld_func(sql_subfunc *f) { - if (strcmp(f->func->imp, "+") == 0 || strcmp(f->func->imp, "sum") == 0 || - strcmp(f->func->imp, "count") == 0) + str name = f->func->imp ? f->func->imp : f->func->base.name; + if (strcmp(name, "+") == 0 || strcmp(name, "sum") == 0 || strcmp(name, "count") == 0) return "+"; - else if (strcmp(f->func->imp, "-") == 0) + else if (strcmp(name, "-") == 0) return "-"; - else if (strcmp(f->func->imp, "*") == 0 || strcmp(f->func->imp, "prod") == 0) + else if (strcmp(name, "*") == 0 || strcmp(name, "prod") == 0) return "*"; - else if (strcmp(f->func->imp, "/") == 0) + else if (strcmp(name, "/") == 0) return "/"; - /* TODO check for others that we might support through UDFs */ + else if (strcmp(name, "like") == 0) + return "like"; return NULL; } @@ -208,8 +210,20 @@ exp_has_column(sql_exp *exp) { ret = exp_has_column(exp->l); break; case e_cmp: - if (exp->l) ret |= exp_has_column(exp->l); - if (exp->r) ret |= exp_has_column(exp->r); + if (exp->flag == cmp_filter || exp->flag == cmp_or) { + for (en = ((list*)exp->l)->h; en; en = en->next) { + ret |= exp_has_column(en->data); + } + } else if (exp->l) { + ret |= exp_has_column(exp->l); + } + if (exp->flag == cmp_filter || exp->flag == cmp_or || exp->flag == cmp_in || exp->flag == cmp_notin) { + for (en = ((list*)exp->r)->h; en; en = en->next) { + ret |= exp_has_column(en->data); + } + } else if (exp->r) { + ret |= exp_has_column(exp->r); + } if (exp->f) ret |= exp_has_column(exp->f); break; case e_func: @@ -234,16 +248,45 @@ exp_to_weld(backend *be, weld_state *wst } switch (exp->type) { case e_convert: { - wprintf(wstate, "%s(", getWeldType(exp->tpe.type->localtype)); - exp_to_weld(be, wstate, exp->l); - wprintf(wstate, ")"); + str conv_to = getWeldType(exp->tpe.type->localtype); + if (strcmp(conv_to, "vec[i8]") == 0) { + /* Do nothing */ + exp_to_weld(be, wstate, exp->l); + } else { + wprintf(wstate, "%s(", conv_to); + exp_to_weld(be, wstate, exp->l); + wprintf(wstate, ")"); + } break; } case e_cmp: { if (is_anti(exp)) { wprintf(wstate, "("); } - if (exp->f) { + if (exp->flag == cmp_in || exp->flag == cmp_notin) { + /* TODO implement this */ + wstate->error = 1; + return; + } else if (get_cmp(exp) == cmp_or) { + wprintf(wstate, "("); + exps_to_weld(be, wstate, exp->l, ""); + wprintf(wstate, ") || ( "); + exps_to_weld(be, wstate, exp->r, ""); + wprintf(wstate, ")"); + } else if (get_cmp(exp) == cmp_filter) { + /* Must be an udf */ + str udf = get_weld_func(exp->f); + int state_ptr = wstate->next_var++; + sprintf(wstate->global_init + strlen(wstate->global_init), + "let v%d = cudf[state_init, i64](\"%s\");", state_ptr, udf); + sprintf(wstate->global_cleanup + strlen(wstate->global_cleanup), + "let v%d = cudf[state_cleanup, i64](\"%s\", v%d);", state_ptr, udf, state_ptr); + wprintf(wstate, "cudf[%s, bool](v%d,", udf, state_ptr); + exps_to_weld(be, wstate, exp->l, ", "); + wprintf(wstate, ", "); + exps_to_weld(be, wstate, exp->r, ", "); + wprintf(wstate, ")"); + } else if (exp->f) { if (get_weld_cmp(swap_compare(range2lcompare(exp->flag))) == NULL) { wstate->error = 1; return; @@ -342,9 +385,9 @@ base_table_produce(backend *be, sql_rel } if (exp_subtype(exp)->type->localtype == TYPE_str) { /* Save the vheap and stroffset names */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list