Changeset: bfcd5e0e842f for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=bfcd5e0e842f Modified Files: sql/backends/monet5/UDF/capi/Tests/capi04.sql sql/backends/monet5/UDF/capi/capi.c Branch: jitudf Log Message:
Copy data that is not page aligned because we can only protect entire pages. diffs (truncated from 301 to 300 lines): diff --git a/sql/backends/monet5/UDF/capi/Tests/capi04.sql b/sql/backends/monet5/UDF/capi/Tests/capi04.sql --- a/sql/backends/monet5/UDF/capi/Tests/capi04.sql +++ b/sql/backends/monet5/UDF/capi/Tests/capi04.sql @@ -49,3 +49,4 @@ INSERT INTO strings VALUES ('ello'), ('o SELECT capi04(i) FROM strings; ROLLBACK; + diff --git a/sql/backends/monet5/UDF/capi/capi.c b/sql/backends/monet5/UDF/capi/capi.c --- a/sql/backends/monet5/UDF/capi/capi.c +++ b/sql/backends/monet5/UDF/capi/capi.c @@ -32,9 +32,6 @@ typedef struct _mprotected_region { void *addr; size_t len; - void *actual_addr; - size_t actual_len; - struct _mprotected_region *next; } mprotected_region; @@ -96,58 +93,18 @@ static bool WriteTextToFile(FILE *f, con static void handler(int sig, siginfo_t *si, void *unused) { - int actually_protected_area = false; - mprotected_region *found_region = NULL; int tid = THRgettid(); (void)sig; (void)unused; - // we caught a segfault or bus error - // this can be either because - // (1) the function accessed a protected piece of memory - // (2) the function caused a segfault by e.g. dereferencing a NULL pointer - // in the first case, this *might* be a valid memory access - // this is because we are required to align our mprotects on page boundaries - // thus sometimes we mprotect a page where only part of the page - // should actually be protected. Thus for this case we check if the access - // was actually an error - if (actual_mprotected_regions[tid]) { - mprotected_region *region = *actual_mprotected_regions[tid]; - while (region) { - if (si->si_addr >= region->addr && - (char *)si->si_addr <= (char *)region->addr + region->len) { - // the address belongs to this mprotected region - found_region = region; - if (si->si_addr >= region->actual_addr && - (char *)si->si_addr <= - (char *)region->actual_addr + region->actual_len) { - // and the address is actually supposed to be protected - actually_protected_area = true; - break; - } - } - region = region->next; - } - } - if (found_region && !actually_protected_area) { - // this is NOT an actually protected area - // thus the segfault/bus error is invalid - // the nasty part here is that we have to unprotect the entire page now - // thus opening us up to future modifications of the data + longjmp(jump_buffer[tid], 1); +} - mprotected_region *region = *actual_mprotected_regions[tid]; - while (region) { - if (si->si_addr >= region->addr && - (char *)si->si_addr <= (char *)region->addr + region->len) { - // the address belongs to this mprotected region - clear_mprotect(found_region->addr, found_region->len); - } - region = region->next; - } - } else { - longjmp(jump_buffer[tid], 1); - } +static bool can_mprotect_region(void* addr) { + int pagesize = getpagesize(); + void* page_begin = (void *)((size_t)addr - (size_t)addr % pagesize); + return page_begin == addr; } static char *mprotect_region(void *addr, size_t len, @@ -156,23 +113,10 @@ static char *mprotect_region(void *addr, mprotected_region *region; int pagesize; void *page_begin; - void *actual_addr = addr; - size_t actual_len = len; if (len == 0) return NULL; - // we must mprotect an entire page - // thus here we check if the region is page-aligned - // and if it is not, we page-align it - pagesize = getpagesize(); - page_begin = (void *)((size_t)addr - (size_t)addr % pagesize); - if (page_begin != addr) { - // data is not page-aligned - len += ((size_t)addr - (size_t)page_begin); - addr = page_begin; - } - // page align len - len = len % pagesize == 0 ? len : len - len % pagesize + pagesize; + assert(can_mprotect_region(addr)); region = GDKmalloc(sizeof(mprotected_region)); if (!region) { @@ -181,8 +125,6 @@ static char *mprotect_region(void *addr, region->addr = addr; region->len = len; region->next = *regions; - region->actual_addr = actual_addr; - region->actual_len = actual_len; *regions = region; return NULL; } @@ -280,7 +222,9 @@ GENERATE_BASE_HEADERS(char *, str); GENERATE_BASE_HEADERS(cudf_data_date, date); GENERATE_BASE_HEADERS(cudf_data_time, time); GENERATE_BASE_HEADERS(cudf_data_timestamp, timestamp); -GENERATE_BASE_HEADERS(cudf_data_blob, blob); +static int blob_is_null(cudf_data_blob value); +static void blob_initialize(struct cudf_data_struct_blob *self, + size_t count); #define GENERATE_BAT_INPUT_BASE(tpe) \ struct cudf_data_struct_##tpe *bat_data = \ @@ -307,10 +251,14 @@ GENERATE_BASE_HEADERS(cudf_data_blob, bl /* bat is dense, materialize it */ \ bat_data->data = wrapped_GDK_malloc_nojump( \ bat_data->count * sizeof(bat_data->null_value)); \ + if (!bat_data->data) { \ + msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \ + goto wrapup; \ + } \ for (it = 0; it < bat_data->count; it++) { \ bat_data->data[it] = val++; \ } \ - } else { \ + } else if (can_mprotect_region(Tloc(b, 0))) { \ bat_data->data = (tpe *)Tloc(b, 0); \ mprotect_retval = mprotect_region( \ bat_data->data, \ @@ -321,6 +269,16 @@ GENERATE_BASE_HEADERS(cudf_data_blob, bl mprotect_retval); \ goto wrapup; \ } \ + } else { \ + /* cannot mprotect bat region, copy data */ \ + bat_data->data = wrapped_GDK_malloc_nojump( \ + bat_data->count * sizeof(bat_data->null_value)); \ + if (!bat_data->data) { \ + msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); \ + goto wrapup; \ + } \ + memcpy(bat_data->data, Tloc(b, 0), \ + bat_data->count * sizeof(bat_data->null_value)); \ } \ } @@ -974,6 +932,7 @@ static str CUDFeval(Client cntxt, MalBlk } else if (bat_type == TYPE_str) { BATiter li; BUN p = 0, q = 0; + bool can_mprotect_varheap = false; str mprotect_retval; GENERATE_BAT_INPUT_BASE(str); bat_data->count = BATcount(input_bats[index]); @@ -985,6 +944,11 @@ static str CUDFeval(Client cntxt, MalBlk } j = 0; + // check if we can mprotect the varheap + // if we can't mprotect, copy the strings instead + assert(input_bats[index]->tvheap); + can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base); + li = bat_iterator(input_bats[index]); BATloop(input_bats[index], p, q) { @@ -992,20 +956,30 @@ static str CUDFeval(Client cntxt, MalBlk if (strcmp(t, str_nil) == 0) { bat_data->data[j] = NULL; } else { - bat_data->data[j] = t; + if (can_mprotect_varheap) { + bat_data->data[j] = t; + } else { + bat_data->data[j] = wrapped_GDK_malloc_nojump(strlen(t) + 1); + if (!bat_data->data[j]) { + msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); + goto wrapup; + } + strcpy(bat_data->data[j], t); + } } j++; } - // for string columns, mprotect the varheap of the BAT - assert(input_bats[index]->tvheap); - mprotect_retval = - mprotect_region(input_bats[index]->tvheap->base, - input_bats[index]->tvheap->size, ®ions); - if (mprotect_retval) { - msg = createException(MAL, "cudf.eval", - "Failed to mprotect region: %s", - mprotect_retval); - goto wrapup; + if (can_mprotect_varheap) { + // mprotect the varheap of the BAT to prevent modification of input strings + mprotect_retval = + mprotect_region(input_bats[index]->tvheap->base, + input_bats[index]->tvheap->size, ®ions); + if (mprotect_retval) { + msg = createException(MAL, "cudf.eval", + "Failed to mprotect region: %s", + mprotect_retval); + goto wrapup; + } } } else if (bat_type == TYPE_date) { date *baseptr; @@ -1059,6 +1033,7 @@ static str CUDFeval(Client cntxt, MalBlk BATiter li; BUN p = 0, q = 0; str mprotect_retval; + bool can_mprotect_varheap = false; GENERATE_BAT_INPUT_BASE(blob); bat_data->count = BATcount(input_bats[index]); bat_data->data = @@ -1069,6 +1044,11 @@ static str CUDFeval(Client cntxt, MalBlk } j = 0; + // check if we can mprotect the varheap + // if we can't mprotect, copy the strings instead + assert(input_bats[index]->tvheap); + can_mprotect_varheap = can_mprotect_region(input_bats[index]->tvheap->base); + li = bat_iterator(input_bats[index]); BATloop(input_bats[index], p, q) { @@ -1078,22 +1058,32 @@ static str CUDFeval(Client cntxt, MalBlk bat_data->data[j].data = NULL; } else { bat_data->data[j].size = t->nitems; - bat_data->data[j].data = &t->data[0]; + if (can_mprotect_varheap) { + bat_data->data[j].data = &t->data[0]; + } else { + bat_data->data[j].data = wrapped_GDK_malloc_nojump(t->nitems); + if (!bat_data->data[j].data) { + msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL); + goto wrapup; + } + memcpy(bat_data->data[j].data, &t->data[0], t->nitems); + } } j++; } bat_data->null_value.size = 0; bat_data->null_value.data = NULL; - // for blob columns, mprotect the varheap of the BAT - assert(input_bats[index]->tvheap); - mprotect_retval = - mprotect_region(input_bats[index]->tvheap->base, - input_bats[index]->tvheap->size, ®ions); - if (mprotect_retval) { - msg = createException(MAL, "cudf.eval", - "Failed to mprotect region: %s", - mprotect_retval); - goto wrapup; + if (can_mprotect_varheap) { + // for blob columns, mprotect the varheap of the BAT + mprotect_retval = + mprotect_region(input_bats[index]->tvheap->base, + input_bats[index]->tvheap->size, ®ions); + if (mprotect_retval) { + msg = createException(MAL, "cudf.eval", + "Failed to mprotect region: %s", + mprotect_retval); + goto wrapup; + } } } else { // unsupported type: convert to string @@ -1755,3 +1745,10 @@ int timestamp_is_null(cudf_data_timestam int str_is_null(char *value) { return value == NULL; } int blob_is_null(cudf_data_blob value) { return value.data == NULL; } + +void blob_initialize(struct cudf_data_struct_blob *self, + size_t count) { + self->count = count; + self->data = jump_GDK_malloc(count * sizeof(self->null_value)); + memset(self->data, 0, count * sizeof(self->null_value)); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list