Hi Jakub, Thanks for the fix. I've tested this patch with the HSA PRM suite and no bri{ck,g}ing, so please go ahead and commit.
BR, Pekka On Mon, Jan 30, 2017 at 10:17 PM, Jakub Jelinek <ja...@redhat.com> wrote: > On Mon, Jan 30, 2017 at 08:53:18PM +0100, Bernhard Reutner-Fischer wrote: >> As this will likely bri{ck,g} horribly I'll leave these to Martin and Pekka. >> Thanks for fixing but note that you attached the wrong patch below. > > Oops, here is the right one (bootstrapped/regtested on x86_64-linux and > i686-linux again): > > 2017-01-30 Jakub Jelinek <ja...@redhat.com> > > * configure.tgt: Fix i?86-*-linux* entry. > * rt/sat_arithmetic.c (__hsail_sat_add_u32, __hsail_sat_add_u64, > __hsail_sat_add_s32, __hsail_sat_add_s64): Use __builtin_add_overflow. > (__hsail_sat_sub_u8, __hsail_sat_sub_u16): Remove pointless for > overflow > over maximum. > (__hsail_sat_sub_u32, __hsail_sat_sub_u64, __hsail_sat_sub_s32, > __hsail_sat_sub_s64): Use __builtin_sub_overflow. > (__hsail_sat_mul_u32, __hsail_sat_mul_u64, __hsail_sat_mul_s32, > __hsail_sat_mul_s64): Use __builtin_mul_overflow. > * rt/arithmetic.c (__hsail_borrow_u32, __hsail_borrow_u64): Use > __builtin_sub_overflow_p. > (__hsail_carry_u32, __hsail_carry_u64): Use __builtin_add_overflow_p. > * rt/misc.c (__hsail_groupbaseptr, __hsail_kernargbaseptr_u64): > Cast pointers to uintptr_t first before casting to some other integral > type. > * rt/segment.c (__hsail_segmentp_private, __hsail_segmentp_group): > Likewise. > * rt/queue.c (__hsail_ldqueuereadindex, __hsail_ldqueuewriteindex, > __hsail_addqueuewriteindex, __hsail_casqueuewriteindex, > __hsail_stqueuereadindex, __hsail_stqueuewriteindex): Cast integral > value > to uintptr_t first before casting to pointer. > * rt/workitems.c (__hsail_alloca_pop_frame): Cast memcpy first > argument to > void * to avoid warning. > > --- libhsail-rt/configure.tgt.jj 2017-01-30 09:31:51.000000000 +0100 > +++ libhsail-rt/configure.tgt 2017-01-30 09:35:04.402926654 +0100 > @@ -28,9 +28,7 @@ > # broken systems. Currently it has been tested only on x86_64 Linux > # of the upstream gcc targets. More targets shall be added after testing. > case "${target}" in > - i[[3456789]]86-*linux*) > - ;; > - x86_64-*-linux*) > + x86_64-*-linux* | i?86-*-linux*) > ;; > *) > UNSUPPORTED=1 > --- libhsail-rt/rt/sat_arithmetic.c.jj 2017-01-26 09:17:46.000000000 +0100 > +++ libhsail-rt/rt/sat_arithmetic.c 2017-01-30 10:27:27.861325330 +0100 > @@ -49,21 +49,19 @@ __hsail_sat_add_u16 (uint16_t a, uint16_ > uint32_t > __hsail_sat_add_u32 (uint32_t a, uint32_t b) > { > - uint64_t c = (uint64_t) a + (uint64_t) b; > - if (c > UINT32_MAX) > + uint32_t c; > + if (__builtin_add_overflow (a, b, &c)) > return UINT32_MAX; > - else > - return c; > + return c; > } > > uint64_t > __hsail_sat_add_u64 (uint64_t a, uint64_t b) > { > - __uint128_t c = (__uint128_t) a + (__uint128_t) b; > - if (c > UINT64_MAX) > + uint64_t c; > + if (__builtin_add_overflow (a, b, &c)) > return UINT64_MAX; > - else > - return c; > + return c; > } > > int8_t > @@ -93,25 +91,19 @@ __hsail_sat_add_s16 (int16_t a, int16_t > int32_t > __hsail_sat_add_s32 (int32_t a, int32_t b) > { > - int64_t c = (int64_t) a + (int64_t) b; > - if (c > INT32_MAX) > - return INT32_MAX; > - else if (c < INT32_MIN) > - return INT32_MIN; > - else > - return c; > + int32_t c; > + if (__builtin_add_overflow (a, b, &c)) > + return b < 0 ? INT32_MIN : INT32_MAX; > + return c; > } > > int64_t > __hsail_sat_add_s64 (int64_t a, int64_t b) > { > - __int128_t c = (__int128_t) a + (__int128_t) b; > - if (c > INT64_MAX) > - return INT64_MAX; > - else if (c < INT64_MIN) > - return INT64_MIN; > - else > - return c; > + int64_t c; > + if (__builtin_add_overflow (a, b, &c)) > + return b < 0 ? INT64_MIN : INT64_MAX; > + return c; > } > > uint8_t > @@ -120,8 +112,6 @@ __hsail_sat_sub_u8 (uint8_t a, uint8_t b > int16_t c = (uint16_t) a - (uint16_t) b; > if (c < 0) > return 0; > - else if (c > UINT8_MAX) > - return UINT8_MAX; > else > return c; > } > @@ -132,8 +122,6 @@ __hsail_sat_sub_u16 (uint16_t a, uint16_ > int32_t c = (uint32_t) a - (uint32_t) b; > if (c < 0) > return 0; > - else if (c > UINT16_MAX) > - return UINT16_MAX; > else > return c; > } > @@ -141,25 +129,19 @@ __hsail_sat_sub_u16 (uint16_t a, uint16_ > uint32_t > __hsail_sat_sub_u32 (uint32_t a, uint32_t b) > { > - int64_t c = (uint64_t) a - (uint64_t) b; > - if (c < 0) > + uint32_t c; > + if (__builtin_sub_overflow (a, b, &c)) > return 0; > - else if (c > UINT32_MAX) > - return UINT32_MAX; > - else > - return c; > + return c; > } > > uint64_t > __hsail_sat_sub_u64 (uint64_t a, uint64_t b) > { > - __int128_t c = (__uint128_t) a - (__uint128_t) b; > - if (c < 0) > + uint64_t c; > + if (__builtin_sub_overflow (a, b, &c)) > return 0; > - else if (c > UINT64_MAX) > - return UINT64_MAX; > - else > - return c; > + return c; > } > > int8_t > @@ -189,25 +171,19 @@ __hsail_sat_sub_s16 (int16_t a, int16_t > int32_t > __hsail_sat_sub_s32 (int32_t a, int32_t b) > { > - int64_t c = (int64_t) a - (int64_t) b; > - if (c > INT32_MAX) > - return INT32_MAX; > - else if (c < INT32_MIN) > - return INT32_MIN; > - else > - return c; > + int32_t c; > + if (__builtin_sub_overflow (a, b, &c)) > + return b < 0 ? INT32_MAX : INT32_MIN; > + return c; > } > > int64_t > __hsail_sat_sub_s64 (int64_t a, int64_t b) > { > - __int128_t c = (__int128_t) a - (__int128_t) b; > - if (c > INT64_MAX) > - return INT64_MAX; > - else if (c < INT64_MIN) > - return INT64_MIN; > - else > - return c; > + int64_t c; > + if (__builtin_sub_overflow (a, b, &c)) > + return b < 0 ? INT64_MAX : INT64_MIN; > + return c; > } > > uint8_t > @@ -233,21 +209,19 @@ __hsail_sat_mul_u16 (uint16_t a, uint16_ > uint32_t > __hsail_sat_mul_u32 (uint32_t a, uint32_t b) > { > - uint64_t c = (uint64_t) a * (uint64_t) b; > - if (c > UINT32_MAX) > + uint32_t c; > + if (__builtin_mul_overflow (a, b, &c)) > return UINT32_MAX; > - else > - return c; > + return c; > } > > uint64_t > __hsail_sat_mul_u64 (uint64_t a, uint64_t b) > { > - __uint128_t c = (__uint128_t) a * (__uint128_t) b; > - if (c > UINT64_MAX) > + uint64_t c; > + if (__builtin_mul_overflow (a, b, &c)) > return UINT64_MAX; > - else > - return c; > + return c; > } > > int8_t > @@ -277,23 +251,17 @@ __hsail_sat_mul_s16 (int16_t a, int16_t > int32_t > __hsail_sat_mul_s32 (int32_t a, int32_t b) > { > - int64_t c = (int64_t) a * (int64_t) b; > - if (c > INT32_MAX) > - return INT32_MAX; > - else if (c < INT32_MIN) > - return INT32_MIN; > - else > - return c; > + int32_t c; > + if (__builtin_mul_overflow (a, b, &c)) > + return ((a > 0) ^ (b > 0)) ? INT32_MIN : INT32_MAX; > + return c; > } > > int64_t > __hsail_sat_mul_s64 (int64_t a, int64_t b) > { > - __int128_t c = (__int128_t) a * (__int128_t) b; > - if (c > INT64_MAX) > - return INT64_MAX; > - else if (c < INT64_MIN) > - return INT64_MIN; > - else > - return c; > + int64_t c; > + if (__builtin_mul_overflow (a, b, &c)) > + return ((a > 0) ^ (b > 0)) ? INT64_MIN : INT64_MAX; > + return c; > } > --- libhsail-rt/rt/arithmetic.c.jj 2017-01-26 09:17:46.000000000 +0100 > +++ libhsail-rt/rt/arithmetic.c 2017-01-30 10:35:44.935726056 +0100 > @@ -376,41 +376,25 @@ __hsail_ftz_f64 (double a) > uint32_t > __hsail_borrow_u32 (uint32_t a, uint32_t b) > { > - uint64_t c = (uint64_t) a - (uint64_t) b; > - if (c > UINT32_MAX) > - return 1; > - else > - return 0; > + return __builtin_sub_overflow_p (a, b, a); > } > > uint64_t > __hsail_borrow_u64 (uint64_t a, uint64_t b) > { > - __uint128_t c = (__uint128_t) a - (__uint128_t) b; > - if (c > UINT64_MAX) > - return 1; > - else > - return 0; > + return __builtin_sub_overflow_p (a, b, a); > } > > uint32_t > __hsail_carry_u32 (uint32_t a, uint32_t b) > { > - uint64_t c = (uint64_t) a + (uint64_t) b; > - if (c > UINT32_MAX) > - return 1; > - else > - return 0; > + return __builtin_add_overflow_p (a, b, a); > } > > uint64_t > __hsail_carry_u64 (uint64_t a, uint64_t b) > { > - __uint128_t c = (__uint128_t) a + (__uint128_t) b; > - if (c > UINT64_MAX) > - return 1; > - else > - return 0; > + return __builtin_add_overflow_p (a, b, a); > } > > float > --- libhsail-rt/rt/misc.c.jj 2017-01-26 09:17:46.000000000 +0100 > +++ libhsail-rt/rt/misc.c 2017-01-30 10:09:36.340473728 +0100 > @@ -68,8 +68,8 @@ __hsail_debugtrap (uint32_t src, PHSAWor > uint32_t > __hsail_groupbaseptr (PHSAWorkItem *wi) > { > - return (uint32_t) (uint64_t) (wi->wg->group_base_ptr > - - wi->launch_data->group_segment_start_addr); > + return (uint32_t) (uintptr_t) (wi->wg->group_base_ptr > + - wi->launch_data->group_segment_start_addr); > } > > uint64_t > @@ -77,7 +77,7 @@ __hsail_kernargbaseptr_u64 (PHSAWorkItem > { > /* For now assume only a single kernarg allocation at a time. > Proper kernarg memory management to do. */ > - return (uint64_t) wi->launch_data->kernarg_addr; > + return (uint64_t) (uintptr_t) wi->launch_data->kernarg_addr; > } > > uint32_t > --- libhsail-rt/rt/segment.c.jj 2017-01-26 09:17:46.000000000 +0100 > +++ libhsail-rt/rt/segment.c 2017-01-30 10:12:15.430371604 +0100 > @@ -32,9 +32,10 @@ __hsail_segmentp_private (uint64_t flat_ > if (flat_addr == 0) > return 1; > else > - return (void *) flat_addr >= wi->wg->private_base_ptr > - && (void *) flat_addr > - < wi->wg->private_base_ptr + > wi->wg->private_segment_total_size; > + return ((void *) (uintptr_t) flat_addr >= wi->wg->private_base_ptr > + && ((void *) (uintptr_t) flat_addr > + < (wi->wg->private_base_ptr > + + wi->wg->private_segment_total_size))); > } > > uint32_t > @@ -43,9 +44,10 @@ __hsail_segmentp_group (uint64_t flat_ad > if (flat_addr == 0) > return 1; > else > - return (void *) flat_addr >= wi->wg->group_base_ptr > - && (void *) flat_addr < wi->wg->group_base_ptr > - + > wi->launch_data->dp->group_segment_size; > + return ((void *) (uintptr_t) flat_addr >= wi->wg->group_base_ptr > + && ((void *) (uintptr_t) flat_addr > + < (wi->wg->group_base_ptr > + + wi->launch_data->dp->group_segment_size))); > } > > uint32_t > --- libhsail-rt/rt/queue.c.jj 2017-01-26 09:17:46.000000000 +0100 > +++ libhsail-rt/rt/queue.c 2017-01-30 10:13:16.903560011 +0100 > @@ -29,21 +29,21 @@ > uint64_t > __hsail_ldqueuereadindex (uint64_t queue_addr) > { > - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; > + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; > return queue->read_index; > } > > uint64_t > __hsail_ldqueuewriteindex (uint64_t queue_addr) > { > - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; > + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; > return queue->write_index; > } > > uint64_t > __hsail_addqueuewriteindex (uint64_t queue_addr, uint64_t value) > { > - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; > + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; > return __sync_fetch_and_add (&queue->write_index, value); > } > > @@ -51,7 +51,7 @@ uint64_t > __hsail_casqueuewriteindex (uint64_t queue_addr, uint64_t cmp_value, > uint64_t new_value) > { > - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; > + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; > return __sync_val_compare_and_swap (&queue->write_index, cmp_value, > new_value); > } > @@ -59,13 +59,13 @@ __hsail_casqueuewriteindex (uint64_t que > void > __hsail_stqueuereadindex (uint64_t queue_addr, uint64_t value) > { > - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; > + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; > queue->read_index = value; > } > > void > __hsail_stqueuewriteindex (uint64_t queue_addr, uint64_t value) > { > - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; > + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; > queue->write_index = value; > } > --- libhsail-rt/rt/workitems.c.jj 2017-01-26 09:17:46.000000000 +0100 > +++ libhsail-rt/rt/workitems.c 2017-01-30 10:14:53.901279408 +0100 > @@ -938,7 +938,7 @@ __hsail_alloca_pop_frame (PHSAWorkItem * > volatile PHSAWorkGroup *wg = wi->wg; > > wg->alloca_stack_p = wg->alloca_frame_p; > - memcpy (&wg->alloca_frame_p, > + memcpy ((void *) &wg->alloca_frame_p, > (const void *) (wg->private_base_ptr + wg->alloca_frame_p), 4); > /* Now frame_p points to the beginning of the previous function's > frame and stack_p to its end. */ > > > Jakub