Hi! I've noticed a bug in libhsail-rt/configure.tgt - those files are actually pure shell script, not autoconf templates (they are just sourced from the autoconf templates as well as configure scripts generated by autoconf), so [[ ]] doesn't really work there.
After fixing that I've noticed that libhsail-rt actually doesn't build at all as 32-bit. The most important reason is that the code uses __int128_t and __uint128_t, which is not supported on ILP32. While computing the saturating arithmetics that way is probably efficient for 8-bit and 16-bit, I believe even on x86_64 it is unnecessarily slow for 32-bit and very slow for 64-bit SAT computations, so I rewrote those to use the overflow builtins, which should emit efficient code. Do you have some sufficient testsuite to test those? I also don't understand the uint8_t __hsail_sat_sub_u8 (uint8_t a, uint8_t b) { int16_t c = (uint16_t) a - (uint16_t) b; if (c < 0) return 0; else if (c > UINT8_MAX) return UINT8_MAX; else return c; } How could subtraction of unsigned values ever yield a value larger than the unsigned maximum? For a and b in [0, UINT8_MAX] a - b with infinite precision is in the range [ -UINT8_MAX, UINT8_MAX ], so only the if (c < 0) return 0; else return c; makes sense there to me. The rest of the patch are workaround for pointer to int and int to pointer conversion warning and the last one is for stripping away volatile qualifier. Haven't tested whether it actually works then (but I only see dg-do compile tests for brig anyway). OT, should we ask for a DWARF language code for BRIG? Shall it be DW_LANG_hsail or DW_LANG_brig, something else? 2017-01-30 Jakub Jelinek <ja...@redhat.com> * configure.tgt: Fix i?86-*-linux* entry. * rt/sat_arithmetic.c (__hsail_sat_add_u32, __hsail_sat_add_u64, __hsail_sat_add_s32, __hsail_sat_add_s64): Use __builtin_add_overflow. (__hsail_sat_sub_u8, __hsail_sat_sub_u16): Remove pointless for overflow over maximum. (__hsail_sat_sub_u32, __hsail_sat_sub_u64, __hsail_sat_sub_s32, __hsail_sat_sub_s64): Use __builtin_sub_overflow. (__hsail_sat_mul_u32, __hsail_sat_mul_u64, __hsail_sat_mul_s32, __hsail_sat_mul_s64): Use __builtin_mul_overflow. * rt/arithmetic.c (__hsail_borrow_u32, __hsail_borrow_u64): Use __builtin_sub_overflow_p. (__hsail_carry_u32, __hsail_carry_u64): Use __builtin_add_overflow_p. * rt/misc.c (__hsail_groupbaseptr, __hsail_kernargbaseptr_u64): Cast pointers to uintptr_t first before casting to some other integral type. * rt/segment.c (__hsail_segmentp_private, __hsail_segmentp_group): Likewise. * rt/queue.c (__hsail_ldqueuereadindex, __hsail_ldqueuewriteindex, __hsail_addqueuewriteindex, __hsail_casqueuewriteindex, __hsail_stqueuereadindex, __hsail_stqueuewriteindex): Cast integral value to uintptr_t first before casting to pointer. * rt/workitems.c (__hsail_alloca_pop_frame): Cast memcpy first argument to void * to avoid warning. --- libhsail-rt/configure.tgt.jj 2017-01-30 09:31:51.000000000 +0100 +++ libhsail-rt/configure.tgt 2017-01-30 09:35:04.402926654 +0100 @@ -28,9 +28,7 @@ # broken systems. Currently it has been tested only on x86_64 Linux # of the upstream gcc targets. More targets shall be added after testing. case "${target}" in - i[[3456789]]86-*linux*) - ;; - x86_64-*-linux*) + x86_64-*-linux* | i?86-*-linux*) ;; *) UNSUPPORTED=1 --- libhsail-rt/rt/sat_arithmetic.c.jj 2017-01-26 09:17:46.000000000 +0100 +++ libhsail-rt/rt/sat_arithmetic.c 2017-01-30 10:27:27.861325330 +0100 @@ -49,21 +49,18 @@ __hsail_sat_add_u16 (uint16_t a, uint16_ uint32_t __hsail_sat_add_u32 (uint32_t a, uint32_t b) { - uint64_t c = (uint64_t) a + (uint64_t) b; - if (c > UINT32_MAX) + uint32_t c; + if (__builtin_add_overflow (a, b, &c)) return UINT32_MAX; - else - return c; + return c; } uint64_t __hsail_sat_add_u64 (uint64_t a, uint64_t b) { - __uint128_t c = (__uint128_t) a + (__uint128_t) b; - if (c > UINT64_MAX) + uint64_t c; + if (__builtin_add_overflow (a, b, &c)) return UINT64_MAX; - else - return c; } int8_t @@ -93,25 +90,19 @@ __hsail_sat_add_s16 (int16_t a, int16_t int32_t __hsail_sat_add_s32 (int32_t a, int32_t b) { - int64_t c = (int64_t) a + (int64_t) b; - if (c > INT32_MAX) - return INT32_MAX; - else if (c < INT32_MIN) - return INT32_MIN; - else - return c; + int32_t c; + if (__builtin_add_overflow (a, b, &c)) + return b < 0 ? INT32_MIN : INT32_MAX; + return c; } int64_t __hsail_sat_add_s64 (int64_t a, int64_t b) { - __int128_t c = (__int128_t) a + (__int128_t) b; - if (c > INT64_MAX) - return INT64_MAX; - else if (c < INT64_MIN) - return INT64_MIN; - else - return c; + int64_t c; + if (__builtin_add_overflow (a, b, &c)) + return b < 0 ? INT64_MIN : INT64_MAX; + return c; } uint8_t @@ -120,8 +111,6 @@ __hsail_sat_sub_u8 (uint8_t a, uint8_t b int16_t c = (uint16_t) a - (uint16_t) b; if (c < 0) return 0; - else if (c > UINT8_MAX) - return UINT8_MAX; else return c; } @@ -132,8 +121,6 @@ __hsail_sat_sub_u16 (uint16_t a, uint16_ int32_t c = (uint32_t) a - (uint32_t) b; if (c < 0) return 0; - else if (c > UINT16_MAX) - return UINT16_MAX; else return c; } @@ -141,25 +128,19 @@ __hsail_sat_sub_u16 (uint16_t a, uint16_ uint32_t __hsail_sat_sub_u32 (uint32_t a, uint32_t b) { - int64_t c = (uint64_t) a - (uint64_t) b; - if (c < 0) + uint32_t c; + if (__builtin_sub_overflow (a, b, &c)) return 0; - else if (c > UINT32_MAX) - return UINT32_MAX; - else - return c; + return c; } uint64_t __hsail_sat_sub_u64 (uint64_t a, uint64_t b) { - __int128_t c = (__uint128_t) a - (__uint128_t) b; - if (c < 0) + uint64_t c; + if (__builtin_sub_overflow (a, b, &c)) return 0; - else if (c > UINT64_MAX) - return UINT64_MAX; - else - return c; + return c; } int8_t @@ -189,25 +170,19 @@ __hsail_sat_sub_s16 (int16_t a, int16_t int32_t __hsail_sat_sub_s32 (int32_t a, int32_t b) { - int64_t c = (int64_t) a - (int64_t) b; - if (c > INT32_MAX) - return INT32_MAX; - else if (c < INT32_MIN) - return INT32_MIN; - else - return c; + int32_t c; + if (__builtin_sub_overflow (a, b, &c)) + return b < 0 ? INT32_MAX : INT32_MIN; + return c; } int64_t __hsail_sat_sub_s64 (int64_t a, int64_t b) { - __int128_t c = (__int128_t) a - (__int128_t) b; - if (c > INT64_MAX) - return INT64_MAX; - else if (c < INT64_MIN) - return INT64_MIN; - else - return c; + int64_t c; + if (__builtin_sub_overflow (a, b, &c)) + return b < 0 ? INT64_MAX : INT64_MIN; + return c; } uint8_t @@ -233,21 +208,19 @@ __hsail_sat_mul_u16 (uint16_t a, uint16_ uint32_t __hsail_sat_mul_u32 (uint32_t a, uint32_t b) { - uint64_t c = (uint64_t) a * (uint64_t) b; - if (c > UINT32_MAX) + uint32_t c; + if (__builtin_mul_overflow (a, b, &c)) return UINT32_MAX; - else - return c; + return c; } uint64_t __hsail_sat_mul_u64 (uint64_t a, uint64_t b) { - __uint128_t c = (__uint128_t) a * (__uint128_t) b; - if (c > UINT64_MAX) + uint64_t c; + if (__builtin_mul_overflow (a, b, &c)) return UINT64_MAX; - else - return c; + return c; } int8_t @@ -277,23 +250,17 @@ __hsail_sat_mul_s16 (int16_t a, int16_t int32_t __hsail_sat_mul_s32 (int32_t a, int32_t b) { - int64_t c = (int64_t) a * (int64_t) b; - if (c > INT32_MAX) - return INT32_MAX; - else if (c < INT32_MIN) - return INT32_MIN; - else - return c; + int32_t c; + if (__builtin_mul_overflow (a, b, &c)) + return ((a > 0) ^ (b > 0)) ? INT32_MIN : INT32_MAX; + return c; } int64_t __hsail_sat_mul_s64 (int64_t a, int64_t b) { - __int128_t c = (__int128_t) a * (__int128_t) b; - if (c > INT64_MAX) - return INT64_MAX; - else if (c < INT64_MIN) - return INT64_MIN; - else - return c; + int64_t c; + if (__builtin_mul_overflow (a, b, &c)) + return ((a > 0) ^ (b > 0)) ? INT64_MIN : INT64_MAX; + return c; } --- libhsail-rt/rt/arithmetic.c.jj 2017-01-26 09:17:46.000000000 +0100 +++ libhsail-rt/rt/arithmetic.c 2017-01-30 10:35:44.935726056 +0100 @@ -376,41 +376,25 @@ __hsail_ftz_f64 (double a) uint32_t __hsail_borrow_u32 (uint32_t a, uint32_t b) { - uint64_t c = (uint64_t) a - (uint64_t) b; - if (c > UINT32_MAX) - return 1; - else - return 0; + return __builtin_sub_overflow_p (a, b, a); } uint64_t __hsail_borrow_u64 (uint64_t a, uint64_t b) { - __uint128_t c = (__uint128_t) a - (__uint128_t) b; - if (c > UINT64_MAX) - return 1; - else - return 0; + return __builtin_sub_overflow_p (a, b, a); } uint32_t __hsail_carry_u32 (uint32_t a, uint32_t b) { - uint64_t c = (uint64_t) a + (uint64_t) b; - if (c > UINT32_MAX) - return 1; - else - return 0; + return __builtin_add_overflow_p (a, b, a); } uint64_t __hsail_carry_u64 (uint64_t a, uint64_t b) { - __uint128_t c = (__uint128_t) a + (__uint128_t) b; - if (c > UINT64_MAX) - return 1; - else - return 0; + return __builtin_add_overflow_p (a, b, a); } float --- libhsail-rt/rt/misc.c.jj 2017-01-26 09:17:46.000000000 +0100 +++ libhsail-rt/rt/misc.c 2017-01-30 10:09:36.340473728 +0100 @@ -68,8 +68,8 @@ __hsail_debugtrap (uint32_t src, PHSAWor uint32_t __hsail_groupbaseptr (PHSAWorkItem *wi) { - return (uint32_t) (uint64_t) (wi->wg->group_base_ptr - - wi->launch_data->group_segment_start_addr); + return (uint32_t) (uintptr_t) (wi->wg->group_base_ptr + - wi->launch_data->group_segment_start_addr); } uint64_t @@ -77,7 +77,7 @@ __hsail_kernargbaseptr_u64 (PHSAWorkItem { /* For now assume only a single kernarg allocation at a time. Proper kernarg memory management to do. */ - return (uint64_t) wi->launch_data->kernarg_addr; + return (uint64_t) (uintptr_t) wi->launch_data->kernarg_addr; } uint32_t --- libhsail-rt/rt/segment.c.jj 2017-01-26 09:17:46.000000000 +0100 +++ libhsail-rt/rt/segment.c 2017-01-30 10:12:15.430371604 +0100 @@ -32,9 +32,10 @@ __hsail_segmentp_private (uint64_t flat_ if (flat_addr == 0) return 1; else - return (void *) flat_addr >= wi->wg->private_base_ptr - && (void *) flat_addr - < wi->wg->private_base_ptr + wi->wg->private_segment_total_size; + return ((void *) (uintptr_t) flat_addr >= wi->wg->private_base_ptr + && ((void *) (uintptr_t) flat_addr + < (wi->wg->private_base_ptr + + wi->wg->private_segment_total_size))); } uint32_t @@ -43,9 +44,10 @@ __hsail_segmentp_group (uint64_t flat_ad if (flat_addr == 0) return 1; else - return (void *) flat_addr >= wi->wg->group_base_ptr - && (void *) flat_addr < wi->wg->group_base_ptr - + wi->launch_data->dp->group_segment_size; + return ((void *) (uintptr_t) flat_addr >= wi->wg->group_base_ptr + && ((void *) (uintptr_t) flat_addr + < (wi->wg->group_base_ptr + + wi->launch_data->dp->group_segment_size))); } uint32_t --- libhsail-rt/rt/queue.c.jj 2017-01-26 09:17:46.000000000 +0100 +++ libhsail-rt/rt/queue.c 2017-01-30 10:13:16.903560011 +0100 @@ -29,21 +29,21 @@ uint64_t __hsail_ldqueuereadindex (uint64_t queue_addr) { - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; return queue->read_index; } uint64_t __hsail_ldqueuewriteindex (uint64_t queue_addr) { - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; return queue->write_index; } uint64_t __hsail_addqueuewriteindex (uint64_t queue_addr, uint64_t value) { - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; return __sync_fetch_and_add (&queue->write_index, value); } @@ -51,7 +51,7 @@ uint64_t __hsail_casqueuewriteindex (uint64_t queue_addr, uint64_t cmp_value, uint64_t new_value) { - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; return __sync_val_compare_and_swap (&queue->write_index, cmp_value, new_value); } @@ -59,13 +59,13 @@ __hsail_casqueuewriteindex (uint64_t que void __hsail_stqueuereadindex (uint64_t queue_addr, uint64_t value) { - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; queue->read_index = value; } void __hsail_stqueuewriteindex (uint64_t queue_addr, uint64_t value) { - phsa_queue_t *queue = (phsa_queue_t *) queue_addr; + phsa_queue_t *queue = (phsa_queue_t *) (uintptr_t) queue_addr; queue->write_index = value; } --- libhsail-rt/rt/workitems.c.jj 2017-01-26 09:17:46.000000000 +0100 +++ libhsail-rt/rt/workitems.c 2017-01-30 10:14:53.901279408 +0100 @@ -938,7 +938,7 @@ __hsail_alloca_pop_frame (PHSAWorkItem * volatile PHSAWorkGroup *wg = wi->wg; wg->alloca_stack_p = wg->alloca_frame_p; - memcpy (&wg->alloca_frame_p, + memcpy ((void *) &wg->alloca_frame_p, (const void *) (wg->private_base_ptr + wg->alloca_frame_p), 4); /* Now frame_p points to the beginning of the previous function's frame and stack_p to its end. */ Jakub