Author: jvesely Date: Tue Feb 6 10:44:50 2018 New Revision: 324376 URL: http://llvm.org/viewvc/llvm-project?rev=324376&view=rev Log: Add vstore_half_rte implementation
Passes CTS on carrizo Reviewer: Jeroen Ketema <j.ket...@xs4all.nl> Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu> Modified: libclc/trunk/generic/include/clc/shared/vstore.h libclc/trunk/generic/lib/shared/vstore.cl Modified: libclc/trunk/generic/include/clc/shared/vstore.h URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/shared/vstore.h?rev=324376&r1=324375&r2=324376&view=diff ============================================================================== --- libclc/trunk/generic/include/clc/shared/vstore.h (original) +++ libclc/trunk/generic/include/clc/shared/vstore.h Tue Feb 6 10:44:50 2018 @@ -40,6 +40,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float,) _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz) _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtn) _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtp) +_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rte) #ifdef cl_khr_fp64 _CLC_VECTOR_VSTORE_PRIM1(double) @@ -47,6 +48,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rt _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz) _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtn) _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtp) + _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rte) #endif #ifdef cl_khr_fp16 Modified: libclc/trunk/generic/lib/shared/vstore.cl URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/shared/vstore.cl?rev=324376&r1=324375&r2=324376&view=diff ============================================================================== --- libclc/trunk/generic/lib/shared/vstore.cl (original) +++ libclc/trunk/generic/lib/shared/vstore.cl Tue Feb 6 10:44:50 2018 @@ -147,6 +147,27 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rtp(f { return ((as_uint(x) & 0x80000000) == 0) ? __clc_rti(x) : __clc_rtz(x); } +_CLC_DEF _CLC_OVERLOAD float __clc_rte(float x) +{ + /* Mantisa + implicit bit */ + const uint mantissa = (as_uint(x) & 0x7fffff) | (1u << 23); + const int exp = (as_uint(x) >> 23 & 0xff) - 127; + int shift = 13; + if (exp < -14) { + /* The default assumes lower 13 bits are rounded, + * but it might be more for denormals. + * Shifting beyond last == 0b, and qr == 00b is not necessary */ + shift += min(-(exp + 14), 15); + } + int mask = (1 << shift) - 1; + const uint grs = mantissa & mask; + const uint last = mantissa & (1 << shift); + /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1. + * exp > 15 should round to inf. */ + bool roundup = (grs > (1 << (shift - 1))) || + (grs == (1 << (shift - 1)) && last != 0) || (exp > 15); + return roundup ? __clc_rti(x) : __clc_rtz(x); +} #ifdef cl_khr_fp64 _CLC_DEF _CLC_OVERLOAD double __clc_noop(double x) @@ -192,13 +213,35 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rtp( { return ((as_ulong(x) & 0x8000000000000000UL) == 0) ? __clc_rti(x) : __clc_rtz(x); } +_CLC_DEF _CLC_OVERLOAD double __clc_rte(double x) +{ + /* Mantisa + implicit bit */ + const ulong mantissa = (as_ulong(x) & 0xfffffffffffff) | (1UL << 52); + const int exp = (as_ulong(x) >> 52 & 0x7ff) - 1023; + int shift = 42; + if (exp < -14) { + /* The default assumes lower 13 bits are rounded, + * but it might be more for denormals. + * Shifting beyond last == 0b, and qr == 00b is not necessary */ + shift += min(-(exp + 14), 15); + } + ulong mask = (1UL << shift) - 1UL; + const ulong grs = mantissa & mask; + const ulong last = mantissa & (1UL << shift); + /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1. + * exp > 15 should round to inf. */ + bool roundup = (grs > (1UL << (shift - 1UL))) || + (grs == (1UL << (shift - 1UL)) && last != 0) || (exp > 15); + return roundup ? __clc_rti(x) : __clc_rtz(x); +} #endif #define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \ __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \ __FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) \ __FUNC(SUFFIX ## _rtn, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtn) \ - __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp) + __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp) \ + __FUNC(SUFFIX ## _rte, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rte) #define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \ __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits