Hi, Bootstrapped and regtested on x86_64 linux. I also tested that all the new calls can be linked with the AOCL LibM library. Ok to push?
Thanks, Filip Kastl -- 8< -- We currently support generating vectorized math calls to the AMD core math library (ACML) (-mveclibabi=acml). That library is end-of-life and its successor is the math library from AMD Optimizing CPU Libraries (AOCL). This patch adds support for AOCL (-mveclibabi=aocl). That significantly broadens the range of vectorized math functions optimized for AMD CPUs that GCC can generate calls to. See the edit to invoke.texi for a complete list of added functions. Compared to the list of functions in AOCL LibM docs I left out the sincos, linearfrac, powx, sqrt and fabs operations. I also left out all the functions working with arrays and amd_vrd2_expm1() (the AMD docs list the function but I wasn't able to link calls to it with the current version of the library). gcc/ChangeLog: PR target/56504 * config/i386/i386-options.cc (ix86_option_override_internal): Add ix86_veclibabi_type_aocl case. * config/i386/i386-options.h (ix86_veclibabi_aocl): Add extern ix86_veclibabi_aocl(). * config/i386/i386-opts.h (enum ix86_veclibabi): Add ix86_veclibabi_type_aocl into the ix86_veclibabi enum. * config/i386/i386.cc (ix86_veclibabi_aocl): New function. * config/i386/i386.opt: Add the 'aocl' type. * doc/invoke.texi: Document -mveclibabi=aocl. gcc/testsuite/ChangeLog: PR target/56504 * gcc.target/i386/vectorize-aocl1.c: New test. Signed-off-by: Filip Kastl <fka...@suse.cz> --- gcc/config/i386/i386-options.cc | 4 + gcc/config/i386/i386-options.h | 1 + gcc/config/i386/i386-opts.h | 3 +- gcc/config/i386/i386.cc | 143 +++++++++++ gcc/config/i386/i386.opt | 3 + gcc/doc/invoke.texi | 57 +++-- .../gcc.target/i386/vectorize-aocl1.c | 224 ++++++++++++++++++ 7 files changed, 419 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vectorize-aocl1.c diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 603166d249c..76a20179a36 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2877,6 +2877,10 @@ ix86_option_override_internal (bool main_args_p, ix86_veclib_handler = &ix86_veclibabi_acml; break; + case ix86_veclibabi_type_aocl: + ix86_veclib_handler = &ix86_veclibabi_aocl; + break; + default: gcc_unreachable (); } diff --git a/gcc/config/i386/i386-options.h b/gcc/config/i386/i386-options.h index 0d448ef9f15..591a6152c01 100644 --- a/gcc/config/i386/i386-options.h +++ b/gcc/config/i386/i386-options.h @@ -60,6 +60,7 @@ void ix86_simd_clone_adjust (struct cgraph_node *node); extern tree (*ix86_veclib_handler) (combined_fn, tree, tree); extern tree ix86_veclibabi_svml (combined_fn, tree, tree); extern tree ix86_veclibabi_acml (combined_fn, tree, tree); +extern tree ix86_veclibabi_aocl (combined_fn, tree, tree); enum ix86_function_specific_strings { diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index 35542b28936..69fcd82bf47 100644 --- a/gcc/config/i386/i386-opts.h +++ b/gcc/config/i386/i386-opts.h @@ -87,7 +87,8 @@ enum asm_dialect { enum ix86_veclibabi { ix86_veclibabi_type_none, ix86_veclibabi_type_svml, - ix86_veclibabi_type_acml + ix86_veclibabi_type_acml, + ix86_veclibabi_type_aocl }; enum stack_protector_guard { diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 6ac3a5d55f2..8ccbc8bbc07 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -19882,6 +19882,149 @@ ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) return new_fndecl; } +/* Handler for an AOCL-LibM-style interface to + a library with vectorized intrinsics. */ + +tree +ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in) +{ + char name[20] = "amd_vr"; + int name_len = 6; + tree fntype, new_fndecl, args; + unsigned arity; + const char *bname; + machine_mode el_mode, in_mode; + int n, in_n; + + /* AOCL-LibM is 64bits only. It is also only suitable for unsafe math only + as it trades off some accuracy for increased performance. */ + if (!TARGET_64BIT + || !flag_unsafe_math_optimizations) + return NULL_TREE; + + el_mode = TYPE_MODE (TREE_TYPE (type_out)); + n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (el_mode != in_mode + || n != in_n) + return NULL_TREE; + + gcc_checking_assert (n > 0); + + /* Decide whether there exists a function for the combination of FN, the mode + and the vector width. Return early if it doesn't. */ + + if (el_mode != DFmode && el_mode != SFmode) + return NULL_TREE; + + /* Supported vector widths for given FN and single/double precision. Zeros + are used to fill out unused positions in the arrays. */ + static const int supported_n[][2][3] = { + /* Single prec. , Double prec. */ + { { 16, 0, 0 }, { 2, 4, 8 } }, /* TAN. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* EXP2. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* LOG2. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* COS. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* SIN. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* POW. */ + { { 4, 8, 16 }, { 2, 4, 8 } }, /* ERF. */ + { { 4, 8, 16 }, { 2, 8, 0 } }, /* ATAN. */ + { { 4, 8, 16 }, { 2, 0, 0 } }, /* LOG10. */ + { { 4, 0, 0 }, { 2, 0, 0 } }, /* EXP10. */ + { { 4, 0, 0 }, { 2, 0, 0 } }, /* LOG1P. */ + { { 4, 8, 16 }, { 8, 0, 0 } }, /* ASIN. */ + { { 4, 16, 0 }, { 0, 0, 0 } }, /* ACOS. */ + { { 4, 8, 16 }, { 0, 0, 0 } }, /* TANH. */ + { { 4, 0, 0 }, { 0, 0, 0 } }, /* EXPM1. */ + { { 4, 8, 0 }, { 0, 0, 0 } }, /* COSH. */ + }; + + /* We cannot simply index the supported_n array with FN since multiple FNs + may correspond to a single operation (see the definitions of these + CASE_CFN_* macros). */ + int i; + switch (fn) + { + CASE_CFN_TAN : i = 0; break; + CASE_CFN_EXP : i = 1; break; + CASE_CFN_EXP2 : i = 2; break; + CASE_CFN_LOG : i = 3; break; + CASE_CFN_LOG2 : i = 4; break; + CASE_CFN_COS : i = 5; break; + CASE_CFN_SIN : i = 6; break; + CASE_CFN_POW : i = 7; break; + CASE_CFN_ERF : i = 8; break; + CASE_CFN_ATAN : i = 9; break; + CASE_CFN_LOG10 : i = 10; break; + CASE_CFN_EXP10 : i = 11; break; + CASE_CFN_LOG1P : i = 12; break; + CASE_CFN_ASIN : i = 13; break; + CASE_CFN_ACOS : i = 14; break; + CASE_CFN_TANH : i = 15; break; + CASE_CFN_EXPM1 : i = 16; break; + CASE_CFN_COSH : i = 17; break; + default: return NULL_TREE; + } + + int j = el_mode == DFmode; + bool n_is_supported = false; + for (unsigned k = 0; k < 3; k++) + if (supported_n[i][j][k] == n) + { + n_is_supported = true; + break; + } + if (!n_is_supported) + return NULL_TREE; + + /* Append the precision and the vector width to the function name we are + constructing. */ + name[name_len++] = el_mode == DFmode ? 'd' : 's'; + switch (n) + { + case 2: + case 4: + case 8: + name[name_len++] = '0' + n; + break; + case 16: + name[name_len++] = '1'; + name[name_len++] = '6'; + break; + default: + gcc_unreachable (); + } + name[name_len++] = '_'; + + /* Append the operation name (steal it from the name of a builtin). */ + tree fndecl = mathfn_built_in (el_mode == DFmode + ? double_type_node : float_type_node, fn); + bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + sprintf (name + name_len, "%s", bname + 10); + + arity = 0; + for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) + arity++; + + if (arity == 1) + fntype = build_function_type_list (type_out, type_in, NULL); + else + fntype = build_function_type_list (type_out, type_in, type_in, NULL); + + /* Build a function declaration for the vectorized function. */ + new_fndecl = build_decl (BUILTINS_LOCATION, + FUNCTION_DECL, get_identifier (name), fntype); + TREE_PUBLIC (new_fndecl) = 1; + DECL_EXTERNAL (new_fndecl) = 1; + DECL_IS_NOVOPS (new_fndecl) = 1; + TREE_READONLY (new_fndecl) = 1; + + return new_fndecl; +} + /* Returns a decl of a function that implements scatter store with register type VECTYPE and index type INDEX_TYPE and SCALE. Return NULL_TREE if it is not available. */ diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 99e86f545e8..ea292650ed1 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -588,6 +588,9 @@ Enum(ix86_veclibabi) String(svml) Value(ix86_veclibabi_type_svml) EnumValue Enum(ix86_veclibabi) String(acml) Value(ix86_veclibabi_type_acml) +EnumValue +Enum(ix86_veclibabi) String(aocl) Value(ix86_veclibabi_type_aocl) + mvect8-ret-in-mem Target Mask(VECT8_RETURNS) Save Return 8-byte vectors in memory. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 859af06036f..3c63fa8f737 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -35943,25 +35943,52 @@ all of the reciprocal approximations, except for square root. @opindex mveclibabi @item -mveclibabi=@var{type} -Specifies the ABI type to use for vectorizing intrinsics using an -external library. Supported values for @var{type} are @samp{svml} -for the Intel short -vector math library and @samp{acml} for the AMD math core library. +Specifies the ABI type to use for vectorizing intrinsics using an external +library. Supported values for @var{type} are @samp{svml} for the Intel short +vector math library, @samp{aocl} for the math library (LibM) from AMD +Optimizing CPU Libraries (AOCL) and @samp{acml} for the end-of-life AMD core +math library (to which AOCL-LibM is the successor). To use this option, both @option{-ftree-vectorize} and @option{-funsafe-math-optimizations} have to be enabled, and an SVML or ACML ABI-compatible library must be specified at link time. -GCC currently emits calls to @code{vmldExp2}, -@code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2}, -@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2}, -@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2}, -@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2}, -@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, -@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4}, -@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4}, -@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4}, -@code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for corresponding -function type when @option{-mveclibabi=svml} is used, and @code{__vrd2_sin}, +GCC currently emits calls to @code{vmldExp2}, @code{vmldLn2}, +@code{vmldLog102}, @code{vmldPow2}, @code{vmldTanh2}, @code{vmldTan2}, +@code{vmldAtan2}, @code{vmldAtanh2}, @code{vmldCbrt2}, @code{vmldSinh2}, +@code{vmldSin2}, @code{vmldAsinh2}, @code{vmldAsin2}, @code{vmldCosh2}, +@code{vmldCos2}, @code{vmldAcosh2}, @code{vmldAcos2}, @code{vmlsExp4}, +@code{vmlsLn4}, @code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, +@code{vmlsTan4}, @code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, +@code{vmlsSinh4}, @code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, +@code{vmlsCosh4}, @code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for +corresponding function type when @option{-mveclibabi=svml} is used, +@code{amd_vrs4_acosf}, @code{amd_vrs16_acosf}, @code{amd_vrd8_asin}, +@code{amd_vrs4_asinf}, @code{amd_vrs8_asinf}, @code{amd_vrs16_asinf}, +@code{amd_vrd2_atan}, @code{amd_vrd8_atan}, @code{amd_vrs4_atanf}, +@code{amd_vrs8_atanf}, @code{amd_vrs16_atanf}, @code{amd_vrd2_cos}, +@code{amd_vrd4_cos}, @code{amd_vrd8_cos}, @code{amd_vrs4_cosf}, +@code{amd_vrs8_cosf}, @code{amd_vrs16_cosf}, @code{amd_vrs4_coshf}, +@code{amd_vrs8_coshf}, @code{amd_vrd2_erf}, @code{amd_vrd4_erf}, +@code{amd_vrd8_erf}, @code{amd_vrs4_erff}, @code{amd_vrs8_erff}, +@code{amd_vrs16_erff}, @code{amd_vrd2_exp}, @code{amd_vrd4_exp}, +@code{amd_vrd8_exp}, @code{amd_vrs4_expf}, @code{amd_vrs8_expf}, +@code{amd_vrs16_expf}, @code{amd_vrd2_exp10}, @code{amd_vrs4_exp10f}, +@code{amd_vrd2_exp2}, @code{amd_vrd4_exp2}, @code{amd_vrd8_exp2}, +@code{amd_vrs4_exp2f}, @code{amd_vrs8_exp2f}, @code{amd_vrs16_exp2f}, +@code{amd_vrs4_expm1f}, @code{amd_vrd2_log}, @code{amd_vrd4_log}, +@code{amd_vrd8_log}, @code{amd_vrs4_logf}, @code{amd_vrs8_logf}, +@code{amd_vrs16_logf}, @code{amd_vrd2_log10}, @code{amd_vrs4_log10f}, +@code{amd_vrs8_log10f}, @code{amd_vrs16_log10f}, @code{amd_vrd2_log1p}, +@code{amd_vrs4_log1pf}, @code{amd_vrd2_log2}, @code{amd_vrd4_log2}, +@code{amd_vrd8_log2}, @code{amd_vrs4_log2f}, @code{amd_vrs8_log2f}, +@code{amd_vrs16_log2f}, @code{amd_vrd2_pow}, @code{amd_vrd4_pow}, +@code{amd_vrd8_pow}, @code{amd_vrs4_powf}, @code{amd_vrs8_powf}, +@code{amd_vrs16_powf}, @code{amd_vrd2_sin}, @code{amd_vrd4_sin}, +@code{amd_vrd8_sin}, @code{amd_vrs4_sinf}, @code{amd_vrs8_sinf}, +@code{amd_vrs16_sinf}, @code{amd_vrd2_tan}, @code{amd_vrd4_tan}, +@code{amd_vrd8_tan}, @code{amd_vrs16_tanf}, @code{amd_vrs4_tanhf}, +@code{amd_vrs8_tanhf}, @code{amd_vrs16_tanhf} for the corresponding function +type when @option{-mveclibabi=aocl} is used, and @code{__vrd2_sin}, @code{__vrd2_cos}, @code{__vrd2_exp}, @code{__vrd2_log}, @code{__vrd2_log2}, @code{__vrd2_log10}, @code{__vrs4_sinf}, @code{__vrs4_cosf}, @code{__vrs4_expf}, @code{__vrs4_logf}, @code{__vrs4_log2f}, diff --git a/gcc/testsuite/gcc.target/i386/vectorize-aocl1.c b/gcc/testsuite/gcc.target/i386/vectorize-aocl1.c new file mode 100644 index 00000000000..5ffb04a7b5b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vectorize-aocl1.c @@ -0,0 +1,224 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=znver5 -mveclibabi=aocl" } */ + + +/* Declare glibc math functions we need since this testcase may be run on + systems that don't have glibc. */ +float tanf(float); +float expf(float); +float exp2f(float); +float logf(float); +float log2f(float); +float cosf(float); +float sinf(float); +float powf(float, float); +float erff(float); +float atanf(float); +float log10f(float); +float exp10f(float); +float expm1f(float); +float log1pf(float); +float asinf(float); +float acosf(float); +float tanhf(float); +float coshf(float); + +double tan(double); +double exp(double); +double exp2(double); +double log(double); +double log2(double); +double cos(double); +double sin(double); +double pow(double, double); +double erf(double); +double atan(double); +double log10(double); +double exp10(double); +double expm1(double); +double log1p(double); +double asin(double); +double acos(double); +double tanh(double); +double cosh(double); + +#define gentest1(FUN, BASE, VF) \ + extern BASE s_##FUN##_##BASE##_##VF[VF]; \ + extern BASE d_##FUN##_##BASE##_##VF[VF]; \ + void test_##FUN##_##BASE##_##VF (void) \ + { \ + for (int i = 0; i < VF; i++) \ + d_##FUN##_##BASE##_##VF[i] \ + = FUN (s_##FUN##_##BASE##_##VF[i]); \ + } \ + + +#define gentest2(FUN, BASE, VF) \ + extern BASE s1_##FUN##_##BASE##_##VF[VF]; \ + extern BASE s2_##FUN##_##BASE##_##VF[VF]; \ + extern BASE d_##FUN##_##BASE##_##VF[VF]; \ + void test_##FUN##_##BASE##_##VF (void) \ + { \ + for (int i = 0; i < VF; i++) \ + d_##FUN##_##BASE##_##VF[i] \ + = FUN (s1_##FUN##_##BASE##_##VF[i], \ + s2_##FUN##_##BASE##_##VF[i]); \ + } \ + + +gentest1(tan, float, 16) + +#define COMMON_FLOAT_TESTS1(FUN) \ + gentest1(FUN, float, 4) \ + gentest1(FUN, float, 8) \ + gentest1(FUN, float, 16) + +COMMON_FLOAT_TESTS1(exp) +COMMON_FLOAT_TESTS1(exp2) +COMMON_FLOAT_TESTS1(log) +COMMON_FLOAT_TESTS1(log2) +COMMON_FLOAT_TESTS1(cos) +COMMON_FLOAT_TESTS1(sin) + +gentest2(powf, float, 4) +gentest2(powf, float, 8) +gentest2(powf, float, 16) + +//COMMON_FLOAT_TESTS1(sqrt) provided by an instruction +COMMON_FLOAT_TESTS1(erf) + +//gentest1(fabsf, float, 4) provided by an instruction +//gentest1(fabsf, float, 8) provided by an instruction + +COMMON_FLOAT_TESTS1(atan) +COMMON_FLOAT_TESTS1(log10) + +gentest1(exp10f, float, 4) +gentest1(expm1f, float, 4) +gentest1(log1pf, float, 4) + +COMMON_FLOAT_TESTS1(asinf) + +gentest1(acosf, float, 4) +gentest1(acosf, float, 16) + +COMMON_FLOAT_TESTS1(tanhf) + +gentest1(coshf, float, 4) +gentest1(coshf, float, 8) + +#define COMMON_DOUBLE_TESTS1(FUN) \ + gentest1(FUN, double, 2) \ + gentest1(FUN, double, 4) \ + gentest1(FUN, double, 8) + + +COMMON_DOUBLE_TESTS1(tan) +COMMON_DOUBLE_TESTS1(exp) +COMMON_DOUBLE_TESTS1(exp2) +COMMON_DOUBLE_TESTS1(log) +COMMON_DOUBLE_TESTS1(log2) +COMMON_DOUBLE_TESTS1(cos) +COMMON_DOUBLE_TESTS1(sin) + +gentest2(pow, double, 2) +gentest2(pow, double, 4) +gentest2(pow, double, 8) + +//COMMON_DOUBLE_TESTS1(sqrt) provided by an instruction +COMMON_DOUBLE_TESTS1(erf) + +//gentest1(fabs, double, 2) provided by an instruction +//gentest1(fabs, double, 4) provided by an instruction + +gentest1(atan, double, 2) +gentest1(atan, double, 8) + +gentest1(log10, double, 2) +gentest1(exp10, double, 2) +gentest1(expm1, double, 2) +gentest1(log1p, double, 2) + +gentest1(asin, double, 8) + + +/* { dg-final { scan-assembler-times "amd_vrs8_expf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_exp2f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_expf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_exp2f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_exp10f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_expm1f" 1 } } */ +/* { dg-final { scan-assembler "amd_vrd2_exp" } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_exp2" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_exp10" 1 } } */ +/* { dg-final { scan-assembler "amd_vrd4_exp" } } */ +/* { dg-final { scan-assembler-times "amd_vrd4_exp2" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_expf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_exp2f" 1 } } */ +/* { dg-final { scan-assembler "amd_vrd8_exp" } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_exp2" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_logf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_log2f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_log10f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_logf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_log2f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_log10f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_log1pf" 1 } } */ +/* { dg-final { scan-assembler "amd_vrd4_log" } } */ +/* { dg-final { scan-assembler-times "amd_vrd4_log2" 1 } } */ +/* { dg-final { scan-assembler "amd_vrd2_log" } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_log2" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_log10" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_log1p" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_logf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_log2f" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_log10f" 1 } } */ +/* { dg-final { scan-assembler "amd_vrd8_log" } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_log2" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_cosf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_cosf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_sinf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_sinf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd4_sin" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd4_cos" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd4_tan" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_cos" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_sin" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_tan" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_cosf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_sinf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_tanf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_cos" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_sin" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_tan" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_acosf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_asinf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_asinf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_atanf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_atanf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_atan" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_atanf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_asinf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_acosf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_atan" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_asin" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_coshf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_tanhf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_coshf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_tanhf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_tanhf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_powf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_pow" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd4_pow" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_powf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_powf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_pow" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs4_erff" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd2_erf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs8_erff" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd4_erf" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrs16_erff" 1 } } */ +/* { dg-final { scan-assembler-times "amd_vrd8_erf" 1 } } */ + + + -- 2.46.0