[PATCH] i386: Add -mveclibabi=aocl [PR56504]

Filip Kastl Mon, 11 Nov 2024 06:40:40 -0800

Hi,

Bootstrapped and regtested on x86_64 linux.  I also tested that all the new
calls can be linked with the AOCL LibM library.  Ok to push?


Thanks,
Filip Kastl


-- 8< --


We currently support generating vectorized math calls to the AMD core
math library (ACML) (-mveclibabi=acml).  That library is end-of-life and
its successor is the math library from AMD Optimizing CPU Libraries
(AOCL).

This patch adds support for AOCL (-mveclibabi=aocl).  That significantly
broadens the range of vectorized math functions optimized for AMD CPUs
that GCC can generate calls to.

See the edit to invoke.texi for a complete list of added functions.
Compared to the list of functions in AOCL LibM docs I left out the
sincos, linearfrac, powx, sqrt and fabs operations.  I also left out all
the functions working with arrays and amd_vrd2_expm1() (the AMD docs
list the function but I wasn't able to link calls to it with the current
version of the library).

gcc/ChangeLog:

        PR target/56504
        * config/i386/i386-options.cc (ix86_option_override_internal):
        Add ix86_veclibabi_type_aocl case.
        * config/i386/i386-options.h (ix86_veclibabi_aocl): Add extern
        ix86_veclibabi_aocl().
        * config/i386/i386-opts.h (enum ix86_veclibabi): Add
        ix86_veclibabi_type_aocl into the ix86_veclibabi enum.
        * config/i386/i386.cc (ix86_veclibabi_aocl): New function.
        * config/i386/i386.opt: Add the 'aocl' type.
        * doc/invoke.texi: Document -mveclibabi=aocl.

gcc/testsuite/ChangeLog:

        PR target/56504
        * gcc.target/i386/vectorize-aocl1.c: New test.

Signed-off-by: Filip Kastl <fka...@suse.cz>
---
 gcc/config/i386/i386-options.cc               |   4 +
 gcc/config/i386/i386-options.h                |   1 +
 gcc/config/i386/i386-opts.h                   |   3 +-
 gcc/config/i386/i386.cc                       | 143 +++++++++++
 gcc/config/i386/i386.opt                      |   3 +
 gcc/doc/invoke.texi                           |  57 +++--
 .../gcc.target/i386/vectorize-aocl1.c         | 224 ++++++++++++++++++
 7 files changed, 419 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vectorize-aocl1.c

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 603166d249c..76a20179a36 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2877,6 +2877,10 @@ ix86_option_override_internal (bool main_args_p,
        ix86_veclib_handler = &ix86_veclibabi_acml;
        break;
 
+      case ix86_veclibabi_type_aocl:
+       ix86_veclib_handler = &ix86_veclibabi_aocl;
+       break;
+
       default:
        gcc_unreachable ();
       }
diff --git a/gcc/config/i386/i386-options.h b/gcc/config/i386/i386-options.h
index 0d448ef9f15..591a6152c01 100644
--- a/gcc/config/i386/i386-options.h
+++ b/gcc/config/i386/i386-options.h
@@ -60,6 +60,7 @@ void ix86_simd_clone_adjust (struct cgraph_node *node);
 extern tree (*ix86_veclib_handler) (combined_fn, tree, tree);
 extern tree ix86_veclibabi_svml (combined_fn, tree, tree);
 extern tree ix86_veclibabi_acml (combined_fn, tree, tree);
+extern tree ix86_veclibabi_aocl (combined_fn, tree, tree);
 
 enum ix86_function_specific_strings
 {
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 35542b28936..69fcd82bf47 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -87,7 +87,8 @@ enum asm_dialect {
 enum ix86_veclibabi {
   ix86_veclibabi_type_none,
   ix86_veclibabi_type_svml,
-  ix86_veclibabi_type_acml
+  ix86_veclibabi_type_acml,
+  ix86_veclibabi_type_aocl
 };
 
 enum stack_protector_guard {
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 6ac3a5d55f2..8ccbc8bbc07 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -19882,6 +19882,149 @@ ix86_veclibabi_acml (combined_fn fn, tree type_out, 
tree type_in)
   return new_fndecl;
 }
 
+/* Handler for an AOCL-LibM-style interface to
+   a library with vectorized intrinsics.  */
+
+tree
+ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
+{
+  char name[20] = "amd_vr";
+  int name_len = 6;
+  tree fntype, new_fndecl, args;
+  unsigned arity;
+  const char *bname;
+  machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* AOCL-LibM is 64bits only.  It is also only suitable for unsafe math only
+     as it trades off some accuracy for increased performance.  */
+  if (!TARGET_64BIT
+      || !flag_unsafe_math_optimizations)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  gcc_checking_assert (n > 0);
+
+  /* Decide whether there exists a function for the combination of FN, the mode
+     and the vector width.  Return early if it doesn't.  */
+
+  if (el_mode != DFmode && el_mode != SFmode)
+    return NULL_TREE;
+
+  /* Supported vector widths for given FN and single/double precision.  Zeros
+     are used to fill out unused positions in the arrays.  */
+  static const int supported_n[][2][3] = {
+  /*   Single prec. ,  Double prec.  */
+    { { 16,  0,  0 }, {  2,  4,  8 } }, /* TAN.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP2.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG2.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* COS.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* SIN.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* POW.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* ERF.  */
+    { {  4,  8, 16 }, {  2,  8,  0 } }, /* ATAN.  */
+    { {  4,  8, 16 }, {  2,  0,  0 } }, /* LOG10.  */
+    { {  4,  0,  0 }, {  2,  0,  0 } }, /* EXP10.  */
+    { {  4,  0,  0 }, {  2,  0,  0 } }, /* LOG1P.  */
+    { {  4,  8, 16 }, {  8,  0,  0 } }, /* ASIN.  */
+    { {  4, 16,  0 }, {  0,  0,  0 } }, /* ACOS.  */
+    { {  4,  8, 16 }, {  0,  0,  0 } }, /* TANH.  */
+    { {  4,  0,  0 }, {  0,  0,  0 } }, /* EXPM1.  */
+    { {  4,  8,  0 }, {  0,  0,  0 } }, /* COSH.  */
+  };
+
+  /* We cannot simply index the supported_n array with FN since multiple FNs
+     may correspond to a single operation (see the definitions of these
+     CASE_CFN_* macros).  */
+  int i;
+  switch (fn)
+    {
+    CASE_CFN_TAN   :  i = 0; break;
+    CASE_CFN_EXP   :  i = 1; break;
+    CASE_CFN_EXP2  :  i = 2; break;
+    CASE_CFN_LOG   :  i = 3; break;
+    CASE_CFN_LOG2  :  i = 4; break;
+    CASE_CFN_COS   :  i = 5; break;
+    CASE_CFN_SIN   :  i = 6; break;
+    CASE_CFN_POW   :  i = 7; break;
+    CASE_CFN_ERF   :  i = 8; break;
+    CASE_CFN_ATAN  :  i = 9; break;
+    CASE_CFN_LOG10 : i = 10; break;
+    CASE_CFN_EXP10 : i = 11; break;
+    CASE_CFN_LOG1P : i = 12; break;
+    CASE_CFN_ASIN  : i = 13; break;
+    CASE_CFN_ACOS  : i = 14; break;
+    CASE_CFN_TANH  : i = 15; break;
+    CASE_CFN_EXPM1 : i = 16; break;
+    CASE_CFN_COSH  : i = 17; break;
+    default: return NULL_TREE;
+    }
+
+  int j = el_mode == DFmode;
+  bool n_is_supported = false;
+  for (unsigned k = 0; k < 3; k++)
+    if (supported_n[i][j][k] == n)
+      {
+       n_is_supported = true;
+       break;
+      }
+  if (!n_is_supported)
+    return NULL_TREE;
+
+  /* Append the precision and the vector width to the function name we are
+     constructing.  */
+  name[name_len++] = el_mode == DFmode ? 'd' : 's';
+  switch (n)
+    {
+      case 2:
+      case 4:
+      case 8:
+       name[name_len++] = '0' + n;
+       break;
+      case 16:
+       name[name_len++] = '1';
+       name[name_len++] = '6';
+       break;
+      default:
+       gcc_unreachable ();
+    }
+  name[name_len++] = '_';
+
+  /* Append the operation name (steal it from the name of a builtin).  */
+  tree fndecl = mathfn_built_in (el_mode == DFmode
+                                ? double_type_node : float_type_node, fn);
+  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+  sprintf (name + name_len, "%s", bname + 10);
+
+  arity = 0;
+  for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
+    arity++;
+
+  if (arity == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+                          FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
 /* Returns a decl of a function that implements scatter store with
    register type VECTYPE and index type INDEX_TYPE and SCALE.
    Return NULL_TREE if it is not available.  */
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 99e86f545e8..ea292650ed1 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -588,6 +588,9 @@ Enum(ix86_veclibabi) String(svml) 
Value(ix86_veclibabi_type_svml)
 EnumValue
 Enum(ix86_veclibabi) String(acml) Value(ix86_veclibabi_type_acml)
 
+EnumValue
+Enum(ix86_veclibabi) String(aocl) Value(ix86_veclibabi_type_aocl)
+
 mvect8-ret-in-mem
 Target Mask(VECT8_RETURNS) Save
 Return 8-byte vectors in memory.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 859af06036f..3c63fa8f737 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -35943,25 +35943,52 @@ all of the reciprocal approximations, except for 
square root.
 
 @opindex mveclibabi
 @item -mveclibabi=@var{type}
-Specifies the ABI type to use for vectorizing intrinsics using an
-external library.  Supported values for @var{type} are @samp{svml}
-for the Intel short
-vector math library and @samp{acml} for the AMD math core library.
+Specifies the ABI type to use for vectorizing intrinsics using an external
+library.  Supported values for @var{type} are @samp{svml} for the Intel short
+vector math library, @samp{aocl} for the math library (LibM) from AMD
+Optimizing CPU Libraries (AOCL) and @samp{acml} for the end-of-life AMD core
+math library (to which AOCL-LibM is the successor).
 To use this option, both @option{-ftree-vectorize} and
 @option{-funsafe-math-optimizations} have to be enabled, and an SVML or ACML
 ABI-compatible library must be specified at link time.
 
-GCC currently emits calls to @code{vmldExp2},
-@code{vmldLn2}, @code{vmldLog102}, @code{vmldPow2},
-@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
-@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
-@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
-@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4},
-@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
-@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
-@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},
-@code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for corresponding
-function type when @option{-mveclibabi=svml} is used, and @code{__vrd2_sin},
+GCC currently emits calls to @code{vmldExp2}, @code{vmldLn2},
+@code{vmldLog102}, @code{vmldPow2}, @code{vmldTanh2}, @code{vmldTan2},
+@code{vmldAtan2}, @code{vmldAtanh2}, @code{vmldCbrt2}, @code{vmldSinh2},
+@code{vmldSin2}, @code{vmldAsinh2}, @code{vmldAsin2}, @code{vmldCosh2},
+@code{vmldCos2}, @code{vmldAcosh2}, @code{vmldAcos2}, @code{vmlsExp4},
+@code{vmlsLn4}, @code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4},
+@code{vmlsTan4}, @code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4},
+@code{vmlsSinh4}, @code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4},
+@code{vmlsCosh4}, @code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for
+corresponding function type when @option{-mveclibabi=svml} is used,
+@code{amd_vrs4_acosf}, @code{amd_vrs16_acosf}, @code{amd_vrd8_asin},
+@code{amd_vrs4_asinf}, @code{amd_vrs8_asinf}, @code{amd_vrs16_asinf},
+@code{amd_vrd2_atan}, @code{amd_vrd8_atan}, @code{amd_vrs4_atanf},
+@code{amd_vrs8_atanf}, @code{amd_vrs16_atanf}, @code{amd_vrd2_cos},
+@code{amd_vrd4_cos}, @code{amd_vrd8_cos}, @code{amd_vrs4_cosf},
+@code{amd_vrs8_cosf}, @code{amd_vrs16_cosf}, @code{amd_vrs4_coshf},
+@code{amd_vrs8_coshf}, @code{amd_vrd2_erf}, @code{amd_vrd4_erf},
+@code{amd_vrd8_erf}, @code{amd_vrs4_erff}, @code{amd_vrs8_erff},
+@code{amd_vrs16_erff}, @code{amd_vrd2_exp}, @code{amd_vrd4_exp},
+@code{amd_vrd8_exp}, @code{amd_vrs4_expf}, @code{amd_vrs8_expf},
+@code{amd_vrs16_expf}, @code{amd_vrd2_exp10}, @code{amd_vrs4_exp10f},
+@code{amd_vrd2_exp2}, @code{amd_vrd4_exp2}, @code{amd_vrd8_exp2},
+@code{amd_vrs4_exp2f}, @code{amd_vrs8_exp2f}, @code{amd_vrs16_exp2f},
+@code{amd_vrs4_expm1f}, @code{amd_vrd2_log}, @code{amd_vrd4_log},
+@code{amd_vrd8_log}, @code{amd_vrs4_logf}, @code{amd_vrs8_logf},
+@code{amd_vrs16_logf}, @code{amd_vrd2_log10}, @code{amd_vrs4_log10f},
+@code{amd_vrs8_log10f}, @code{amd_vrs16_log10f}, @code{amd_vrd2_log1p},
+@code{amd_vrs4_log1pf}, @code{amd_vrd2_log2}, @code{amd_vrd4_log2},
+@code{amd_vrd8_log2}, @code{amd_vrs4_log2f}, @code{amd_vrs8_log2f},
+@code{amd_vrs16_log2f}, @code{amd_vrd2_pow}, @code{amd_vrd4_pow},
+@code{amd_vrd8_pow}, @code{amd_vrs4_powf}, @code{amd_vrs8_powf},
+@code{amd_vrs16_powf}, @code{amd_vrd2_sin}, @code{amd_vrd4_sin},
+@code{amd_vrd8_sin}, @code{amd_vrs4_sinf}, @code{amd_vrs8_sinf},
+@code{amd_vrs16_sinf}, @code{amd_vrd2_tan}, @code{amd_vrd4_tan},
+@code{amd_vrd8_tan}, @code{amd_vrs16_tanf}, @code{amd_vrs4_tanhf},
+@code{amd_vrs8_tanhf}, @code{amd_vrs16_tanhf} for the corresponding function
+type when @option{-mveclibabi=aocl} is used, and @code{__vrd2_sin},
 @code{__vrd2_cos}, @code{__vrd2_exp}, @code{__vrd2_log}, @code{__vrd2_log2},
 @code{__vrd2_log10}, @code{__vrs4_sinf}, @code{__vrs4_cosf},
 @code{__vrs4_expf}, @code{__vrs4_logf}, @code{__vrs4_log2f},
diff --git a/gcc/testsuite/gcc.target/i386/vectorize-aocl1.c 
b/gcc/testsuite/gcc.target/i386/vectorize-aocl1.c
new file mode 100644
index 00000000000..5ffb04a7b5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vectorize-aocl1.c
@@ -0,0 +1,224 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=znver5 
-mveclibabi=aocl" } */
+
+
+/* Declare glibc math functions we need since this testcase may be run on
+   systems that don't have glibc.  */
+float tanf(float);
+float expf(float);
+float exp2f(float);
+float logf(float);
+float log2f(float);
+float cosf(float);
+float sinf(float);
+float powf(float, float);
+float erff(float);
+float atanf(float);
+float log10f(float);
+float exp10f(float);
+float expm1f(float);
+float log1pf(float);
+float asinf(float);
+float acosf(float);
+float tanhf(float);
+float coshf(float);
+
+double tan(double);
+double exp(double);
+double exp2(double);
+double log(double);
+double log2(double);
+double cos(double);
+double sin(double);
+double pow(double, double);
+double erf(double);
+double atan(double);
+double log10(double);
+double exp10(double);
+double expm1(double);
+double log1p(double);
+double asin(double);
+double acos(double);
+double tanh(double);
+double cosh(double);
+
+#define gentest1(FUN, BASE, VF)                                        \
+  extern BASE s_##FUN##_##BASE##_##VF[VF];                     \
+  extern BASE d_##FUN##_##BASE##_##VF[VF];                     \
+  void test_##FUN##_##BASE##_##VF (void)                       \
+  {                                                            \
+    for (int i = 0; i < VF; i++)                               \
+      d_##FUN##_##BASE##_##VF[i]                               \
+       = FUN (s_##FUN##_##BASE##_##VF[i]);                     \
+  }                                                            \
+
+
+#define gentest2(FUN, BASE, VF)                                        \
+  extern BASE s1_##FUN##_##BASE##_##VF[VF];                    \
+  extern BASE s2_##FUN##_##BASE##_##VF[VF];                    \
+  extern BASE d_##FUN##_##BASE##_##VF[VF];                     \
+  void test_##FUN##_##BASE##_##VF (void)                       \
+  {                                                            \
+    for (int i = 0; i < VF; i++)                               \
+      d_##FUN##_##BASE##_##VF[i]                               \
+       = FUN (s1_##FUN##_##BASE##_##VF[i],                     \
+              s2_##FUN##_##BASE##_##VF[i]);                    \
+  }                                                            \
+
+
+gentest1(tan, float, 16)
+
+#define COMMON_FLOAT_TESTS1(FUN)                       \
+  gentest1(FUN, float, 4)                      \
+  gentest1(FUN, float, 8)                      \
+  gentest1(FUN, float, 16)
+
+COMMON_FLOAT_TESTS1(exp)
+COMMON_FLOAT_TESTS1(exp2)
+COMMON_FLOAT_TESTS1(log)
+COMMON_FLOAT_TESTS1(log2)
+COMMON_FLOAT_TESTS1(cos)
+COMMON_FLOAT_TESTS1(sin)
+
+gentest2(powf, float, 4)
+gentest2(powf, float, 8)
+gentest2(powf, float, 16)
+
+//COMMON_FLOAT_TESTS1(sqrt)  provided by an instruction
+COMMON_FLOAT_TESTS1(erf)
+
+//gentest1(fabsf, float, 4)  provided by an instruction
+//gentest1(fabsf, float, 8)  provided by an instruction
+
+COMMON_FLOAT_TESTS1(atan)
+COMMON_FLOAT_TESTS1(log10)
+
+gentest1(exp10f, float, 4)
+gentest1(expm1f, float, 4)
+gentest1(log1pf, float, 4)
+
+COMMON_FLOAT_TESTS1(asinf)
+
+gentest1(acosf, float, 4)
+gentest1(acosf, float, 16)
+
+COMMON_FLOAT_TESTS1(tanhf)
+
+gentest1(coshf, float, 4)
+gentest1(coshf, float, 8)
+
+#define COMMON_DOUBLE_TESTS1(FUN)                      \
+  gentest1(FUN, double, 2)                     \
+  gentest1(FUN, double, 4)                     \
+  gentest1(FUN, double, 8)
+
+
+COMMON_DOUBLE_TESTS1(tan)
+COMMON_DOUBLE_TESTS1(exp)
+COMMON_DOUBLE_TESTS1(exp2)
+COMMON_DOUBLE_TESTS1(log)
+COMMON_DOUBLE_TESTS1(log2)
+COMMON_DOUBLE_TESTS1(cos)
+COMMON_DOUBLE_TESTS1(sin)
+
+gentest2(pow, double, 2)
+gentest2(pow, double, 4)
+gentest2(pow, double, 8)
+
+//COMMON_DOUBLE_TESTS1(sqrt)  provided by an instruction
+COMMON_DOUBLE_TESTS1(erf)
+
+//gentest1(fabs, double, 2)  provided by an instruction
+//gentest1(fabs, double, 4)  provided by an instruction
+
+gentest1(atan, double, 2)
+gentest1(atan, double, 8)
+
+gentest1(log10, double, 2)
+gentest1(exp10, double, 2)
+gentest1(expm1, double, 2)
+gentest1(log1p, double, 2)
+
+gentest1(asin, double, 8)
+
+
+/* { dg-final { scan-assembler-times "amd_vrs8_expf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_exp2f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_expf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_exp2f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_exp10f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_expm1f" 1 } } */
+/* { dg-final { scan-assembler "amd_vrd2_exp" } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_exp2" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_exp10" 1 } } */
+/* { dg-final { scan-assembler "amd_vrd4_exp" } } */
+/* { dg-final { scan-assembler-times "amd_vrd4_exp2" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_expf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_exp2f" 1 } } */
+/* { dg-final { scan-assembler "amd_vrd8_exp" } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_exp2" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_logf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_log2f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_log10f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_logf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_log2f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_log10f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_log1pf" 1 } } */
+/* { dg-final { scan-assembler "amd_vrd4_log" } } */
+/* { dg-final { scan-assembler-times "amd_vrd4_log2" 1 } } */
+/* { dg-final { scan-assembler "amd_vrd2_log" } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_log2" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_log10" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_log1p" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_logf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_log2f" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_log10f" 1 } } */
+/* { dg-final { scan-assembler "amd_vrd8_log" } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_log2" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_cosf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_cosf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_sinf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_sinf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd4_sin" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd4_cos" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd4_tan" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_cos" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_sin" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_tan" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_cosf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_sinf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_tanf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_cos" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_sin" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_tan" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_acosf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_asinf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_asinf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_atanf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_atanf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_atan" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_atanf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_asinf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_acosf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_atan" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_asin" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_coshf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_tanhf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_coshf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_tanhf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_tanhf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_powf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_pow" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd4_pow" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_powf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_powf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_pow" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs4_erff" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd2_erf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs8_erff" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd4_erf" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrs16_erff" 1 } } */
+/* { dg-final { scan-assembler-times "amd_vrd8_erf" 1 } } */
+
+
+
-- 
2.46.0

[PATCH] i386: Add -mveclibabi=aocl [PR56504]

Reply via email to