[PATCH 04/11] builtins: Add FP types for atomic builtin overload resolution

mmalcomson Thu, 14 Nov 2024 05:56:57 -0800

From: Matthew Malcomson <mmalcom...@nvidia.com>

N.b. we match which function to use based on the MODE of the type for
two reasons:
1) Can't match directly on type as otherwise `typedef float x` would
   mean that `x` could no longer be used with that intrinsic.
2) MODE (i.e. the types ABI) is the thing that we need to distinguish
   between when deciding which fundamental operation needs to be
   applied.


Use wrapper in builtins.cc to check if possible, this is in builtins.cc
in order to share with the code in a patch later on in the patch series.
Have to put my static array in a function so that it initialises later
than the `global_trees`.

N.b. if there are any floating point types that are available but not as
arithmetic types or simply not available, that gets handled by the
sync_resolve_size check against available types.

gcc/ChangeLog:

        * builtins.cc (struct type_to_repr_builtin): New.
        (fp_type_mappings): Hard coded mappings between type and example
        builtin from which offset can be determined.
        (get_builtin_fp_offset): New.
        * builtins.h (get_builtin_fp_offset): New.

gcc/c-family/ChangeLog:

        * c-common.cc (sync_resolve_size): Determine offset to floating
        point builtin and return that when we have a floating point
        type.
        (sync_resolve_params): Handle casting to floating point types.
        (resolve_overloaded_builtin): Resolve floating point fetch_add
        and fetch_sub.

Signed-off-by: Matthew Malcomson <mmalcom...@nvidia.com>
---
 gcc/builtins.cc          | 57 ++++++++++++++++++++++++++++++++++++++++
 gcc/builtins.h           |  1 +
 gcc/c-family/c-common.cc | 52 +++++++++++++++++++++++++++---------
 3 files changed, 97 insertions(+), 13 deletions(-)

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 0d90c2ad5f8..480d38db058 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -6394,6 +6394,63 @@ get_builtin_sync_mode (int fcode_diff)
   return int_mode_for_size (BITS_PER_UNIT << fcode_diff, 0).require ();
 }
 
+/* Mappings between floating point types and builtin offsets.
+   fp_type_mappings gives a mapping between types and a representative builtin.
+   Can convert from that representative builtin to an offset from base builtin
+   by subtracting BUILT_IN_ATOMIC_FETCH_ADD_N.
+
+   Only works for floating point atomic builtins.  */
+struct type_to_repr_builtin
+{
+  tree type;
+  enum built_in_function example;
+};
+static size_t
+fp_type_mappings (const struct type_to_repr_builtin **x)
+{
+  static const struct type_to_repr_builtin fp_type_mappings[]
+    = {{float_type_node, BUILT_IN_ATOMIC_FETCH_ADD_FPF},
+       {double_type_node, BUILT_IN_ATOMIC_FETCH_ADD_FP},
+       {long_double_type_node, BUILT_IN_ATOMIC_FETCH_ADD_FPL},
+       {bfloat16_type_node ? bfloat16_type_node : error_mark_node,
+       BUILT_IN_ATOMIC_FETCH_ADD_FPF16B},
+       {float16_type_node ? float16_type_node : error_mark_node,
+       BUILT_IN_ATOMIC_FETCH_ADD_FPF16},
+       {float32_type_node ? float32_type_node : error_mark_node,
+       BUILT_IN_ATOMIC_FETCH_ADD_FPF32},
+       {float64_type_node ? float64_type_node : error_mark_node,
+       BUILT_IN_ATOMIC_FETCH_ADD_FPF64},
+       {float128_type_node ? float128_type_node : error_mark_node,
+       BUILT_IN_ATOMIC_FETCH_ADD_FPF128},
+       {float32x_type_node ? float32x_type_node : error_mark_node,
+       BUILT_IN_ATOMIC_FETCH_ADD_FPF32X},
+       {float64x_type_node ? float64x_type_node : error_mark_node,
+       BUILT_IN_ATOMIC_FETCH_ADD_FPF64X}};
+  *x = fp_type_mappings;
+  return sizeof (fp_type_mappings) / sizeof (fp_type_mappings[0]);
+}
+
+static inline size_t
+get_builtin_fp_offset (struct type_to_repr_builtin x)
+{
+  return x.example - BUILT_IN_ATOMIC_FETCH_ADD_N;
+}
+
+int
+get_builtin_fp_offset (tree type)
+{
+  const struct type_to_repr_builtin *tto_p;
+  size_t len = fp_type_mappings (&tto_p);
+  for (size_t i = 0; i < len; ++i)
+    {
+      struct type_to_repr_builtin tto = tto_p[i];
+      if (tto.type != error_mark_node
+         && TYPE_MODE (tto.type) == TYPE_MODE (type))
+       return get_builtin_fp_offset (tto);
+    }
+  return 0;
+}
+
 /* Expand the memory expression LOC and return the appropriate memory operand
    for the builtin_sync operations.  */
 
diff --git a/gcc/builtins.h b/gcc/builtins.h
index 8d93f75a9a4..7ac9981442d 100644
--- a/gcc/builtins.h
+++ b/gcc/builtins.h
@@ -130,6 +130,7 @@ extern tree std_fn_abi_va_list (tree);
 extern tree std_canonical_va_list_type (tree);
 extern void std_expand_builtin_va_start (tree, rtx);
 extern void expand_builtin_trap (void);
+extern int get_builtin_fp_offset (tree);
 extern void expand_ifn_atomic_bit_test_and (gcall *);
 extern void expand_ifn_atomic_compare_exchange (gcall *);
 extern void expand_ifn_atomic_op_fetch_cmp_0 (gcall *);
diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 156d20dfd5d..5dc7fc10db3 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "c-spellcheck.h"
 #include "selftest.h"
 #include "debug.h"
+#include "builtins.h"
 #include "tree-vector-builder.h"
 #include "vec-perm-indices.h"
 #include "tree-pretty-print-markup.h"
@@ -7501,13 +7502,15 @@ speculation_safe_value_resolve_return (tree 
first_param, tree result)
 
 static int
 sync_resolve_size (tree function, vec<tree, va_gc> *params, bool fetch,
-                  bool orig_format, bool complain)
+                  bool orig_format, int *fp_specialisation_offset,
+                  bool complain)
 {
   /* Type of the argument.  */
   tree argtype;
   /* Type the argument points to.  */
   tree type;
   int size;
+  bool valid_float = false;
 
   if (vec_safe_is_empty (params))
     {
@@ -7527,7 +7530,8 @@ sync_resolve_size (tree function, vec<tree, va_gc> 
*params, bool fetch,
     goto incompatible;
 
   type = TREE_TYPE (type);
-  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
+  valid_float = fp_specialisation_offset && fetch && SCALAR_FLOAT_TYPE_P 
(type);
+  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type) && !valid_float)
     goto incompatible;
 
   if (!COMPLETE_TYPE_P (type))
@@ -7544,6 +7548,15 @@ sync_resolve_size (tree function, vec<tree, va_gc> 
*params, bool fetch,
       && !targetm.scalar_mode_supported_p (TImode))
     return -1;
 
+  if (valid_float)
+    {
+      size_t offset = get_builtin_fp_offset (type);
+      if (offset == 0)
+       goto incompatible;
+      *fp_specialisation_offset = offset;
+      return -1;
+    }
+
   if (size == 1 || size == 2 || size == 4 || size == 8 || size == 16)
     return size;
 
@@ -7600,12 +7613,13 @@ sync_resolve_params (location_t loc, tree 
orig_function, tree function,
          return false;
        }
 
-      /* Only convert parameters if arg_type is unsigned integer type with
-        new format sync routines, i.e. don't attempt to convert pointer
-        arguments (e.g. EXPECTED argument of __atomic_compare_exchange_n),
-        bool arguments (e.g. WEAK argument) or signed int arguments (memmodel
-        kinds).  */
-      if (TREE_CODE (arg_type) == INTEGER_TYPE && TYPE_UNSIGNED (arg_type))
+      /* Only convert parameters if arg_type is unsigned integer type or
+        floating point type with new format sync routines, i.e. don't attempt
+        to convert pointer arguments (e.g. EXPECTED argument of
+        __atomic_compare_exchange_n), bool arguments (e.g. WEAK argument) or
+        signed int arguments (memmodel kinds).  */
+      if ((TREE_CODE (arg_type) == INTEGER_TYPE && TYPE_UNSIGNED (arg_type))
+         || SCALAR_FLOAT_TYPE_P (arg_type))
        {
          /* Ideally for the first conversion we'd use convert_for_assignment
             so that we get warnings for anything that doesn't match the pointer
@@ -8441,6 +8455,9 @@ resolve_overloaded_builtin (location_t loc, tree function,
      and so must be rejected.  */
   bool fetch_op = true;
   bool orig_format = true;
+  /* Is this function one of the builtins that has floating point
+     specializations.  */
+  bool fetch_maybe_float = false;
   tree new_return = NULL_TREE;
 
   switch (DECL_BUILT_IN_CLASS (function))
@@ -8577,12 +8594,14 @@ resolve_overloaded_builtin (location_t loc, tree 
function,
       /* FALLTHRU */
     case BUILT_IN_ATOMIC_ADD_FETCH_N:
     case BUILT_IN_ATOMIC_SUB_FETCH_N:
+    case BUILT_IN_ATOMIC_FETCH_SUB_N:
+    case BUILT_IN_ATOMIC_FETCH_ADD_N:
+      fetch_maybe_float = true;
+      /* FALLTHRU */
     case BUILT_IN_ATOMIC_AND_FETCH_N:
     case BUILT_IN_ATOMIC_NAND_FETCH_N:
     case BUILT_IN_ATOMIC_XOR_FETCH_N:
     case BUILT_IN_ATOMIC_OR_FETCH_N:
-    case BUILT_IN_ATOMIC_FETCH_ADD_N:
-    case BUILT_IN_ATOMIC_FETCH_SUB_N:
     case BUILT_IN_ATOMIC_FETCH_AND_N:
     case BUILT_IN_ATOMIC_FETCH_NAND_N:
     case BUILT_IN_ATOMIC_FETCH_XOR_N:
@@ -8614,7 +8633,10 @@ resolve_overloaded_builtin (location_t loc, tree 
function,
                      && orig_code != BUILT_IN_SYNC_LOCK_TEST_AND_SET_N
                      && orig_code != BUILT_IN_SYNC_LOCK_RELEASE_N);
 
+       int fp_specialisation_offset = 0;
        int n = sync_resolve_size (function, params, fetch_op, orig_format,
+                                  fetch_maybe_float ? &fp_specialisation_offset
+                                                    : NULL,
                                   complain);
        tree new_function, first_param, result;
        enum built_in_function fncode;
@@ -8622,7 +8644,10 @@ resolve_overloaded_builtin (location_t loc, tree 
function,
        if (n == 0)
          return error_mark_node;
 
-       if (n == -1)
+       if (fp_specialisation_offset != 0)
+         fncode = (enum built_in_function) ((int) orig_code
+                                            + fp_specialisation_offset);
+       else if (n == -1)
          {
            /* complain is related to SFINAE context.
               _BitInt is not defined in C++, hence can't enter this clause
@@ -8636,13 +8661,14 @@ resolve_overloaded_builtin (location_t loc, tree 
function,
            return atomic_bitint_fetch_using_cas_loop (loc, orig_code, function,
                                                       params);
          }
+       else
+         fncode
+           = (enum built_in_function) ((int) orig_code + exact_log2 (n) + 1);
 
-       fncode = (enum built_in_function)((int)orig_code + exact_log2 (n) + 1);
        new_function = builtin_decl_explicit (fncode);
        if (!sync_resolve_params (loc, function, new_function, params,
                                  orig_format, complain))
          return error_mark_node;
-
        first_param = (*params)[0];
        result = build_function_call_vec (loc, vNULL, new_function, params,
                                          NULL);
-- 
2.43.0

[PATCH 04/11] builtins: Add FP types for atomic builtin overload resolution

Reply via email to