This is the modified version of the second of my Aarch64 SIMD ABI patches. While implementing this functionality I found I wanted targetm.simd_clone.adjust to be called when creating SIMD clone definitions and also when creating SIMD clone declarations. The current implementation (used only by x86) only called this target function when creating clone definitions. I added a second argument to the target function to say if it was creating a definition or a declaration and modified the i386 code to do nothing on declarations, thus maintaining its current behavour.
This allowed my to add the aarch64_vector_pcs attribute to SIMD clone declarations and definitions on Aarch64. I considered comparing node->decl and cfun->decl to differentiate between definitions and declarations instead of using a new argument but having an argument seemed cleaner and clearer. Tested on x86 and aarch64. Steve Ellcey sell...@marvell.com 2018-12-11 Steve Ellcey <sell...@cavium.com> * config/aarch64/aarch64.c (cgraph.h): New include. (aarch64_simd_clone_compute_vecsize_and_simdlen): New function. (aarch64_simd_clone_adjust): Ditto. (aarch64_simd_clone_usable): Ditto. (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN): New macro. (TARGET_SIMD_CLONE_ADJUST): Ditto. (TARGET_SIMD_CLONE_USABLE): Ditto. * config/i386/i386.c (ix86_simd_clone_adjust): Add new argument. * omp-simd-clone.c (simd_clone_adjust): Add new argument to targetm.simd_clone.adjust call. (expand_simd_clones): Add new targetm.simd_clone.adjust call. * target.def (simd_clone_adjust): Add new argument.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ea7e79f..40f18ef 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -40,6 +40,7 @@ #include "regs.h" #include "emit-rtl.h" #include "recog.h" +#include "cgraph.h" #include "diagnostic.h" #include "insn-attr.h" #include "alias.h" @@ -17936,6 +17937,135 @@ aarch64_estimated_poly_value (poly_int64 val) return val.coeffs[0] + val.coeffs[1] * over_128 / 128; } +/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, + CLONEI->vecsize_float and if CLONEI->simdlen is 0, also + CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, + or number of vecsize_mangle variants that should be emitted. */ + +static int +aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + struct cgraph_simd_clone *clonei, + tree base_type, + int num ATTRIBUTE_UNUSED) +{ + int ret = 0; + + if (clonei->simdlen + && (clonei->simdlen < 2 + || clonei->simdlen > 1024 + || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d", clonei->simdlen); + return 0; + } + + tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); + if (TREE_CODE (ret_type) != VOID_TYPE) + switch (TYPE_MODE (ret_type)) + { + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_SFmode: + case E_DFmode: + /* case E_SCmode: */ + /* case E_DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported return type %qT for simd\n", ret_type); + return 0; + } + + tree t; + for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t)) + /* FIXME: Shouldn't we allow such arguments if they are uniform? */ + switch (TYPE_MODE (TREE_TYPE (t))) + { + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_SFmode: + case E_DFmode: + /* case E_SCmode: */ + /* case E_DCmode: */ + break; + default: + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported argument type %qT for simd\n", TREE_TYPE (t)); + return 0; + } + + if (TARGET_SIMD) + { + clonei->vecsize_mangle = 'n'; + clonei->mask_mode = VOIDmode; + clonei->vecsize_int = 128; + clonei->vecsize_float = 128; + + if (clonei->simdlen == 0) + { + if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) + clonei->simdlen = clonei->vecsize_int; + else + clonei->simdlen = clonei->vecsize_float; + clonei->simdlen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); + } + else if (clonei->simdlen > 16) + { + /* If it is possible for given SIMDLEN to pass CTYPE value in + registers (v0-v7) accept that SIMDLEN, otherwise warn and don't + emit corresponding clone. */ + int cnt = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)) * clonei->simdlen; + if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) + cnt /= clonei->vecsize_int; + else + cnt /= clonei->vecsize_float; + if (cnt > 8) + { + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %d", clonei->simdlen); + return 0; + } + } + ret = 1; + } + return ret; +} + +/* Add aarch64_vector_pcs target attribute to SIMD clones so they use the + correct ABI. */ + +static void +aarch64_simd_clone_adjust (struct cgraph_node *node, bool defn ATTRIBUTE_UNUSED) +{ + tree t = TREE_TYPE (node->decl); + TYPE_ATTRIBUTES (t) = + make_attribute ("aarch64_vector_pcs", "default", TYPE_ATTRIBUTES (t)); +} + +/* If SIMD clone NODE can't be used in a vectorized loop + in current function, return -1, otherwise return a badness of using it + (0 if it is most desirable from vecsize_mangle point of view, 1 + slightly less desirable, etc.). */ + +static int +aarch64_simd_clone_usable (struct cgraph_node *node) +{ + switch (node->simdclone->vecsize_mangle) + { + case 'n': + if (!TARGET_SIMD) + return -1; + return 0; + default: + gcc_unreachable (); + } +} + /* Target-specific selftests. */ #if CHECKING_P @@ -18418,6 +18548,16 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_ESTIMATED_POLY_VALUE #define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value +#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN +#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ + aarch64_simd_clone_compute_vecsize_and_simdlen + +#undef TARGET_SIMD_CLONE_ADJUST +#define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust + +#undef TARGET_SIMD_CLONE_USABLE +#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable + #if CHECKING_P #undef TARGET_RUN_TARGET_SELFTESTS #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e86c39e..d9ad6e7 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -50011,9 +50011,14 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, /* Add target attribute to SIMD clone NODE if needed. */ static void -ix86_simd_clone_adjust (struct cgraph_node *node) +ix86_simd_clone_adjust (struct cgraph_node *node, bool defn) { const char *str = NULL; + + /* Attributes need to be adjusted for definitions, not declarations. */ + if (!defn) + return; + gcc_assert (node->decl == cfun->decl); switch (node->simdclone->vecsize_mangle) { diff --git a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c index ba03bd5..09d7f86 100644 --- a/gcc/omp-simd-clone.c +++ b/gcc/omp-simd-clone.c @@ -1112,7 +1112,7 @@ simd_clone_adjust (struct cgraph_node *node) { push_cfun (DECL_STRUCT_FUNCTION (node->decl)); - targetm.simd_clone.adjust (node); + targetm.simd_clone.adjust (node, true); tree retval = simd_clone_adjust_return_type (node); ipa_parm_adjustment_vec adjustments @@ -1685,6 +1685,7 @@ expand_simd_clones (struct cgraph_node *node) simd_clone_adjust (n); else { + targetm.simd_clone.adjust (n, false); simd_clone_adjust_return_type (n); simd_clone_adjust_argument_types (n); } diff --git a/gcc/target.def b/gcc/target.def index 96f37e0..ffc3787 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1632,8 +1632,10 @@ int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL) DEFHOOK (adjust, "This hook should add implicit @code{attribute(target(\"...\"))} attribute\n\ -to SIMD clone @var{node} if needed.", -void, (struct cgraph_node *), NULL) +to SIMD clone @var{node} if needed. If the @var{defn} bool argument is true\n\ +then this function is being called for a function definition, if false it is\n\ +a function declaration.", +void, (struct cgraph_node *, bool), NULL) DEFHOOK (usable,