https://gcc.gnu.org/g:fc39a29f75cb181dcefaebf4489303847ae4fe9d
commit r14-11462-gfc39a29f75cb181dcefaebf4489303847ae4fe9d Author: Jakub Jelinek <ja...@redhat.com> Date: Sat Mar 1 09:15:57 2025 +0100 openmp: Fix up simd clone mask argument creation on x86 [PR115871] The following testcase ICEs since r14-5057. The Intel vector ABI says that in the ZMM case the masks is passed in unsigned int or unsigned long long arguments and how many bits in them and how many of those arguments are is determined by the characteristic data type of the function. In the testcase simdlen is 32 and characteristic data type is double, so return as well as first argument is passed in 4 V8DFmode arguments and the mask is supposed to be passed in 4 unsigned int arguments (8 bits in each). Before the r14-5057 change there was sc->args[i].orig_type = parm_type; ... case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: case SIMD_CLONE_ARG_TYPE_VECTOR: if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type)) veclen = sc->vecsize_int; else veclen = sc->vecsize_float; if (known_eq (veclen, 0U)) veclen = sc->simdlen; else veclen = exact_div (veclen, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type))); for the argument handling and if (sc->inbranch) { tree base_type = simd_clone_compute_base_data_type (sc->origin, sc); ... if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type)) veclen = sc->vecsize_int; else veclen = sc->vecsize_float; if (known_eq (veclen, 0U)) veclen = sc->simdlen; else veclen = exact_div (veclen, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type))); for the mask handling. r14-5057 moved this argument creation later and unified that: case SIMD_CLONE_ARG_TYPE_MASK: case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: case SIMD_CLONE_ARG_TYPE_VECTOR: if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK && sc->mask_mode != VOIDmode) elem_type = boolean_type_node; else elem_type = TREE_TYPE (sc->args[i].vector_type); if (INTEGRAL_TYPE_P (elem_type) || POINTER_TYPE_P (elem_type)) veclen = sc->vecsize_int; else veclen = sc->vecsize_float; if (known_eq (veclen, 0U)) veclen = sc->simdlen; else veclen = exact_div (veclen, GET_MODE_BITSIZE (SCALAR_TYPE_MODE (elem_type))); This is correct for the argument cases (so linear or vector) (though POINTER_TYPE_P will never appear as TREE_TYPE of a vector), but the boolean_type_node in there is completely bogus, when using AVX512 integer masks as I wrote above we need the characteristic data type, not bool, and bool is strange in that it has bitsize of 8 (or 32 on darwin), while the masks are 1 bit per lane anyway. Fixed thusly. 2025-03-01 Jakub Jelinek <ja...@redhat.com> PR middle-end/115871 * omp-simd-clone.cc (simd_clone_adjust): For SIMD_CLONE_ARG_TYPE_MASK and sc->mask_mode not VOIDmode, set elem_type to the characteristic type rather than boolean_type_node. * gcc.dg/gomp/simd-clones-8.c: New test. (cherry picked from commit 6589eb4efe39545ec7f7e641e1d302c89b260350) Diff: --- gcc/omp-simd-clone.cc | 2 +- gcc/testsuite/gcc.dg/gomp/simd-clones-8.c | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index 864586207ee8..eb46ff97be26 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -1423,7 +1423,7 @@ simd_clone_adjust (struct cgraph_node *node) case SIMD_CLONE_ARG_TYPE_VECTOR: if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK && sc->mask_mode != VOIDmode) - elem_type = boolean_type_node; + elem_type = simd_clone_compute_base_data_type (sc->origin, sc); else elem_type = TREE_TYPE (sc->args[i].vector_type); if (INTEGRAL_TYPE_P (elem_type) || POINTER_TYPE_P (elem_type)) diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-8.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-8.c new file mode 100644 index 000000000000..a6ddb04388c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-8.c @@ -0,0 +1,10 @@ +/* PR middle-end/115871 */ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-fopenmp -w" } */ + +#pragma omp declare simd inbranch simdlen(32) +double +foo (double x) +{ + return x * 4.0; +}