PR96125 shows a bug when we try to use an MMA built-in within a function that uses #pragma target/attribute target to enable power10 code generation and the -mcpu=<CPU> command line option is pre-power10.
The problem is that we only initialize built-ins once, fairly early, when the command line options are in force. If the -mcpu=<CPU> is pre-power10, then we fail to initialize the MMA built-ins at all and so they are not available to call in a #pragma target/attribute target function. The patch below basically always (on server type cpus) initializes the MMA built-ins so we can use them in #pragma target/attribute target functions. The patch below fixes the bug and is currently in the middle of testing. Is this ok for trunk assuming the bootstrap and regression testing show no regressions? This also affects GCC10, so I'd like to backport this before the release. Ok there too after it sits on trunk a day or two? Peter gcc/ PR target/96125 * config/rs6000/rs6000-call.c (rs6000_init_builtins): Define the MMA specific types __vector_quad and __vector_pair, and initialize the MMA built-ins if TARGET_EXTRA_BUILTINS is set. (mma_init_builtins): Don't test for mask set in rs6000_builtin_mask. Remove now unneeded mask variable. * config/rs6000/rs6000.c (rs6000_option_override_internal): Add the OPTION_MASK_MMA flag for power10 if not already set. gcc/testsuite/ PR target/96125 * gcc.target/powerpc/pr96125.c: New test. diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 8e7bb54c73d..883c66810e6 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -12572,7 +12572,7 @@ rs6000_init_builtins (void) ieee128_float_type_node = ibm128_float_type_node = long_double_type_node; /* Vector pair and vector quad support. */ - if (TARGET_MMA) + if (TARGET_EXTRA_BUILTINS) { tree oi_uns_type = make_unsigned_type (256); vector_pair_type_node = build_distinct_type_copy (oi_uns_type); @@ -12648,13 +12648,14 @@ rs6000_init_builtins (void) pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", pixel_type_node, 8); - /* Create Altivec and VSX builtins on machines with at least the + /* Create Altivec, VSX and MMA builtins on machines with at least the general purpose extensions (970 and newer) to allow the use of the target attribute. */ if (TARGET_EXTRA_BUILTINS) - altivec_init_builtins (); - if (TARGET_MMA) - mma_init_builtins (); + { + altivec_init_builtins (); + mma_init_builtins (); + } if (TARGET_HTM) htm_init_builtins (); @@ -13388,20 +13389,12 @@ mma_init_builtins (void) for (unsigned i = 0; i < ARRAY_SIZE (bdesc_mma); i++, d++) { tree op[MAX_MMA_OPERANDS], type; - HOST_WIDE_INT mask = d->mask; unsigned icode = (unsigned) d->icode; unsigned attr = rs6000_builtin_info[d->code].attr; int attr_args = (attr & RS6000_BTC_OPND_MASK); bool gimple_func = (attr & RS6000_BTC_GIMPLE); unsigned nopnds = 0; - if ((mask & rs6000_builtin_mask) != mask) - { - if (TARGET_DEBUG_BUILTIN) - fprintf (stderr, "mma_builtin, skip binary %s\n", d->name); - continue; - } - if (d->name == 0) { if (TARGET_DEBUG_BUILTIN) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index fef72884b31..15af9b230e6 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4264,8 +4264,12 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_PCREL; } + /* Enable -mmma by default on power10 systems. */ + if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0) + rs6000_isa_flags |= OPTION_MASK_MMA; + /* Turn off vector pair/mma options on non-power10 systems. */ - if (!TARGET_POWER10 && TARGET_MMA) + else if (!TARGET_POWER10 && TARGET_MMA) { if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0) error ("%qs requires %qs", "-mmma", "-mcpu=power10"); diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index bbd8060e143..ea2c89eb6b3 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -577,7 +577,8 @@ extern int rs6000_vector_align[]; || TARGET_POPCNTD /* ISA 2.06 */ \ || TARGET_ALTIVEC \ || TARGET_VSX \ - || TARGET_HARD_FLOAT) + || TARGET_HARD_FLOAT \ + || TARGET_MMA) /* E500 cores only support plain "sync", not lwsync. */ #define TARGET_NO_LWSYNC (rs6000_cpu == PROCESSOR_PPC8540 \ diff --git a/gcc/testsuite/gcc.target/powerpc/pr96125.c b/gcc/testsuite/gcc.target/powerpc/pr96125.c new file mode 100644 index 00000000000..1fcc78aec11 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr96125.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-mdejagnu-cpu=power8 -O2" } */ + +void +__attribute__((target("cpu=power10"))) +test0 (__vector_quad *dst) +{ + __vector_quad acc; + __builtin_mma_xxsetaccz (&acc); + *dst = acc; +} + +void +test1 (__vector_quad *dst) +{ + __vector_quad acc; + __builtin_mma_xxsetaccz (&acc); /* { dg-error "'__builtin_mma_xxsetaccz' requires the '-mmma' option" } */ + *dst = acc; +} + +#pragma GCC target("cpu=power10") +void +test2 (__vector_quad *dst) +{ + __vector_quad acc; + __builtin_mma_xxsetaccz (&acc); + *dst = acc; +} + +void +test3 (__vector_quad *dst) +{ + __vector_quad acc; + __builtin_mma_xxsetaccz (&acc); + *dst = acc; +} + +#pragma GCC reset_options +void +test4 (__vector_quad *dst) +{ + __vector_quad acc; + __builtin_mma_xxmfacc (&acc); /* { dg-error "'__builtin_mma_xxmfacc' requires the '-mmma' option" } */ + *dst = acc; +} +