Hi! The following patch changes the memcpy etc. folding to use bitwise vector types rather than huge INTEGER_TYPEs for copying of > MAX_FIXED_MODE_SIZE lengths. The problem with the huge INTEGER_TYPEs is that they aren't supported very much, usually there are just optabs to handle moves of them, perhaps misaligned moves and that is it, so they pose problems e.g. to BITINT_TYPE lowering.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2024-02-28 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/113988 * stor-layout.h (bitwise_mode_for_size): Declare. * stor-layout.cc (bitwise_mode_for_size): New function. * gimple-fold.cc (gimple_fold_builtin_memory_op): Use it. Use bitwise_type_for_mode instead of build_nonstandard_integer_type. Use BITS_PER_UNIT instead of 8. * gcc.dg/bitint-91.c: New test. --- gcc/stor-layout.h.jj 2024-01-03 11:51:28.103778767 +0100 +++ gcc/stor-layout.h 2024-02-27 12:32:17.716535079 +0100 @@ -102,6 +102,8 @@ extern opt_machine_mode mode_for_size_tr extern tree bitwise_type_for_mode (machine_mode); +extern opt_machine_mode bitwise_mode_for_size (poly_uint64); + /* Given a VAR_DECL, PARM_DECL or RESULT_DECL, clears the results of a previous call to layout_decl and calls it again. */ extern void relayout_decl (tree); --- gcc/stor-layout.cc.jj 2024-01-17 13:53:13.160176498 +0100 +++ gcc/stor-layout.cc 2024-02-27 12:27:20.876647298 +0100 @@ -476,6 +476,32 @@ bitwise_type_for_mode (machine_mode mode return inner_type; } +/* Find a mode that can be used for efficient bitwise operations on SIZE + bits, if one exists. */ + +opt_machine_mode +bitwise_mode_for_size (poly_uint64 size) +{ + if (known_le (size, (unsigned int) MAX_FIXED_MODE_SIZE)) + return mode_for_size (size, MODE_INT, true); + + machine_mode mode, ret = VOIDmode; + FOR_EACH_MODE_FROM (mode, MIN_MODE_VECTOR_INT) + if (known_eq (GET_MODE_BITSIZE (mode), size) + && (ret == VOIDmode || GET_MODE_INNER (mode) == QImode) + && have_regs_of_mode[mode] + && targetm.vector_mode_supported_p (mode)) + { + if (GET_MODE_INNER (mode) == QImode) + return mode; + else if (ret == VOIDmode) + ret = mode; + } + if (ret != VOIDmode) + return ret; + return opt_machine_mode (); +} + /* Find a mode that is suitable for representing a vector with NUNITS elements of mode INNERMODE, if one exists. The returned mode can be either an integer mode or a vector mode. */ --- gcc/gimple-fold.cc.jj 2024-02-20 10:25:26.297760979 +0100 +++ gcc/gimple-fold.cc 2024-02-27 12:42:38.338925573 +0100 @@ -995,9 +995,12 @@ gimple_fold_builtin_memory_op (gimple_st if (warning != OPT_Wrestrict) return false; - scalar_int_mode mode; - if (int_mode_for_size (ilen * 8, 0).exists (&mode) - && GET_MODE_SIZE (mode) * BITS_PER_UNIT == ilen * 8 + scalar_int_mode imode; + machine_mode mode; + if (int_mode_for_size (ilen * BITS_PER_UNIT, 0).exists (&imode) + && bitwise_mode_for_size (ilen + * BITS_PER_UNIT).exists (&mode) + && known_eq (GET_MODE_BITSIZE (mode), ilen * BITS_PER_UNIT) /* If the destination pointer is not aligned we must be able to emit an unaligned store. */ && (dest_align >= GET_MODE_ALIGNMENT (mode) @@ -1005,7 +1008,7 @@ gimple_fold_builtin_memory_op (gimple_st || (optab_handler (movmisalign_optab, mode) != CODE_FOR_nothing))) { - tree type = build_nonstandard_integer_type (ilen * 8, 1); + tree type = bitwise_type_for_mode (mode); tree srctype = type; tree desttype = type; if (src_align < GET_MODE_ALIGNMENT (mode)) --- gcc/testsuite/gcc.dg/bitint-91.c.jj 2024-02-27 12:08:15.230481756 +0100 +++ gcc/testsuite/gcc.dg/bitint-91.c 2024-02-27 12:08:15.230481756 +0100 @@ -0,0 +1,38 @@ +/* PR tree-optimization/113988 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-mavx512f" { target i?86-*-* x86_64-*-* } } */ + +int i; + +#if __BITINT_MAXWIDTH__ >= 256 +void +foo (void *p, _BitInt(256) x) +{ + __builtin_memcpy (p, &x, sizeof x); +} + +_BitInt(256) +bar (void *p, _BitInt(256) x) +{ + _BitInt(246) y = x + 1; + __builtin_memcpy (p, &y, sizeof y); + return x; +} +#endif + +#if __BITINT_MAXWIDTH__ >= 512 +void +baz (void *p, _BitInt(512) x) +{ + __builtin_memcpy (p, &x, sizeof x); +} + +_BitInt(512) +qux (void *p, _BitInt(512) x) +{ + _BitInt(512) y = x + 1; + __builtin_memcpy (p, &y, sizeof y); + return x; +} +#endif Jakub