commit: 490956af07fb29586060d60f6fe9a3eeeb12e9b5 Author: Sam James <sam <AT> gentoo <DOT> org> AuthorDate: Tue Aug 26 00:55:51 2025 +0000 Commit: Sam James <sam <AT> gentoo <DOT> org> CommitDate: Tue Aug 26 00:56:17 2025 +0000 URL: https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=490956af
16.0.0: fix GFNI changes The change is approved already but I'm about to do a rebuild so may as well have this in. Signed-off-by: Sam James <sam <AT> gentoo.org> ...-recent-changes-to-use-GFNI-for-rotates-s.patch | 134 +++++++++++++++++++++ 16.0.0/gentoo/README.history | 1 + 2 files changed, 135 insertions(+) diff --git a/16.0.0/gentoo/87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch b/16.0.0/gentoo/87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch new file mode 100644 index 0000000..c5bd764 --- /dev/null +++ b/16.0.0/gentoo/87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch @@ -0,0 +1,134 @@ +From 830dea69de876322ec39823073163f1957bfec9a Mon Sep 17 00:00:00 2001 +Message-ID: <830dea69de876322ec39823073163f1957bfec9a.1756169719.git....@gentoo.org> +From: Jakub Jelinek <[email protected]> +Date: Tue, 26 Aug 2025 00:39:47 +0200 +Subject: [PATCH] i386: Fix up recent changes to use GFNI for rotates/shifts + [PR121658] + +Hi! + +The vgf2p8affineqb_<mode><mask_name> pattern uses "register_operand" +predicate for the first input operand, so using "general_operand" +for the rotate operand passed to it leads to ICEs, and so does +the "nonimmediate_operand" in the <insn>v16qi3 define_expand. +The following patch fixes it by using "register_operand" in the former +case (that pattern is TARGET_GFNI only) and using force_reg in +the latter case (the pattern is TARGET_XOP || TARGET_GFNI and for XOP +we can handle MEM operand). + +The rest of the changes are small formatting tweaks or use of const0_rtx +instead of GEN_INT (0). + +Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? + +2025-08-25 Jakub Jelinek <[email protected]> + + PR target/121658 + * config/i386/sse.md (<insn><mode>3 any_shift): Use const0_rtx + instead of GEN_INT (0). + (cond_<insn><mode> any_shift): Likewise. Formatting fix. + (<insn><mode>3 any_rotate): Use register_operand predicate instead of + general_operand for match_operand 1. Use const0_rtx instead of + GEN_INT (0). + (<insn>v16qi3 any_rotate): Use force_reg on operands[1]. Formatting + fix. + * config/i386/i386.cc (ix86_shift_rotate_cost): Comment formatting + fixes. + + * gcc.target/i386/pr121658.c: New test. +--- + gcc/config/i386/i386.cc | 6 +++--- + gcc/config/i386/sse.md | 14 ++++++++------ + gcc/testsuite/gcc.target/i386/pr121658.c | 11 +++++++++++ + 3 files changed, 22 insertions(+), 9 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/i386/pr121658.c + +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 9093f2077346..b2c1acd12dac 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -22104,9 +22104,9 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, + case V32QImode: + if (TARGET_GFNI && constant_op1) + { +- /* Use vgf2p8affine. One extra load for the mask, but in a loop +- with enough registers it will be moved out. So for now don't +- account the constant mask load. This is not quite right ++ /* Use vgf2p8affine. One extra load for the mask, but in a loop ++ with enough registers it will be moved out. So for now don't ++ account the constant mask load. This is not quite right + for non loop vectorization. */ + extra = 0; + return ix86_vec_cost (mode, cost->sse_op) + extra; +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 951ee54589f3..505095040f75 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -26994,7 +26994,7 @@ (define_expand "<insn><mode>3" + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], + <CODE>); + emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix, +- GEN_INT (0))); ++ const0_rtx)); + } + else + ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]); +@@ -27014,20 +27014,21 @@ (define_expand "cond_<insn><mode>" + { + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); + emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[1], matrix, +- GEN_INT (0), operands[4], operands[1])); ++ const0_rtx, operands[4], ++ operands[1])); + DONE; + }) + + (define_expand "<insn><mode>3" + [(set (match_operand:VI1_AVX512_3264 0 "register_operand") + (any_rotate:VI1_AVX512_3264 +- (match_operand:VI1_AVX512_3264 1 "general_operand") ++ (match_operand:VI1_AVX512_3264 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_GFNI" + { + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); + emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix, +- GEN_INT (0))); ++ const0_rtx)); + DONE; + }) + +@@ -27073,8 +27074,9 @@ (define_expand "<insn>v16qi3" + else if (TARGET_GFNI && CONST_INT_P (operands[2])) + { + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); +- emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], operands[1], matrix, +- GEN_INT (0))); ++ emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], ++ force_reg (V16QImode, operands[1]), ++ matrix, const0_rtx)); + DONE; + } + else +diff --git a/gcc/testsuite/gcc.target/i386/pr121658.c b/gcc/testsuite/gcc.target/i386/pr121658.c +new file mode 100644 +index 000000000000..04373161e688 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr121658.c +@@ -0,0 +1,11 @@ ++/* PR target/121658 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx512f -mgfni" } */ ++ ++__attribute__((__vector_size__(64))) unsigned char v; ++ ++void ++foo (void) ++{ ++ v = (v << 7) | (v >> 1); ++} + +base-commit: 2dfd2779e373dffaae9532d45267497a6246f661 +-- +2.51.0 + diff --git a/16.0.0/gentoo/README.history b/16.0.0/gentoo/README.history index 44873fe..2caadb6 100644 --- a/16.0.0/gentoo/README.history +++ b/16.0.0/gentoo/README.history @@ -1,6 +1,7 @@ 13 ???? U 86_all_PR120933-i386-default-to-mtls-dialect-gnu2-if-appropriate.patch + + 87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch 12 24 August 2025
