commit:     490956af07fb29586060d60f6fe9a3eeeb12e9b5
Author:     Sam James <sam <AT> gentoo <DOT> org>
AuthorDate: Tue Aug 26 00:55:51 2025 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Tue Aug 26 00:56:17 2025 +0000
URL:        https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=490956af

16.0.0: fix GFNI changes

The change is approved already but I'm about to do a rebuild so
may as well have this in.

Signed-off-by: Sam James <sam <AT> gentoo.org>

 ...-recent-changes-to-use-GFNI-for-rotates-s.patch | 134 +++++++++++++++++++++
 16.0.0/gentoo/README.history                       |   1 +
 2 files changed, 135 insertions(+)

diff --git 
a/16.0.0/gentoo/87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch
 
b/16.0.0/gentoo/87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch
new file mode 100644
index 0000000..c5bd764
--- /dev/null
+++ 
b/16.0.0/gentoo/87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch
@@ -0,0 +1,134 @@
+From 830dea69de876322ec39823073163f1957bfec9a Mon Sep 17 00:00:00 2001
+Message-ID: 
<830dea69de876322ec39823073163f1957bfec9a.1756169719.git....@gentoo.org>
+From: Jakub Jelinek <[email protected]>
+Date: Tue, 26 Aug 2025 00:39:47 +0200
+Subject: [PATCH] i386: Fix up recent changes to use GFNI for rotates/shifts
+ [PR121658]
+
+Hi!
+
+The vgf2p8affineqb_<mode><mask_name> pattern uses "register_operand"
+predicate for the first input operand, so using "general_operand"
+for the rotate operand passed to it leads to ICEs, and so does
+the "nonimmediate_operand" in the <insn>v16qi3 define_expand.
+The following patch fixes it by using "register_operand" in the former
+case (that pattern is TARGET_GFNI only) and using force_reg in
+the latter case (the pattern is TARGET_XOP || TARGET_GFNI and for XOP
+we can handle MEM operand).
+
+The rest of the changes are small formatting tweaks or use of const0_rtx
+instead of GEN_INT (0).
+
+Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
+
+2025-08-25  Jakub Jelinek  <[email protected]>
+
+       PR target/121658
+       * config/i386/sse.md (<insn><mode>3 any_shift): Use const0_rtx
+       instead of GEN_INT (0).
+       (cond_<insn><mode> any_shift): Likewise.  Formatting fix.
+       (<insn><mode>3 any_rotate): Use register_operand predicate instead of
+       general_operand for match_operand 1.  Use const0_rtx instead of
+       GEN_INT (0).
+       (<insn>v16qi3 any_rotate): Use force_reg on operands[1].  Formatting
+       fix.
+       * config/i386/i386.cc (ix86_shift_rotate_cost): Comment formatting
+       fixes.
+
+       * gcc.target/i386/pr121658.c: New test.
+---
+ gcc/config/i386/i386.cc                  |  6 +++---
+ gcc/config/i386/sse.md                   | 14 ++++++++------
+ gcc/testsuite/gcc.target/i386/pr121658.c | 11 +++++++++++
+ 3 files changed, 22 insertions(+), 9 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr121658.c
+
+diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
+index 9093f2077346..b2c1acd12dac 100644
+--- a/gcc/config/i386/i386.cc
++++ b/gcc/config/i386/i386.cc
+@@ -22104,9 +22104,9 @@ ix86_shift_rotate_cost (const struct processor_costs 
*cost,
+       case V32QImode:
+         if (TARGET_GFNI && constant_op1)
+           {
+-            /* Use vgf2p8affine. One extra load for the mask, but in a loop
+-               with enough registers it will be moved out. So for now don't
+-               account the constant mask load. This is not quite right
++            /* Use vgf2p8affine.  One extra load for the mask, but in a loop
++               with enough registers it will be moved out.  So for now don't
++               account the constant mask load.  This is not quite right
+                for non loop vectorization.  */
+             extra = 0;
+             return ix86_vec_cost (mode, cost->sse_op) + extra;
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index 951ee54589f3..505095040f75 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -26994,7 +26994,7 @@ (define_expand "<insn><mode>3"
+       rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2],
+                                                  <CODE>);
+       emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+-                                          GEN_INT (0)));
++                                          const0_rtx));
+     }
+   else
+     ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
+@@ -27014,20 +27014,21 @@ (define_expand "cond_<insn><mode>"
+ {
+   rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], 
<CODE>);
+   emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[1], matrix,
+-              GEN_INT (0), operands[4], operands[1]));
++                                           const0_rtx, operands[4],
++                                           operands[1]));
+   DONE;
+ })
+ 
+ (define_expand "<insn><mode>3"
+   [(set (match_operand:VI1_AVX512_3264 0 "register_operand")
+       (any_rotate:VI1_AVX512_3264
+-        (match_operand:VI1_AVX512_3264 1 "general_operand")
++        (match_operand:VI1_AVX512_3264 1 "register_operand")
+         (match_operand:SI 2 "const_int_operand")))]
+   "TARGET_GFNI"
+ {
+   rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], 
<CODE>);
+   emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+-             GEN_INT (0)));
++             const0_rtx));
+   DONE;
+ })
+ 
+@@ -27073,8 +27074,9 @@ (define_expand "<insn>v16qi3"
+   else if (TARGET_GFNI && CONST_INT_P (operands[2]))
+     {
+       rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], 
<CODE>);
+-      emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], operands[1], matrix,
+-                 GEN_INT (0)));
++      emit_insn (gen_vgf2p8affineqb_v16qi (operands[0],
++                                         force_reg (V16QImode, operands[1]),
++                                         matrix, const0_rtx));
+       DONE;
+     }
+   else
+diff --git a/gcc/testsuite/gcc.target/i386/pr121658.c 
b/gcc/testsuite/gcc.target/i386/pr121658.c
+new file mode 100644
+index 000000000000..04373161e688
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr121658.c
+@@ -0,0 +1,11 @@
++/* PR target/121658 */
++/* { dg-do compile } */
++/* { dg-options "-O2 -mavx512f -mgfni" } */
++
++__attribute__((__vector_size__(64))) unsigned char v;
++
++void
++foo (void)
++{
++  v = (v << 7) | (v >> 1);
++}
+
+base-commit: 2dfd2779e373dffaae9532d45267497a6246f661
+-- 
+2.51.0
+

diff --git a/16.0.0/gentoo/README.history b/16.0.0/gentoo/README.history
index 44873fe..2caadb6 100644
--- a/16.0.0/gentoo/README.history
+++ b/16.0.0/gentoo/README.history
@@ -1,6 +1,7 @@
 13     ????
 
        U 86_all_PR120933-i386-default-to-mtls-dialect-gnu2-if-appropriate.patch
+       + 
87_all_PR121658-i386-Fix-up-recent-changes-to-use-GFNI-for-rotates-s.patch
 
 12     24 August 2025
 

Reply via email to