https://gcc.gnu.org/g:c8eb4fcd40c2faef5dadbaa83abfcc6e058ee9f6

commit r14-11923-gc8eb4fcd40c2faef5dadbaa83abfcc6e058ee9f6
Author: liuhongt <hongtao....@intel.com>
Date:   Tue Jul 29 00:01:37 2025 -0700

    Eliminate redundant vpextrq/vpinsrq when move TI to V4SI.
    
    r14-1902-g96c3539f2a3813 split TImode move with 2 DImode move, it's
    supposed to optimize TImode in parameter/return since accoring to
    psABI it's stored into 2 general registers.
    
    But when TImode is not in parameter/return, it could create redundancy
    in the PR.
    
    The patch add a splitter to handle that.
    
    .i.e.
    (insn 10 9 14 2 (set (subreg:V2DI (reg:V4SI 98 [ <retval> ]) 0)
                         (vec_concat:V2DI (subreg:DI (reg:TI 101) 0)
                             (subreg:DI (reg:TI 101) 8)))
                             8442 {vec_concatv2di}
                            (expr_list:REG_DEAD (reg:TI 101)
    
    gcc/ChangeLog:
    
            PR target/121274
            * config/i386/sse.md (*vec_concatv2di_0): Add a splitter
            before it.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr121274.c: New test.
    
    (cherry picked from commit 6a466839340dce3b596b3ae5ce85bd05a067ae00)

Diff:
---
 gcc/config/i386/sse.md                   | 13 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr121274.c | 24 ++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 67313d518530..477e18b112f6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20793,6 +20793,19 @@
           (const_string "orig")))
    (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
 
+;; Eliminate redundancy caused by
+;; /* Special case TImode to 128-bit vector conversions via V2DI.  */
+;; in ix86_expand_vector_move
+
+(define_split
+  [(set (match_operand:V2DI 0 "register_operand")
+       (vec_concat:V2DI
+         (subreg:DI (match_operand:TI 1 "register_operand") 0)
+         (subreg:DI (match_dup 1) 8)))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  [(set (match_dup 0)
+       (subreg:V2DI (match_dup 1) 0))])
+
 (define_insn "*vec_concatv2di_0"
   [(set (match_operand:V2DI 0 "register_operand"     "=v,v ,x")
        (vec_concat:V2DI
diff --git a/gcc/testsuite/gcc.target/i386/pr121274.c 
b/gcc/testsuite/gcc.target/i386/pr121274.c
new file mode 100644
index 000000000000..16760cfcbace
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121274.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpextrq" } } */
+/* { dg-final { scan-assembler-not "vpinsrq" } } */
+
+typedef int v16si __attribute__((vector_size(64)));
+typedef int v4si __attribute__((vector_size(16)));
+
+v4si f(v16si x)
+{
+  return __builtin_shufflevector(x, x, 0, 1, 2, 3);
+}
+
+v4si g(v16si x)
+{
+return __builtin_shufflevector(x, x, 4, 5, 6, 7);
+}
+
+v4si f1(__int128 *x)
+{
+  __int128 t = *x;
+  asm("":"+x"(t));
+  return (v4si)t;
+}

Reply via email to