[PATCH] s390: Floating point vector lane handling

Juergen Christ Wed, 14 May 2025 07:30:59 -0700

Since floating point and vector registers overlap on s390, more
efficient code can be generated to extract FPRs from VRs.
Additionally, for double vectors, more efficient code can be generated
to load specific lanes.


gcc/ChangeLog:

        * config/s390/vector.md (VF): New mode iterator.
        (VEC_SET_NONFLOAT): New mode iterator.
        (VEC_SET_SINGLEFLOAT): New mode iterator.
        (*vec_set<mode>): Split pattern in two.
        (*vec_setv2df): Extract special handling for V2DF mode.
        (*vec_extract<mode>): Split pattern in two.

gcc/testsuite/ChangeLog:

        * gcc.target/s390/vector/vec-extract-1.c: New test.
        * gcc.target/s390/vector/vec-set-1.c: New test.

Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
---
 gcc/config/s390/vector.md                     | 135 ++++++++++++-
 .../gcc.target/s390/vector/vec-extract-1.c    | 190 ++++++++++++++++++
 .../gcc.target/s390/vector/vec-set-1.c        |  67 ++++++
 3 files changed, 381 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-set-1.c

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index e29255fe1116..580cf6fc71f6 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -75,6 +75,8 @@
                           V1DF V2DF
                           (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 
+(define_mode_iterator VF [(V2SF "TARGET_VXE") (V4SF "TARGET_VXE") V2DF])
+
 ; All modes present in V_HW1 and VFT.
 (define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF
                               V2DF (V1SF "TARGET_VXE") (V2SF "TARGET_VXE")
@@ -506,26 +508,90 @@
                   UNSPEC_VEC_SET))]
   "TARGET_VX")
 
+; Iterator for vec_set that does not use special float/vect overlay tricks
+(define_mode_iterator VEC_SET_NONFLOAT
+  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2SF 
V4SF])
+; Iterator for single element float vectors
+(define_mode_iterator VEC_SET_SINGLEFLOAT [(V1SF "TARGET_VXE") V1DF (V1TF 
"TARGET_VXE")])
+
 ; FIXME: Support also vector mode operands for 1
 ; FIXME: A target memory operand seems to be useful otherwise we end
 ; up with vl vlvgg vst.  Shouldn't the middle-end be able to handle
 ; that itself?
 ; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih, vleif, 
vleig
 (define_insn "*vec_set<mode>"
-  [(set (match_operand:V                    0 "register_operand"  "=v,v,v")
-       (unspec:V [(match_operand:<non_vec> 1 "general_operand"    "d,R,K")
-                  (match_operand:SI        2 "nonmemory_operand" "an,I,I")
-                  (match_operand:V         3 "register_operand"   "0,0,0")]
-                 UNSPEC_VEC_SET))]
+  [(set (match_operand:VEC_SET_NONFLOAT      0 "register_operand"  "=v,v,v")
+       (unspec:VEC_SET_NONFLOAT
+         [(match_operand:<non_vec>          1 "general_operand"    "d,R,K")
+          (match_operand:SI                 2 "nonmemory_operand" "an,I,I")
+          (match_operand:VEC_SET_NONFLOAT   3 "register_operand"   "0,0,0")]
+         UNSPEC_VEC_SET))]
   "TARGET_VX
    && (!CONST_INT_P (operands[2])
-       || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
+       || UINTVAL (operands[2]) < GET_MODE_NUNITS 
(<VEC_SET_NONFLOAT:MODE>mode))"
   "@
    vlvg<bhfgq>\t%v0,%1,%Y2
    vle<bhfgq>\t%v0,%1,%2
    vlei<bhfgq>\t%v0,%1,%2"
   [(set_attr "op_type" "VRS,VRX,VRI")])
 
+(define_insn "*vec_set<mode>"
+  [(set (match_operand:VEC_SET_SINGLEFLOAT     0 "register_operand"  "=v,v")
+       (unspec:VEC_SET_SINGLEFLOAT
+         [(match_operand:<non_vec>            1 "general_operand"    "f,R")
+          (match_operand:SI                   2 "nonmemory_operand" "an,I")
+          (match_operand:VEC_SET_SINGLEFLOAT  3 "register_operand"   "0,0")]
+         UNSPEC_VEC_SET))]
+  "TARGET_VX"
+  "@
+  vlr\t%v0,%v1
+  vle<bhfgq>\t%v0,%1,0"
+ [(set_attr "op_type" "VRR,VRX")])
+
+(define_insn "*vec_setv2df"
+  [(set (match_operand:V2DF                    0 "register_operand"  
"=v,v,v,v")
+       (unspec:V2DF [(match_operand:DF        1 "general_operand"    "d,R,K,f")
+                     (match_operand:SI        2 "nonmemory_operand" "an,I,I,n")
+                     (match_operand:V2DF      3 "register_operand"   
"0,0,0,0")]
+                    UNSPEC_VEC_SET))]
+  "TARGET_VX
+   && (!CONST_INT_P (operands[2])
+       || UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))"
+  "@
+   vlvgg\t%v0,%1,%Y2
+   vleg\t%v0,%1,%2
+   vleig\t%v0,%1,%2
+   #"
+  [(set_attr "op_type" "VRS,VRX,VRI,*")])
+
+(define_split
+  [(set (match_operand:V2DF                 0 "register_operand"  "")
+       (unspec:V2DF [(match_operand:DF        1 "register_operand"  "")
+                     (match_operand:SI        2 "const_int_operand" "")
+                     (match_operand:V2DF      3 "register_operand"  "")]
+                    UNSPEC_VEC_SET))]
+  "TARGET_VX
+   && (UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))
+   && reload_completed
+   && FP_REGNO_P (REGNO (operands[1]))"
+   [(set (match_dup 0)
+      (vec_select:V2DF
+        (vec_concat:V4DF
+         (match_dup 1)
+         (match_dup 3))
+       (parallel [(const_int 0) (match_dup 4)])))]
+  "
+  {
+    operands[1] = gen_rtx_REG (V2DFmode, REGNO (operands[1]));
+    if (UINTVAL (operands[2]) == 0)
+      operands[4] = GEN_INT (3);
+    else
+    {
+      std::swap (operands[1], operands[3]);
+      operands[4] = GEN_INT (2);
+    }
+   }")
+
 ; vlvgb, vlvgh, vlvgf, vlvgg
 (define_insn "*vec_set<mode>_plus"
   [(set (match_operand:V                      0 "register_operand" "=v")
@@ -554,19 +620,66 @@
 (define_insn "*vec_extract<mode>"
   [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R")
        (vec_select:<non_vec>
-         (match_operand:V        1 "nonmemory_operand"  "v,v")
+         (match_operand:VI       1 "nonmemory_operand"  "v,v")
          (parallel
           [(match_operand:SI     2 "nonmemory_operand" "an,I")])))]
   "TARGET_VX"
   {
     if (CONST_INT_P (operands[2]))
-         operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS 
(<V:MODE>mode) - 1));
+         operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS 
(<VI:MODE>mode) - 1));
     if (which_alternative == 0)
       return "vlgv<bhfgq>\t%0,%v1,%Y2";
        return "vste<bhfgq>\t%v1,%0,%2";
   }
   [(set_attr "op_type" "VRS,VRX")])
 
+(define_insn "*vec_extract<mode>"
+  [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R,f")
+       (vec_select:<non_vec>
+         (match_operand:VF       1 "nonmemory_operand"  "v,v,f")
+         (parallel
+          [(match_operand:SI     2 "nonmemory_operand" "an,I,n")])))]
+  "TARGET_VX"
+  {
+    if (CONST_INT_P (operands[2]))
+      operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS 
(<VF:MODE>mode) - 1));
+    if (which_alternative == 0)
+      return "vlgv<bhfgq>\t%0,%v1,%Y2";
+    else if (which_alternative == 1)
+      return "vste<bhfgq>\t%v1,%0,%2";
+    else
+      return "#";
+  }
+  [(set_attr "op_type" "VRS,VRX,*")])
+
+(define_split
+  [(set (match_operand:<non_vec> 0 "register_operand" "")
+       (vec_select:<non_vec>
+         (match_operand:VF       1 "register_operand"  "")
+         (parallel
+          [(match_operand:SI     2 "const_int_operand" "")])))]
+  "TARGET_VX && reload_completed && FP_REGNO_P (REGNO (operands[0]))"
+  [(set (match_dup 0)
+        (vec_duplicate:VF
+           (vec_select:<non_vec>
+              (match_dup 1)
+              (parallel [(match_dup 2)]))))]
+  "
+  {
+    unsigned HOST_WIDE_INT idx = UINTVAL (operands[2]) & (GET_MODE_NUNITS 
(<VF:MODE>mode) - 1);
+    if (idx == 0)
+      {
+        rtx dest = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+        emit_insn (gen_mov<VF:mode> (dest, operands[1]));
+        DONE;
+      }
+    else
+      {
+        operands[0] = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+        operands[2] = GEN_INT (idx);
+      }
+  }")
+
 ; vlgvb, vlgvh, vlgvf, vlgvg
 (define_insn "*vec_extract<mode>_plus"
   [(set (match_operand:<non_vec>       0 "nonimmediate_operand" "=d")
@@ -603,10 +716,10 @@
 ; Replicate from vector element
 ; vrepb, vreph, vrepf, vrepg
 (define_insn "*vec_splat<mode>"
-  [(set (match_operand:V_128_NOSINGLE   0 "register_operand" "=v")
-       (vec_duplicate:V_128_NOSINGLE
+  [(set (match_operand:V   0 "register_operand" "=v")
+       (vec_duplicate:V
         (vec_select:<non_vec>
-         (match_operand:V_128_NOSINGLE 1 "register_operand"  "v")
+         (match_operand:V 1 "register_operand"  "v")
          (parallel
           [(match_operand:QI 2 "const_mask_operand" "C")]))))]
   "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c
new file mode 100644
index 000000000000..9df7909a3ea8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-extract-1.c
@@ -0,0 +1,190 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=z14 -mzarch" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+typedef double V2DF __attribute__((vector_size(16)));
+typedef float V4SF __attribute__((vector_size(16)));
+typedef float V2SF __attribute__((vector_size(8)));
+typedef double V1DF __attribute__((vector_size(8)));
+typedef float V1SF __attribute__((vector_size(4)));
+typedef long double V1TF __attribute__((vector_size(16)));
+
+/*
+** extractfirstdouble:
+**     vlr     %v0,%v24
+**     br      %r14
+*/
+double
+extractfirstdouble (V2DF x)
+{
+  return x[0];
+}
+
+/*
+** extractseconddouble:
+**     vrepg   %v0,%v24,1
+**     br      %r14
+*/
+double
+extractseconddouble (V2DF x)
+{
+  return x[1];
+}
+
+/*
+** extractnthdouble:
+**     vlgvg   (%r.),%v24,0\(%r2\)
+**     ldgr    %f0,\1
+**     br      %r14
+*/
+double
+extractnthdouble (V2DF x, int n)
+{
+  return x[n];
+}
+
+/*
+** sumfirstdouble:
+**     vfadb   %v0,%v24,%v26
+**     br      %r14
+*/
+double
+sumfirstdouble (V2DF x, V2DF y)
+{
+  return (x + y)[0];
+}
+
+/*
+** extractfirstfloat:
+**     vlr     %v0,%v24
+**     br      %r14
+*/
+float
+extractfirstfloat (V4SF x)
+{
+  return x[0];
+}
+
+/*
+** extractsecondfloat:
+**     vrepf   %v0,%v24,1
+**     br      %r14
+*/
+float
+extractsecondfloat (V4SF x)
+{
+  return x[1];
+}
+
+/*
+** extractthirdfloat:
+**     vrepf   %v0,%v24,2
+**     br      %r14
+*/
+float
+extractthirdfloat (V4SF x)
+{
+  return x[2];
+}
+
+/*
+** extractfourthfloat:
+**     vrepf   %v0,%v24,3
+**     br      %r14
+*/
+float
+extractfourthfloat (V4SF x)
+{
+  return x[3];
+}
+
+/*
+** extractnthfloat:
+**     vlgvf   (%r.),%v24,0\(%r2\)
+**     vlvgf   %v0,\1,0
+**     br      %r14
+*/
+float
+extractnthfloat (V4SF x, int n)
+{
+  return x[n];
+}
+
+/*
+** sumfirstfloat:
+**     vfasb   %v0,%v24,%v26
+**     br      %r14
+*/
+float
+sumfirstfloat (V4SF x, V4SF y)
+{
+  return (x + y)[0];
+}
+
+/*
+** extractfirst2:
+**     vlr     %v0,%v24
+**     br      %r14
+*/
+float
+extractfirst2 (V2SF x)
+{
+  return x[0];
+}
+
+/*
+** extractsecond2:
+**     vrepf   %v0,%v24,1
+**     br      %r14
+*/
+float
+extractsecond2 (V2SF x)
+{
+  return x[1];
+}
+
+/*
+** extractnth2:
+**     vlgvf   (%r.),%v24,0\(%r2\)
+**     vlvgf   %v0,\1,0
+**     br      %r14
+*/
+float
+extractnth2 (V2SF x, int n)
+{
+  return x[n];
+}
+
+/*
+** extractsinglef:
+**     vlr     %v0,%v24
+**     br      %r14
+*/
+float
+extractsinglef (V1SF x)
+{
+  return x[0];
+}
+
+/*
+** extractsingled:
+**     vlr     %v0,%v24
+**     br      %r14
+*/
+double
+extractsingled (V1DF x)
+{
+  return x[0];
+}
+
+/*
+** extractsingleld:
+**     vlr     (%v.),%v24
+**     vst     \1,0\(%r2\),3
+**     br      %r14
+*/
+long double
+extractsingleld (V1TF x)
+{
+  return x[0];
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-set-1.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-set-1.c
new file mode 100644
index 000000000000..2eddb58290f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-set-1.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=z14 -mzarch" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+typedef double V2DF __attribute__((vector_size(16)));
+typedef double V1DF __attribute__((vector_size(8)));
+
+/*
+** setdf0:
+**     vpdi    %v24,%v0,%v24,1
+**     br      %r14
+*/
+V2DF
+setdf0 (V2DF x, double y)
+{
+  x[0] = y;
+  return x;
+}
+
+/*
+** setdf1:
+**     vmrhg   %v24,%v24,%v0
+**     br      %r14
+*/
+V2DF
+setdf1 (V2DF x, double y)
+{
+  x[1] = y;
+  return x;
+}
+
+/*
+** setdfn:
+**     lgdr    (%r.),%f0
+**     vlvgg   %v24,\1,0\(%r2\)
+**     br      %r14
+*/
+V2DF
+setdfn (V2DF x, double y, int n)
+{
+  x[n] = y;
+  return x;
+}
+
+/*
+** set1df:
+**     vlr     %v24,%v0
+**     br      %r14
+*/
+V1DF
+set1df (V1DF x, double y)
+{
+  x[0] = y;
+  return x;
+}
+
+/*
+** set1dfn:
+**     vlr     %v24,%v0
+**     br      %r14
+*/
+V1DF
+set1dfn (V1DF x, double y, int n)
+{
+  x[n] = y;
+  return x;
+}
-- 
2.43.5

[PATCH] s390: Floating point vector lane handling

Reply via email to