Some casts were missing leading to missed of bad vectorizations where
casting was done scalar followed by a vector creation from the
individual elements.
gcc/ChangeLog:
* config/s390/vector.md (VEC_HALF_NARROWED): New mode iterator.
(vec_half_narrowed): ditto.
(trunc<VI_TRUNC:mode><vec_half_narrowed>2): New pattern.
(vec_pack_ufix_trunc_v2df): ditto.
(vec_pack_sfix_trunc_v2df): ditto.
(vec_unpack_sfix_trunc_lo_v4sf): ditto.
(vec_unpack_sfix_trunc_hi_v4sf): ditto.
(vec_unpack_ufix_trunc_lo_v4sf): ditto.
(vec_unpack_ufix_trunc_hi_v4sf): ditto.
(floatv2siv2sf2): ditto.
(floatunsv2siv2sf2): ditto.
(vec_unpacks_float_hi_v4si): ditto.
(vec_unpacks_float_lo_v4si): ditto.
(vec_unpacku_float_hi_v4si): ditto.
(vec_unpacku_float_lo_v4si): ditto.
gcc/testsuite/ChangeLog:
* gcc.target/s390/vector/vec-cast-single.c: New test.
* gcc.target/s390/vector/vec_pack_ufix_trunc_v2df.c: New test.
Bootstrapped and regtested on s390x. Ok for trunk?
Signed-off-by: Juergen Christ <[email protected]>
---
gcc/config/s390/vector.md | 170 ++++++++++-
.../gcc.target/s390/vector/vec-cast-single.c | 271 ++++++++++++++++++
.../s390/vector/vec_pack_ufix_trunc_v2df.c | 30 ++
3 files changed, 463 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-cast-single.c
create mode 100644
gcc/testsuite/gcc.target/s390/vector/vec_pack_ufix_trunc_v2df.c
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 40de0c75a7cf..356f25d26deb 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -89,6 +89,8 @@
(define_mode_iterator VI_EXTEND [V2QI V2HI V2SI V4QI V4HI])
+(define_mode_iterator VI_TRUNC [V2HI V2SI V2DI V4HI V4SI])
+
; Empty string for all but TImode. This is used to hide the TImode
; expander name in case it is defined already. See addti3 for an
; example.
@@ -211,6 +213,14 @@
(V1SF "v1df") (V2SF "v2df") (V4SF "v4df")
(V1DF "v1tf") (V2DF "v2tf")])
+; Vector with narrowed element size and the same number of elements.
+(define_mode_attr VEC_HALF_NARROWED [(V1HI "V1QI") (V2HI "V2QI") (V4HI "V4QI")
(V8HI "V8QI")
+ (V1SI "V1HI") (V2SI "V2HI") (V4SI "V4HI")
+ (V1DI "V1DI") (V2DI "V2SI")])
+(define_mode_attr vec_half_narrowed [(V1HI "v1qi") (V2HI "v2qi") (V4HI "v4qi")
(V8HI "v8qi")
+ (V1SI "v1hi") (V2SI "v2hi") (V4SI "v4hi")
+ (V1DI "v1di") (V2DI "v2si")])
+
; Vector with half the element size AND half the number of elements.
(define_mode_attr vec_halfhalf
[(V2HI "V2QI") (V4HI "V4QI") (V8HI "V8QI")
@@ -2422,6 +2432,17 @@
operands[2] = gen_reg_rtx (V4SFmode);
})
+;; vector truncate
+
+; downcasts
+
+(define_insn "trunc<VI_TRUNC:mode><vec_half_narrowed>2"
+ [(set (match_operand:<VEC_HALF_NARROWED> 0 "register_operand" "=v")
+ (truncate:<VEC_HALF_NARROWED> (match_operand:VI_TRUNC 1 "register_operand"
"v")))]
+ "TARGET_VX"
+ "vpk<bhfgq>\t %0,%1,%1"
+ [(set_attr "op_type" "VRR")])
+
;; vector unpack v16qi
; signed
@@ -3177,17 +3198,150 @@
emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2]));
emit_insn (gen_vstlv16qi (operands[1], len, mem));
DONE;
-});;
+})
+
+(define_expand "vec_pack_ufix_trunc_v2df"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V2DF 1 "register_operand")
+ (match_operand:V2DF 2 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r1 = gen_reg_rtx (V2DImode);
+ rtx r2 = gen_reg_rtx (V2DImode);
+
+ emit_insn (gen_fixuns_truncv2dfv2di2 (r1, operands[1]));
+ emit_insn (gen_fixuns_truncv2dfv2di2 (r2, operands[2]));
+ emit_insn (gen_vec_pack_trunc_v2di (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V2DF 1 "register_operand")
+ (match_operand:V2DF 2 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r1 = gen_reg_rtx (V2DImode);
+ rtx r2 = gen_reg_rtx (V2DImode);
+
+ emit_insn (gen_fix_truncv2dfv2di2 (r1, operands[1]));
+ emit_insn (gen_fix_truncv2dfv2di2 (r2, operands[2]));
+ emit_insn (gen_vec_pack_trunc_v2di (operands[0], r1, r2));
+ DONE;
+})
+
+; v4sf -> v2di
+(define_expand "vec_unpack_sfix_trunc_lo_v4sf"
+ [(match_operand:V2DI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V4SImode);
+
+ emit_insn (gen_fix_truncv4sfv4si2 (r, operands[1]));
+ emit_insn (gen_vec_unpacks_lo_v4si (operands[0], r));
+ DONE;
+})
+
+(define_expand "vec_unpack_sfix_trunc_hi_v4sf"
+ [(match_operand:V2DI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V4SImode);
+
+ emit_insn (gen_fix_truncv4sfv4si2 (r, operands[1]));
+ emit_insn (gen_vec_unpacks_hi_v4si (operands[0], r));
+ DONE;
+})
+
+(define_expand "vec_unpack_ufix_trunc_lo_v4sf"
+ [(match_operand:V2DI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V4SImode);
+
+ emit_insn (gen_fixuns_truncv4sfv4si2 (r, operands[1]));
+ emit_insn (gen_vec_unpacku_lo_v4si (operands[0], r));
+ DONE;
+})
+
+(define_expand "vec_unpack_ufix_trunc_hi_v4sf"
+ [(match_operand:V2DI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V4SImode);
+
+ emit_insn (gen_fixuns_truncv4sfv4si2 (r, operands[1]));
+ emit_insn (gen_vec_unpacku_hi_v4si (operands[0], r));
+ DONE;
+})
+(define_insn "floatv2siv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=v")
+ (float:V2SF (match_operand:V2SI 1 "register_operand" "v")))]
+ "TARGET_VXE2"
+ "vcefb\t%v0,%v1,0,5"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "floatunsv2siv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=v")
+ (unsigned_float:V2SF (match_operand:V2SI 1 "register_operand" "v")))]
+ "TARGET_VXE2"
+ "vcelfb\t%v0,%v1,0,5"
+ [(set_attr "op_type" "VRR")])
+
+(define_expand "vec_unpacks_float_hi_v4si"
+ [(match_operand:V2DF 0 "register_operand")
+ (match_operand:V4SI 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V2DImode);
+
+ emit_insn (gen_vec_unpacks_hi_v4si (r, operands[1]));
+ emit_insn (gen_floatv2div2df2 (operands[0], r));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+ [(match_operand:V2DF 0 "register_operand")
+ (match_operand:V4SI 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V2DImode);
+
+ emit_insn (gen_vec_unpacks_lo_v4si (r, operands[1]));
+ emit_insn (gen_floatv2div2df2 (operands[0], r));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v4si"
+ [(match_operand:V2DF 0 "register_operand")
+ (match_operand:V4SI 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V2DImode);
+
+ emit_insn (gen_vec_unpacku_hi_v4si (r, operands[1]));
+ emit_insn (gen_floatunsv2div2df2 (operands[0], r));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v4si"
+ [(match_operand:V2DF 0 "register_operand")
+ (match_operand:V4SI 1 "register_operand")]
+ "TARGET_VX"
+{
+ rtx r = gen_reg_rtx(V2DImode);
+
+ emit_insn (gen_vec_unpacku_lo_v4si (r, operands[1]));
+ emit_insn (gen_floatunsv2div2df2 (operands[0], r));
+ DONE;
+})
; reduc_smin
; reduc_smax
; reduc_umin
; reduc_umax
-
-; vec_pack_sfix_trunc: convert + pack ?
-; vec_pack_ufix_trunc
-; vec_unpacks_float_hi
-; vec_unpacks_float_lo
-; vec_unpacku_float_hi
-; vec_unpacku_float_lo
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-cast-single.c
b/gcc/testsuite/gcc.target/s390/vector/vec-cast-single.c
new file mode 100644
index 000000000000..59a154594e9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-cast-single.c
@@ -0,0 +1,271 @@
+/* Check that the single-step vector conversions work. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z15 -ftree-vectorize
-fvect-cost-model=unlimited -fdump-tree-slp-all" } */
+/* { dg-final { scan-tree-dump-not "conversion not supported by target" "slp"
} } */
+
+void
+extendv4hiv4si2 (short *in, int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+void
+zero_extendv4hiv4si2 (unsigned short *in, unsigned int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+void
+vec_unpacks_v4si (int *in, long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+void
+vec_unpacku_v4si (unsigned int *in, unsigned long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+extedv2siv2di2 (int *in, long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+zero_extedv2siv2di2 (unsigned int *in, unsigned long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+truncv4siv4hi2_signed (int *in, short *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+truncv4siv4hi2_unsigned (unsigned int *in, unsigned short *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+truncv2div2si2_signed (long *in, int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+truncv2div2si2_unsigned (unsigned long *in, unsigned int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+fix_truncv4sfv4si2 (float *in, int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+fixuns_truncv4sfv4si2 (float *in, unsigned int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_pack_trunc_v2di__signed (long *in, int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_pack_trunc_v2di__unsigned (unsigned long *in, unsigned int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_pack_sfix_trunc_v2df (double *in, int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_pack_ufix_trunc_v2df (double *in, unsigned int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_unpack_sfix_trunc (float *in, long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_unpack_ufix_trunc (float *in, unsigned long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+fix_truncv2dfv2di2 (double *in, long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+fixuns_truncv2dfv2di2 (double *in, unsigned long *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+floatv4hiv4sf2 (short *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+floatunsv4hiv4sf2 (unsigned short *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+floatv4siv4sf2 (int *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+floatunsv4siv4sf2 (unsigned int *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_packs_float_v2di (int *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_packu_float_v2di (unsigned int *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+floatv2div2df2 (long *in, double *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+floatunsv2div2df2 (unsigned long *in, double *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+
+void
+floatv2siv2sf2 (int *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+void
+floatunsv2siv2sf2 (unsigned int *in, float *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+}
+void
+vec_unpacks_float_hi_v4si (int *in, double *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+void
+vec_unpacku_float_hi_v4si (unsigned int *in, double *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec_pack_ufix_trunc_v2df.c
b/gcc/testsuite/gcc.target/s390/vector/vec_pack_ufix_trunc_v2df.c
new file mode 100644
index 000000000000..4fcfbd88abe4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec_pack_ufix_trunc_v2df.c
@@ -0,0 +1,30 @@
+/* Check that vec_pack_ufix_trunc_v2df pattern is correctly used. Even without
+ this pattern, we will vectorize this code, but produce wrong output. */
+
+/* { dg-do run } */
+/* { dg-options "-O3 -mzarch -march=z13 -ftree-vectorize
-fvect-cost-model=unlimited" } */
+
+__attribute__((noinline,noclone,noipa))
+void
+vec_pack_ufix_trunc_v2df (double *in, unsigned int *out);
+
+void
+vec_pack_ufix_trunc_v2df (double *in, unsigned int *out)
+{
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+int main()
+{
+ double in[] = {-1,-2,-3,-4};
+ unsigned int out[4];
+
+ vec_pack_ufix_trunc_v2df (in, out);
+ for (int i = 0; i < 4; ++i)
+ if (out[i] != 0)
+ __builtin_abort();
+ return 0;
+}
--
2.43.0