Hi: This patch supports cond_add/sub/mul/div expanders for vector float/double. There're still cond_fma/fms/fnms/fma/max/min/xor/ior/and left which I failed to figure out a testcase to validate them. Also cond_add/sub/mul for vector integer. Bootstrap is ok, survive the regression test on x86_64-linux-gnu{-m32,}. Pushed to trunk if there're no objections.
gcc/ChangeLog: * config/i386/sse.md (cond_<insn><mode>):New expander. (cond_mul<mode>): Ditto. (cond_div<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/cond_op_addsubmuldiv_double-1.c: New test. * gcc.target/i386/cond_op_addsubmuldiv_double-2.c: New test. * gcc.target/i386/cond_op_addsubmuldiv_float-1.c: New test. * gcc.target/i386/cond_op_addsubmuldiv_float-2.c: New test. --- gcc/config/i386/sse.md | 54 ++++++++++++ .../i386/cond_op_addsubmuldiv_double-1.c | 31 +++++++ .../i386/cond_op_addsubmuldiv_double-2.c | 85 +++++++++++++++++++ .../i386/cond_op_addsubmuldiv_float-1.c | 9 ++ .../i386/cond_op_addsubmuldiv_float-2.c | 4 + 5 files changed, 183 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-2.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b5a08988590..8bf1764d3d5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1891,6 +1891,24 @@ (define_insn_and_split "*nabs<mode>2" } [(set_attr "isa" "noavx,noavx,avx,avx")]) +(define_expand "cond_<insn><mode>" + [(set (match_operand:VF 0 "register_operand") + (vec_merge:VF + (plusminus:VF + (match_operand:VF 2 "vector_operand") + (match_operand:VF 3 "vector_operand")) + (match_operand:VF 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "<MODE_SIZE> == 64 || TARGET_AVX512VL" +{ + emit_insn (gen_<insn><mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_expand "<insn><mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand") (plusminus:VF @@ -1953,6 +1971,24 @@ (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>" (set_attr "prefix" "<round_scalar_prefix>") (set_attr "mode" "<ssescalarmode>")]) +(define_expand "cond_mul<mode>" + [(set (match_operand:VF 0 "register_operand") + (vec_merge:VF + (mult:VF + (match_operand:VF 2 "vector_operand") + (match_operand:VF 3 "vector_operand")) + (match_operand:VF 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "<MODE_SIZE> == 64 || TARGET_AVX512VL" +{ + emit_insn (gen_mul<mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_expand "mul<mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand") (mult:VF @@ -2041,6 +2077,24 @@ (define_expand "div<mode>3" } }) +(define_expand "cond_div<mode>" + [(set (match_operand:VF 0 "register_operand") + (vec_merge:VF + (div:VF + (match_operand:VF 2 "register_operand") + (match_operand:VF 3 "vector_operand")) + (match_operand:VF 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "<MODE_SIZE> == 64 || TARGET_AVX512VL" +{ + emit_insn (gen_<sse>_div<mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_insn "<sse>_div<mode>3<mask_name><round_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") (div:VF diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-1.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-1.c new file mode 100644 index 00000000000..1092cba9876 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-vect" } */ +/* { dg-final { scan-tree-dump ".COND_ADD" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_SUB" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_MUL" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_RDIV" "vect" } } */ + +#ifndef NUM +#define NUM 800 +#endif +#ifndef TYPE +#define TYPE double +#endif + +TYPE a[NUM], b[NUM], c[NUM], d[NUM], e[NUM], j[NUM]; + +#define BIN(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O3"))) \ + foo_##OPNAME () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + a[i] = d[i] OP e[i]; \ + } + + +BIN (add, +); +BIN (sub, -); +BIN (mul, *); +BIN (div, /); diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-2.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-2.c new file mode 100644 index 00000000000..360891f3d21 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_double-2.c @@ -0,0 +1,85 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256" } */ + +#define AVX512VL +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#include "cond_op_addsubmuldiv_double-1.c" +#define BINO2(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O2"))) \ + foo_o2_##OPNAME () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + j[i] = d[i] OP e[i]; \ + } + + +BINO2 (add, +); +BINO2 (sub, -); +BINO2 (mul, *); +BINO2 (div, /); + +static void +test_256 (void) +{ + int sign = -1; + for (int i = 0; i != NUM; i++) + { + a[i] = 0.0; + d[i] = i * 0.5; + e[i] = i * i * 0.3 - i * 0.9 + 15.3; + b[i] = i * 0.83; + c[i] = b[i] + sign; + sign *= -1; + j[i] = b[i] < c[i] ? 1.0 : 0.0; + } + foo_add (); + foo_o2_add (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0.0; + j[i] = b[i] < c[i] ? 1.0 : 0.0; + } + + foo_sub (); + foo_o2_sub (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0.0; + j[i] = b[i] < c[i] ? 1.0 : 0.0; + } + + foo_mul (); + foo_o2_mul (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0.0; + j[i] = b[i] < c[i] ? 1.0 : 0.0; + } + + foo_div (); + foo_o2_div (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + } +} + +static void +test_128 () +{ + +} diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-1.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-1.c new file mode 100644 index 00000000000..d4975271e26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -DTYPE=float -fdump-tree-vect" } */ +/* { dg-final { scan-tree-dump ".COND_ADD" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_SUB" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_MUL" "vect" } } */ +/* { dg-final { scan-tree-dump ".COND_RDIV" "vect" } } */ + +#include "cond_op_addsubmuldiv_double-1.c" + diff --git a/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-2.c b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-2.c new file mode 100644 index 00000000000..20ed737cbf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_addsubmuldiv_float-2.c @@ -0,0 +1,4 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -DTYPE=float" } */ + +#include "cond_op_addsubmuldiv_double-2.c" -- 2.18.1