Hi: Boostrapped and regtested on x86_64-linux-gnu{-m32,}. gcc/ChangeLog:
* config/i386/sse.md (cond_<insn><mode>): New expander. (VI248_AVX512VLBW): New mode iterator. * config/i386/predicates.md (nonimmediate_or_const_vec_dup_operand): New predicate. gcc/testsuite/ChangeLog: * gcc.target/i386/cond_op_shift_d-1.c: New test. * gcc.target/i386/cond_op_shift_d-2.c: New test. * gcc.target/i386/cond_op_shift_q-1.c: New test. * gcc.target/i386/cond_op_shift_q-2.c: New test. * gcc.target/i386/cond_op_shift_ud-1.c: New test. * gcc.target/i386/cond_op_shift_ud-2.c: New test. * gcc.target/i386/cond_op_shift_uq-1.c: New test. * gcc.target/i386/cond_op_shift_uq-2.c: New test. * gcc.target/i386/cond_op_shift_uw-1.c: New test. * gcc.target/i386/cond_op_shift_uw-2.c: New test. * gcc.target/i386/cond_op_shift_w-1.c: New test. * gcc.target/i386/cond_op_shift_w-2.c: New test. --- gcc/config/i386/predicates.md | 4 + gcc/config/i386/sse.md | 36 +++++++ .../gcc.target/i386/cond_op_shift_d-1.c | 56 ++++++++++ .../gcc.target/i386/cond_op_shift_d-2.c | 102 ++++++++++++++++++ .../gcc.target/i386/cond_op_shift_q-1.c | 11 ++ .../gcc.target/i386/cond_op_shift_q-2.c | 5 + .../gcc.target/i386/cond_op_shift_ud-1.c | 10 ++ .../gcc.target/i386/cond_op_shift_ud-2.c | 5 + .../gcc.target/i386/cond_op_shift_uq-1.c | 10 ++ .../gcc.target/i386/cond_op_shift_uq-2.c | 5 + .../gcc.target/i386/cond_op_shift_uw-1.c | 8 ++ .../gcc.target/i386/cond_op_shift_uw-2.c | 6 ++ .../gcc.target/i386/cond_op_shift_w-1.c | 8 ++ .../gcc.target/i386/cond_op_shift_w-2.c | 6 ++ 14 files changed, 272 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_d-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_d-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_q-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_q-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_ud-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_ud-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_uq-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_uq-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_uw-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_uw-2.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_w-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cond_op_shift_w-2.c diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 6aa1ea32627..129205ac3a7 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1161,6 +1161,10 @@ (define_predicate "nonimmediate_or_const_vector_operand" (ior (match_operand 0 "nonimmediate_operand") (match_code "const_vector"))) +(define_predicate "nonimmediate_or_const_vec_dup_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_test "const_vec_duplicate_p (op)"))) + ;; Return true when OP is either register operand, or any ;; CONST_VECTOR. (define_predicate "reg_or_const_vector_operand" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a46a2373547..45b1ec2add4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -506,6 +506,13 @@ (define_mode_iterator VI248_AVX512VL (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) +(define_mode_iterator VI248_AVX512VLBW + [(V32HI "TARGET_AVX512BW") + (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") + (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VI48_AVX2 [(V8SI "TARGET_AVX2") V4SI (V4DI "TARGET_AVX2") V2DI]) @@ -22786,6 +22793,35 @@ (define_expand "vec_init<mode><ssehalfvecmodelower>" DONE; }) +(define_expand "cond_<insn><mode>" + [(set (match_operand:VI248_AVX512VLBW 0 "register_operand") + (vec_merge:VI248_AVX512VLBW + (any_shift:VI248_AVX512VLBW + (match_operand:VI248_AVX512VLBW 2 "register_operand") + (match_operand:VI248_AVX512VLBW 3 "nonimmediate_or_const_vec_dup_operand")) + (match_operand:VI248_AVX512VLBW 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "TARGET_AVX512F" +{ + if (const_vec_duplicate_p (operands[3])) + { + operands[3] = unwrap_const_vec_duplicate (operands[3]); + operands[3] = lowpart_subreg (DImode, operands[3], <ssescalarmode>mode); + emit_insn (gen_<insn><mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + } + else + emit_insn (gen_<avx2_avx512>_<insn>v<mode>_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_insn "<avx2_avx512>_ashrv<mode><mask_name>" [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v") (ashiftrt:VI48_AVX512F_AVX512VL diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_d-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-1.c new file mode 100644 index 00000000000..af047b6ed95 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-1.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrad" 1 } } */ +/* { dg-final { scan-assembler-times "vpslld" 1 } } */ +/* { dg-final { scan-assembler-times "vpsravd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvd" 1 } } */ + + +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; + +#ifndef NUM +#define NUM 800 +#endif +#ifndef TYPE +#define TYPE int +#endif + +TYPE a[NUM], b[NUM], c[NUM], d[NUM], e[NUM], j[NUM]; +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X,Y) ((X) < (Y) ? (Y) : (X)) + +#define BINC(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O3"))) \ + foo_##OPNAME##_const () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + a[i] = d[i] OP 3; \ + else \ + a[i] = MAX(d[i], e[i]); \ + } + +#define BINV(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O3"))) \ + foo_##OPNAME##_variable () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + a[i] = d[i] OP e[i]; \ + else \ + a[i] = MAX(d[i], e[i]); \ + } + +BINC (shl, <<); +BINC (shr, >>); +BINV (shl, <<); +BINV (shr, >>); diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_d-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-2.c new file mode 100644 index 00000000000..449e5b44be2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-2.c @@ -0,0 +1,102 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512VL +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#include "cond_op_shift_d-1.c" + +#define BINO2C(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O2"))) \ + foo_o2_##OPNAME##_const () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + j[i] = d[i] OP 3; \ + else \ + j[i] = MAX(d[i], e[i]); \ + } + +#define BINO2V(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O2"))) \ + foo_o2_##OPNAME##_variable () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + j[i] = d[i] OP e[i]; \ + else \ + j[i] = MAX(d[i], e[i]); \ + } + +BINO2C (shl, <<); +BINO2C (shr, >>); +BINO2V (shl, <<); +BINO2V (shr, >>); + +static void +test_256 (void) +{ + int sign = -1; + for (int i = 0; i != NUM; i++) + { + a[i] = 0; + d[i] = i * 2; + e[i] = (i * i * 3 - i * 9 + 6)%8; + b[i] = i * 83; + c[i] = b[i] + sign; + sign *= -1; + j[i] = 1; + } + foo_shl_const (); + foo_o2_shl_const (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + b[i] = 1; + } + + foo_shr_const (); + foo_o2_shr_const (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + j[i] = 1; + } + + foo_shl_variable (); + foo_o2_shl_variable (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + b[i] = 1; + } + + foo_shr_variable (); + foo_o2_shr_variable (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + j[i] = 1; + } +} + +static void +test_128 () +{ + +} diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_q-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-1.c new file mode 100644 index 00000000000..1b981b57497 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=int64" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsravq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsravq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvq" 1 } } */ + + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_q-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-2.c new file mode 100644 index 00000000000..94f1d71cbc9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-2.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -DTYPE=int64" } */ +/* { dg-require-effective-target avx512vl } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-1.c new file mode 100644 index 00000000000..eea0f6720c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=uint32" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrlvd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrlvd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvd" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-2.c new file mode 100644 index 00000000000..b18c568b26b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-2.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -DTYPE=uint32" } */ +/* { dg-require-effective-target avx512vl } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-1.c new file mode 100644 index 00000000000..77a03886d06 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=uint64" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrlq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrlq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllq" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-2.c new file mode 100644 index 00000000000..a9e0acf9ab2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-2.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -DTYPE=uint64" } */ +/* { dg-require-effective-target avx512vl } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-1.c new file mode 100644 index 00000000000..b84cdd89e3e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=uint16" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrlw" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllw" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-2.c new file mode 100644 index 00000000000..cfdece9f95b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-2.c @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -mprefer-vector-width=256 -DTYPE=uint16" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_w-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-1.c new file mode 100644 index 00000000000..54c854f2f37 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=int16" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsraw" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllw" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_w-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-2.c new file mode 100644 index 00000000000..577682657dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-2.c @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -mprefer-vector-width=256 -DTYPE=int16" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ + +#include "cond_op_shift_d-2.c" -- 2.18.1