On Fri, Jun 4, 2021 at 1:01 PM Hongtao Liu <crazy...@gmail.com> wrote: > > On Tue, Jun 1, 2021 at 6:17 PM Marc Glisse <marc.gli...@inria.fr> wrote: > > > > On Tue, 1 Jun 2021, Hongtao Liu via Gcc-patches wrote: > > > > > Hi: > > > This patch is about to simplify (view_convert:type ~a) < 0 to > > > (view_convert:type a) >= 0 when type is signed integer. Similar for > > > (view_convert:type ~a) >= 0. > > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. > > > Ok for the trunk? > > > > > > gcc/ChangeLog: > > > > > > PR middle-end/100738 > > > * match.pd ((view_convert ~a) < 0 --> (view_convert a) >= 0, > > > (view_convert ~a) >= 0 --> (view_convert a) < 0): New GIMPLE > > > simplification. > > > > We already have > > > > /* Fold ~X op C as X op' ~C, where op' is the swapped comparison. */ > > (for cmp (simple_comparison) > > scmp (swapped_simple_comparison) > > (simplify > > (cmp (bit_not@2 @0) CONSTANT_CLASS_P@1) > > (if (single_use (@2) > > && (TREE_CODE (@1) == INTEGER_CST || TREE_CODE (@1) == VECTOR_CST)) > > (scmp @0 (bit_not @1))))) > > > > Would it make sense to try and generalize it a bit, say with > > > > (cmp (nop_convert1? (bit_not @0)) CONSTANT_CLASS_P) > > > > (scmp (view_convert:XXX @0) (bit_not @1)) > > > Thanks for your advice, it looks great. > And can I use *view_convert1?* instead of *nop_convert1?* here, > because the original case is view_convert, and nop_convert would fail > to simplify the case. Here is updated patch
gcc/ChangeLog: PR middle-end/100738 * match.pd (Fold ~X op C as X op' ~C): Extend GIMPLE simplification to handle view_convert ~X. gcc/testsuite/ChangeLog: PR middle-end/100738 * g++.target/i386/avx2-pr100738-1.C: New test. * g++.target/i386/sse4_1-pr100738-1.C: New test. > > (I still believe that it is a bad idea that SSA_NAMEs are strongly typed, > > encoding the type in operations would be more convenient, but I think the > > time for that choice has long gone) > > > > -- > > Marc Glisse > > > > -- > BR, > Hongtao -- BR, Hongtao
From 60308636a36fa7a5b96d115452a42be914ef19e7 Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Thu, 27 May 2021 15:21:06 +0800 Subject: [PATCH] Extend gimple simplication of ((~X) op C --> (X) op' ~C) to hanlde view_convert of ~X gcc/ChangeLog: PR middle-end/100738 * match.pd (Fold ~X op C as X op' ~C): Extend GIMPLE simplification to handle view_convert ~X. gcc/testsuite/ChangeLog: PR middle-end/100738 * g++.target/i386/avx2-pr100738-1.C: New test. * g++.target/i386/sse4_1-pr100738-1.C: New test. --- gcc/match.pd | 5 +- .../g++.target/i386/avx2-pr100738-1.C | 120 ++++++++++++++++++ .../g++.target/i386/sse4_1-pr100738-1.C | 120 ++++++++++++++++++ 3 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/avx2-pr100738-1.C create mode 100644 gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C diff --git a/gcc/match.pd b/gcc/match.pd index cdb87636951..cbb76d67dc5 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4144,10 +4144,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (for cmp (simple_comparison) scmp (swapped_simple_comparison) (simplify - (cmp (bit_not@2 @0) CONSTANT_CLASS_P@1) + (cmp (view_convert1? (bit_not@2 @0)) CONSTANT_CLASS_P@1) + (with {tree ttype = TREE_TYPE (@1);} (if (single_use (@2) && (TREE_CODE (@1) == INTEGER_CST || TREE_CODE (@1) == VECTOR_CST)) - (scmp @0 (bit_not @1))))) + (scmp (view_convert:ttype @0) (bit_not @1)))))) (for cmp (simple_comparison) /* Fold (double)float1 CMP (double)float2 into float1 CMP float2. */ diff --git a/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C b/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C new file mode 100644 index 00000000000..80fdad3e5f0 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/avx2-pr100738-1.C @@ -0,0 +1,120 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -std=c++14 -O2 -mno-avx512f -mno-xop" } */ +/* { dg-final { scan-assembler-not "pxor" } } */ +/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */ +/* { dg-final { scan-assembler-times "pblendvb" 6 } } */ +/* { dg-final { scan-assembler-times "blendvps" 6 } } */ +/* { dg-final { scan-assembler-times "blendvpd" 6 } } */ + +typedef char v32qi __attribute__ ((vector_size (32))); +typedef short v16hi __attribute__ ((vector_size (32))); +typedef int v8si __attribute__ ((vector_size (32))); +typedef long long v4di __attribute__ ((vector_size (32))); + +v8si +f1 (v32qi a, v8si b, v8si c) +{ + return ((v8si)~a) < 0 ? b : c; +} + +v4di +f2 (v32qi a, v4di b, v4di c) +{ + return ((v4di)~a) < 0 ? b : c; +} + +v32qi +f3 (v16hi a, v32qi b, v32qi c) +{ + return ((v32qi)~a) < 0 ? b : c; +} + +v8si +f4 (v16hi a, v8si b, v8si c) +{ + return ((v8si)~a) < 0 ? b : c; +} + +v4di +f5 (v16hi a, v4di b, v4di c) +{ + return ((v4di)~a) < 0 ? b : c; +} + +v32qi +f6 (v8si a, v32qi b, v32qi c) +{ + return ((v32qi)~a) < 0 ? b : c; +} + +v4di +f7 (v8si a, v4di b, v4di c) +{ + return ((v4di)~a) < 0 ? b : c; +} + +v32qi +f8 (v4di a, v32qi b, v32qi c) +{ + return ((v32qi)~a) < 0 ? b : c; +} + +v8si +f9 (v4di a, v8si b, v8si c) +{ + return ((v8si)~a) < 0 ? b : c; +} + +v8si +f10 (v32qi a, v8si b, v8si c) +{ + return ((v8si)~a) >= 0 ? b : c; +} + +v4di +f11 (v32qi a, v4di b, v4di c) +{ + return ((v4di)~a) >= 0 ? b : c; +} + +v32qi +f12 (v16hi a, v32qi b, v32qi c) +{ + return ((v32qi)~a) >= 0 ? b : c; +} + +v8si +f13 (v16hi a, v8si b, v8si c) +{ + return ((v8si)~a) >= 0 ? b : c; +} + +v4di +f14 (v16hi a, v4di b, v4di c) +{ + return ((v4di)~a) >= 0 ? b : c; +} + +v32qi +f15 (v8si a, v32qi b, v32qi c) +{ + return ((v32qi)~a) >= 0 ? b : c; +} + +v4di +f16 (v8si a, v4di b, v4di c) +{ + return ((v4di)~a) >= 0 ? b : c; +} + +v32qi +f17 (v4di a, v32qi b, v32qi c) +{ + return ((v32qi)~a) >= 0 ? b : c; +} + +v8si +f18 (v4di a, v8si b, v8si c) +{ + return ((v8si)~a) >= 0 ? b : c; +} diff --git a/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C b/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C new file mode 100644 index 00000000000..d3454c264cd --- /dev/null +++ b/gcc/testsuite/g++.target/i386/sse4_1-pr100738-1.C @@ -0,0 +1,120 @@ +/* { dg-do compile } */ +/* { dg-options "-msse4 -std=c++14 -mno-avx2 -O2 -mno-xop" } */ +/* { dg-final { scan-assembler-not "pxor" } } */ +/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */ +/* { dg-final { scan-assembler-times "pblendvb" 6 } } */ +/* { dg-final { scan-assembler-times "blendvps" 6 } } */ +/* { dg-final { scan-assembler-times "blendvpd" 6 } } */ + +typedef char v16qi __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef long long v2di __attribute__ ((vector_size (16))); + +v4si +f1 (v16qi a, v4si b, v4si c) +{ + return ((v4si)~a) < 0 ? b : c; +} + +v2di +f2 (v16qi a, v2di b, v2di c) +{ + return ((v2di)~a) < 0 ? b : c; +} + +v16qi +f3 (v8hi a, v16qi b, v16qi c) +{ + return ((v16qi)~a) < 0 ? b : c; +} + +v4si +f4 (v8hi a, v4si b, v4si c) +{ + return ((v4si)~a) < 0 ? b : c; +} + +v2di +f5 (v8hi a, v2di b, v2di c) +{ + return ((v2di)~a) < 0 ? b : c; +} + +v16qi +f6 (v4si a, v16qi b, v16qi c) +{ + return ((v16qi)~a) < 0 ? b : c; +} + +v2di +f7 (v4si a, v2di b, v2di c) +{ + return ((v2di)~a) < 0 ? b : c; +} + +v16qi +f8 (v2di a, v16qi b, v16qi c) +{ + return ((v16qi)~a) < 0 ? b : c; +} + +v4si +f9 (v2di a, v4si b, v4si c) +{ + return ((v4si)~a) < 0 ? b : c; +} + +v4si +f10 (v16qi a, v4si b, v4si c) +{ + return ((v4si)~a) >= 0 ? b : c; +} + +v2di +f11 (v16qi a, v2di b, v2di c) +{ + return ((v2di)~a) >= 0 ? b : c; +} + +v16qi +f12 (v8hi a, v16qi b, v16qi c) +{ + return ((v16qi)~a) >= 0 ? b : c; +} + +v4si +f13 (v8hi a, v4si b, v4si c) +{ + return ((v4si)~a) >= 0 ? b : c; +} + +v2di +f14 (v8hi a, v2di b, v2di c) +{ + return ((v2di)~a) >= 0 ? b : c; +} + +v16qi +f15 (v4si a, v16qi b, v16qi c) +{ + return ((v16qi)~a) >= 0 ? b : c; +} + +v2di +f16 (v4si a, v2di b, v2di c) +{ + return ((v2di)~a) >= 0 ? b : c; +} + +v16qi +f17 (v2di a, v16qi b, v16qi c) +{ + return ((v16qi)~a) >= 0 ? b : c; +} + +v4si +f18 (v2di a, v4si b, v4si c) +{ + return ((v4si)~a) >= 0 ? b : c; +} -- 2.18.1