max pattern.

Uros Bizjak via Gcc-patches Wed, 05 Jul 2023 23:19:49 -0700

On Thu, Jul 6, 2023 at 3:20 AM liuhongt <[email protected]> wrote:
>
> We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> and it failed ix86_expand_sse_fp_minmax.
>
> This patch adds pre_reload splitter to detect the min/max pattern.
>
> Operands order in MINSS matters for signed zero and NANs, since the
> instruction always returns second operand when any operand is NAN or
> both operands are zero.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/110170
>         * config/i386/i386.md (*ieee_minmax<mode>3_1): New pre_reload
>         splitter to detect fp min/max pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/pr110170.C: New test.
>         * gcc.target/i386/pr110170.c: New test.
> ---
>  gcc/config/i386/i386.md                  | 30 +++++++++
>  gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr110170.c | 18 ++++++
>  3 files changed, 126 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index e6ebc461e52..353bb21993d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -22483,6 +22483,36 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
>     (set_attr "type" "sseadd")
>     (set_attr "mode" "<MODE>")])
>
> +;; Operands order in min/max instruction matters for signed zero and NANs.
> +(define_insn_and_split "*ieee_minmax<mode>3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand")
> +          (match_operand:MODEF 2 "register_operand")
> +          (lt:MODEF
> +            (match_operand:MODEF 3 "register_operand")
> +            (match_operand:MODEF 4 "register_operand"))]
> +         UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +  && ((rtx_equal_p (operands[1], operands[3])
> +       && rtx_equal_p (operands[2], operands[4]))
> +      || (rtx_equal_p (operands[1], operands[4])
> +         && rtx_equal_p (operands[2], operands[3])))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  int u = (rtx_equal_p (operands[1], operands[3])
> +          && rtx_equal_p (operands[2], operands[4]))
> +          ? UNSPEC_IEEE_MAX : UNSPEC_IEEE_MIN;
> +  emit_move_insn (operands[0],
> +                 gen_rtx_UNSPEC (<MODE>mode,
> +                                 gen_rtvec (2, operands[2], operands[1]),
> +                                 u));
> +  DONE;
> +})


Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
and the other emitting UNSPEC_IEEE_MIN.

> +
>  ;; Make two stack loads independent:
>  ;;   fld aa              fld aa
>  ;;   fld %st(0)     ->   fld bb
> diff --git a/gcc/testsuite/g++.target/i386/pr110170.C 
> b/gcc/testsuite/g++.target/i386/pr110170.C
> new file mode 100644
> index 00000000000..1e9a781ca74
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> @@ -0,0 +1,78 @@
> +/* { dg-do run } */
> +/* { dg-options " -O2 -march=x86-64 -mfpmath=sse -std=gnu++20" } */

The test involves blendv instruction, which is SSE4.1, so it is
pointless to test it without -msse4.1. Please add -msse4.1 instead of
-march=x86_64 and use sse4_runtime target selector, as is the case
with gcc.target/i386/pr90358.c.

> +#include <math.h>
> +
> +void
> +__attribute__((noinline))
> +__cond_swap(double* __x, double* __y) {
> +  bool __r = (*__x < *__y);
> +  auto __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +auto test1() {
> +    double nan = -0.0;
> +    double x = 0.0;
> +    __cond_swap(&nan, &x);
> +    return x == -0.0 && nan == 0.0;
> +}
> +
> +auto test1r() {
> +    double nan = NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == 1.0;
> +}
> +
> +auto test2() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test2r() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test3() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test3r() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test4() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +auto test4r() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +
> +int main() {
> +    if (
> +        !test1() || !test1r()
> +        || !test2() || !test2r()
> +        || !test3() || !test4r()
> +        || !test4() || !test4r()
> +    ) __builtin_abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c 
> b/gcc/testsuite/gcc.target/i386/pr110170.c
> new file mode 100644
> index 00000000000..0f98545cce3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110170.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options " -O2 -march=x86-64-v2 -mfpmath=sse" } */

Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
the test is valid also for 32bit targets, you should use -msseregparm
additional options for ia32 (please see gcc.target/i386/pr43546.c
testcase) in the same way as -mregparm to pass SSE arguments in
registers.

Uros.

> +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */
> +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */
> +
> +void __cond_swap_df(double* __x, double* __y) {
> +  _Bool __r = (*__x < *__y);
> +  double __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +void __cond_swap_sf(float* __x, float* __y) {
> +  _Bool __r = (*__x < *__y);
> +  float __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>

Re: [PATCH 1/2] [x86] Add pre_reload splitter to detect fp min/max pattern.

Reply via email to