On Wed, May 15, 2024 at 4:21 PM Hongyu Wang <hongyu.w...@intel.com> wrote:
>
> The ccmp insn itself doesn't support fp compare, but x86 has fp comi
> insn that changes EFLAG which can be the scc input to ccmp. Allow
> scalar fp compare in ix86_gen_ccmp_first except ORDERED/UNORDERD
> compare which can not be identified in ccmp.
Ok if the second patch(middle-end part) is approved.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-expand.cc (ix86_gen_ccmp_first): Add fp
>         compare and check the allowed fp compare type.
>         (ix86_gen_ccmp_next): Adjust compare_code input to ccmp for
>         fp compare.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ccmp-1.c: Add test for fp compare.
>         * gcc.target/i386/apx-ccmp-2.c: Likewise.
> ---
>  gcc/config/i386/i386-expand.cc             | 53 ++++++++++++++++++++--
>  gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 45 +++++++++++++++++-
>  gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 47 +++++++++++++++++++
>  3 files changed, 138 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index f00525e449f..7507034dc91 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -25571,18 +25571,58 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn 
> **gen_seq,
>    if (op_mode == VOIDmode)
>      op_mode = GET_MODE (op1);
>
> +  /* We only supports following scalar comparisons that use just 1
> +     instruction: DI/SI/QI/HI/DF/SF/HF.
> +     Unordered/Ordered compare cannot be corretly indentified by
> +     ccmp so they are not supported.  */
>    if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
> -       || op_mode == QImode))
> +       || op_mode == QImode || op_mode == DFmode || op_mode == SFmode
> +       || op_mode == HFmode)
> +      || code == ORDERED
> +      || code == UNORDERED)
>      {
>        end_sequence ();
>        return NULL_RTX;
>      }
>
>    /* Canonicalize the operands according to mode.  */
> -  if (!nonimmediate_operand (op0, op_mode))
> -    op0 = force_reg (op_mode, op0);
> -  if (!x86_64_general_operand (op1, op_mode))
> -    op1 = force_reg (op_mode, op1);
> +  if (SCALAR_INT_MODE_P (op_mode))
> +    {
> +      if (!nonimmediate_operand (op0, op_mode))
> +       op0 = force_reg (op_mode, op0);
> +      if (!x86_64_general_operand (op1, op_mode))
> +       op1 = force_reg (op_mode, op1);
> +    }
> +  else
> +    {
> +      /* op0/op1 can be canonicallized from expand_fp_compare, so
> +        just adjust the code to make it generate supported fp
> +        condition.  */
> +      if (ix86_fp_compare_code_to_integer (code) == UNKNOWN)
> +       {
> +         /* First try to split condition if we don't need to honor
> +            NaNs, as the ORDERED/UNORDERED check always fall
> +            through.  */
> +         if (!HONOR_NANS (op_mode))
> +           {
> +             rtx_code first_code;
> +             split_comparison (code, op_mode, &first_code, &code);
> +           }
> +         /* Otherwise try to swap the operand order and check if
> +            the comparison is supported.  */
> +         else
> +           {
> +             code = swap_condition (code);
> +             std::swap (op0, op1);
> +           }
> +
> +         if (ix86_fp_compare_code_to_integer (code) == UNKNOWN)
> +           {
> +             end_sequence ();
> +             return NULL_RTX;
> +           }
> +       }
> +    }
>
>    *prep_seq = get_insns ();
>    end_sequence ();
> @@ -25647,6 +25687,9 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn 
> **gen_seq, rtx prev,
>    dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
>
>    prev_code = GET_CODE (prev);
> +  /* Fixup FP compare code here.  */
> +  if (GET_MODE (XEXP (prev, 0)) == CCFPmode)
> +    prev_code = ix86_fp_compare_code_to_integer (prev_code);
>
>    if (bit_code != AND)
>      prev_code = reverse_condition (prev_code);
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c 
> b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
> index 5a2dad89f1f..e4e112f07e0 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-O2 -mapx-features=ccmp" } */
> +/* { dg-options "-O2 -ffast-math -mapx-features=ccmp" } */
>
>  int
>  f1 (int a)
> @@ -56,8 +56,49 @@ f9 (int a, int b)
>    return a == 3 || a == 0;
>  }
>
> +int
> +f10 (float a, int b, float c)
> +{
> +  return a > c || b < 19;
> +}
> +
> +int
> +f11 (float a, int b)
> +{
> +  return a == 0.0 && b > 21;
> +}
> +
> +int
> +f12 (double a, int b)
> +{
> +  return a < 3.0 && b != 23;
> +}
> +
> +int
> +f13 (double a, double b, int c, int d)
> +{
> +  a += b;
> +  c += d;
> +  return a != b || c == d;
> +}
> +
> +int
> +f14 (double a, int b)
> +{
> +  return b != 0 && a < 1.5;
> +}
> +
> +int
> +f15 (double a, double b, int c, int d)
> +{
> +  return c != d || a <= b;
> +}
> +
>  /* { dg-final { scan-assembler-times "ccmpg" 2 } } */
>  /* { dg-final { scan-assembler-times "ccmple" 2 } } */
>  /* { dg-final { scan-assembler-times "ccmpne" 4 } } */
> -/* { dg-final { scan-assembler-times "ccmpe" 1 } } */
> +/* { dg-final { scan-assembler-times "ccmpe" 3 } } */
> +/* { dg-final { scan-assembler-times "ccmpbe" 1 } } */
> +/* { dg-final { scan-assembler-times "ccmpa" 1 } } */
> +/* { dg-final { scan-assembler-times "ccmpbl" 2 } } */
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c 
> b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
> index 30a1c216c1b..0123a686d2c 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
> @@ -42,6 +42,47 @@ int foo_noapx(int a, int b, int c, int d)
>    return sum;
>  }
>
> +__attribute__((noinline, noclone,
> +              optimize(("finite-math-only")), target("apxf")))
> +double foo_fp_apx(int a, double b, int c, double d)
> +{
> +  int sum = a;
> +  double sumd = b;
> +
> +  if (a != c)
> +    {
> +      sum += a;
> +      if (a < c || sumd != d || sum > c)
> +       {
> +         c += a;
> +         sum += a + c;
> +       }
> +    }
> +
> +  return sum + sumd;
> +}
> +
> +__attribute__((noinline, noclone,
> +              optimize(("finite-math-only")), target("no-apxf")))
> +double foo_fp_noapx(int a, double b, int c, double d)
> +{
> +  int sum = a;
> +  double sumd = b;
> +
> +  if (a != c)
> +    {
> +      sum += a;
> +      if (a < c || sumd != d || sum > c)
> +       {
> +         c += a;
> +         sum += a + c;
> +       }
> +    }
> +
> +  return sum + sumd;
> +}
> +
> +
>  int main (void)
>  {
>    if (!__builtin_cpu_supports ("apxf"))
> @@ -53,5 +94,11 @@ int main (void)
>    if (val1 != val2)
>      __builtin_abort ();
>
> +  double val3 = foo_fp_noapx (24, 7.5, 32, 2.0);
> +  double val4 = foo_fp_apx (24, 7.5, 32, 2.0);
> +
> +  if (val3 != val4)
> +    __builtin_abort ();
> +
>    return 0;
>  }
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to