On Wed, Sep 8, 2021 at 6:02 PM Jakub Jelinek <ja...@redhat.com> wrote:
>
> On Wed, Sep 08, 2021 at 06:00:50PM +0800, Hongtao Liu wrote:
> > Yes, I think so.
> > And I find paradoxical subreg like (subreg:V4SF (reg:SF)) are not
> > allowed by validate_subreg until r11-621.
> > That's why post_reload splitter is needed here.
>
> Following seems to work for all the testcases I've find (and in some
> generates better code than the post-reload splitter):
>
> 2021-09-08  Jakub Jelinek  <ja...@redhat.com>
>             liuhongt  <hongtao....@intel.com>
>
>         PR target/89984
>         * config/i386/i386.md (@xorsign<mode>3_1): Remove.
>         * config/i386/i386-expand.c (ix86_expand_xorsign): Expand right away
>         into AND with mask and XOR, using paradoxical subregs.
>         (ix86_split_xorsign): Remove.
Also remove this from i386-protos.h.
>
>         * gcc.target/i386/avx-pr102224.c: Fix up PR number.
>         * gcc.dg/pr89984.c: New test.
>         * gcc.target/i386/avx-pr89984.c: New test.
>
Other LGTM.
> --- gcc/config/i386/i386.md.jj  2021-09-08 11:40:55.826534981 +0200
> +++ gcc/config/i386/i386.md     2021-09-08 11:44:08.394828674 +0200
> @@ -10918,20 +10918,6 @@ (define_expand "xorsign<mode>3"
>    DONE;
>  })
>
> -(define_insn_and_split "@xorsign<mode>3_1"
> -  [(set (match_operand:MODEF 0 "register_operand" "=&Yv,&Yv,&Yv")
> -       (unspec:MODEF
> -         [(match_operand:MODEF 1 "register_operand" "Yv,0,Yv")
> -          (match_operand:MODEF 2 "register_operand" "0,Yv,Yv")
> -          (match_operand:<ssevecmode> 3 "nonimmediate_operand" 
> "Yvm,Yvm,Yvm")]
> -         UNSPEC_XORSIGN))]
> -  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
> -  "#"
> -  "&& reload_completed"
> -  [(const_int 0)]
> -  "ix86_split_xorsign (operands); DONE;"
> -  [(set_attr "isa" "*,avx,avx")])
> -
>  ;; One complement instructions
>
>  (define_expand "one_cmpl<mode>2"
> --- gcc/config/i386/i386-expand.c.jj    2021-09-08 11:40:55.824535010 +0200
> +++ gcc/config/i386/i386-expand.c       2021-09-08 11:51:15.969819626 +0200
> @@ -2270,7 +2270,7 @@ void
>  ix86_expand_xorsign (rtx operands[])
>  {
>    machine_mode mode, vmode;
> -  rtx dest, op0, op1, mask;
> +  rtx dest, op0, op1, mask, x, temp;
>
>    dest = operands[0];
>    op0 = operands[1];
> @@ -2285,60 +2285,15 @@ ix86_expand_xorsign (rtx operands[])
>    else
>      gcc_unreachable ();
>
> +  temp = gen_reg_rtx (vmode);
>    mask = ix86_build_signbit_mask (vmode, 0, 0);
>
> -  emit_insn (gen_xorsign3_1 (mode, dest, op0, op1, mask));
> -}
> +  op1 = lowpart_subreg (vmode, op1, mode);
> +  x = gen_rtx_AND (vmode, op1, mask);
> +  emit_insn (gen_rtx_SET (temp, x));
>
> -/* Deconstruct an xorsign operation into bit masks.  */
> -
> -void
> -ix86_split_xorsign (rtx operands[])
> -{
> -  machine_mode mode, vmode;
> -  rtx dest, op0, op1, mask, x;
> -
> -  dest = operands[0];
> -  op0 = operands[1];
> -  op1 = operands[2];
> -  mask = operands[3];
> -
> -  mode = GET_MODE (dest);
> -  vmode = GET_MODE (mask);
> -
> -  /* The constraints ensure that for non-AVX dest == op1 is
> -     different from op0, and for AVX that at most two of
> -     dest, op0 and op1 are the same register but the third one
> -     is different.  */
> -  if (rtx_equal_p (op0, op1))
> -    {
> -      gcc_assert (TARGET_AVX && !rtx_equal_p (op0, dest));
> -      if (vmode == V4SFmode)
> -       vmode = V4SImode;
> -      else
> -       {
> -         gcc_assert (vmode == V2DFmode);
> -         vmode = V2DImode;
> -       }
> -      mask = lowpart_subreg (vmode, mask, GET_MODE (mask));
> -      if (MEM_P (mask))
> -       {
> -         rtx msk = lowpart_subreg (vmode, dest, mode);
> -         emit_insn (gen_rtx_SET (msk, mask));
> -         mask = msk;
> -       }
> -      op0 = lowpart_subreg (vmode, op0, mode);
> -      x = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, mask), op0);
> -    }
> -  else
> -    {
> -      op1 = lowpart_subreg (vmode, op1, mode);
> -      x = gen_rtx_AND (vmode, op1, mask);
> -      emit_insn (gen_rtx_SET (op1, x));
> -
> -      op0 = lowpart_subreg (vmode, op0, mode);
> -      x = gen_rtx_XOR (vmode, op1, op0);
> -    }
> +  op0 = lowpart_subreg (vmode, op0, mode);
> +  x = gen_rtx_XOR (vmode, temp, op0);
>
>    dest = lowpart_subreg (vmode, dest, mode);
>    emit_insn (gen_rtx_SET (dest, x));
> --- gcc/testsuite/gcc.target/i386/avx-pr102224.c.jj     2021-09-08 
> 11:40:55.826534981 +0200
> +++ gcc/testsuite/gcc.target/i386/avx-pr102224.c        2021-09-08 
> 11:57:41.741386062 +0200
> @@ -1,4 +1,4 @@
> -/* PR tree-optimization/51581 */
> +/* PR target/102224 */
>  /* { dg-do run } */
>  /* { dg-options "-O2 -mavx" } */
>  /* { dg-require-effective-target avx } */
> --- gcc/testsuite/gcc.dg/pr89984.c.jj   2021-09-08 11:56:33.799343240 +0200
> +++ gcc/testsuite/gcc.dg/pr89984.c      2021-09-08 11:54:36.070001821 +0200
> @@ -0,0 +1,20 @@
> +/* PR target/89984 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +__attribute__((noipa)) float
> +foo (float x, float y)
> +{
> +  return x * __builtin_copysignf (1.0f, y) + y;
> +}
> +
> +int
> +main ()
> +{
> +  if (foo (1.25f, 7.25f) != 1.25f + 7.25f
> +      || foo (1.75f, -3.25f) != -1.75f + -3.25f
> +      || foo (-2.25f, 7.5f) != -2.25f + 7.5f
> +      || foo (-3.0f, -4.0f) != 3.0f + -4.0f)
> +    __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.target/i386/avx-pr89984.c.jj      2021-09-08 
> 11:57:12.297800869 +0200
> +++ gcc/testsuite/gcc.target/i386/avx-pr89984.c 2021-09-08 11:57:56.936172001 
> +0200
> @@ -0,0 +1,23 @@
> +/* PR target/89984 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx" } */
> +/* { dg-require-effective-target avx } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "avx-check.h"
> +#endif
> +#ifndef TEST
> +#define TEST avx_test
> +#endif
> +
> +#define main main1
> +#include "../../gcc.dg/pr89984.c"
> +#undef main
> +
> +#include CHECK_H
> +
> +static void
> +TEST (void)
> +{
> +  main1 ();
> +}
>
>
>         Jakub
>


-- 
BR,
Hongtao

Reply via email to