On Wed, Sep 08, 2021 at 06:00:50PM +0800, Hongtao Liu wrote: > Yes, I think so. > And I find paradoxical subreg like (subreg:V4SF (reg:SF)) are not > allowed by validate_subreg until r11-621. > That's why post_reload splitter is needed here.
Following seems to work for all the testcases I've find (and in some generates better code than the post-reload splitter): 2021-09-08 Jakub Jelinek <ja...@redhat.com> liuhongt <hongtao....@intel.com> PR target/89984 * config/i386/i386.md (@xorsign<mode>3_1): Remove. * config/i386/i386-expand.c (ix86_expand_xorsign): Expand right away into AND with mask and XOR, using paradoxical subregs. (ix86_split_xorsign): Remove. * gcc.target/i386/avx-pr102224.c: Fix up PR number. * gcc.dg/pr89984.c: New test. * gcc.target/i386/avx-pr89984.c: New test. --- gcc/config/i386/i386.md.jj 2021-09-08 11:40:55.826534981 +0200 +++ gcc/config/i386/i386.md 2021-09-08 11:44:08.394828674 +0200 @@ -10918,20 +10918,6 @@ (define_expand "xorsign<mode>3" DONE; }) -(define_insn_and_split "@xorsign<mode>3_1" - [(set (match_operand:MODEF 0 "register_operand" "=&Yv,&Yv,&Yv") - (unspec:MODEF - [(match_operand:MODEF 1 "register_operand" "Yv,0,Yv") - (match_operand:MODEF 2 "register_operand" "0,Yv,Yv") - (match_operand:<ssevecmode> 3 "nonimmediate_operand" "Yvm,Yvm,Yvm")] - UNSPEC_XORSIGN))] - "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] - "ix86_split_xorsign (operands); DONE;" - [(set_attr "isa" "*,avx,avx")]) - ;; One complement instructions (define_expand "one_cmpl<mode>2" --- gcc/config/i386/i386-expand.c.jj 2021-09-08 11:40:55.824535010 +0200 +++ gcc/config/i386/i386-expand.c 2021-09-08 11:51:15.969819626 +0200 @@ -2270,7 +2270,7 @@ void ix86_expand_xorsign (rtx operands[]) { machine_mode mode, vmode; - rtx dest, op0, op1, mask; + rtx dest, op0, op1, mask, x, temp; dest = operands[0]; op0 = operands[1]; @@ -2285,60 +2285,15 @@ ix86_expand_xorsign (rtx operands[]) else gcc_unreachable (); + temp = gen_reg_rtx (vmode); mask = ix86_build_signbit_mask (vmode, 0, 0); - emit_insn (gen_xorsign3_1 (mode, dest, op0, op1, mask)); -} + op1 = lowpart_subreg (vmode, op1, mode); + x = gen_rtx_AND (vmode, op1, mask); + emit_insn (gen_rtx_SET (temp, x)); -/* Deconstruct an xorsign operation into bit masks. */ - -void -ix86_split_xorsign (rtx operands[]) -{ - machine_mode mode, vmode; - rtx dest, op0, op1, mask, x; - - dest = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - mask = operands[3]; - - mode = GET_MODE (dest); - vmode = GET_MODE (mask); - - /* The constraints ensure that for non-AVX dest == op1 is - different from op0, and for AVX that at most two of - dest, op0 and op1 are the same register but the third one - is different. */ - if (rtx_equal_p (op0, op1)) - { - gcc_assert (TARGET_AVX && !rtx_equal_p (op0, dest)); - if (vmode == V4SFmode) - vmode = V4SImode; - else - { - gcc_assert (vmode == V2DFmode); - vmode = V2DImode; - } - mask = lowpart_subreg (vmode, mask, GET_MODE (mask)); - if (MEM_P (mask)) - { - rtx msk = lowpart_subreg (vmode, dest, mode); - emit_insn (gen_rtx_SET (msk, mask)); - mask = msk; - } - op0 = lowpart_subreg (vmode, op0, mode); - x = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, mask), op0); - } - else - { - op1 = lowpart_subreg (vmode, op1, mode); - x = gen_rtx_AND (vmode, op1, mask); - emit_insn (gen_rtx_SET (op1, x)); - - op0 = lowpart_subreg (vmode, op0, mode); - x = gen_rtx_XOR (vmode, op1, op0); - } + op0 = lowpart_subreg (vmode, op0, mode); + x = gen_rtx_XOR (vmode, temp, op0); dest = lowpart_subreg (vmode, dest, mode); emit_insn (gen_rtx_SET (dest, x)); --- gcc/testsuite/gcc.target/i386/avx-pr102224.c.jj 2021-09-08 11:40:55.826534981 +0200 +++ gcc/testsuite/gcc.target/i386/avx-pr102224.c 2021-09-08 11:57:41.741386062 +0200 @@ -1,4 +1,4 @@ -/* PR tree-optimization/51581 */ +/* PR target/102224 */ /* { dg-do run } */ /* { dg-options "-O2 -mavx" } */ /* { dg-require-effective-target avx } */ --- gcc/testsuite/gcc.dg/pr89984.c.jj 2021-09-08 11:56:33.799343240 +0200 +++ gcc/testsuite/gcc.dg/pr89984.c 2021-09-08 11:54:36.070001821 +0200 @@ -0,0 +1,20 @@ +/* PR target/89984 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +__attribute__((noipa)) float +foo (float x, float y) +{ + return x * __builtin_copysignf (1.0f, y) + y; +} + +int +main () +{ + if (foo (1.25f, 7.25f) != 1.25f + 7.25f + || foo (1.75f, -3.25f) != -1.75f + -3.25f + || foo (-2.25f, 7.5f) != -2.25f + 7.5f + || foo (-3.0f, -4.0f) != 3.0f + -4.0f) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.target/i386/avx-pr89984.c.jj 2021-09-08 11:57:12.297800869 +0200 +++ gcc/testsuite/gcc.target/i386/avx-pr89984.c 2021-09-08 11:57:56.936172001 +0200 @@ -0,0 +1,23 @@ +/* PR target/89984 */ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-require-effective-target avx } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif +#ifndef TEST +#define TEST avx_test +#endif + +#define main main1 +#include "../../gcc.dg/pr89984.c" +#undef main + +#include CHECK_H + +static void +TEST (void) +{ + main1 (); +} Jakub