On Thu, Jul 09, 2015 at 01:58:22PM -0700, H.J. Lu wrote: > On Thu, Jul 09, 2015 at 12:13:38PM -0700, H.J. Lu wrote: > > ix86_split_long_move can optimize floating point constant move, which > > can be used to optimize SFmode move for IA MCU. > > > > OK for trunk if there is no regression? > > > > > > H.J. > > --- > > gcc/ > > > > PR target/66824 > > * config/i386/i386.c (ix86_split_to_parts): Allow SFmode move > > for IA MCU. > > (ix86_split_long_move): Support single move. > > * config/i386/i386.md (FP splitter): Allow SFmode for IA MCU. > > > > gcc/testsuite/ > > > > PR target/66824 > > * gcc.target/i386/pr66824.c: New test. > > --- > > > I missed the testcase. Here is the updated patch. >
ix86_split_long_move can optimize floating point constant move, which can be used to optimize SFmode move with software floating point. OK for trunk if there are no regressions? H.J. -- gcc/ PR target/66824 * config/i386/i386.c (ix86_split_to_parts): Allow SFmode move without 387, MMX nor SSE. (ix86_split_long_move): Support single move. * config/i386/i386.md (FP splitter): Allow SFmode without 387, MMX nor SSE. gcc/testsuite/ PR target/66824 * gcc.target/i386/pr66824.c: New test. --- gcc/config/i386/i386.c | 18 +++++++++++++++++- gcc/config/i386/i386.md | 6 +++++- gcc/testsuite/gcc.target/i386/pr66824.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr66824.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6b5af11..108f211 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22756,7 +22756,13 @@ ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) size = (GET_MODE_SIZE (mode) + 4) / 8; gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); - gcc_assert (size >= 2 && size <= 4); + /* For software floating point, we also optimize SFmode move. */ + gcc_assert ((size >= 2 + || (mode == SFmode + && !TARGET_80387 + && !TARGET_MMX + && !TARGET_SSE)) + && size <= 4); /* Optimize constant pool reference to immediates. This is used by fp moves, that force all constants to memory to allow combining. */ @@ -22834,10 +22840,14 @@ ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) case DFmode: REAL_VALUE_TO_TARGET_DOUBLE (r, l); break; + case SFmode: + REAL_VALUE_TO_TARGET_SINGLE (r, l[0]); + goto part0; default: gcc_unreachable (); } parts[1] = gen_int_mode (l[1], SImode); +part0: parts[0] = gen_int_mode (l[0], SImode); } else @@ -22944,6 +22954,12 @@ ix86_split_long_move (rtx operands[]) nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); + if (nparts == 1) + { + emit_move_insn (part[0][0], part[1][0]); + return; + } + /* When emitting push, take care for source operands on the stack. */ if (push && MEM_P (operands[1]) && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index bc98389..0351c36 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3507,7 +3507,11 @@ "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == DFmode) + || GET_MODE (operands[0]) == DFmode + || (GET_MODE (operands[0]) == SFmode + && !TARGET_80387 + && !TARGET_MMX + && !TARGET_SSE)) && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") diff --git a/gcc/testsuite/gcc.target/i386/pr66824.c b/gcc/testsuite/gcc.target/i386/pr66824.c new file mode 100644 index 0000000..3511e4c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr66824.c @@ -0,0 +1,29 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -mno-sse -mno-mmx -mno-80387" } */ +/* { dg-final { scan-assembler-not "\.LC\[0-9\]" } } */ + +double foo (float); + +double +f1 (void) +{ + return foo (1.0); +} + +double +f2 (void) +{ + return foo (0.0); +} + +void +f3 (float *x, float t) +{ + *x = 0.0 + t; +} + +float +f4 (void) +{ + return 1.0; +} -- 2.4.3