Hi, ping for the RTL optimization stuff.
The problem here is that the code in reg-stack.c pretty much everywhere assumes that the stack registers do not have gaps. IMHO it is not worth to fix the register allocation in a way that would be necessary for that configuration to work correctly. So this patch tries just to detect a situation that can't possibly work and exit cleanly without raising any ICEs. In this case we have a regstack of st(1) only. That is temp_stack.top=0 and temp_stack.reg[0]=FIRST_STACK_REG+1. So it is just by luck that the assertion in line 2522 triggers, because immediately before that we already do std::swap (temp_stack[j], temp_stack[k]) with j=-1 and k=0 in line 2118. That is because of: j = (temp_stack.top - (REGNO (recog_data.operand[i]) - FIRST_STACK_REG)) This formula only works, if you can assume that st(1) can only be used in a stack that has at least two elements. Likewise the return statement in get_hard_regnum: return i >= 0 ? (FIRST_STACK_REG + regstack->top - i) : -1; Which is also the same formula, and in this case it returns FIRST_STACK_REG although the stack only has one element FIRST_STACK_REG+1 aka st(1). On 05/22/16 22:02, Uros Bizjak wrote: > On Sun, May 22, 2016 at 9:00 AM, Bernd Edlinger > <bernd.edlin...@hotmail.de> wrote: >> Hi! >> >> as described in the PR there are several non-intuitive rules that >> one has to follow to avoid ICEs with x87 asm operands. >> >> This patch adds an explicit rule, that avoids ICE in the first test case and >> removes an unnecessary error message in the second test case. >> >> >> Boot-strapped and regression-tested on x86_64-pc-linux-gnu. >> OK for trunk? > > This patch is actually dealing with two separate problems > > This part: > > @@ -607,7 +631,7 @@ check_asm_stack_operands (rtx_insn *insn) > record any earlyclobber. */ > > for (i = n_outputs; i < n_outputs + n_inputs; i++) > - if (op_alt[i].matches == -1) > + if (op_alt[i].matches == -1 && STACK_REG_P (recog_data.operand[i])) > { > int j; > > is OK, although, I'd written it as: > > > + if (STACK_REG_P (recog_data.operand[i]) && op_alt[i].matches == -1) > > with slightly simplified testcase: > > --cut here-- > int > __attribute__((noinline, noclone)) > test (double y) > { > int a, b; > asm ("fistpl (%1)\n\t" > "movl (%1), %0" > : "=r" (a) > : "r" (&b), "t" (y) > : "st"); > return a; > } > > int > main () > { > int t = -10; > > if (test (t) != t) > __builtin_abort (); > return 0; > } > --cut here-- > > BTW: It looks to me you also don't need all-memory clobber here. > right. > This part is OK, with a testcase you provided it borders on obvious. > However, you will need rtl-optimization approval for the other > problem. > > Uros. > I changed the patch according to your comments, thanks. I have the updated patch attached. Thanks Bernd.
gcc: 2016-05-22 Bernd Edlinger <bernd.edlin...@hotmail.de> PR inline-asm/68843 * reg-stack.c (check_asm_stack_operands): Explicit input arguments must be grouped on top of stack. Don't force early clobber on ordinary reg outputs. testsuite: 2016-05-22 Bernd Edlinger <bernd.edlin...@hotmail.de> PR inline-asm/68843 * gcc.target/i386/pr68843-1.c: New test. * gcc.target/i386/pr68843-2.c: New test.
Index: gcc/reg-stack.c =================================================================== --- gcc/reg-stack.c (revision 236597) +++ gcc/reg-stack.c (working copy) @@ -97,6 +97,9 @@ All implicitly popped input regs must be closer to the top of the reg-stack than any input that is not implicitly popped. + All explicitly referenced input operands may not "skip" a reg. + Otherwise we can have holes in the stack. + 3. It is possible that if an input dies in an insn, reload might use the input reg for an output reload. Consider this example: @@ -461,6 +464,7 @@ check_asm_stack_operands (rtx_insn *insn) char reg_used_as_output[FIRST_PSEUDO_REGISTER]; char implicitly_dies[FIRST_PSEUDO_REGISTER]; + char explicitly_used[FIRST_PSEUDO_REGISTER]; rtx *clobber_reg = 0; int n_inputs, n_outputs; @@ -568,6 +572,7 @@ check_asm_stack_operands (rtx_insn *insn) popped. */ memset (implicitly_dies, 0, sizeof (implicitly_dies)); + memset (explicitly_used, 0, sizeof (explicitly_used)); for (i = n_outputs; i < n_outputs + n_inputs; i++) if (STACK_REG_P (recog_data.operand[i])) { @@ -581,6 +586,8 @@ check_asm_stack_operands (rtx_insn *insn) if (j < n_clobbers || op_alt[i].matches >= 0) implicitly_dies[REGNO (recog_data.operand[i])] = 1; + else if (reg_class_size[(int) op_alt[i].cl] == 1) + explicitly_used[REGNO (recog_data.operand[i])] = 1; } /* Search for first non-popped reg. */ @@ -600,6 +607,23 @@ check_asm_stack_operands (rtx_insn *insn) malformed_asm = 1; } + /* Search for first not-explicitly used reg. */ + for (i = FIRST_STACK_REG; i < LAST_STACK_REG + 1; i++) + if (! implicitly_dies[i] && ! explicitly_used[i]) + break; + + /* If there are any other explicitly used regs, that's an error. */ + for (; i < LAST_STACK_REG + 1; i++) + if (explicitly_used[i]) + break; + + if (i != LAST_STACK_REG + 1) + { + error_for_asm (insn, + "explicitly used regs must be grouped at top of stack"); + malformed_asm = 1; + } + /* Enforce rule #3: If any input operand uses the "f" constraint, all output constraints must use the "&" earlyclobber. @@ -607,7 +631,7 @@ check_asm_stack_operands (rtx_insn *insn) record any earlyclobber. */ for (i = n_outputs; i < n_outputs + n_inputs; i++) - if (op_alt[i].matches == -1) + if (STACK_REG_P (recog_data.operand[i]) && op_alt[i].matches == -1) { int j; Index: gcc/testsuite/gcc.target/i386/pr68843-1.c =================================================================== --- gcc/testsuite/gcc.target/i386/pr68843-1.c (revision 0) +++ gcc/testsuite/gcc.target/i386/pr68843-1.c (working copy) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +double +test () +{ + double x = 1.0; + asm ("fld %1" /* { dg-error "explicitly used regs must be grouped at top of stack" } */ + : "=&t" (x) + : "u" (x)); + return x; +} Index: gcc/testsuite/gcc.target/i386/pr68843-2.c =================================================================== --- gcc/testsuite/gcc.target/i386/pr68843-2.c (revision 0) +++ gcc/testsuite/gcc.target/i386/pr68843-2.c (working copy) @@ -0,0 +1,22 @@ +int +__attribute__((noinline, noclone)) +test (double y) +{ + int a, b; + asm ("fistpl (%1)\n\t" + "movl (%1), %0" + : "=r" (a) + : "r" (&b), "t" (y) + : "st"); + return a; +} + +int +main () +{ + int t = -10; + + if (test (t) != t) + __builtin_abort (); + return 0; +}