On Thu, Oct 12, 2017 at 9:49 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > As mentioned in the PR, there are two bugs in these. One is that > the zero_extract destination is effectively another input operand (for the > remaining bits that are unchanged) and thus the constraint can't be =Q, > but has to be +Q. > And the other problem is that then LRA ICEs whenever it has 3 different > operands, because it is unable to reload it properly. Uros mentioned > that it could be reloaded by using *insvqi_2-like insn to move the > 8 bits from the operand that should use "0" constraint into the destination > register, but LRA isn't able to do that right now. > So this patch instead adds insn conditions that either the destination > is the same as the first input operand or as one of the input operands > (the latter for commutative patterns). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.3? > > 2017-10-12 Jakub Jelinek <ja...@redhat.com> > > PR target/82524 > * config/i386/i386.md (addqi_ext_1, andqi_ext_1, > *andqi_ext_1_cc, *<code>qi_ext_1, *xorqi_ext_1_cc): Change > =Q constraints to +Q and into insn condition add check > that operands[0] and operands[1] are equal. > (*addqi_ext_2, *andqi_ext_2, *<code>qi_ext_2): Change > =Q constraints to +Q and into insn condition add check > that operands[0] is equal to either operands[1] or operands[2]. > > * gcc.c-torture/execute/pr82524.c: New test.
OK for mainline and gcc-7 branch. Thanks, Uros. > --- gcc/config/i386/i386.md.jj 2017-10-12 14:05:15.000000000 +0200 > +++ gcc/config/i386/i386.md 2017-10-12 17:07:11.723151868 +0200 > @@ -6264,7 +6264,7 @@ (define_insn "*add<mode>_5" > (set_attr "mode" "<MODE>")]) > > (define_insn "addqi_ext_1" > - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") > + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -6275,7 +6275,8 @@ (define_insn "addqi_ext_1" > (const_int 8)) 0) > (match_operand:QI 2 "general_operand" "QnBc,m")) 0)) > (clobber (reg:CC FLAGS_REG))] > - "" > + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + rtx_equal_p (operands[0], operands[1])" > { > switch (get_attr_type (insn)) > { > @@ -6300,7 +6301,7 @@ (define_insn "addqi_ext_1" > (set_attr "mode" "QI")]) > > (define_insn "*addqi_ext_2" > - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") > + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -6314,7 +6315,9 @@ (define_insn "*addqi_ext_2" > (const_int 8) > (const_int 8)) 0)) 0)) > (clobber (reg:CC FLAGS_REG))] > - "" > + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + rtx_equal_p (operands[0], operands[1]) > + || rtx_equal_p (operands[0], operands[2])" > "add{b}\t{%h2, %h0|%h0, %h2}" > [(set_attr "type" "alu") > (set_attr "mode" "QI")]) > @@ -8998,7 +9001,7 @@ (define_insn "*andqi_2_slp" > (set_attr "mode" "QI")]) > > (define_insn "andqi_ext_1" > - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") > + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -9009,7 +9012,8 @@ (define_insn "andqi_ext_1" > (const_int 8)) 0) > (match_operand:QI 2 "general_operand" "QnBc,m")) 0)) > (clobber (reg:CC FLAGS_REG))] > - "" > + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + rtx_equal_p (operands[0], operands[1])" > "and{b}\t{%2, %h0|%h0, %2}" > [(set_attr "isa" "*,nox64") > (set_attr "type" "alu") > @@ -9027,7 +9031,7 @@ (define_insn "*andqi_ext_1_cc" > (const_int 8)) 0) > (match_operand:QI 2 "general_operand" "QnBc,m")) > (const_int 0))) > - (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") > + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -9037,14 +9041,16 @@ (define_insn "*andqi_ext_1_cc" > (const_int 8) > (const_int 8)) 0) > (match_dup 2)) 0))] > - "ix86_match_ccmode (insn, CCNOmode)" > + "ix86_match_ccmode (insn, CCNOmode) > + /* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + && rtx_equal_p (operands[0], operands[1])" > "and{b}\t{%2, %h0|%h0, %2}" > [(set_attr "isa" "*,nox64") > (set_attr "type" "alu") > (set_attr "mode" "QI")]) > > (define_insn "*andqi_ext_2" > - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") > + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -9058,7 +9064,9 @@ (define_insn "*andqi_ext_2" > (const_int 8) > (const_int 8)) 0)) 0)) > (clobber (reg:CC FLAGS_REG))] > - "" > + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + rtx_equal_p (operands[0], operands[1]) > + || rtx_equal_p (operands[0], operands[2])" > "and{b}\t{%h2, %h0|%h0, %h2}" > [(set_attr "type" "alu") > (set_attr "mode" "QI")]) > @@ -9431,7 +9439,7 @@ (define_insn "*<code><mode>_3" > (set_attr "mode" "<MODE>")]) > > (define_insn "*<code>qi_ext_1" > - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") > + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -9442,14 +9450,16 @@ (define_insn "*<code>qi_ext_1" > (const_int 8)) 0) > (match_operand:QI 2 "general_operand" "QnBc,m")) 0)) > (clobber (reg:CC FLAGS_REG))] > - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" > + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) > + /* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + && rtx_equal_p (operands[0], operands[1])" > "<logic>{b}\t{%2, %h0|%h0, %2}" > [(set_attr "isa" "*,nox64") > (set_attr "type" "alu") > (set_attr "mode" "QI")]) > > (define_insn "*<code>qi_ext_2" > - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") > + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -9463,7 +9473,10 @@ (define_insn "*<code>qi_ext_2" > (const_int 8) > (const_int 8)) 0)) 0)) > (clobber (reg:CC FLAGS_REG))] > - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" > + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) > + /* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + && (rtx_equal_p (operands[0], operands[1]) > + || rtx_equal_p (operands[0], operands[2]))" > "<logic>{b}\t{%h2, %h0|%h0, %h2}" > [(set_attr "type" "alu") > (set_attr "mode" "QI")]) > @@ -9552,7 +9565,7 @@ (define_insn "*xorqi_ext_1_cc" > (const_int 8)) 0) > (match_operand:QI 2 "general_operand" "QnBc,m")) > (const_int 0))) > - (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") > + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q") > (const_int 8) > (const_int 8)) > (subreg:SI > @@ -9562,7 +9575,9 @@ (define_insn "*xorqi_ext_1_cc" > (const_int 8) > (const_int 8)) 0) > (match_dup 2)) 0))] > - "ix86_match_ccmode (insn, CCNOmode)" > + "ix86_match_ccmode (insn, CCNOmode) > + /* FIXME: without this LRA can't reload this pattern, see PR82524. */ > + && rtx_equal_p (operands[0], operands[1])" > "xor{b}\t{%2, %h0|%h0, %2}" > [(set_attr "isa" "*,nox64") > (set_attr "type" "alu") > --- gcc/testsuite/gcc.c-torture/execute/pr82524.c.jj 2017-10-12 > 17:39:28.244825800 +0200 > +++ gcc/testsuite/gcc.c-torture/execute/pr82524.c 2017-10-12 > 17:39:12.000000000 +0200 > @@ -0,0 +1,37 @@ > +/* PR target/82524 */ > + > +struct S { unsigned char b, g, r, a; }; > +union U { struct S c; unsigned v; }; > + > +static inline unsigned char > +foo (unsigned char a, unsigned char b) > +{ > + return ((a + 1) * b) >> 8; > +} > + > +__attribute__((noinline, noclone)) unsigned > +bar (union U *x, union U *y) > +{ > + union U z; > + unsigned char v = x->c.a; > + unsigned char w = foo (y->c.a, 255 - v); > + z.c.r = foo (x->c.r, v) + foo (y->c.r, w); > + z.c.g = foo (x->c.g, v) + foo (y->c.g, w); > + z.c.b = foo (x->c.b, v) + foo (y->c.b, w); > + z.c.a = 0; > + return z.v; > +} > + > +int > +main () > +{ > + union U a, b, c; > + if ((unsigned char) ~0 != 255 || sizeof (unsigned) != 4) > + return 0; > + a.c = (struct S) { 255, 255, 255, 0 }; > + b.c = (struct S) { 255, 255, 255, 255 }; > + c.v = bar (&a, &b); > + if (c.c.b != 255 || c.c.g != 255 || c.c.r != 255 || c.c.a != 0) > + __builtin_abort (); > + return 0; > +} > > Jakub