Hi!

As mentioned in the PR, there are two bugs in these.  One is that
the zero_extract destination is effectively another input operand (for the
remaining bits that are unchanged) and thus the constraint can't be =Q,
but has to be +Q.
And the other problem is that then LRA ICEs whenever it has 3 different
operands, because it is unable to reload it properly.  Uros mentioned
that it could be reloaded by using *insvqi_2-like insn to move the
8 bits from the operand that should use "0" constraint into the destination
register, but LRA isn't able to do that right now.
So this patch instead adds insn conditions that either the destination
is the same as the first input operand or as one of the input operands
(the latter for commutative patterns).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.3?

2017-10-12  Jakub Jelinek  <ja...@redhat.com>

        PR target/82524
        * config/i386/i386.md (addqi_ext_1, andqi_ext_1,
        *andqi_ext_1_cc, *<code>qi_ext_1, *xorqi_ext_1_cc): Change
        =Q constraints to +Q and into insn condition add check
        that operands[0] and operands[1] are equal.
        (*addqi_ext_2, *andqi_ext_2, *<code>qi_ext_2): Change
        =Q constraints to +Q and into insn condition add check
        that operands[0] is equal to either operands[1] or operands[2].

        * gcc.c-torture/execute/pr82524.c: New test.

--- gcc/config/i386/i386.md.jj  2017-10-12 14:05:15.000000000 +0200
+++ gcc/config/i386/i386.md     2017-10-12 17:07:11.723151868 +0200
@@ -6264,7 +6264,7 @@ (define_insn "*add<mode>_5"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "addqi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -6275,7 +6275,8 @@ (define_insn "addqi_ext_1"
                               (const_int 8)) 0)
            (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])"
 {
   switch (get_attr_type (insn))
     {
@@ -6300,7 +6301,7 @@ (define_insn "addqi_ext_1"
    (set_attr "mode" "QI")])
 
 (define_insn "*addqi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -6314,7 +6315,9 @@ (define_insn "*addqi_ext_2"
                               (const_int 8)
                               (const_int 8)) 0)) 0))
   (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])
+   || rtx_equal_p (operands[0], operands[2])"
   "add{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -8998,7 +9001,7 @@ (define_insn "*andqi_2_slp"
    (set_attr "mode" "QI")])
 
 (define_insn "andqi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -9009,7 +9012,8 @@ (define_insn "andqi_ext_1"
                               (const_int 8)) 0)
            (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])"
   "and{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
@@ -9027,7 +9031,7 @@ (define_insn "*andqi_ext_1_cc"
                               (const_int 8)) 0)
            (match_operand:QI 2 "general_operand" "QnBc,m"))
          (const_int 0)))
-   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -9037,14 +9041,16 @@ (define_insn "*andqi_ext_1_cc"
                               (const_int 8)
                               (const_int 8)) 0)
            (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)"
+  "ix86_match_ccmode (insn, CCNOmode)
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "and{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
 (define_insn "*andqi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -9058,7 +9064,9 @@ (define_insn "*andqi_ext_2"
                               (const_int 8)
                               (const_int 8)) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])
+   || rtx_equal_p (operands[0], operands[2])"
   "and{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -9431,7 +9439,7 @@ (define_insn "*<code><mode>_3"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<code>qi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -9442,14 +9450,16 @@ (define_insn "*<code>qi_ext_1"
                               (const_int 8)) 0)
            (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
 (define_insn "*<code>qi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -9463,7 +9473,10 @@ (define_insn "*<code>qi_ext_2"
                               (const_int 8)
                               (const_int 8)) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && (rtx_equal_p (operands[0], operands[1])
+       || rtx_equal_p (operands[0], operands[2]))"
   "<logic>{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -9552,7 +9565,7 @@ (define_insn "*xorqi_ext_1_cc"
                               (const_int 8)) 0)
            (match_operand:QI 2 "general_operand" "QnBc,m"))
          (const_int 0)))
-   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
                         (const_int 8)
                         (const_int 8))
        (subreg:SI
@@ -9562,7 +9575,9 @@ (define_insn "*xorqi_ext_1_cc"
                               (const_int 8)
                               (const_int 8)) 0)
          (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)"
+  "ix86_match_ccmode (insn, CCNOmode)
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "xor{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
--- gcc/testsuite/gcc.c-torture/execute/pr82524.c.jj    2017-10-12 
17:39:28.244825800 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr82524.c       2017-10-12 
17:39:12.000000000 +0200
@@ -0,0 +1,37 @@
+/* PR target/82524 */
+
+struct S { unsigned char b, g, r, a; };
+union U { struct S c; unsigned v; };
+
+static inline unsigned char
+foo (unsigned char a, unsigned char b)
+{
+  return ((a + 1) * b) >> 8;
+}
+
+__attribute__((noinline, noclone)) unsigned
+bar (union U *x, union U *y)
+{
+  union U z;
+  unsigned char v = x->c.a;
+  unsigned char w = foo (y->c.a, 255 - v);
+  z.c.r = foo (x->c.r, v) + foo (y->c.r, w);
+  z.c.g = foo (x->c.g, v) + foo (y->c.g, w);
+  z.c.b = foo (x->c.b, v) + foo (y->c.b, w);
+  z.c.a = 0;
+  return z.v;
+}
+
+int
+main ()
+{
+  union U a, b, c;
+  if ((unsigned char) ~0 != 255 || sizeof (unsigned) != 4)
+    return 0;
+  a.c = (struct S) { 255, 255, 255, 0 };
+  b.c = (struct S) { 255, 255, 255, 255 };
+  c.v = bar (&a, &b);
+  if (c.c.b != 255 || c.c.g != 255 || c.c.r != 255 || c.c.a != 0)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply via email to