Hello! Attached patch introduces various missing insert patterns, improving attached testcases considerably. As a necessary complication to improve robustness, the patch tags x86_64 patterns that require memory operands without REX prefix with a UNSPEC_NOREX_MEM tag to prevent unwanted pattern matching through subregs.
The patch also tightens scan-assembler patterns in a couple of pr78904 testcases. 2017-01-02 Uros Bizjak <ubiz...@gmail.com> PR target/78967 * config/i386/i386.md (UNSPEC_NOREX_MEM): New unspec. (*insvqi_1): New insn pattern. (*insvqi_1_mem_rex64): Ditto. (*insvqi_2): Ditto. (*insvqi_3): Rename from *insvqi. (*extzvqi_mem_rex64): Add UNSPEC_NOREX_MEM tag. testsuite/ChangeLog: 2017-01-02 Uros Bizjak <ubiz...@gmail.com> PR target/78967 * gcc.target/i386/pr78967-1.c: New test. * gcc.target/i386/pr78967-2.c: Ditto. * gcc.target/i386/pr78967-3.c: Ditto. * gcc.target/i386/pr78904-2.c: Tighten scan-asm patterns. * gcc.target/i386/pr78904-4.c: Ditto. * gcc.target/i386/pr78904-6.c: Ditto. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 244002) +++ config/i386/i386.md (working copy) @@ -114,6 +114,7 @@ UNSPEC_STOS UNSPEC_PEEPSIB UNSPEC_INSN_FALSE_DEP + UNSPEC_NOREX_MEM ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -2819,7 +2820,8 @@ (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") (const_int 8) - (const_int 8)) 0))] + (const_int 8)) 0)) + (unspec [(const_int 0)] UNSPEC_NOREX_MEM)] "TARGET_64BIT && reload_completed" "mov{b}\t{%h1, %0|%0, %h1}" [(set_attr "type" "imov") @@ -2862,11 +2864,13 @@ (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))] "TARGET_64BIT && peep2_reg_dead_p (2, operands[0])" - [(set (match_dup 2) - (subreg:QI - (zero_extract:SI (match_dup 1) - (const_int 8) - (const_int 8)) 0))]) + [(parallel + [(set (match_dup 2) + (subreg:QI + (zero_extract:SI (match_dup 1) + (const_int 8) + (const_int 8)) 0)) + (unspec [(const_int 0)] UNSPEC_NOREX_MEM)])]) (define_expand "insv<mode>" [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand") @@ -2916,10 +2920,62 @@ (set_attr "type" "imov") (set_attr "mode" "QI")]) -(define_insn "*insvqi" +(define_insn "*insvqi_1_mem_rex64" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") (const_int 8) (const_int 8)) + (subreg:SI + (match_operand:QI 1 "norex_memory_operand" "Bn") 0)) + (unspec [(const_int 0)] UNSPEC_NOREX_MEM)] + "TARGET_64BIT && reload_completed" + "mov{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*insvqi_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q") + (const_int 8) + (const_int 8)) + (subreg:SI + (match_operand:QI 1 "general_operand" "QnBc,m") 0))] + "" + "mov{b}\t{%1, %h0|%h0, %1}" + [(set_attr "isa" "*,nox64") + (set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_peephole2 + [(set (match_operand:QI 0 "register_operand") + (match_operand:QI 1 "norex_memory_operand")) + (set (zero_extract:SI (match_operand 2 "ext_register_operand") + (const_int 8) + (const_int 8)) + (subreg:SI (match_dup 0) 0))] + "TARGET_64BIT + && peep2_reg_dead_p (2, operands[0])" + [(parallel + [(set (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) + (subreg:SI (match_dup 1) 0)) + (unspec [(const_int 0)] UNSPEC_NOREX_MEM)])]) + +(define_insn "*insvqi_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "mov{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*insvqi_3" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") (const_int 8)))] "" Index: testsuite/gcc.target/i386/pr78904-2.c =================================================================== --- testsuite/gcc.target/i386/pr78904-2.c (revision 244002) +++ testsuite/gcc.target/i386/pr78904-2.c (working copy) @@ -18,7 +18,7 @@ return a; } -/* { dg-final { scan-assembler "\[ \t\]andb\[^\n\r]*, %.h" } } */ +/* { dg-final { scan-assembler "\[ \t\]andb\[ \t\]+t\[^\n\r]*, %.h" } } */ struct S1 test_or (struct S1 a) { @@ -27,7 +27,7 @@ return a; } -/* { dg-final { scan-assembler "\[ \t\]orb\[^\n\r]*, %.h" } } */ +/* { dg-final { scan-assembler "\[ \t\]orb\[ \t\]+t\[^\n\r]*, %.h" } } */ struct S1 test_xor (struct S1 a) { @@ -36,7 +36,7 @@ return a; } -/* { dg-final { scan-assembler "\[ \t\]xorb\[^\n\r]*, %.h" } } */ +/* { dg-final { scan-assembler "\[ \t\]xorb\[ \t\]+t\[^\n\r]*, %.h" } } */ struct S1 test_add (struct S1 a) { @@ -45,4 +45,4 @@ return a; } -/* { dg-final { scan-assembler "\[ \t\]addb\[^\n\r]*, %.h" } } */ +/* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ Index: testsuite/gcc.target/i386/pr78904-4.c =================================================================== --- testsuite/gcc.target/i386/pr78904-4.c (revision 244002) +++ testsuite/gcc.target/i386/pr78904-4.c (working copy) @@ -18,4 +18,4 @@ t[i] = a.val; } -/* { dg-final { scan-assembler "\[ \t\]movb\[\t \]*%.h," } } */ +/* { dg-final { scan-assembler "\[ \t\]movb\[\t \]+%.h, t" } } */ Index: testsuite/gcc.target/i386/pr78904-6.c =================================================================== --- testsuite/gcc.target/i386/pr78904-6.c (revision 244002) +++ testsuite/gcc.target/i386/pr78904-6.c (working copy) @@ -18,4 +18,4 @@ t[i] = a.val; } -/* { dg-final { scan-assembler "\[ \t\]movb\[\t \]*%.h," } } */ +/* { dg-final { scan-assembler "\[ \t\]movb\[\t \]*%.h, t" } } */ Index: testsuite/gcc.target/i386/pr78967-1.c =================================================================== --- testsuite/gcc.target/i386/pr78967-1.c (nonexistent) +++ testsuite/gcc.target/i386/pr78967-1.c (working copy) @@ -0,0 +1,21 @@ +/* PR target/78967 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ + +struct S1 +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +struct S1 foo (struct S1 a, struct S1 b) +{ + a.val = b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]movb\[ \t\]+%.h, %.h" } } */ Index: testsuite/gcc.target/i386/pr78967-2.c =================================================================== --- testsuite/gcc.target/i386/pr78967-2.c (nonexistent) +++ testsuite/gcc.target/i386/pr78967-2.c (working copy) @@ -0,0 +1,24 @@ +/* PR target/78967 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ + +typedef __SIZE_TYPE__ size_t; + +struct S1 +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +extern unsigned char t[256]; + +struct S1 foo (struct S1 a, size_t i) +{ + a.val = t[i]; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]movb\[ \t\]+t\[^\n\r]*, %.h" } } */ Index: testsuite/gcc.target/i386/pr78967-3.c =================================================================== --- testsuite/gcc.target/i386/pr78967-3.c (nonexistent) +++ testsuite/gcc.target/i386/pr78967-3.c (working copy) @@ -0,0 +1,24 @@ +/* PR target/78967 */ +/* { dg-do assemble { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +typedef __SIZE_TYPE__ size_t; + +struct S1 +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +extern unsigned char t[256]; + +struct S1 foo (struct S1 a, size_t i) +{ + register size_t _i __asm ("r10") = i; + + asm volatile ("" : "+r" (_i)); + a.val = t[_i]; + + return a; +}