Hello! As outlined in the PR, the canonical way of writing insn patterns with embedded zero_extract RTXes is e.g.:
(define_insn "andqi_ext_1" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") (const_int 8) (const_int 8)) (subreg:SI (and:QI (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") (const_int 8) (const_int 8)) 0) (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0)) (clobber (reg:CC FLAGS_REG))] When changed to the above form, combine is able to synthesize instructions that operate on high-part of the registers. The patch changes relevant patterns to the canonical form. 2016-12-26 Uros Bizjak <ubiz...@gmail.com> PR target/78904 * config/i386/i386.md (addqi_ext_1): Canonicalize insn pattern w.r.t. zero_extract RTXes. (*addqi_ext_2): Ditto. (testqi_ext_ccno_0): Canonicalize expander w.r.t. zero_extract RTXes. (testqi_ext_1_ccno): Rename from testqi_ext_ccno_0. (*testqi_ext_0): Merge with *testqi_ext_1. (*testqi_ext_1): Canonicalize insn pattern w.r.t. zero_extract RTXes. Update corresponding splitter. (*testqi_ext_2): Canonicalize insn pattern w.r.t. zero_extract RTXes. (*andqi_ext_0): Merge with *andqi_ext_1. (andqi_ext_1): Canonicalize insn pattern w.r.t. zero_extract RTXes. Rename from *andqi_ext_1. Update corresponding splitter and peephole2 patterns. (*andqi_ext_1_cc): Rename from *andqi_ext_0_cc. (*andqi_ext_2): Canonicalize insn pattern w.r.t. zero_extract RTXes. (*<any_or:code>qi_ext_0): Merge with *andqi_ext_1. (*<any_or:code>qi_ext_1): Canonicalize insn pattern w.r.t. zero_extract RTXes. Update corresponding splitter. (*<any_or:code>qi_ext_2): Canonicalize insn pattern w.r.t. zero_extract RTXes. (xorqi_cc_ext_1): Canonicalize expander w.r.t. zero_extract RTXes. (xorqi_ext_1_cc): Rename from xorqi_cc_ext_1. (*xorqi_cc_ext_1): Canonicalize insn pattern w.r.t. zero_extract RTXes. Update corresponding splitter. (*xorqi_ext_1_cc): Rename from *xorqi_cc_ext_1. (isinfxf2): Update calls to renamed expanders. (isinf<mode>2): Ditto. * config/i386/i386.c (ix86_expand_fp_compare): Ditto. (ix86_emit_fp_unordered_jump): Ditto. (ix86_emit_i387_round): Ditto. testsuite/ChangeLog: 2016-12-26 Uros Bizjak <ubiz...@gmail.com> PR target/78904 * gcc.target/i386/pr78904.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 243823) +++ config/i386/i386.c (working copy) @@ -22585,12 +22585,12 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op case UNGT: if (code == GT || !TARGET_IEEE_FP) { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); code = EQ; } else { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); intcmp_mode = CCmode; @@ -22601,7 +22601,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op case UNLT: if (code == LT && TARGET_IEEE_FP) { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); intcmp_mode = CCmode; code = EQ; @@ -22608,7 +22608,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op } else { - emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx)); + emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx)); code = NE; } break; @@ -22616,13 +22616,13 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op case UNGE: if (code == GE || !TARGET_IEEE_FP) { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05))); code = EQ; } else { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx)); + emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx)); code = NE; } break; @@ -22630,7 +22630,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op case UNLE: if (code == LE && TARGET_IEEE_FP) { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); intcmp_mode = CCmode; @@ -22638,7 +22638,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op } else { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); code = NE; } break; @@ -22646,7 +22646,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op case UNEQ: if (code == EQ && TARGET_IEEE_FP) { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); intcmp_mode = CCmode; code = EQ; @@ -22653,7 +22653,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op } else { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); code = NE; } break; @@ -22661,24 +22661,24 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op case LTGT: if (code == NE && TARGET_IEEE_FP) { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, + emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); + emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, GEN_INT (0x40))); code = NE; } else { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); code = EQ; } break; case UNORDERED: - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); code = NE; break; case ORDERED: - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); code = EQ; break; @@ -44370,7 +44370,7 @@ ix86_emit_fp_unordered_jump (rtx label) } else { - emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); + emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04))); temp = gen_rtx_REG (CCNOmode, FLAGS_REG); temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); @@ -44528,7 +44528,7 @@ void ix86_emit_i387_round (rtx op0, rtx op1) } /* flags = signbit(a) */ - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02))); + emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); /* if (flags) then res = -res */ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 243823) +++ config/i386/i386.md (working copy) @@ -1345,7 +1345,6 @@ "cmp{b}\t{%1, %h0|%h0, %1}" [(set_attr "isa" "*,nox64") (set_attr "type" "icmp") - (set_attr "modrm" "1") (set_attr "mode" "QI")]) (define_insn "*cmpqi_ext_4" @@ -6082,12 +6081,13 @@ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") (const_int 8) (const_int 8)) - (plus:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "0,0") - (const_int 8) - (const_int 8)) - (match_operand:QI 2 "general_x64nomem_operand" "Qn,m"))) + (subreg:SI + (plus:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0)) (clobber (reg:CC FLAGS_REG))] "" { @@ -6111,7 +6111,6 @@ (if_then_else (match_operand:QI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) - (set_attr "modrm" "1") (set_attr "mode" "QI")]) (define_insn "*addqi_ext_2" @@ -6118,16 +6117,17 @@ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (plus:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "%0") - (const_int 8) - (const_int 8)) - (zero_extract:SI - (match_operand 2 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)))) - (clobber (reg:CC FLAGS_REG))] + (subreg:SI + (plus:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)) 0)) + (clobber (reg:CC FLAGS_REG))] "" "add{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") @@ -7823,45 +7823,26 @@ (set_attr "mode" "<MODE>") (set_attr "pent_pair" "uv,np,uv")]) -(define_expand "testqi_ext_ccno_0" +(define_expand "testqi_ext_1_ccno" [(set (reg:CCNO FLAGS_REG) (compare:CCNO - (and:SI - (zero_extract:SI - (match_operand 0 "ext_register_operand") - (const_int 8) - (const_int 8)) - (match_operand 1 "const_int_operand")) + (and:QI + (subreg:QI + (zero_extract:SI (match_operand 0 "ext_register_operand") + (const_int 8) + (const_int 8)) 0) + (match_operand 1 "const_int_operand")) (const_int 0)))]) -(define_insn "*testqi_ext_0" - [(set (reg FLAGS_REG) - (compare - (and:SI - (zero_extract:SI - (match_operand 0 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)) - (match_operand 1 "const_int_operand" "n")) - (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" - "test{b}\t{%1, %h0|%h0, %1}" - [(set_attr "type" "test") - (set_attr "mode" "QI") - (set_attr "length_immediate" "1") - (set_attr "modrm" "1") - (set_attr "pent_pair" "np")]) - (define_insn "*testqi_ext_1" [(set (reg FLAGS_REG) (compare - (and:SI - (zero_extract:SI - (match_operand 0 "ext_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) - (zero_extend:SI - (match_operand:QI 1 "nonimmediate_x64nomem_operand" "Q,m"))) + (and:QI + (subreg:QI + (zero_extract:SI (match_operand 0 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_x64nomem_operand" "Qn,m")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%1, %h0|%h0, %1}" @@ -7872,15 +7853,15 @@ (define_insn "*testqi_ext_2" [(set (reg FLAGS_REG) (compare - (and:SI - (zero_extract:SI - (match_operand 0 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)) - (zero_extract:SI - (match_operand 1 "ext_register_operand" "Q") - (const_int 8) - (const_int 8))) + (and:QI + (subreg:QI + (zero_extract:SI (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%h1, %h0|%h0, %h1}" @@ -7982,12 +7963,16 @@ && !(INTVAL (operands[3]) & ~(127 << 8))))" [(set (match_dup 0) (match_op_dup 1 - [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) - (match_dup 3)) + [(and:QI + (subreg:QI + (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) 0) + (match_dup 3)) (const_int 0)]))] { operands[2] = gen_lowpart (SImode, operands[2]); - operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode); + operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode); }) (define_split @@ -8261,17 +8246,21 @@ (clobber (reg:CC FLAGS_REG))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && reload_completed" - [(parallel [(set (zero_extract:SI (match_dup 0) - (const_int 8) - (const_int 8)) - (xor:SI - (zero_extract:SI (match_dup 0) - (const_int 8) - (const_int 8)) - (zero_extract:SI (match_dup 0) - (const_int 8) - (const_int 8)))) - (clobber (reg:CC FLAGS_REG))])] + [(parallel + [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (subreg:SI + (xor:QI + (subreg:QI + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (SImode, operands[0]);") (define_insn "*anddi_2" @@ -8371,92 +8360,69 @@ [(set_attr "type" "alu1") (set_attr "mode" "QI")]) -;; ??? A bug in recog prevents it from recognizing a const_int as an -;; operand to zero_extend in andqi_ext_1. It was checking explicitly -;; for a QImode operand, which of course failed. -(define_insn "andqi_ext_0" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") +(define_insn "andqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") (const_int 8) (const_int 8)) - (and:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "0") - (const_int 8) - (const_int 8)) - (match_operand 2 "const_int_operand" "n"))) + (subreg:SI + (and:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0)) (clobber (reg:CC FLAGS_REG))] "" "and{b}\t{%2, %h0|%h0, %2}" - [(set_attr "type" "alu") - (set_attr "length_immediate" "1") - (set_attr "modrm" "1") + [(set_attr "isa" "*,nox64") + (set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Generated by peephole translating test to and. This shows up ;; often in fp comparisons. -(define_insn "*andqi_ext_0_cc" +(define_insn "*andqi_ext_1_cc" [(set (reg FLAGS_REG) (compare - (and:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "0") - (const_int 8) - (const_int 8)) + (and:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) 0) (match_operand 2 "const_int_operand" "n")) (const_int 0))) (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (and:SI - (zero_extract:SI - (match_dup 1) - (const_int 8) - (const_int 8)) - (match_dup 2)))] + (subreg:SI + (and:QI + (subreg:QI + (zero_extract:SI (match_dup 1) + (const_int 8) + (const_int 8)) 0) + (match_dup 2)) 0))] "ix86_match_ccmode (insn, CCNOmode)" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") - (set_attr "length_immediate" "1") - (set_attr "modrm" "1") (set_attr "mode" "QI")]) -(define_insn "*andqi_ext_1" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") - (const_int 8) - (const_int 8)) - (and:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "0,0") - (const_int 8) - (const_int 8)) - (zero_extend:SI - (match_operand:QI 2 "nonimmediate_x64nomem_operand" "Q,m")))) - (clobber (reg:CC FLAGS_REG))] - "" - "and{b}\t{%2, %h0|%h0, %2}" - [(set_attr "isa" "*,nox64") - (set_attr "type" "alu") - (set_attr "length_immediate" "0") - (set_attr "mode" "QI")]) - (define_insn "*andqi_ext_2" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (and:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "%0") - (const_int 8) - (const_int 8)) - (zero_extract:SI - (match_operand 2 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)))) + (subreg:SI + (and:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "" "and{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") - (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) ;; Convert wide AND instructions with immediate operand to shorter QImode @@ -8472,15 +8438,22 @@ "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(~INTVAL (operands[2]) & ~(255 << 8))" - [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) - (and:SI (zero_extract:SI (match_dup 1) - (const_int 8) (const_int 8)) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] + [(parallel + [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (subreg:SI + (and:QI + (subreg:QI + (zero_extract:SI (match_dup 1) + (const_int 8) + (const_int 8)) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); - operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode); + operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode); }) ;; Since AND can be encoded with sign extended immediate, this is only @@ -8773,41 +8746,22 @@ [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) -(define_insn "*<code>qi_ext_0" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") - (const_int 8) - (const_int 8)) - (any_or:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "0") - (const_int 8) - (const_int 8)) - (match_operand 2 "const_int_operand" "n"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "<logic>{b}\t{%2, %h0|%h0, %2}" - [(set_attr "type" "alu") - (set_attr "length_immediate" "1") - (set_attr "modrm" "1") - (set_attr "mode" "QI")]) - (define_insn "*<code>qi_ext_1" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") (const_int 8) (const_int 8)) - (any_or:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "0,0") - (const_int 8) - (const_int 8)) - (zero_extend:SI - (match_operand:QI 2 "nonimmediate_x64nomem_operand" "Q,m")))) + (subreg:SI + (any_or:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) 0)) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "<logic>{b}\t{%2, %h0|%h0, %2}" [(set_attr "isa" "*,nox64") (set_attr "type" "alu") - (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) (define_insn "*<code>qi_ext_2" @@ -8814,18 +8768,20 @@ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) - (any_or:SI - (zero_extract:SI (match_operand 1 "ext_register_operand" "0") - (const_int 8) - (const_int 8)) - (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") - (const_int 8) - (const_int 8)))) + (subreg:SI + (any_or:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "<logic>{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") - (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) ;; Convert wide OR instructions with immediate operand to shorter QImode @@ -8841,15 +8797,22 @@ "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(INTVAL (operands[2]) & ~(255 << 8))" - [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) - (any_or:SI (zero_extract:SI (match_dup 1) - (const_int 8) (const_int 8)) - (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] + [(parallel + [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (subreg:SI + (any_or:QI + (subreg:QI + (zero_extract:SI (match_dup 1) + (const_int 8) + (const_int 8)) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); - operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode); + operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode); }) ;; Since OR can be encoded with sign extended immediate, this is only @@ -8873,51 +8836,52 @@ operands[2] = gen_lowpart (QImode, operands[2]); }) -(define_expand "xorqi_cc_ext_1" +(define_expand "xorqi_ext_1_cc" [(parallel [ (set (reg:CCNO FLAGS_REG) (compare:CCNO - (xor:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand") - (const_int 8) - (const_int 8)) - (match_operand:QI 2 "const_int_operand")) + (xor:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand") + (const_int 8) + (const_int 8)) 0) + (match_operand 2 "const_int_operand")) (const_int 0))) (set (zero_extract:SI (match_operand 0 "ext_register_operand") (const_int 8) (const_int 8)) - (xor:SI - (zero_extract:SI - (match_dup 1) - (const_int 8) - (const_int 8)) - (match_dup 2)))])]) + (subreg:SI + (xor:QI + (subreg:QI + (zero_extract:SI (match_dup 1) + (const_int 8) + (const_int 8)) 0) + (match_dup 2)) 0))])]) -(define_insn "*xorqi_cc_ext_1" +(define_insn "*xorqi_ext_1_cc" [(set (reg FLAGS_REG) (compare - (xor:SI - (zero_extract:SI - (match_operand 1 "ext_register_operand" "0,0") - (const_int 8) - (const_int 8)) + (xor:QI + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) 0) (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) (const_int 0))) (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") (const_int 8) (const_int 8)) - (xor:SI - (zero_extract:SI - (match_dup 1) - (const_int 8) - (const_int 8)) - (match_dup 2)))] + (subreg:SI + (xor:QI + (subreg:QI + (zero_extract:SI (match_dup 1) + (const_int 8) + (const_int 8)) 0) + (match_dup 2)) 0))] "ix86_match_ccmode (insn, CCNOmode)" "xor{b}\t{%2, %h0|%h0, %2}" [(set_attr "isa" "*,nox64") (set_attr "type" "alu") - (set_attr "modrm" "1") (set_attr "mode" "QI")]) ;; Negation instructions @@ -16051,7 +16015,7 @@ emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); - emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_andqi_ext_1 (scratch, scratch, mask)); emit_insn (gen_cmpqi_ext_3 (scratch, val)); ix86_expand_setcc (res, EQ, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); @@ -16083,7 +16047,7 @@ emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp)); } - emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_andqi_ext_1 (scratch, scratch, mask)); emit_insn (gen_cmpqi_ext_3 (scratch, val)); ix86_expand_setcc (res, EQ, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); @@ -17804,11 +17768,11 @@ (define_peephole2 [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" - [(and:SI - (zero_extract:SI - (match_operand 2 "QIreg_operand") - (const_int 8) - (const_int 8)) + [(and:QI + (subreg:QI + (zero_extract:SI (match_operand 2 "QIreg_operand") + (const_int 8) + (const_int 8)) 0) (match_operand 3 "const_int_operand")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL @@ -17815,24 +17779,26 @@ && ix86_match_ccmode (insn, CCNOmode) && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" - [(parallel [(set (match_dup 0) - (match_op_dup 1 - [(and:SI - (zero_extract:SI - (match_dup 2) - (const_int 8) - (const_int 8)) - (match_dup 3)) - (const_int 0)])) - (set (zero_extract:SI (match_dup 2) - (const_int 8) - (const_int 8)) - (and:SI - (zero_extract:SI - (match_dup 2) - (const_int 8) - (const_int 8)) - (match_dup 3)))])]) + [(parallel + [(set (match_dup 0) + (match_op_dup 1 + [(and:QI + (subreg:QI + (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) 0) + (match_dup 3)) + (const_int 0)])) + (set (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) + (subreg:SI + (and:QI + (subreg:QI + (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) 0) + (match_dup 3)) 0))])]) ;; Don't do logical operations with memory inputs. (define_peephole2 Index: testsuite/gcc.target/i386/pr78904.c =================================================================== --- testsuite/gcc.target/i386/pr78904.c (nonexistent) +++ testsuite/gcc.target/i386/pr78904.c (working copy) @@ -0,0 +1,48 @@ +/* PR target/78904 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ +/* { dg-final { scan-assembler-not "movb" } } */ + +struct S1 +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +struct S1 test_and (struct S1 a, struct S1 b) +{ + a.val &= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S1 test_or (struct S1 a, struct S1 b) +{ + a.val |= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S1 test_xor (struct S1 a, struct S1 b) +{ + a.val ^= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ + +struct S1 test_add (struct S1 a, struct S1 b) +{ + a.val += b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */