On Wed, Jul 20, 2022 at 8:54 AM Hongtao Liu <crazy...@gmail.com> wrote: > > On Wed, Jul 20, 2022 at 2:18 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > On Wed, Jul 20, 2022 at 8:14 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > On Wed, Jul 20, 2022 at 4:37 AM Hongtao Liu <crazy...@gmail.com> wrote: > > > > > > > > On Tue, Jul 19, 2022 at 5:37 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > > > > > On Tue, Jul 19, 2022 at 8:56 AM Hongtao Liu <crazy...@gmail.com> > > > > > wrote: > > > > > > > > > > > > On Tue, Jul 19, 2022 at 2:35 PM Uros Bizjak via Gcc-patches > > > > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > > > > > > > On Tue, Jul 19, 2022 at 8:07 AM liuhongt <hongtao....@intel.com> > > > > > > > wrote: > > > > > > > > > > > > > > > > And split it after reload. > > > > > > > > > > > > > > > > > You will need ix86_binary_operator_ok insn constraint here > > > > > > > > > with > > > > > > > > > corresponding expander using > > > > > > > > > ix86_fixup_binary_operands_no_copy to > > > > > > > > > prepare insn operands. > > > > > > > > Split define_expand with just register_operand, and allow > > > > > > > > memory/immediate in define_insn, assume combine/forwprop will > > > > > > > > do optimization. > > > > > > > > > > > > > > But you will *ease* the job of the above passes if you use > > > > > > > ix86_fixup_binary_operands_no_copy in the expander. > > > > > > for -m32, it will hit ICE in > > > > > > Breakpoint 1, ix86_fixup_binary_operands_no_copy (code=XOR, > > > > > > mode=E_V4QImode, operands=0x7fffffffa970) a > > > > > > /gcc/config/i386/i386-expand.cc:1184 > > > > > > 1184 rtx dst = ix86_fixup_binary_operands (code, mode, > > > > > > operands); > > > > > > (gdb) n > > > > > > 1185 gcc_assert (dst == operands[0]); -- here > > > > > > (gdb) > > > > > > > > > > > > the original operands[0], operands[1], operands[2] are below > > > > > > (gdb) p debug_rtx (operands[0]) > > > > > > (mem/c:V4QI (plus:SI (reg/f:SI 77 virtual-stack-vars) > > > > > > (const_int -8220 [0xffffffffffffdfe4])) [0 MEM <vector(4) > > > > > > unsigned char> [(unsigned char *)&tmp2 + 4B]+0 S4 A32]) > > > > > > $1 = void > > > > > > (gdb) p debug_rtx (operands[1]) > > > > > > (subreg:V4QI (reg:SI 129) 0) > > > > > > $2 = void > > > > > > (gdb) p debug_rtx (operands[2]) > > > > > > (subreg:V4QI (reg:SI 98 [ _46 ]) 0) > > > > > > $3 = void > > > > > > (gdb) > > > > > > > > > > > > since operands[0] is mem and not equal to operands[1], > > > > > > ix86_fixup_binary_operands will create a pseudo register for dst. > > > > > > and > > > > > > then hit ICE. > > > > > > Is this a bug or assumed? > > > > > > > > > > You will need ix86_expand_binary_operator here. > > > > It will swap memory operand from op1 to op2 and hit ICE for > > > > unrecognized insn. > > > > > > > > What about this? > > > > > > Still no good... You are using commutative operands, so the predicate > > > of operand 2 should also allow memory. So, the predicate should be > > > nonimmediate_or_x86_64_const_vector_operand. The intermediate insn > > > pattern should look something like *<any_or:code><mode>_1, but with > > > added XMM and MMX reg alternatives instead of mask regs. > > > > Alternatively, you can use UNKNOWN operator to prevent > > canonicalization, but then you should not use commutative constraint > > in the intermediate insn. I think this is the best solution. > Like this?
Please check the attached (lightly tested) patch that keeps commutative operands. Uros.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3294c1e6274..0a39d396430 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -86,6 +86,14 @@ [(V8QI "b") (V4QI "b") (V2QI "b") (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")]) +;; Mapping to same size integral mode. +(define_mode_attr mmxinsnmode + [(V8QI "DI") (V4QI "SI") (V2QI "HI") + (V4HI "DI") (V2HI "SI") + (V2SI "DI") + (V4HF "DI") (V2HF "SI") + (V2SF "DI")]) + (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) @@ -350,22 +358,7 @@ HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1], <MODE>mode); operands[1] = GEN_INT (val); - machine_mode mode; - switch (GET_MODE_SIZE (<MODE>mode)) - { - case 2: - mode = HImode; - break; - case 4: - mode = SImode; - break; - case 8: - mode = DImode; - break; - default: - gcc_unreachable (); - } - operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode); + operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode); }) ;; For TARGET_64BIT we always round up to 8 bytes. @@ -2974,33 +2967,50 @@ (set_attr "type" "mmxadd,sselog,sselog,sselog") (set_attr "mode" "DI,TI,TI,TI")]) -(define_insn "<code><mode>3" - [(set (match_operand:VI_16_32 0 "register_operand" "=?r,x,x,v") +(define_expand "<code><mode>3" + [(parallel + [(set (match_operand:VI_16_32 0 "nonimmediate_operand") (any_logic:VI_16_32 - (match_operand:VI_16_32 1 "register_operand" "%0,0,x,v") - (match_operand:VI_16_32 2 "register_operand" "r,x,x,v"))) - (clobber (reg:CC FLAGS_REG))] + (match_operand:VI_16_32 1 "nonimmediate_operand") + (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand"))) + (clobber (reg:CC FLAGS_REG))])] "" + "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") + +(define_insn "*<code><mode>3" + [(set (match_operand:VI_16_32 0 "nonimmediate_operand" "=?r,m,x,x,v") + (any_logic:VI_16_32 + (match_operand:VI_16_32 1 "nonimmediate_operand" "%0,0,0,x,v") + (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand" "r,i,x,x,v"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "#" - [(set_attr "isa" "*,sse2_noavx,avx,avx512vl") - (set_attr "type" "alu,sselog,sselog,sselog") - (set_attr "mode" "SI,TI,TI,TI")]) + [(set_attr "isa" "*,*,sse2_noavx,avx,avx512vl") + (set_attr "type" "alu,alu,sselog,sselog,sselog") + (set_attr "mode" "SI,SI,TI,TI,TI")]) (define_split - [(set (match_operand:VI_16_32 0 "general_reg_operand") + [(set (match_operand:VI_16_32 0 "nonimmediate_gr_operand") (any_logic:VI_16_32 - (match_operand:VI_16_32 1 "general_reg_operand") - (match_operand:VI_16_32 2 "general_reg_operand"))) + (match_operand:VI_16_32 1 "nonimmediate_gr_operand") + (match_operand:VI_16_32 2 "reg_or_const_vector_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed" [(parallel [(set (match_dup 0) - (any_logic:SI (match_dup 1) (match_dup 2))) + (any_logic:<mmxinsnmode> (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { - operands[2] = lowpart_subreg (SImode, operands[2], <MODE>mode); - operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode); - operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode); + if (!register_operand (operands[2], <MODE>mode)) + { + HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[2], + <MODE>mode); + operands[2] = GEN_INT (val); + } + else + operands[2] = lowpart_subreg (<mmxinsnmode>mode, operands[2], <MODE>mode); + operands[1] = lowpart_subreg (<mmxinsnmode>mode, operands[1], <MODE>mode); + operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode); }) (define_split diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 42053ea7209..064596d9594 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1209,6 +1209,10 @@ return trunc_int_for_mode (val, SImode) == val; }) +(define_predicate "nonimmediate_or_x86_64_const_vector_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "x86_64_const_vector_operand"))) + ;; Return true when OP is nonimmediate or standard SSE constant. (define_predicate "nonimmediate_or_sse_const_operand" (ior (match_operand 0 "nonimmediate_operand")