On Fri, May 6, 2022 at 4:29 PM Qing Zhao <qing.z...@oracle.com> wrote: > > Hi, > > As Kee’s requested in this PR: > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101891 > > ===== > > Currently -fzero-call-used-regs will use a pattern of: > > XOR regA,regA > MOV regA,regB > MOV regA,regC > ... > RET > > However, this introduces both a register ordering dependency (e.g. the CPU > cannot clear regB without clearing regA first), and while greatly reduces > available ROP gadgets, it does technically leave a set of "MOV" ROP gadgets > at the end of functions (e.g. "MOV regA,regC; RET"). > > Please switch to always using XOR: > > XOR regA,regA > XOR regB,regB > XOR regC,regC > ... > RET > > ======= > > This patch switch all MOV to XOR on i386. > > Bootstrapped and regresstion tested on x86_64-linux-gnu. > > Okay for gcc13? > > Thanks. > > Qing > > ==========================================
> gcc/ChangeLog: > > * config/i386/i386.cc (zero_all_mm_registers): Use SET to zero instead > of MOV for zeroing scratch registers. > (ix86_zero_call_used_regs): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/zero-scratch-regs-1.c: Add -fno-stack-protector > -fno-PIC. > * gcc.target/i386/zero-scratch-regs-10.c: Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-13.c: Add -msse. > * gcc.target/i386/zero-scratch-regs-14.c: Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-15.c: Add -fno-stack-protector > -fno-PIC. > * gcc.target/i386/zero-scratch-regs-16.c: Likewise. > * gcc.target/i386/zero-scratch-regs-17.c: Likewise. > * gcc.target/i386/zero-scratch-regs-18.c: Add -fno-stack-protector > -fno-PIC, adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-19.c: Add -fno-stack-protector > -fno-PIC. > * gcc.target/i386/zero-scratch-regs-2.c: Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-20.c: Add -msse. > * gcc.target/i386/zero-scratch-regs-21.c: Add -fno-stack-protector > -fno-PIC, Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-22.c: Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-23.c: Likewise. > * gcc.target/i386/zero-scratch-regs-26.c: Likewise. > * gcc.target/i386/zero-scratch-regs-27.c: Likewise. > * gcc.target/i386/zero-scratch-regs-28.c: Likewise. > * gcc.target/i386/zero-scratch-regs-3.c: Add -fno-stack-protector. > * gcc.target/i386/zero-scratch-regs-31.c: Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-4.c: Add -fno-stack-protector > -fno-PIC. > * gcc.target/i386/zero-scratch-regs-5.c: Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-6.c: Add -fno-stack-protector. > * gcc.target/i386/zero-scratch-regs-7.c: Likewise. > * gcc.target/i386/zero-scratch-regs-8.c: Adjust mov to xor. > * gcc.target/i386/zero-scratch-regs-9.c: Add -fno-stack-protector. Please use something like the attached (functionally equivalent) patch for the last hunk of your patch. Also, if possible, please use V2SImode as a generic MMX mode instead of V4HImode. OK with the above changes. Thanks, Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index b16df5b183e..87220278d33 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -3923,59 +3923,32 @@ ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) SET_HARD_REG_BIT (zeroed_hardregs, regno); - rtx reg, tmp, zero_rtx; machine_mode mode = zero_call_used_regno_mode (regno); - reg = gen_rtx_REG (mode, regno); - zero_rtx = CONST0_RTX (mode); + rtx reg = gen_rtx_REG (mode, regno); + rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode)); - if (mode == SImode) - if (zero_gpr == NULL_RTX) - { - zero_gpr = reg; - tmp = gen_rtx_SET (reg, zero_rtx); - if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) - { - rtx clob = gen_rtx_CLOBBER (VOIDmode, - gen_rtx_REG (CCmode, - FLAGS_REG)); - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, - tmp, - clob)); - } - emit_insn (tmp); - } - else - emit_move_insn (reg, zero_gpr); - else if (mode == V4SFmode) - if (zero_vector == NULL_RTX) - { - zero_vector = reg; - tmp = gen_rtx_SET (reg, zero_rtx); - emit_insn (tmp); - } - else - emit_move_insn (reg, zero_vector); - else if (mode == HImode) - if (zero_mask == NULL_RTX) - { - zero_mask = reg; - tmp = gen_rtx_SET (reg, zero_rtx); - emit_insn (tmp); - } - else - emit_move_insn (reg, zero_mask); - else if (mode == V4HImode) - if (zero_mmx == NULL_RTX) - { - zero_mmx = reg; - tmp = gen_rtx_SET (reg, zero_rtx); - emit_insn (tmp); - } - else - emit_move_insn (reg, zero_mmx); - else - gcc_unreachable (); + switch (mode) + { + case E_SImode: + if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + { + rtx clob = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (CCmode, FLAGS_REG)); + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); + } + break; + /* FALLTHRU */ + + case E_V4SFmode: + case E_HImode: + case E_V4HImode: + emit_insn (tmp); + break; + + default: + gcc_unreachable (); + } } return zeroed_hardregs; }