On Fri, May 6, 2022 at 4:29 PM Qing Zhao <qing.z...@oracle.com> wrote:
>
> Hi,
>
> As Kee’s requested in this PR: 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101891
>
> =====
>
> Currently -fzero-call-used-regs will use a pattern of:
>
> XOR regA,regA
> MOV regA,regB
> MOV regA,regC
> ...
> RET
>
> However, this introduces both a register ordering dependency (e.g. the CPU 
> cannot clear regB without clearing regA first), and while greatly reduces 
> available ROP gadgets, it does technically leave a set of "MOV" ROP gadgets 
> at the end of functions (e.g. "MOV regA,regC; RET").
>
> Please switch to always using XOR:
>
> XOR regA,regA
> XOR regB,regB
> XOR regC,regC
> ...
> RET
>
> =======
>
> This patch switch all MOV to XOR on i386.
>
> Bootstrapped and regresstion tested on x86_64-linux-gnu.
>
> Okay for gcc13?
>
> Thanks.
>
> Qing
>
> ==========================================

> gcc/ChangeLog:
>
> * config/i386/i386.cc (zero_all_mm_registers): Use SET to zero instead
> of MOV for zeroing scratch registers.
> (ix86_zero_call_used_regs): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/zero-scratch-regs-1.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-10.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-13.c: Add -msse.
> * gcc.target/i386/zero-scratch-regs-14.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-15.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-16.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-17.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-18.c: Add -fno-stack-protector
> -fno-PIC, adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-19.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-2.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-20.c: Add -msse.
> * gcc.target/i386/zero-scratch-regs-21.c: Add -fno-stack-protector
> -fno-PIC, Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-22.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-23.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-26.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-27.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-28.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-3.c: Add -fno-stack-protector.
> * gcc.target/i386/zero-scratch-regs-31.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-4.c: Add -fno-stack-protector
> -fno-PIC.
> * gcc.target/i386/zero-scratch-regs-5.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-6.c: Add -fno-stack-protector.
> * gcc.target/i386/zero-scratch-regs-7.c: Likewise.
> * gcc.target/i386/zero-scratch-regs-8.c: Adjust mov to xor.
> * gcc.target/i386/zero-scratch-regs-9.c: Add -fno-stack-protector.

Please use something like the attached (functionally equivalent) patch
for the last hunk of your patch.

Also, if possible, please use V2SImode as a generic MMX mode instead
of V4HImode.

OK with the above changes.

Thanks,
Uros.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b16df5b183e..87220278d33 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -3923,59 +3923,32 @@ ix86_zero_call_used_regs (HARD_REG_SET 
need_zeroed_hardregs)
 
       SET_HARD_REG_BIT (zeroed_hardregs, regno);
 
-      rtx reg, tmp, zero_rtx;
       machine_mode mode = zero_call_used_regno_mode (regno);
 
-      reg = gen_rtx_REG (mode, regno);
-      zero_rtx = CONST0_RTX (mode);
+      rtx reg = gen_rtx_REG (mode, regno);
+      rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
 
-      if (mode == SImode)
-       if (zero_gpr == NULL_RTX)
-         {
-           zero_gpr = reg;
-           tmp = gen_rtx_SET (reg, zero_rtx);
-           if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
-             {
-               rtx clob = gen_rtx_CLOBBER (VOIDmode,
-                                           gen_rtx_REG (CCmode,
-                                                        FLAGS_REG));
-               tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
-                                                            tmp,
-                                                            clob));
-             }
-           emit_insn (tmp);
-         }
-       else
-         emit_move_insn (reg, zero_gpr);
-      else if (mode == V4SFmode)
-       if (zero_vector == NULL_RTX)
-         {
-           zero_vector = reg;
-           tmp = gen_rtx_SET (reg, zero_rtx);
-           emit_insn (tmp);
-         }
-       else
-         emit_move_insn (reg, zero_vector);
-      else if (mode == HImode)
-       if (zero_mask == NULL_RTX)
-         {
-           zero_mask = reg;
-           tmp = gen_rtx_SET (reg, zero_rtx);
-           emit_insn (tmp);
-         }
-       else
-         emit_move_insn (reg, zero_mask);
-      else if (mode == V4HImode)
-       if (zero_mmx == NULL_RTX)
-         {
-           zero_mmx = reg;
-           tmp = gen_rtx_SET (reg, zero_rtx);
-           emit_insn (tmp);
-         }
-       else
-         emit_move_insn (reg, zero_mmx);
-      else
-       gcc_unreachable ();
+      switch (mode)
+       {
+       case E_SImode:
+         if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+           {
+             rtx clob = gen_rtx_CLOBBER (VOIDmode,
+                                         gen_rtx_REG (CCmode, FLAGS_REG));
+             tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+           }
+         break;
+         /* FALLTHRU */
+
+       case E_V4SFmode:
+       case E_HImode:
+       case E_V4HImode:
+         emit_insn (tmp);
+         break;
+
+       default:
+         gcc_unreachable ();
+       }
     }
   return zeroed_hardregs;
 }

Reply via email to