http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #2 from Oleg Endo <olegendo at gcc dot gnu.org> ---
I have quickly tried adding a peephole pass shortly after initial RTL
expansion:

Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c    (revision 205971)
+++ gcc/config/sh/sh.c    (working copy)
@@ -735,6 +735,10 @@
   if (!TARGET_SH1)
     return;

+  opt_pass* pre_combine = make_pass_peephole2 (g);
+  pre_combine->name = "pre_peephole";
+  register_pass (pre_combine, PASS_POS_INSERT_AFTER, "dfinit", 1);
+
 /* Running the sh_treg_combine pass after ce1 generates better code when
    comparisons are combined and reg-reg moves are introduced, because
    reg-reg moves will be eliminated afterwards.  However, there are quite

and adding the following peepholes (the existing define_peephole2 patterns need
a && reload_completed condition)

Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md    (revision 205971)
+++ gcc/config/sh/sh.md    (working copy)
@@ -821,6 +821,29 @@
     cmp/ge    %1,%0"
   [(set_attr "type" "mt_group")])

+;; This peephole will be done after RTL expansion before combine.
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+           (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+                (const_int 31)))
+          (clobber (reg:SI T_REG))])
+   (set (match_operand:SI 2 "arith_reg_dest")
+    (xor:SI (match_dup 0) (const_int 1)))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 1) (const_int 0)))
+   (set (match_dup 2) (reg:SI T_REG))])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest")
+    (not:SI (match_operand:SI 1 "arith_reg_operand")))
+   (parallel [(set (match_operand:SI 2 "arith_reg_dest")
+           (lshiftrt:SI (match_dup 0) (const_int 31)))
+          (clobber (reg:SI T_REG))])]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 1) (const_int 0)))
+   (set (match_dup 2) (reg:SI T_REG))
+   (set (match_dup 0) (not:SI (match_dup 1)))])
+

This allows generating the cmp/pz insn before combine and thus it will be able
to combine it with other insns that use T_REG as an operand:

unsigned int
test_011 (unsigned int a, unsigned int b)
{
  return (a << 1) | ((a >> 31) ^ 1);
}

will compile to:
        cmp/pz  r4
        mov     r4,r0
        rts
        rotcl   r0

Reply via email to