This patch improves the code generated for DImode right shifts (both
arithmetic and logical) by a single bit, and also for DImode rotates
(both left and right) by a single bit.  In approach, this is similar
to the recently added DImode left shift by a single bit patch, but
also builds upon i386.md's UNSPEC carry flag representation:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html

The benefits can be seen from the four new test cases:

long long ashr(long long x) { return x >> 1; }

Before:
ashr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        asr_s   r1,r1,1

After:
ashr:   asr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long lshr(unsigned long long x) { return x >> 1; }

Before:
lshr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        lsr_s   r1,r1

After:
lshr:   lsr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); }

Before:
rotl:   lsr     r12,r1,31
        lsr     r2,r0,31
        asl_s   r3,r0,1
        asl_s   r1,r1,1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotl:   add.f   r0,r0,r0
        adc.f   r1,r1,r1
        j_s.d   [blink]
        add.cs  r0,r0,1

unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); }

Before:
rotr:   asl     r12,r1,31
        asl     r2,r0,31
        lsr_s   r3,r0
        lsr_s   r1,r1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotr:   asr.f   0,r0
        rrc.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

On CPUs without a barrel shifter the improvements are even better.

Tested with a cross-compiler to arc-linux hosted on x86_64,
with no new (compile-only) regressions from make -k check.
Ok for mainline if this passes Claudiu's nightly testing?


2023-11-06  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
        represents the carry flag being set if the operand is non-zero.
        (adc_f): New define_insn representing adc with updated flags.
        (ashrdi3): New define_expand that only handles shifts by 1.
        (ashrdi3_cnt1): New pre-reload define_insn_and_split.
        (lshrdi3): New define_expand that only handles shifts by 1.
        (lshrdi3_cnt1): New pre-reload define_insn_and_split.
        (rrcsi2): New define_insn for rrc (SImode rotate right through
carry).
        (rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
        (rotldi3): New define_expand that only handles rotates by 1.
        (rotldi3_cnt1): New pre-reload define_insn_and_split.
        (rotrdi3): New define_expand that only handles rotates by 1.
        (rotrdi3_cnt1): New pre-reload define_insn_and_split.
        (lshrsi3_cnt1_carry): New define_insn for lsr.f.
        (ashrsi3_cnt1_carry): New define_insn for asr.f.
        (btst_0_carry): New define_insn for asr.f without result.

gcc/testsuite/ChangeLog
        * gcc.target/arc/ashrdi3-1.c: New test case.
        * gcc.target/arc/lshrdi3-1.c: Likewise.
        * gcc.target/arc/rotldi3-1.c: Likewise.
        * gcc.target/arc/rotrdi3-1.c: Likewise.


Thanks in advance,
Roger
--

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 7702978..97231b9 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -137,6 +137,7 @@
   UNSPEC_ARC_VMAC2HU
   UNSPEC_ARC_VMPY2H
   UNSPEC_ARC_VMPY2HU
+  UNSPEC_ARC_CC_NEZ
 
   VUNSPEC_ARC_RTIE
   VUNSPEC_ARC_SYNC
@@ -2790,6 +2791,31 @@ archs4x, archs4xd"
    (set_attr "type" "cc_arith")
    (set_attr "length" "4,4,4,4,8,8")])
 
+(define_insn "adc_f"
+  [(set (reg:CC_C CC_REG)
+       (compare:CC_C
+         (zero_extend:DI
+           (plus:SI
+             (plus:SI
+               (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+               (match_operand:SI 1 "register_operand" "%r"))
+             (match_operand:SI 2 "register_operand" "r")))
+         (plus:DI
+           (ltu:DI (reg:CC_C CC_REG) (const_int 0))
+           (zero_extend:DI (match_dup 1)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+       (plus:SI
+         (plus:SI
+           (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+           (match_dup 1))
+         (match_dup 2)))]
+  ""
+  "adc.f\\t%0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "predicable" "no")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4")])
+
 ; combiner-splitter cmp / scc -> cmp / adc
 (define_split
   [(set (match_operand:SI 0 "dest_reg_operand" "")
@@ -3530,6 +3556,68 @@ archs4x, archs4xd"
   ""
   [(set_attr "length" "8")])
 
+(define_expand "ashrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (ashiftrt:DI (match_operand:DI 1 "register_operand")
+                        (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; Split into asr.f hi; rrc lo
+(define_insn_and_split "*ashrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (ashiftrt:DI (match_operand:DI 1 "register_operand")
+                    (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+                                    gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+                        gen_lowpart (SImode, operands[1])));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+(define_expand "lshrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (lshiftrt:DI (match_operand:DI 1 "register_operand")
+                        (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; Split into lsr.f hi; rrc lo
+(define_insn_and_split "*lshrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (lshiftrt:DI (match_operand:DI 1 "register_operand")
+                    (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
+                                    gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
+                        gen_lowpart (SImode, operands[1])));
+  DONE;
+}
+  [(set_attr "length" "8")])
+
 ;; Rotate instructions.
 
 (define_insn "rotrsi3_insn"
@@ -3571,6 +3659,103 @@ archs4x, archs4xd"
     }
 })
 
+;; Rotate through carry flag
+
+(define_insn "rrcsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (plus:SI
+         (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+                      (const_int 1))
+         (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+                    (const_int 31))))]
+  ""
+  "rrc\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4")])
+
+(define_insn "rrcsi2_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (plus:SI
+         (lshiftrt:SI (match_dup 1) (const_int 1))
+         (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+                    (const_int 31))))]
+  ""
+  "rrc.f\\t%0,%1"
+  [(set_attr "type" "shift")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4")])
+
+;; DImode Rotate instructions
+
+(define_expand "rotldi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (rotate:DI (match_operand:DI 1 "register_operand")
+                      (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; split into add.f lo; adc.f hi; adc lo
+(define_insn_and_split "*rotldi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (rotate:DI (match_operand:DI 1 "register_operand")
+                  (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx lo0 = gen_lowpart (SImode, operands[0]);
+  rtx lo1 = gen_lowpart (SImode, operands[1]);
+  rtx hi1 = gen_highpart (SImode, operands[1]);
+  emit_insn (gen_add_f (lo0, lo1, lo1));
+  emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1));
+  emit_insn (gen_adc (lo0, lo0, const0_rtx));
+  DONE;
+}
+  [(set_attr "length" "12")])
+
+(define_expand "rotrdi3"
+  [(parallel
+      [(set (match_operand:DI 0 "register_operand")
+           (rotatert:DI (match_operand:DI 1 "register_operand")
+                        (match_operand:QI 2 "const_int_operand")))
+       (clobber (reg:CC CC_REG))])]
+  ""
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+})
+
+;; split into asr.f lo; rrc.f hi; rrc lo
+(define_insn_and_split "*rotrdi3_cnt1"
+  [(set (match_operand:DI 0 "register_operand")
+       (rotatert:DI (match_operand:DI 1 "register_operand")
+                    (const_int 1)))
+   (clobber (reg:CC CC_REG))]
+  "arc_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx lo = gen_lowpart (SImode, operands[1]);
+  emit_insn (gen_btst_0_carry (lo));
+  emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]),
+                              gen_highpart (SImode, operands[1])));
+  emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo));
+  DONE;
+}
+  [(set_attr "length" "12")])
+
 ;; Compare / branch instructions.
 
 (define_expand "cbranchsi4"
@@ -6022,6 +6207,18 @@ archs4x, archs4xd"
    (set_attr "iscompact" "maybe,false")
    (set_attr "predicable" "no,no")])
 
+(define_insn "lshrsi3_cnt1_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (lshiftrt:SI (match_dup 1) (const_int 1)))]
+  ""
+  "lsr.f\\t%0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 (define_insn "ashrsi3_cnt1"
   [(set (match_operand:SI 0 "dest_reg_operand"             "=q,w")
        (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
@@ -6032,6 +6229,28 @@ archs4x, archs4xd"
    (set_attr "iscompact" "maybe,false")
    (set_attr "predicable" "no,no")])
 
+(define_insn "ashrsi3_cnt1_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r")
+       (ashiftrt:SI (match_dup 1) (const_int 1)))]
+  ""
+  "asr.f\\t%0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
+(define_insn "btst_0_carry"
+  [(set (reg:CC_C CC_REG)
+       (unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r")
+                             (const_int 1))] UNSPEC_ARC_CC_NEZ))]
+  ""
+  "asr.f\\t0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")
+   (set_attr "predicable" "no")])
+
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand" "")
        (zero_extract:SI (match_dup 0)
diff --git a/gcc/testsuite/gcc.target/arc/ashrdi3-1.c 
b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c
new file mode 100644
index 0000000..d990bfd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo(long long x)
+{
+  return x >> 1;
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/lshrdi3-1.c 
b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c
new file mode 100644
index 0000000..6542ffd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return x >> 1;
+}
+
+/* { dg-final { scan-assembler "lsr.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotldi3-1.c 
b/gcc/testsuite/gcc.target/arc/rotldi3-1.c
new file mode 100644
index 0000000..325996e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/rotldi3-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return (x << 1) | (x >> 63);
+}
+
+/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
+/* { dg-final { scan-assembler "adc.f\\s+r1,r1,r1" } } */
+/* { dg-final { scan-assembler "add.cs\\s+r0,r0,1" } } */
diff --git a/gcc/testsuite/gcc.target/arc/rotrdi3-1.c 
b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c
new file mode 100644
index 0000000..cd8e0de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned long long foo(unsigned long long x)
+{
+  return (x >> 1) | (x << 63);
+}
+
+/* { dg-final { scan-assembler "asr.f\\s+0,r0" } } */
+/* { dg-final { scan-assembler "rrc.f\\s+r1,r1" } } */
+/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */

Reply via email to