This patch improves the code generated for DImode right shifts (both arithmetic and logical) by a single bit, and also for DImode rotates (both left and right) by a single bit. In approach, this is similar to the recently added DImode left shift by a single bit patch, but also builds upon i386.md's UNSPEC carry flag representation: https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html
The benefits can be seen from the four new test cases: long long ashr(long long x) { return x >> 1; } Before: ashr: asl r2,r1,31 lsr_s r0,r0 or_s r0,r0,r2 j_s.d [blink] asr_s r1,r1,1 After: ashr: asr.f r1,r1 j_s.d [blink] rrc r0,r0 unsigned long long lshr(unsigned long long x) { return x >> 1; } Before: lshr: asl r2,r1,31 lsr_s r0,r0 or_s r0,r0,r2 j_s.d [blink] lsr_s r1,r1 After: lshr: lsr.f r1,r1 j_s.d [blink] rrc r0,r0 unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); } Before: rotl: lsr r12,r1,31 lsr r2,r0,31 asl_s r3,r0,1 asl_s r1,r1,1 or r0,r12,r3 j_s.d [blink] or_s r1,r1,r2 After: rotl: add.f r0,r0,r0 adc.f r1,r1,r1 j_s.d [blink] add.cs r0,r0,1 unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); } Before: rotr: asl r12,r1,31 asl r2,r0,31 lsr_s r3,r0 lsr_s r1,r1 or r0,r12,r3 j_s.d [blink] or_s r1,r1,r2 After: rotr: asr.f 0,r0 rrc.f r1,r1 j_s.d [blink] rrc r0,r0 On CPUs without a barrel shifter the improvements are even better. Tested with a cross-compiler to arc-linux hosted on x86_64, with no new (compile-only) regressions from make -k check. Ok for mainline if this passes Claudiu's nightly testing? 2023-11-06 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog * config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that represents the carry flag being set if the operand is non-zero. (adc_f): New define_insn representing adc with updated flags. (ashrdi3): New define_expand that only handles shifts by 1. (ashrdi3_cnt1): New pre-reload define_insn_and_split. (lshrdi3): New define_expand that only handles shifts by 1. (lshrdi3_cnt1): New pre-reload define_insn_and_split. (rrcsi2): New define_insn for rrc (SImode rotate right through carry). (rrcsi2_carry): Likewise for rrc.f, as above but updating flags. (rotldi3): New define_expand that only handles rotates by 1. (rotldi3_cnt1): New pre-reload define_insn_and_split. (rotrdi3): New define_expand that only handles rotates by 1. (rotrdi3_cnt1): New pre-reload define_insn_and_split. (lshrsi3_cnt1_carry): New define_insn for lsr.f. (ashrsi3_cnt1_carry): New define_insn for asr.f. (btst_0_carry): New define_insn for asr.f without result. gcc/testsuite/ChangeLog * gcc.target/arc/ashrdi3-1.c: New test case. * gcc.target/arc/lshrdi3-1.c: Likewise. * gcc.target/arc/rotldi3-1.c: Likewise. * gcc.target/arc/rotrdi3-1.c: Likewise. Thanks in advance, Roger --
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 7702978..97231b9 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -137,6 +137,7 @@ UNSPEC_ARC_VMAC2HU UNSPEC_ARC_VMPY2H UNSPEC_ARC_VMPY2HU + UNSPEC_ARC_CC_NEZ VUNSPEC_ARC_RTIE VUNSPEC_ARC_SYNC @@ -2790,6 +2791,31 @@ archs4x, archs4xd" (set_attr "type" "cc_arith") (set_attr "length" "4,4,4,4,8,8")]) +(define_insn "adc_f" + [(set (reg:CC_C CC_REG) + (compare:CC_C + (zero_extend:DI + (plus:SI + (plus:SI + (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (match_operand:SI 1 "register_operand" "%r")) + (match_operand:SI 2 "register_operand" "r"))) + (plus:DI + (ltu:DI (reg:CC_C CC_REG) (const_int 0)) + (zero_extend:DI (match_dup 1))))) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (plus:SI + (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (match_dup 1)) + (match_dup 2)))] + "" + "adc.f\\t%0,%1,%2" + [(set_attr "cond" "set") + (set_attr "predicable" "no") + (set_attr "type" "cc_arith") + (set_attr "length" "4")]) + ; combiner-splitter cmp / scc -> cmp / adc (define_split [(set (match_operand:SI 0 "dest_reg_operand" "") @@ -3530,6 +3556,68 @@ archs4x, archs4xd" "" [(set_attr "length" "8")]) +(define_expand "ashrdi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (ashiftrt:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; Split into asr.f hi; rrc lo +(define_insn_and_split "*ashrdi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (ashiftrt:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]))); + DONE; +} + [(set_attr "length" "8")]) + +(define_expand "lshrdi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (lshiftrt:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; Split into lsr.f hi; rrc lo +(define_insn_and_split "*lshrdi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (lshiftrt:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]))); + DONE; +} + [(set_attr "length" "8")]) + ;; Rotate instructions. (define_insn "rotrsi3_insn" @@ -3571,6 +3659,103 @@ archs4x, archs4xd" } }) +;; Rotate through carry flag + +(define_insn "rrcsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=r") + (plus:SI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1)) + (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (const_int 31))))] + "" + "rrc\\t%0,%1" + [(set_attr "type" "shift") + (set_attr "predicable" "no") + (set_attr "length" "4")]) + +(define_insn "rrcsi2_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ)) + (set (match_operand:SI 0 "dest_reg_operand" "=r") + (plus:SI + (lshiftrt:SI (match_dup 1) (const_int 1)) + (ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (const_int 31))))] + "" + "rrc.f\\t%0,%1" + [(set_attr "type" "shift") + (set_attr "predicable" "no") + (set_attr "length" "4")]) + +;; DImode Rotate instructions + +(define_expand "rotldi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (rotate:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; split into add.f lo; adc.f hi; adc lo +(define_insn_and_split "*rotldi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (rotate:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx lo0 = gen_lowpart (SImode, operands[0]); + rtx lo1 = gen_lowpart (SImode, operands[1]); + rtx hi1 = gen_highpart (SImode, operands[1]); + emit_insn (gen_add_f (lo0, lo1, lo1)); + emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1)); + emit_insn (gen_adc (lo0, lo0, const0_rtx)); + DONE; +} + [(set_attr "length" "12")]) + +(define_expand "rotrdi3" + [(parallel + [(set (match_operand:DI 0 "register_operand") + (rotatert:DI (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (operands[2] != const1_rtx) + FAIL; +}) + +;; split into asr.f lo; rrc.f hi; rrc lo +(define_insn_and_split "*rotrdi3_cnt1" + [(set (match_operand:DI 0 "register_operand") + (rotatert:DI (match_operand:DI 1 "register_operand") + (const_int 1))) + (clobber (reg:CC CC_REG))] + "arc_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx lo = gen_lowpart (SImode, operands[1]); + emit_insn (gen_btst_0_carry (lo)); + emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]))); + emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo)); + DONE; +} + [(set_attr "length" "12")]) + ;; Compare / branch instructions. (define_expand "cbranchsi4" @@ -6022,6 +6207,18 @@ archs4x, archs4xd" (set_attr "iscompact" "maybe,false") (set_attr "predicable" "no,no")]) +(define_insn "lshrsi3_cnt1_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ)) + (set (match_operand:SI 0 "dest_reg_operand" "=r") + (lshiftrt:SI (match_dup 1) (const_int 1)))] + "" + "lsr.f\\t%0,%1" + [(set_attr "type" "unary") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + (define_insn "ashrsi3_cnt1" [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c") @@ -6032,6 +6229,28 @@ archs4x, archs4xd" (set_attr "iscompact" "maybe,false") (set_attr "predicable" "no,no")]) +(define_insn "ashrsi3_cnt1_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ)) + (set (match_operand:SI 0 "dest_reg_operand" "=r") + (ashiftrt:SI (match_dup 1) (const_int 1)))] + "" + "asr.f\\t%0,%1" + [(set_attr "type" "unary") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + +(define_insn "btst_0_carry" + [(set (reg:CC_C CC_REG) + (unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r") + (const_int 1))] UNSPEC_ARC_CC_NEZ))] + "" + "asr.f\\t0,%0" + [(set_attr "type" "unary") + (set_attr "length" "4") + (set_attr "predicable" "no")]) + (define_peephole2 [(set (match_operand:SI 0 "register_operand" "") (zero_extract:SI (match_dup 0) diff --git a/gcc/testsuite/gcc.target/arc/ashrdi3-1.c b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c new file mode 100644 index 0000000..d990bfd --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/ashrdi3-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +long long foo(long long x) +{ + return x >> 1; +} + +/* { dg-final { scan-assembler "asr.f\\s+r1,r1" } } */ +/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */ diff --git a/gcc/testsuite/gcc.target/arc/lshrdi3-1.c b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c new file mode 100644 index 0000000..6542ffd --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/lshrdi3-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long long foo(unsigned long long x) +{ + return x >> 1; +} + +/* { dg-final { scan-assembler "lsr.f\\s+r1,r1" } } */ +/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */ diff --git a/gcc/testsuite/gcc.target/arc/rotldi3-1.c b/gcc/testsuite/gcc.target/arc/rotldi3-1.c new file mode 100644 index 0000000..325996e --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/rotldi3-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long long foo(unsigned long long x) +{ + return (x << 1) | (x >> 63); +} + +/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */ +/* { dg-final { scan-assembler "adc.f\\s+r1,r1,r1" } } */ +/* { dg-final { scan-assembler "add.cs\\s+r0,r0,1" } } */ diff --git a/gcc/testsuite/gcc.target/arc/rotrdi3-1.c b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c new file mode 100644 index 0000000..cd8e0de --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/rotrdi3-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long long foo(unsigned long long x) +{ + return (x >> 1) | (x << 63); +} + +/* { dg-final { scan-assembler "asr.f\\s+0,r0" } } */ +/* { dg-final { scan-assembler "rrc.f\\s+r1,r1" } } */ +/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */