The current method of treating shifts of extended values on RISC-V frequently causes sequences of 3 shifts, despite the presence of the 'zero_extendsidi2_shifted' pattern.
Consider: unsigned long f(unsigned int a, unsigned long b) { a = a << 1; unsigned long c = (unsigned long) a; c = b + (c<<4); return c; } which will present at combine-time as: Trying 7, 8 -> 9: 7: r78:SI=r81:DI#0<<0x1 REG_DEAD r81:DI 8: r79:DI=zero_extend(r78:SI) REG_DEAD r78:SI 9: r72:DI=r79:DI<<0x4 REG_DEAD r79:DI Failed to match this instruction: (set (reg:DI 72 [ _1 ]) (and:DI (ashift:DI (reg:DI 81) (const_int 5 [0x5])) (const_int 68719476704 [0xfffffffe0]))) and produce the following (optimized) assembly: f: slliw a5,a0,1 slli a5,a5,32 srli a5,a5,28 add a0,a5,a1 ret The current way of handling this (in 'zero_extendsidi2_shifted') doesn't apply for two reasons: - this is seen before reload, and - (more importantly) the constant mask is not 0xfffffffful. To address this, we introduce a generalized version of shifting zero-extended values that supports any mask of consecutive ones as long as the number of training zeros is the inner shift-amount. With this new split, we generate the following assembly for the aforementioned function: f: slli a0,a0,33 srli a0,a0,28 add a0,a0,a1 ret Unfortunately, all of this causes some fallout (especially in how it interacts with Zb* extensions and zero_extract expressions formed during combine): this is addressed through additional instruction splitting and handling of zero_extract. gcc/ChangeLog: * config/riscv/bitmanip.md (*zext.w): Match a zext.w expressed as an and:DI. (*andi_add.uw): New pattern. (*slli_slli_uw): New pattern. (*shift_then_shNadd.uw): New pattern. (*slliuw): Rename to riscv_slli_uw. (riscv_slli_uw): Renamed from *slliuw. (*zeroextract<GPR:mode><ANYI:mode>2_highbits): New pattern. (*zero_extract<GPR:mode>): New pattern, which will be split to shift-left + shift-right. * config/riscv/predicates.md (dimode_shift_operand): * config/riscv/riscv.md (*zero_extract<GPR:mode>_lowbits): (zero_extendsidi2_shifted): Rename. (*zero_extendsidi2_shifted): Generalize. (*shift<GPR:MODE>_truthvalue): New pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/shift-shift-6.c: New test. * gcc.target/riscv/shift-shift-7.c: New test. * gcc.target/riscv/shift-shift-8.c: New test. * gcc.target/riscv/shift-shift-9.c: New test. * gcc.target/riscv/snez.c: New test. Commit notes: - Depends on a predicate posted in "RISC-V: Optimize branches testing a bit-range or a shifted immediate". Depending on the order of applying these, I'll take care to pull that part out of the other patch if needed. Version-changes: 2 - refactor - optimise for additional corner cases and deal with fallout Signed-off-by: Philipp Tomsich <philipp.toms...@vrull.eu> --- (no changes since v1) gcc/config/riscv/bitmanip.md | 142 ++++++++++++++---- gcc/config/riscv/predicates.md | 5 + gcc/config/riscv/riscv.md | 75 +++++++-- .../gcc.target/riscv/shift-shift-6.c | 14 ++ .../gcc.target/riscv/shift-shift-7.c | 16 ++ .../gcc.target/riscv/shift-shift-8.c | 20 +++ .../gcc.target/riscv/shift-shift-9.c | 15 ++ gcc/testsuite/gcc.target/riscv/snez.c | 14 ++ 8 files changed, 261 insertions(+), 40 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/shift-shift-9.c create mode 100644 gcc/testsuite/gcc.target/riscv/snez.c diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 78fdf02c2ec..06126ac4819 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -29,7 +29,20 @@ [(set_attr "type" "bitmanip,load") (set_attr "mode" "DI")]) -(define_insn "riscv_shNadd<X:mode>" +;; We may end up forming a slli.uw with an immediate of 0 (while +;; splitting through "*slli_slli_uw", below). +;; Match this back to a zext.w +(define_insn "*zext.w" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (const_int 0)) + (const_int 4294967295)))] + "TARGET_64BIT && TARGET_ZBA" + "zext.w\t%0,%1" + [(set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) + +(define_insn "*shNadd" [(set (match_operand:X 0 "register_operand" "=r") (plus:X (ashift:X (match_operand:X 1 "register_operand" "r") (match_operand:QI 2 "imm123_operand" "Ds3")) @@ -110,14 +123,52 @@ (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") (match_operand:QI 2 "imm123_operand" "Ds3")) - (match_operand 3 "immediate_operand" "n")) + (match_operand:DI 3 "consecutive_bits32_operand" "")) (match_operand:DI 4 "register_operand" "r")))] "TARGET_64BIT && TARGET_ZBA - && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff" + && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))" "sh%2add.uw\t%0,%1,%4" [(set_attr "type" "bitmanip") (set_attr "mode" "DI")]) +;; A shift-left, zext.w, shift-left sequence should turn into a +;; shift-left followed by slli.uw. +;; The "TARGET_ZBA && clz_hwi (operands[3]) <= 32" check in the +;; "*zero_extendsidi2_shifted" pattern over in riscv.md ensures +;; that we fall through to here, if appropriate. +(define_insn_and_split "*slli_slli_uw" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "dimode_shift_operand" "")) + (match_operand:DI 3 "consecutive_bits_operand" ""))) + (clobber (match_scratch:DI 4 "=&r"))] + "TARGET_64BIT && TARGET_ZBA + && popcount_hwi (INTVAL (operands[3])) < 32 + && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3])) + && IN_RANGE (clz_hwi (INTVAL (operands[3])), 29, 32)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[3]); + /* scale: shamt for the slli.uw */ + int scale = 32 - clz_hwi (mask); + /* bias: shamt for the prior shift (can be zero) */ + int bias = ctz_hwi (mask) - scale; + + /* Always emit both the slli and slli.uw, but break the chain + if bias is 0 (and have RTL cleanup remove the dead/dangling + instruction). */ + emit_insn (gen_rtx_SET (operands[4], + gen_rtx_ASHIFT (DImode, operands[1], GEN_INT (bias)))); + emit_insn (gen_riscv_slli_uw (operands[0], + bias ? operands[4] : operands[1], + GEN_INT (scale), + GEN_INT (HOST_WIDE_INT_C (0xffffffff) << scale))); + + DONE; +}) + ;; During combine, we may encounter an attempt to combine ;; slli rtmp, rs, #imm ;; zext.w rtmp, rtmp @@ -125,33 +176,35 @@ ;; which will lead to the immediate not satisfying the above constraints. ;; By splitting the compound expression, we can simplify to a slli and a ;; sh[123]add.uw. -(define_split - [(set (match_operand:DI 0 "register_operand") - (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand") - (match_operand:QI 2 "immediate_operand")) - (match_operand:DI 3 "consecutive_bits_operand")) - (match_operand:DI 4 "register_operand"))) - (clobber (match_operand:DI 5 "register_operand"))] - "TARGET_64BIT && TARGET_ZBA" + +;; To match this target sequence, the final result must be shifted +;; using the sh[123]add.uw instruction by 1, 2 or 3 bits into the high +;; word. To test for this property, we count the leading zero-bits of +;; the mask (which must be in the range [29, 31]). + +(define_insn_and_split "*shift_then_shNadd.uw" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "dimode_shift_operand" "")) + (match_operand:DI 3 "consecutive_bits_operand" "")) + (match_operand:DI 4 "register_operand" "r"))) + (clobber (match_scratch:DI 5 "=&r"))] + "TARGET_64BIT && TARGET_ZBA + && riscv_shamt_matches_mask_p (UINTVAL (operands[2]), UINTVAL (operands[3])) + && IN_RANGE (clz_hwi (UINTVAL (operands[3])), 29, 31)" + "#" + "&& reload_completed" [(set (match_dup 5) (ashift:DI (match_dup 1) (match_dup 6))) (set (match_dup 0) (plus:DI (and:DI (ashift:DI (match_dup 5) (match_dup 7)) (match_dup 8)) (match_dup 4)))] { - unsigned HOST_WIDE_INT mask = UINTVAL (operands[3]); - /* scale: shift within the sh[123]add.uw */ + unsigned HOST_WIDE_INT mask = INTVAL (operands[3]); + /* scale: shamt for the sh[123]add.uw */ unsigned HOST_WIDE_INT scale = 32 - clz_hwi (mask); - /* bias: pre-scale amount (i.e. the prior shift amount) */ - int bias = ctz_hwi (mask) - scale; - - /* If the bias + scale don't add up to operand[2], reject. */ - if ((scale + bias) != UINTVAL (operands[2])) - FAIL; - - /* If the shift-amount is out-of-range for sh[123]add.uw, reject. */ - if ((scale < 1) || (scale > 3)) - FAIL; + /* bias: shamt for the prior shift */ + unsigned HOST_WIDE_INT bias = ctz_hwi (mask) - scale; /* If there's no bias, the '*shNadduw' pattern should have matched. */ if (bias == 0) @@ -159,7 +212,7 @@ operands[6] = GEN_INT (bias); operands[7] = GEN_INT (scale); - operands[8] = GEN_INT (0xffffffffULL << scale); + operands[8] = GEN_INT (HOST_WIDE_INT_C (0xffffffff) << scale); }) (define_insn "*add.uw" @@ -172,13 +225,13 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "DI")]) -(define_insn "*slliuw" +(define_insn "riscv_slli_uw" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") - (match_operand:QI 2 "immediate_operand" "I")) - (match_operand 3 "immediate_operand" "n")))] + (match_operand:QI 2 "dimode_shift_operand" "")) + (match_operand 3 "consecutive_bits32_operand" "")))] "TARGET_64BIT && TARGET_ZBA - && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff" + && riscv_shamt_matches_mask_p(INTVAL (operands[2]), INTVAL (operands[3]))" "slli.uw\t%0,%1,%2" [(set_attr "type" "bitmanip") (set_attr "mode" "DI")]) @@ -328,6 +381,39 @@ "rev8\t%0,%1" [(set_attr "type" "bitmanip")]) +(define_insn "*zeroextract<GPR:mode><ANYI:mode>2_highbits" + [(set (match_operand:ANYI 0 "register_operand" "=r") + (zero_extract:ANYI (match_operand:GPR 1 "register_operand" "r") + (match_operand 2 "immediate_operand") + (match_operand 3 "immediate_operand")))] + "INTVAL (operands[2]) <= GET_MODE_BITSIZE (<ANYI:MODE>mode) && + (INTVAL (operands[2]) + INTVAL (operands[3])) == GET_MODE_BITSIZE (<GPR:MODE>mode)" +{ + return (TARGET_64BIT && <GPR:MODE>mode == SImode) ? "srliw\t%0,%1,%3" : "srli\t%0,%1,%3"; +} + [(set_attr "type" "shift") + (set_attr "mode" "<GPR:MODE>")]) + +(define_insn_and_split "*zero_extract<GPR:mode>" + [(set (match_operand:GPR 0 "register_operand" "=r") + (zero_extract:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand 2 "immediate_operand") + (match_operand 3 "immediate_operand")))] + "(INTVAL (operands[2]) + INTVAL (operands[3])) != 32 + && (INTVAL (operands[2]) + INTVAL (operands[3])) != 64 + && !(TARGET_ZBS && INTVAL (operands[2]) == 1)" + "#" + "&& reload_completed" + [(set (match_dup 0) (ashift:GPR (match_dup 1) (match_dup 2))) + (set (match_dup 0) (lshiftrt:GPR (match_dup 0) (match_dup 3)))] +{ + int width = INTVAL (operands[2]); + int pos = INTVAL (operands[3]); + + operands[2] = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - (width + pos)); + operands[3] = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - width); +}) + ;; HI bswap can be emulated using SI/DI bswap followed ;; by a logical shift right ;; SI bswap for TARGET_64BIT is already similarly in diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index c56bfa99339..6de9b39e39b 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -59,6 +59,10 @@ (ior (match_operand 0 "const_0_operand") (match_operand 0 "register_operand"))) +(define_predicate "dimode_shift_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (DImode) - 1)"))) + ;; Only use branch-on-bit sequences when the mask is not an ANDI immediate. (define_predicate "branch_on_bit_operand" (and (match_code "const_int") @@ -336,3 +340,4 @@ (match_operand 0 "const_arith_2simm12_operand") (and (match_operand 0 "const_arith_shifted123_operand") (match_test "TARGET_ZBA")))) + diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 1b357a9c57f..1514e10dbd1 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -2194,6 +2194,23 @@ [(set_attr "type" "shift") (set_attr "mode" "SI")]) +;; A zero-extract that can be expressed as an ANDI +(define_insn "*zero_extract<GPR:mode>_lowbits" + [(set (match_operand:GPR 0 "register_operand" "=r") + (zero_extract:GPR (match_operand:GPR 1 "register_operand" "r") + (match_operand 2 "dimode_shift_operand" "i") + (const_int 0)))] + "SMALL_OPERAND ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1)" +; "#" +; "&& reload_completed" +; [(set (match_dup 0) (and:GPR (match_dup 1) (match_dup 4)))] +{ + operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1); + return "andi\t%0,%1,%4"; +} + [(set_attr "type" "logical") + (set_attr "mode" "<GPR:MODE>")]) + ;; Canonical form for a zero-extend of a logical right shift when the ;; shift count is 31. (define_insn "*lshrsi3_zero_extend_3" @@ -2250,23 +2267,57 @@ ;; occur when unsigned int is used for array indexing. Split this into two ;; shifts. Otherwise we can get 3 shifts. -(define_insn_and_split "zero_extendsidi2_shifted" +(define_insn_and_split "*zero_extendsidi2_shifted" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") - (match_operand:QI 2 "immediate_operand" "I")) - (match_operand 3 "immediate_operand" ""))) + (match_operand:QI 2 "dimode_shift_operand" "")) + (match_operand:DI 3 "consecutive_bits_operand" ""))) (clobber (match_scratch:DI 4 "=&r"))] - "TARGET_64BIT && !TARGET_ZBA - && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)" + "TARGET_64BIT + && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3])) + && !(TARGET_ZBA && clz_hwi (INTVAL (operands[3])) <= 32)" "#" "&& reload_completed" - [(set (match_dup 4) - (ashift:DI (match_dup 1) (const_int 32))) - (set (match_dup 0) - (lshiftrt:DI (match_dup 4) (match_dup 5)))] - "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));" - [(set_attr "type" "shift") - (set_attr "mode" "DI")]) + [(set (match_dup 4) (ashift:DI (match_dup 1) (match_dup 5))) + (set (match_dup 0) (lshiftrt:DI (match_dup 4) (match_dup 6)))] +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[3]); + int leading = clz_hwi (mask); + int trailing = ctz_hwi (mask); + + operands[5] = GEN_INT (leading + trailing); + operands[6] = GEN_INT (leading); +}) + +;; Handle SImode shift of a truth-value w/ zero-extract +;; +;; TODO: match comparison/equality operators +;; +(define_insn_and_split "*shift<GPR:MODE>_truthvalue" + [(set (match_operand:GPR 0 "register_operand" "=r") + (ashift:GPR (ne:GPR (match_operand:GPR 1 "register_operand" "r") + (const_int 0)) + (match_operand 2 "dimode_shift_operand" "i")))] + "!partial_subreg_p (operands[1]) + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) < GET_MODE_BITSIZE (<GPR:MODE>mode)" + "#" + "&& reload_completed" + [(set (match_dup 0) (ne:GPR (match_dup 1) (const_int 0))) + (set (match_dup 0) (ashift:GPR (match_dup 0) (match_dup 2)))] +{ + rtx op1 = operands[1]; + machine_mode opmode = TARGET_64BIT ? DImode : SImode; + + /* If operating on a subreg, unwrap op1. */ + if (SUBREG_P (op1)) + op1 = XEXP (op1, 0); + + if (GET_MODE (op1) != opmode) + op1 = gen_rtx_SUBREG (opmode, op1, 0); + + operands[1] = op1; +}) ;; ;; .................... diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-6.c b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c new file mode 100644 index 00000000000..083f5c4688c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */ + +/* Test for zero_extendsidi2_shifted handling arbitrary mask widths + (not just 32 bits). */ +unsigned sub1(unsigned a, unsigned b) +{ + b = (b << 2) >> 2; + return a + (b << 1); +} + +/* { dg-final { scan-assembler-times "slli" 1 } } */ +/* { dg-final { scan-assembler-times "srli" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-7.c b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c new file mode 100644 index 00000000000..3ecd9ebdc39 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +/* Test for zero_extendsidi2_shifted handling arbitrary mask widths + (not just 32 bits). */ +unsigned long f(unsigned int a, unsigned long b) +{ + a = a << 1; + unsigned long c = (unsigned long) a; + c = b + (c<<4); + return c; +} + +/* { dg-final { scan-assembler-times "slli" 1 } } */ +/* { dg-final { scan-assembler-times "srli" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-8.c b/gcc/testsuite/gcc.target/riscv/shift-shift-8.c new file mode 100644 index 00000000000..e6dcfcce0e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-8.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */ + +/* Tests for andi + sh[123]add */ +unsigned long sub1(unsigned long a, unsigned long b) +{ + return b + ((a << 55) >> 53); +} + +unsigned long sub2(unsigned long a, unsigned long b) +{ + return b + ((a & 1023) << 3); +} + +/* { dg-final { scan-assembler-times "andi" 2 } } */ +/* { dg-final { scan-assembler-times "sh2add" 1 } } */ +/* { dg-final { scan-assembler-times "sh3add" 1 } } */ +/* { dg-final { scan-assembler-not "slli" } } */ +/* { dg-final { scan-assembler-not "srli" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-9.c b/gcc/testsuite/gcc.target/riscv/shift-shift-9.c new file mode 100644 index 00000000000..37e5755b685 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shift-shift-9.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */ + +signed short foo (signed short data) +{ + signed short dtype = ((data >> 3) & 0xf); + dtype |= dtype << 4; + return dtype; +} + +/* { dg-final { scan-assembler-times "slli\t" 2 } } */ +/* { dg-final { scan-assembler-times "srli\t" 1 } } */ +/* { dg-final { scan-assembler-times "add\t" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/riscv/snez.c b/gcc/testsuite/gcc.target/riscv/snez.c new file mode 100644 index 00000000000..e5a7fad770a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/snez.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */ + +int f (long long a) +{ + return (a != 2) << 1; +} + +/* { dg-final { scan-assembler-times "slli\t" 1 } } */ +/* { dg-final { scan-assembler-not "srli\t" } } */ +/* { dg-final { scan-assembler-times "snez\t" 1 } } */ +/* { dg-final { scan-assembler-not "sext.w\t" } } */ + -- 2.34.1