For things like (x | 0x101) << 11
It's obvious to write: ori $r4,$r4,257 slli.d $r4,$r4,11 But we are actually generating something insane: lu12i.w $r12,524288>>12 # 0x80000 ori $r12,$r12,2048 slli.d $r4,$r4,11 or $r4,$r4,$r12 jr $r1 It's because the target-independent canonicalization was written before we have all the RISC targets where loading an immediate may need multiple instructions. So for these targets we need to handle this in the target code. We do the reassociation on our own (i.e. reverting the target-independent reassociation) if "(reg [&|^] mask) << shamt" does not need to load mask into an register, and either: - (mask << shamt) needs to be loaded into an register, or - shamt is a const_immalsl_operand, so the outer shift may be further combined with an add. gcc/ChangeLog: PR target/115921 * config/loongarch/loongarch-protos.h (loongarch_reassoc_shift_bitwise): New function prototype. * config/loongarch/loongarch.cc (loongarch_reassoc_shift_bitwise): Implement. * config/loongarch/loongarch.md (*alslsi3_extend_subreg): New define_insn_and_split. (<any_bitwise:optab>_shift_reverse<X:mode>): New define_insn_and_split. (<any_bitwise:optab>_alsl_reversesi_extended): New define_insn_and_split. (zero_extend_ashift): Remove as it's just a special case of and_shift_reversedi, and it does not make too much sense to write "alsl.d rd,rs,r0,shamt" instead of "slli.d rd,rs,shamt". (bstrpick_alsl_paired): Remove as it is already done by splitting and_shift_reversedi into and + ashift first, then late combining the ashift and a further add. gcc/testsuite/ChangeLog: PR target/115921 * gcc.target/loongarch/bstrpick_alsl_paired.c (scan-rtl-dump): Scan for and_shift_reversedi instead of the removed bstrpick_alsl_paired. * gcc.target/loongarch/bitwise-shift-reassoc.c: New test. --- v1 -> v2: - Use simplify_const_binary_operation for shifting CONST_INT rtx, instead of self-invented code invoking unspecified behavior - Add a test case outputting bstrins.d + alsl.d Bootstrapped and regtested on loongarch64-linux-gnu, ok for trunk? gcc/config/loongarch/loongarch-protos.h | 2 + gcc/config/loongarch/loongarch.cc | 35 +++++ gcc/config/loongarch/loongarch.md | 136 +++++++++++++----- .../loongarch/bitwise-shift-reassoc.c | 98 +++++++++++++ .../loongarch/bstrpick_alsl_paired.c | 2 +- 5 files changed, 239 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index 6601f767dab..33fcb5ee87f 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -85,6 +85,8 @@ extern bool loongarch_split_move_p (rtx, rtx); extern void loongarch_split_move (rtx, rtx); extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); extern void loongarch_split_plus_constant (rtx *, machine_mode); +extern rtx loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, + rtx mask, machine_mode mode); extern void loongarch_split_vector_move (rtx, rtx); extern const char *loongarch_output_move (rtx *); #ifdef RTX_CODE diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 1004b65a1ee..51f72390256 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -4530,6 +4530,41 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode) op[2] = gen_int_mode (v, mode); } +/* Test if reassociate (a << shamt) [&|^] mask to + (a [&|^] (mask >> shamt)) << shamt is possible and beneficial. + If true, return (mask >> shamt). Return NULL_RTX otherwise. */ + +rtx +loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, rtx mask, + machine_mode mode) +{ + gcc_checking_assert (CONST_INT_P (shamt)); + gcc_checking_assert (CONST_INT_P (mask)); + gcc_checking_assert (mode == SImode || mode == DImode); + + if (ctz_hwi (INTVAL (mask)) < INTVAL (shamt)) + return NULL_RTX; + + rtx new_mask = simplify_const_binary_operation (LSHIFTRT, mode, mask, + shamt); + if (const_uns_arith_operand (new_mask, mode)) + return new_mask; + + if (!is_and) + return NULL_RTX; + + if (low_bitmask_operand (new_mask, mode)) + return new_mask; + + /* Do an arithmetic shift for checking ins_zero_bitmask_operand: + ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an + ins_zero_bitmask_operand, but lshiftrt will produce + 0x3fffffff60000000. */ + new_mask = simplify_const_binary_operation (ASHIFTRT, mode, mask, + shamt); + return ins_zero_bitmask_operand (new_mask, mode) ? new_mask : NULL_RTX; +} + /* Implement TARGET_CONSTANT_ALIGNMENT. */ static HOST_WIDE_INT diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 995df1b8875..223e2b9f37f 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -490,6 +490,7 @@ (define_code_iterator neg_bitwise [and ior]) (define_code_attr bitwise_operand [(and "and_operand") (ior "uns_arith_operand") (xor "uns_arith_operand")]) +(define_code_attr is_and [(and "true") (ior "false") (xor "false")]) ;; This code iterator allows unsigned and signed division to be generated ;; from the same template. @@ -3083,39 +3084,6 @@ (define_expand "rotl<mode>3" } }); -;; The following templates were added to generate "bstrpick.d + alsl.d" -;; instruction pairs. -;; It is required that the values of const_immalsl_operand and -;; immediate_operand must have the following correspondence: -;; -;; (immediate_operand >> const_immalsl_operand) == 0xffffffff - -(define_insn "zero_extend_ashift" - [(set (match_operand:DI 0 "register_operand" "=r") - (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") - (match_operand 2 "const_immalsl_operand" "")) - (match_operand 3 "immediate_operand" "")))] - "TARGET_64BIT - && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)" - "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2" - [(set_attr "type" "arith") - (set_attr "mode" "DI") - (set_attr "insn_count" "2")]) - -(define_insn "bstrpick_alsl_paired" - [(set (match_operand:DI 0 "register_operand" "=&r") - (plus:DI - (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") - (match_operand 2 "const_immalsl_operand" "")) - (match_operand 3 "immediate_operand" "")) - (match_operand:DI 4 "register_operand" "r")))] - "TARGET_64BIT - && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)" - "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,%4,%2" - [(set_attr "type" "arith") - (set_attr "mode" "DI") - (set_attr "insn_count" "2")]) - (define_insn "alsl<mode>3" [(set (match_operand:GPR 0 "register_operand" "=r") (plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "r") @@ -3138,6 +3106,108 @@ (define_insn "*alslsi3_extend" [(set_attr "type" "arith") (set_attr "mode" "SI")]) +(define_insn "*alslsi3_extend_subreg" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_extend:DI + (plus:SI + (subreg:SI + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_immalsl_operand" "")) + 0) + (subreg:SI (match_operand:DI 3 "register_operand" "r") 0))))] + "TARGET_64BIT" + "alsl.w<u>\t%0,%1,%3,%2" + [(set_attr "type" "arith") + (set_attr "mode" "SI")]) + +;; The generic code prefers "(reg << shamt) [&|^] (mask << shamt)" +;; instead of "(reg [&|^] mask) << shamt" but we want the latter if +;; we don't need to load mask into an register, and either: +;; - (mask << shamt) needs to be loaded into an register, or +;; - shamt is a const_immalsl_operand, so the outer shift may be further +;; combined with an add. +(define_insn_and_split "<optab>_shift_reverse<X:mode>" + [(set (match_operand:X 0 "register_operand" "=r") + (any_bitwise:X + (ashift:X (match_operand:X 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "i")) + (match_operand:X 3 "const_int_operand" "i")))] + "(const_immalsl_operand (operands[2], SImode) + || !<bitwise_operand> (operands[3], <MODE>mode)) + && loongarch_reassoc_shift_bitwise (<is_and>, operands[2], operands[3], + <MODE>mode)" + "#" + "&& true" + [(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3))) + (set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))] + { + operands[3] = loongarch_reassoc_shift_bitwise (<is_and>, + operands[2], + operands[3], + <MODE>mode); + + if (ins_zero_bitmask_operand (operands[3], <MODE>mode)) + { + gcc_checking_assert (<is_and>); + emit_move_insn (operands[0], operands[1]); + operands[1] = operands[0]; + } + }) + +;; The late_combine2 pass can handle slli.d + add.d => alsl.d, so we +;; already have slli.d + any_bitwise + add.d => any_bitwise + slli.d + +;; add.d => any_bitwise + alsl.d. But late_combine2 cannot handle slli.d + +;; add.w => alsl.w, so implement slli.d + and + add.w => and + alsl.w on +;; our own. +(define_insn_and_split "<optab>_alsl_reversesi_extended" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (plus:SI + (subreg:SI + (any_bitwise:DI + (ashift:DI + (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "const_immalsl_operand" "")) + (match_operand:DI 3 "const_int_operand" "i")) + 0) + (match_operand:SI 4 "register_operand" "r"))))] + "TARGET_64BIT + && loongarch_reassoc_shift_bitwise (<is_and>, operands[2], operands[3], + SImode)" + "#" + "&& true" + [; r0 = r1 [&|^] r3 is emitted in PREPARATION-STATEMENTS because we + ; need to handle a special case, see below. + (set (match_dup 0) + (sign_extend:DI + (plus:SI (ashift:SI (subreg:SI (match_dup 0) 0) (match_dup 2)) + (match_dup 4))))] + { + operands[3] = loongarch_reassoc_shift_bitwise (<is_and>, + operands[2], + operands[3], + SImode); + + if (ins_zero_bitmask_operand (operands[3], SImode)) + { + gcc_checking_assert (<is_and>); + emit_move_insn (operands[0], operands[1]); + operands[1] = operands[0]; + } + + if (operands[3] != CONSTM1_RTX (SImode)) + emit_insn (gen_<optab>di3 (operands[0], operands[1], operands[3])); + else + { + /* Hmm would we really reach here? If we reach here we'd have + a miss-optimization in the generic code (as it should have + optimized this to alslsi3_extend_subreg). But let's be safe + than sorry. */ + gcc_checking_assert (<is_and>); + emit_move_insn (operands[0], operands[1]); + } + }) + ;; Reverse the order of bytes of operand 1 and store the result in operand 0. diff --git a/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c new file mode 100644 index 00000000000..3f197755625 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/bitwise-shift-reassoc.c @@ -0,0 +1,98 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +**t0: +** ori (\$r[0-9]+),\$r4,257 +** slli.d \$r4,\1,11 +** jr \$r1 +*/ +long +t0 (long x) +{ + return (x | 0x101) << 11; +} + +/* +**t1: +** xori (\$r[0-9]+),\$r4,257 +** alsl.d \$r4,\1,\$r5,3 +** jr \$r1 +*/ +long +t1 (long x, long y) +{ + return ((x ^ 0x101) << 3) + y; +} + +/* +**t2: +** bstrins.d (\$r[0-9]+),\$r0,15,4 +** alsl.d \$r4,\1,\$r5,2 +** jr \$r1 +*/ +long +t2 (long x, long y) +{ + return ((x & ~0xfff0) << 2) + y; +} + +/* +**t3: +** ori (\$r[0-9]+),\$r4,3855 +** alsl.w \$r4,\1,\$r5,1 +** jr \$r1 +*/ +long +t3 (long x, long y) +{ + return (int)(((x | 0xf0f) << 1) + y); +} + +/* +**t4: +** bstrpick.d (\$r[0-9]+),\$r4,31,0 +** slli.d \$r4,\1,1 +** jr \$r1 +*/ +unsigned long +t4 (unsigned long x) +{ + return x << 32 >> 31; +} + +/* +**t5: +** bstrpick.d (\$r[0-9]+),\$r4,31,0 +** alsl.d \$r4,\1,\$r5,2 +** jr \$r1 +*/ +unsigned long +t5 (unsigned long x, unsigned long y) +{ + return (x << 32 >> 30) + y; +} + +/* +**t6: +** alsl.w \$r4,\$r4,\$r5,2 +** jr \$r1 +*/ +unsigned int +t6 (unsigned long x, unsigned long y) +{ + return (x << 32 >> 30) + y; +} + +/* +**t7: +** bstrins.d \$r4,\$r0,47,0 +** alsl.d \$r4,\$r4,\$r5,2 +** jr \$r1 +*/ +unsigned long +t7 (unsigned long x, unsigned long y) +{ + return ((x & 0xffff000000000000) << 2) + y; +} diff --git a/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c b/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c index 0bca3886c32..900e8c9e19f 100644 --- a/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c +++ b/gcc/testsuite/gcc.target/loongarch/bstrpick_alsl_paired.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mabi=lp64d -O2 -fdump-rtl-combine" } */ -/* { dg-final { scan-rtl-dump "{bstrpick_alsl_paired}" "combine" } } */ +/* { dg-final { scan-rtl-dump "{and_shift_reversedi}" "combine" } } */ /* { dg-final { scan-assembler-not "alsl.d\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,\\\$r0" } } */ struct SA -- 2.48.1