https://gcc.gnu.org/g:d55d40afd42a280c80729b538e3cce994f20961d
commit r15-6493-gd55d40afd42a280c80729b538e3cce994f20961d Author: Guo Jie <guo...@loongson.cn> Date: Mon Dec 30 10:39:13 2024 +0800 LoongArch: Optimize for conditional move operations The optimization example is as follows. From: if (condition) dest += 1 << 16; To: dest += (condition ? 1 : 0) << 16; It does not use maskeqz and masknez, thus reducing the number of instructions. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_expand_conditional_move): Add some optimization implementations based on noce_try_cmove_arith. gcc/testsuite/ChangeLog: * gcc.target/loongarch/conditional-move-opt-1.c: New test. * gcc.target/loongarch/conditional-move-opt-2.c: New test. Diff: --- gcc/config/loongarch/loongarch.cc | 103 ++++++++++++++++++++- .../gcc.target/loongarch/conditional-move-opt-1.c | 58 ++++++++++++ .../gcc.target/loongarch/conditional-move-opt-2.c | 42 +++++++++ 3 files changed, 202 insertions(+), 1 deletion(-) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 029616c1db2a..23cbd2eead97 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -5294,6 +5294,81 @@ loongarch_expand_conditional_move (rtx *operands) loongarch_emit_float_compare (&code, &op0, &op1); else { + /* Optimize to reduce the number of instructions for ternary operations. + Mainly implemented based on noce_try_cmove_arith. + For dest = (condition) ? value_if_true : value_if_false; + the optimization requires: + a. value_if_false = var; + b. value_if_true = var OP C (a positive integer power of 2). + + Situations similar to the following: + if (condition) + dest += 1 << imm; + to: + dest += (condition ? 1 : 0) << imm; */ + + rtx_insn *insn; + HOST_WIDE_INT val = 0; /* The value of rtx C. */ + /* INSN with operands[2] as the output. */ + rtx_insn *value_if_true_insn = NULL; + /* INSN with operands[3] as the output. */ + rtx_insn *value_if_false_insn = NULL; + rtx value_if_true_insn_src = NULL_RTX; + /* Common operand var in value_if_true and value_if_false. */ + rtx comm_var = NULL_RTX; + bool can_be_optimized = false; + + /* Search value_if_true_insn and value_if_false_insn. */ + struct sequence_stack *seq = get_current_sequence ()->next; + for (insn = seq->last; insn; insn = PREV_INSN (insn)) + { + if (single_set (insn)) + { + rtx set_dest = SET_DEST (single_set (insn)); + if (rtx_equal_p (set_dest, operands[2])) + value_if_true_insn = insn; + else if (rtx_equal_p (set_dest, operands[3])) + value_if_false_insn = insn; + if (value_if_true_insn && value_if_false_insn) + break; + } + } + + /* Check if the optimization conditions are met. */ + if (value_if_true_insn + && value_if_false_insn + /* Make sure that value_if_false and var are the same. */ + && BINARY_P (value_if_true_insn_src + = SET_SRC (single_set (value_if_true_insn))) + /* Make sure that both value_if_true and value_if_false + has the same var. */ + && rtx_equal_p (XEXP (value_if_true_insn_src, 0), + SET_SRC (single_set (value_if_false_insn)))) + { + comm_var = SET_SRC (single_set (value_if_false_insn)); + rtx src = XEXP (value_if_true_insn_src, 1); + rtx imm = NULL_RTX; + if (CONST_INT_P (src)) + imm = src; + else + for (insn = seq->last; insn; insn = PREV_INSN (insn)) + { + rtx set = single_set (insn); + if (set && rtx_equal_p (SET_DEST (set), src)) + { + imm = SET_SRC (set); + break; + } + } + if (imm && CONST_INT_P (imm)) + { + val = INTVAL (imm); + /* Make sure that imm is a positive integer power of 2. */ + if (val > 0 && !(val & (val - 1))) + can_be_optimized = true; + } + } + if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) { promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && @@ -5314,22 +5389,48 @@ loongarch_expand_conditional_move (rtx *operands) op0_extend = op0; op1_extend = force_reg (word_mode, op1); + rtx target = gen_reg_rtx (GET_MODE (op0)); + if (code == EQ || code == NE) { op0 = loongarch_zero_if_equal (op0, op1); op1 = const0_rtx; + /* For EQ, set target to 1 if op0 and op1 are the same, + otherwise set to 0. + For NE, set target to 0 if op0 and op1 are the same, + otherwise set to 1. */ + if (can_be_optimized) + loongarch_emit_binary (code, target, op0, const0_rtx); } else { /* The comparison needs a separate scc instruction. Store the result of the scc in *OP0 and compare it against zero. */ bool invert = false; - rtx target = gen_reg_rtx (GET_MODE (op0)); loongarch_emit_int_order_test (code, &invert, target, op0, op1); + if (can_be_optimized && invert) + loongarch_emit_binary (EQ, target, target, const0_rtx); code = invert ? EQ : NE; op0 = target; op1 = const0_rtx; } + + if (can_be_optimized) + { + /* Perform (condition ? 1 : 0) << log2 (C). */ + loongarch_emit_binary (ASHIFT, target, target, + GEN_INT (exact_log2 (val))); + /* Shift-related insn patterns only support SImode operands[2]. */ + enum rtx_code opcode = GET_CODE (value_if_true_insn_src); + if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT + || opcode == ROTATE || opcode == ROTATERT) + target = gen_lowpart (SImode, target); + /* Perform target = target OP ((condition ? 1 : 0) << log2 (C)). */ + loongarch_emit_binary (opcode, operands[0], + force_reg (GET_MODE (operands[3]), comm_var), + target); + return; + } } rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c new file mode 100644 index 000000000000..ed13471aa90a --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "maskeqz" } } */ +/* { dg-final { scan-assembler-not "masknez" } } */ + +extern long lm, ln, lr; + +void +test_ne () +{ + if (lm != ln) + lr += (1 << 16); + lr += lm; +} + +void +test_eq () +{ + if (lm == ln) + lr = lm + (1 << 16); + else + lr = lm; + lr += lm; +} + +void +test_lt () +{ + if (lm < ln) + lr *= (1 << 16); + lr += lm; +} + +void +test_le () +{ + if (lm <= ln) + lr = lm * ((long)1 << 32); + else + lr = lm; + lr += lm; +} + +void +test_nez () +{ + if (lm != 0) + lr <<= (1 << 4); + lr += lm; +} + +void +test_eqz () +{ + if (lm == 0) + lr >>= (1 << 2); + lr += lm; +} diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c new file mode 100644 index 000000000000..ac72d4d933ad --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --param max-rtl-if-conversion-insns=1" } */ +/* { dg-final { scan-assembler-not "maskeqz" } } */ +/* { dg-final { scan-assembler-not "masknez" } } */ + +/* The relevant optimization is currently only based on noce_try_cmove_arith, + so it bypasses noce_convert_multiple_sets by + --param max-rtl-if-conversion-insns=1 to execute noce_try_cmove_arith. */ + +extern long lm, ln, lr; + +void +test_ge () +{ + if (lm >= ln) + lr += ((long)1 << 32); + lr += lm; +} + +void +test_ltz () +{ + if (lm < 0) + lr |= (1 << 16); + lr += lm; +} + +void +test_lez () +{ + if (lm <= 0) + lr &= (1 << 16); + lr += lm; +} + +void +test_gez () +{ + if (lm >= 0) + lr ^= (1 << 16); + lr += lm; +}