Hi, This patch adds const0 move checking for CLEAR_BY_PIECES. The original vec_duplicate handles duplicates of non-constant inputs. But 0 is a constant. So even a platform doesn't support vec_duplicate, it could still do clear by pieces if it supports const0 move by that mode.
Compared to the previous version, the main change is to do const0 direct move for by-piece clear if the target supports const0 move by that mode. https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643063.html Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no regressions. There are several regressions on aarch64. They could be fixed by enhancing const0 move on V2x8QImode. Is it OK for trunk? Thanks Gui Haochen ChangeLog expand: Add const0 move checking for CLEAR_BY_PIECES optabs vec_duplicate handles duplicates of non-constant inputs. The 0 is a constant. So even a platform doesn't support vec_duplicate, it could still do clear by pieces if it supports const0 move. This patch adds the checking. gcc/ * expr.cc (by_pieces_mode_supported_p): Add const0 move checking for CLEAR_BY_PIECES. (op_by_pieces_d::run): Pass const0 to do the move if the target supports direct const0 move by the mode. patch.diff diff --git a/gcc/expr.cc b/gcc/expr.cc index fc5e998e329..97764eb9ebe 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -1006,14 +1006,21 @@ can_use_qi_vectors (by_pieces_operation op) static bool by_pieces_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) { - if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) + enum insn_code icode = optab_handler (mov_optab, mode); + if (icode == CODE_FOR_nothing) return false; - if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) + if (op == SET_BY_PIECES && VECTOR_MODE_P (mode) && optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing) return false; + if (op == CLEAR_BY_PIECES + && VECTOR_MODE_P (mode) + && optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing + && !insn_operand_matches (icode, 1, CONST0_RTX (mode))) + return false; + if (op == COMPARE_BY_PIECES && !can_compare_p (EQ, mode, ccp_jump)) return false; @@ -1490,7 +1497,7 @@ op_by_pieces_d::run () do { unsigned int size = GET_MODE_SIZE (mode); - rtx to1 = NULL_RTX, from1; + rtx to1 = NULL_RTX, from1 = NULL_RTX; while (length >= size) { @@ -1500,12 +1507,26 @@ op_by_pieces_d::run () to1 = m_to.adjust (mode, m_offset, &to_prev); to_prev.data = to1; to_prev.mode = mode; - from1 = m_from.adjust (mode, m_offset, &from_prev); - from_prev.data = from1; - from_prev.mode = mode; m_to.maybe_predec (-(HOST_WIDE_INT)size); - m_from.maybe_predec (-(HOST_WIDE_INT)size); + + /* Pass CONST0_RTX for memory clear when target supports CONST0 + direct move. */ + if (m_op == CLEAR_BY_PIECES + && VECTOR_MODE_P (mode) + && optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing) + { + enum insn_code icode = optab_handler (mov_optab, mode); + if (insn_operand_matches (icode, 1, CONST0_RTX (mode))) + from1 = CONST0_RTX (mode); + } + else + { + from1 = m_from.adjust (mode, m_offset, &from_prev); + from_prev.data = from1; + from_prev.mode = mode; + m_from.maybe_predec (-(HOST_WIDE_INT)size); + } generate (to1, from1, mode);