Hi, For following test case: svint32_t foo() { int32x4_t v = (int32x4_t) { 1, 2, 3, 4 }; svint32_t v2 = svld1rq_s32 (svptrue_b8(), &v[0]); return v2; }
After applying workaround in forwprop to not simplify VEC_PERM_EXPR in simplify_permutation to avoid type error in middle end (or using -fno-tree-forwprop) as mentioned in: https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598390.html We get following optimized gimple: v2_2 = VEC_PERM_EXPR <{ 1, 2, 3, 4 }, { 1, 2, 3, 4 }, { 0, 1, 2, 3, ... }>; return v2_2; However we hit the following ICE during expansion of vec_perm_expr because in aarch64_vectorize_vec_perm_const, op0 is VECTOR_CST, and we call force_reg (VNx4SI, op0), which is incorrect mode for op0. The patch fixes it by using op_mode instead of vmode in calls to force_reg for op0 and op1. during RTL pass: expand foo2.c: In function ‘foo’: foo2.c:8:10: internal compiler error: in emit_move_insn, at expr.cc:4052 8 | return v2; | ^~ 0x74789b emit_move_insn(rtx_def*, rtx_def*) ../../gcc/gcc/expr.cc:4052 0xb8f664 force_reg(machine_mode, rtx_def*) ../../gcc/gcc/explow.cc:688 0x134182f aarch64_vectorize_vec_perm_const ../../gcc/gcc/config/aarch64/aarch64.cc:24132 0xe63070 expand_vec_perm_const(machine_mode, rtx_def*, rtx_def*, int_vector_builder<poly_int<2u, long> > const&, machine_mode, rtx_def*) ../../gcc/gcc/optabs.cc:6254 0xbb1569 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier) ../../gcc/gcc/expr.cc:10273 0xbb6498 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) ../../gcc/gcc/expr.cc:10625 0xa897dc expand_expr ../../gcc/gcc/expr.h:310 0xa897dc expand_return ../../gcc/gcc/cfgexpand.cc:3809 0xa897dc expand_gimple_stmt_1 ../../gcc/gcc/cfgexpand.cc:3918 0xa897dc expand_gimple_stmt ../../gcc/gcc/cfgexpand.cc:4044 0xa8f238 expand_gimple_basic_block ../../gcc/gcc/cfgexpand.cc:6096 0xa91187 execute ../../gcc/gcc/cfgexpand.cc:6822 Is the patch OK to commit after bootstrap+test on aarch64-linux-gnu ? Thanks, Prathamesh
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 25f4cbb466d..303814b8cca 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24129,11 +24129,11 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, d.op_mode = op_mode; d.op_vec_flags = aarch64_classify_vector_mode (d.op_mode); d.target = target; - d.op0 = op0 ? force_reg (vmode, op0) : NULL_RTX; + d.op0 = op0 ? force_reg (op_mode, op0) : NULL_RTX; if (op0 == op1) d.op1 = d.op0; else - d.op1 = op1 ? force_reg (vmode, op1) : NULL_RTX; + d.op1 = op1 ? force_reg (op_mode, op1) : NULL_RTX; d.testing_p = !target; if (!d.testing_p)