From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai> Hi, Richi.
I tried to understand your last email and to refactor the do-while loop using VECTOR_CST_NELTS. This patch works fine for LEN_MASK_STORE and compiler can CSE redundant store. I have appended testcase in this patch to test VN for LEN_MASK_STORE. I am not sure whether I am on the same page with you. Feel free to correct me, Thanks. gcc/ChangeLog: * tree-ssa-sccvn.cc (vn_reference_lookup_3): Add LEN_MASK_STORE and fix LEN_STORE gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c: New test. --- .../rvv/autovec/partial/len_maskstore_vn-1.c | 30 +++++++++++++++++++ gcc/tree-ssa-sccvn.cc | 24 +++++++++++---- 2 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c new file mode 100644 index 00000000000..0b2d03693dc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv_zvl256b -mabi=ilp32d --param riscv-autovec-preference=fixed-vlmax -O3 -fdump-tree-fre5" } */ + +void __attribute__((noinline,noclone)) +foo (int *out, int *res) +{ + int mask[] = { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; + int i; + for (i = 0; i < 16; ++i) + { + if (mask[i]) + out[i] = i; + } + int o0 = out[0]; + int o7 = out[7]; + int o14 = out[14]; + int o15 = out[15]; + res[0] = o0; + res[2] = o7; + res[4] = o14; + res[6] = o15; +} + +/* Vectorization produces .LEN_MASK_STORE, unrolling will unroll the two + vector iterations. FRE5 after that should be able to CSE + out[7] and out[15], but leave out[0] and out[14] alone. */ +/* { dg-final { scan-tree-dump " = o0_\[0-9\]+;" "fre5" } } */ +/* { dg-final { scan-tree-dump " = 7;" "fre5" } } */ +/* { dg-final { scan-tree-dump " = o14_\[0-9\]+;" "fre5" } } */ +/* { dg-final { scan-tree-dump " = 15;" "fre5" } } */ diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index 11061a374a2..242d82d6274 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -3304,6 +3304,16 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias)) return (void *)-1; break; + case IFN_LEN_MASK_STORE: + len = gimple_call_arg (call, 2); + bias = gimple_call_arg (call, 5); + if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias)) + return (void *)-1; + mask = gimple_call_arg (call, internal_fn_mask_index (fn)); + mask = vn_valueize (mask); + if (TREE_CODE (mask) != VECTOR_CST) + return (void *)-1; + break; default: return (void *)-1; } @@ -3344,11 +3354,17 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, tree vectype = TREE_TYPE (def_rhs); unsigned HOST_WIDE_INT elsz = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); + /* Set initial len value is the UINT_MAX, so mask_idx < actual_len + is always true for MASK_STORE. */ + unsigned actual_len = UINT_MAX; + if (len) + actual_len = tree_to_uhwi (len) + tree_to_shwi (bias); + unsigned nunits + = MIN (actual_len, VECTOR_CST_NELTS (mask).coeffs[0]); if (mask) { HOST_WIDE_INT start = 0, length = 0; - unsigned mask_idx = 0; - do + for (unsigned mask_idx = 0; mask_idx < nunits; mask_idx++) { if (integer_zerop (VECTOR_CST_ELT (mask, mask_idx))) { @@ -3371,9 +3387,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, } else length += elsz; - mask_idx++; } - while (known_lt (mask_idx, TYPE_VECTOR_SUBPARTS (vectype))); if (length != 0) { pd.rhs_off = start; @@ -3389,7 +3403,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, { pd.offset = offset2i; pd.size = (tree_to_uhwi (len) - + -tree_to_shwi (bias)) * BITS_PER_UNIT; + + tree_to_shwi (bias)) * BITS_PER_UNIT; if (BYTES_BIG_ENDIAN) pd.rhs_off = pd.size - tree_to_uhwi (TYPE_SIZE (vectype)); else -- 2.36.1