Plz re-titile with some description rather than `Fix PR` :)
On Fri, May 5, 2023 at 9:52 PM <juzhe.zh...@rivai.ai> wrote: > > From: Juzhe-Zhong <juzhe.zh...@rivai.ai> > > This patch is fixing my recent optimization patch: > https://github.com/gcc-mirror/gcc/commit/d51f2456ee51bd59a79b4725ca0e488c25260bbf > > In that patch, the new_info = parse_insn (i) is not correct. > Since consider the following case: > > vsetvli a5,a4, e8,m1 > .. > vsetvli zero,a5, e32, m4 > vle8.v > vmacc.vv > ... > > Since we have backward demand fusion in Phase 1, so the real demand of > "vle8.v" is e32, m4. > However, if we use parse_insn (vle8.v) = e8, m1 which is not correct. > > So this patch we change new_info = new_info.parse_insn (i) > into: > > vector_insn_info new_info = m_vector_manager->vector_insn_infos[i->uid ()]; > > So that, we can correctly optimize codes into: > > vsetvli a5,a4, e32, m4 > .. > .. (vsetvli zero,a5, e32, m4 is removed) > vle8.v > vmacc.vv > > Since m_vector_manager->vector_insn_infos is the member variable of > pass_vsetvl class. > We remove static void function "local_eliminate_vsetvl_insn", and make it as > the member function > of pass_vsetvl class. > > PR target/109748 > > gcc/ChangeLog: > > * config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Remove > it. > (pass_vsetvl::local_eliminate_vsetvl_insn): New function. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/vsetvl/pr109748.c: New test. > > --- > gcc/config/riscv/riscv-vsetvl.cc | 102 ++++++++++-------- > .../gcc.target/riscv/rvv/vsetvl/pr109748.c | 36 +++++++ > 2 files changed, 93 insertions(+), 45 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c > > diff --git a/gcc/config/riscv/riscv-vsetvl.cc > b/gcc/config/riscv/riscv-vsetvl.cc > index 39b4d21210b..e1efd7b1c40 100644 > --- a/gcc/config/riscv/riscv-vsetvl.cc > +++ b/gcc/config/riscv/riscv-vsetvl.cc > @@ -1056,51 +1056,6 @@ change_vsetvl_insn (const insn_info *insn, const > vector_insn_info &info) > change_insn (rinsn, new_pat); > } > > -static void > -local_eliminate_vsetvl_insn (const vector_insn_info &dem) > -{ > - const insn_info *insn = dem.get_insn (); > - if (!insn || insn->is_artificial ()) > - return; > - rtx_insn *rinsn = insn->rtl (); > - const bb_info *bb = insn->bb (); > - if (vsetvl_insn_p (rinsn)) > - { > - rtx vl = get_vl (rinsn); > - for (insn_info *i = insn->next_nondebug_insn (); > - real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) > - { > - if (i->is_call () || i->is_asm () > - || find_access (i->defs (), VL_REGNUM) > - || find_access (i->defs (), VTYPE_REGNUM)) > - return; > - > - if (has_vtype_op (i->rtl ())) > - { > - if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ()))) > - return; > - rtx avl = get_avl (i->rtl ()); > - if (avl != vl) > - return; > - set_info *def = find_access (i->uses (), REGNO (avl))->def (); > - if (def->insn () != insn) > - return; > - > - vector_insn_info new_info; > - new_info.parse_insn (i); > - if (!new_info.skip_avl_compatible_p (dem)) > - return; > - > - new_info.set_avl_info (dem.get_avl_info ()); > - new_info = dem.merge (new_info, LOCAL_MERGE); > - change_vsetvl_insn (insn, new_info); > - eliminate_insn (PREV_INSN (i->rtl ())); > - return; > - } > - } > - } > -} > - > static bool > source_equal_p (insn_info *insn1, insn_info *insn2) > { > @@ -2672,6 +2627,7 @@ private: > void pre_vsetvl (void); > > /* Phase 5. */ > + void local_eliminate_vsetvl_insn (const vector_insn_info &) const; > void cleanup_insns (void) const; > > /* Phase 6. */ > @@ -3993,6 +3949,62 @@ pass_vsetvl::pre_vsetvl (void) > commit_edge_insertions (); > } > > +/* Local user vsetvl optimizaiton: > + > + Case 1: > + vsetvl a5,a4,e8,mf8 > + ... > + vsetvl zero,a5,e8,mf8 --> Eliminate directly. > + > + Case 2: > + vsetvl a5,a4,e8,mf8 --> vsetvl a5,a4,e32,mf2 > + ... > + vsetvl zero,a5,e32,mf2 --> Eliminate directly. */ > +void > +pass_vsetvl::local_eliminate_vsetvl_insn (const vector_insn_info &dem) const > +{ > + const insn_info *insn = dem.get_insn (); > + if (!insn || insn->is_artificial ()) > + return; > + rtx_insn *rinsn = insn->rtl (); > + const bb_info *bb = insn->bb (); > + if (vsetvl_insn_p (rinsn)) > + { > + rtx vl = get_vl (rinsn); > + for (insn_info *i = insn->next_nondebug_insn (); > + real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) > + { > + if (i->is_call () || i->is_asm () > + || find_access (i->defs (), VL_REGNUM) > + || find_access (i->defs (), VTYPE_REGNUM)) > + return; > + > + if (has_vtype_op (i->rtl ())) > + { > + if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ()))) > + return; > + rtx avl = get_avl (i->rtl ()); > + if (avl != vl) > + return; > + set_info *def = find_access (i->uses (), REGNO (avl))->def (); > + if (def->insn () != insn) > + return; > + > + vector_insn_info new_info > + = m_vector_manager->vector_insn_infos[i->uid ()]; > + if (!new_info.skip_avl_compatible_p (dem)) > + return; > + > + new_info.set_avl_info (dem.get_avl_info ()); > + new_info = dem.merge (new_info, LOCAL_MERGE); > + change_vsetvl_insn (insn, new_info); > + eliminate_insn (PREV_INSN (i->rtl ())); > + return; > + } > + } > + } > +} > + > /* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand > implicitly. Since we will emit VSETVL instruction and make RVV > instructions > depending on VL/VTYPE global status registers, we remove the such AVL > operand > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c > b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c > new file mode 100644 > index 00000000000..81c42c5a82a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize > -fno-schedule-insns -fno-schedule-insns2" } */ > + > +#include "riscv_vector.h" > + > +int byte_mac_vec(unsigned char *a, unsigned char *b, int len) { > + size_t vlmax = __riscv_vsetvlmax_e8m1(); > + vint32m4_t vec_s = __riscv_vmv_v_x_i32m4(0, vlmax); > + vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vlmax); > + int k = len; > + > + for (size_t vl; k > 0; k -= vl, a += vl, b += vl) { > + vl = __riscv_vsetvl_e8m1(k); > + > + vuint8m1_t a8s = __riscv_vle8_v_u8m1(a, vl); > + vuint8m1_t b8s = __riscv_vle8_v_u8m1(b, vl); > + vuint32m4_t a8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl); > + vuint32m4_t b8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl); > + > + vint32m4_t a8s_as_i32 = > __riscv_vreinterpret_v_u32m4_i32m4(a8s_extended); > + vint32m4_t b8s_as_i32 = > __riscv_vreinterpret_v_u32m4_i32m4(b8s_extended); > + > + vec_s = __riscv_vmacc_vv_i32m4_tu(vec_s, a8s_as_i32, b8s_as_i32, vl); > + } > + > + vint32m1_t vec_sum = __riscv_vredsum_vs_i32m4_i32m1(vec_s, vec_zero, > __riscv_vsetvl_e32m4(len)); > + int sum = __riscv_vmv_x_s_i32m1_i32(vec_sum); > + > + return sum; > +} > + > +/* { dg-final { scan-assembler-times > {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { > no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts > "-funroll-loops" } } } } */ > +/* { dg-final { scan-assembler-times > {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { > no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts > "-funroll-loops" } } } } */ > +/* { dg-final { scan-assembler-times > {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { > no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts > "-funroll-loops" } } } } */ > +/* { dg-final { scan-assembler-times > {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 { target { > no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts > "-funroll-loops" } } } } */ > +/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" > no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts > "-funroll-loops" } } } } */ > -- > 2.36.3 >