On Tue, 2 Jan 2024, Tamar Christina wrote: > Hi All, > > I was generating the vector reverse mask without checking if the target > actually supported such an operation. > > It also seems like more targets implement VEC_EXTRACT than permute on mask > registers. > > So this adds a check for IFN_VEC_EXTRACT support when required and changes > the select first code to use it. > > This is good for now since masks always come from whilelo. But in the future > when masks can come from other sources we will need the old code back. > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu > and no issues with --enable-checking=release --enable-lto > --with-build-config=bootstrap-O3 --enable-checking=yes,rtl,extra. > tested on cross cc1 for amdgcn-amdhsa and issue fixed. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/113199 > * tree-vect-loop.cc (vectorizable_live_operation_1): Use > IFN_VEC_EXTRACT. > (vectorizable_live_operation): Check for IFN_VEC_EXTRACT support. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/113199 > * gcc.target/gcn/pr113199.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/testsuite/gcc.target/gcn/pr113199.c > b/gcc/testsuite/gcc.target/gcn/pr113199.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..8a641e5536e80e207ca0163cac66c0f4f6ca93f7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/gcn/pr113199.c > @@ -0,0 +1,44 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O2" } */ > + > +typedef long unsigned int size_t; > +typedef int wchar_t; > +struct tm > +{ > + int tm_mon; > + int tm_year; > +}; > +int abs (int); > +struct lc_time_T { const char *month[12]; }; > +struct __locale_t * __get_current_locale (void) { } > +const struct lc_time_T * __get_time_locale (struct __locale_t *locale) { } > +const wchar_t * __ctloc (wchar_t *buf, const char *elem, size_t *len_ret) { > return buf; } > +size_t > +__strftime (wchar_t *s, size_t maxsize, const wchar_t *format, > + const struct tm *tim_p, struct __locale_t *locale) > +{ > + size_t count = 0; > + const wchar_t *ctloc; > + wchar_t ctlocbuf[256]; > + size_t i, ctloclen; > + const struct lc_time_T *_CurrentTimeLocale = __get_time_locale (locale); > + { > + switch (*format) > + { > + case L'B': > + (ctloc = __ctloc (ctlocbuf, _CurrentTimeLocale->month[tim_p->tm_mon], > &ctloclen)); > + for (i = 0; i < ctloclen; i++) > + { > + if (count < maxsize - 1) > + s[count++] = ctloc[i]; > + else > + return 0; > + { > + int century = tim_p->tm_year >= 0 > + ? tim_p->tm_year / 100 + 1900 / 100 > + : abs (tim_p->tm_year + 1900) / 100; > + } > + } > + } > + } > +} > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index > 37f1be1101ffae779214056a0886411e0683e887..5aa92e67444e7aacf458fffa1428f1983c482374 > 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -10648,36 +10648,18 @@ vectorizable_live_operation_1 (loop_vec_info > loop_vinfo, > &LOOP_VINFO_MASKS (loop_vinfo), > 1, vectype, 0); > tree scalar_res; > + gimple_seq_add_seq (&stmts, tem); > > /* For an inverted control flow with early breaks we want EXTRACT_FIRST > - instead of EXTRACT_LAST. Emulate by reversing the vector and mask. */ > + instead of EXTRACT_LAST. For now since the mask always comes from a > + WHILELO we can get the first element ignoring the mask since CLZ of the > + mask will always be zero. */ > if (restart_loop && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > - { > - /* First create the permuted mask. */ > - tree perm_mask = perm_mask_for_reverse (TREE_TYPE (mask)); > - tree perm_dest = copy_ssa_name (mask); > - gimple *perm_stmt > - = gimple_build_assign (perm_dest, VEC_PERM_EXPR, mask, > - mask, perm_mask); > - vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt, > - &gsi); > - mask = perm_dest; > - > - /* Then permute the vector contents. */ > - tree perm_elem = perm_mask_for_reverse (vectype); > - perm_dest = copy_ssa_name (vec_lhs_phi); > - perm_stmt > - = gimple_build_assign (perm_dest, VEC_PERM_EXPR, vec_lhs_phi, > - vec_lhs_phi, perm_elem); > - vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt, > - &gsi); > - vec_lhs_phi = perm_dest; > - } > - > - gimple_seq_add_seq (&stmts, tem); > - > - scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type, > - mask, vec_lhs_phi); > + scalar_res = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype), > + vec_lhs_phi, bitstart);
So bitstart is always zero? I wonder why using CFN_VEC_EXTRACT over BIT_FIELD_REF here which wouldn't need any additional target support. > + else > + scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type, > + mask, vec_lhs_phi); > > /* Convert the extracted vector element to the scalar type. */ > new_tree = gimple_convert (&stmts, lhs_type, scalar_res); > @@ -10852,9 +10834,25 @@ vectorizable_live_operation (vec_info *vinfo, > stmt_vec_info stmt_info, > gcc_assert (ncopies == 1 && !slp_node); > if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, > OPTIMIZE_FOR_SPEED)) > - vect_record_loop_mask (loop_vinfo, > - &LOOP_VINFO_MASKS (loop_vinfo), > - 1, vectype, NULL); > + { > + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo) > + && LOOP_VINFO_EARLY_BREAKS (loop_vinfo) > + && !direct_internal_fn_supported_p (IFN_EXTRACT_LAST, > + vectype, > + OPTIMIZE_FOR_SPEED)) I don't quite understand this part - this is guarded by direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, OPTIMIZE_FOR_SPEED) unless I'm mis-matching this means the checked !direct_internal_fn_supported_p condition is always false? > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "can't operate on partial vectors " > + "because the target doesn't support extract " > + "first reduction.\n"); > + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; > + } > + else > + vect_record_loop_mask (loop_vinfo, > + &LOOP_VINFO_MASKS (loop_vinfo), > + 1, vectype, NULL); > + } > else if (can_vec_extract_var_idx_p ( > TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype)))) > vect_record_loop_len (loop_vinfo, > > > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)