On Wed, 16 Sep 2020, Jakub Jelinek wrote:

> Hi!
> 
> As the testcases show, if we have something like:
>   MEM <char[12]> [&b + 8B] = {};
>   MEM[(short *) &b] = 5;
>   _5 = *x_4(D);
>   MEM <long long unsigned int> [&b + 2B] = _5;
>   MEM[(char *)&b + 16B] = 88;
>   MEM[(int *)&b + 20B] = 1;
> then in sort_by_bitpos the stores are almost like in the given order,
> except the first store is after the = _5; store.
> We can't coalesce the = 5; store with = _5;, because the latter is MEM_REF,
> while the former INTEGER_CST, and we can't coalesce the = _5 store with
> the = {} store because the former is MEM_REF, the latter INTEGER_CST.
> But we happily coalesce the remaining 3 stores, which is wrong, because the
> = _5; store overlaps those and is in between them in the program order.
> We already have code to deal with similar cases in check_no_overlap, but we
> deal only with the following stores in sort_by_bitpos order, not the earlier
> ones.
> 
> The following patch checks also the earlier ones.  In 
> coalesce_immediate_stores
> it computes the first one that needs to be checked (all the ones whose
> bitpos + bitsize is smaller or equal to merged_store->start don't need to be
> checked and don't need to be checked even for any following attempts because
> of the sort_by_bitpos sorting) and the end of that (that is the first store
> in the merged_store).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2020-09-16  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/97053
>       * gimple-ssa-store-merging.c (check_no_overlap): Add FIRST_ORDER,
>       START, FIRST_EARLIER and LAST_EARLIER arguments.  Return false if
>       any stores between FIRST_EARLIER inclusive and LAST_EARLIER exclusive
>       has order in between FIRST_ORDER and LAST_ORDER and overlaps the to
>       be merged store.
>       (imm_store_chain_info::try_coalesce_bswap): Add FIRST_EARLIER argument.
>       Adjust check_no_overlap caller.
>       (imm_store_chain_info::coalesce_immediate_stores): Add first_earlier
>       and last_earlier variables, adjust them during iterations.  Adjust
>       check_no_overlap callers, call check_no_overlap even when extending
>       overlapping stores by extra INTEGER_CST stores.
> 
>       * gcc.dg/store_merging_31.c: New test.
>       * gcc.dg/store_merging_32.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2020-08-12 12:45:46.000000000 +0200
> +++ gcc/gimple-ssa-store-merging.c    2020-09-15 16:51:11.393453396 +0200
> @@ -2116,7 +2116,8 @@ public:
>        }
>    }
>    bool terminate_and_process_chain ();
> -  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
> +  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
> +                        unsigned int);
>    bool coalesce_immediate_stores ();
>    bool output_merged_store (merged_store_group *);
>    bool output_merged_stores ();
> @@ -2443,14 +2444,39 @@ gather_bswap_load_refs (vec<tree> *refs,
>     into the group.  That way it will be its own store group and will
>     not be touched.  If ALL_INTEGER_CST_P and there are overlapping
>     INTEGER_CST stores, those are mergeable using merge_overlapping,
> -   so don't return false for those.  */
> +   so don't return false for those.
> +
> +   Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
> +   (exclusive), whether they don't overlap the bitrange START to END
> +   and have order in between FIRST_ORDER and LAST_ORDER.  This is to
> +   prevent merging in cases like:
> +     MEM <char[12]> [&b + 8B] = {};
> +     MEM[(short *) &b] = 5;
> +     _5 = *x_4(D);
> +     MEM <long long unsigned int> [&b + 2B] = _5;
> +     MEM[(char *)&b + 16B] = 88;
> +     MEM[(int *)&b + 20B] = 1;
> +   The = {} store comes in sort_by_bitpos before the = 88 store, and can't
> +   be merged with it, because the = _5 store overlaps these and is in between
> +   them in sort_by_order ordering.  If it was merged, the merged store would
> +   go after the = _5 store and thus change behavior.  */
>  
>  static bool
>  check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
> -               bool all_integer_cst_p, unsigned int last_order,
> -               unsigned HOST_WIDE_INT end)
> +               bool all_integer_cst_p, unsigned int first_order,
> +               unsigned int last_order, unsigned HOST_WIDE_INT start,
> +               unsigned HOST_WIDE_INT end, unsigned int first_earlier,
> +               unsigned end_earlier)
>  {
>    unsigned int len = m_store_info.length ();
> +  for (unsigned int j = first_earlier; j < end_earlier; j++)
> +    {
> +      store_immediate_info *info = m_store_info[j];
> +      if (info->order > first_order
> +       && info->order < last_order
> +       && info->bitpos + info->bitsize > start)
> +     return false;
> +    }
>    for (++i; i < len; ++i)
>      {
>        store_immediate_info *info = m_store_info[i];
> @@ -2471,7 +2497,8 @@ check_no_overlap (vec<store_immediate_in
>  bool
>  imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
>                                         unsigned int first,
> -                                       unsigned int try_size)
> +                                       unsigned int try_size,
> +                                       unsigned int first_earlier)
>  {
>    unsigned int len = m_store_info.length (), last = first;
>    unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
> @@ -2611,7 +2638,8 @@ imm_store_chain_info::try_coalesce_bswap
>    if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
>      return false;
>  
> -  if (!check_no_overlap (m_store_info, last, false, last_order, end))
> +  if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
> +                      merged_store->start, end, first_earlier, first))
>      return false;
>  
>    /* Don't handle memory copy this way if normal non-bswap processing
> @@ -2703,6 +2731,8 @@ imm_store_chain_info::coalesce_immediate
>  
>    store_immediate_info *info;
>    unsigned int i, ignore = 0;
> +  unsigned int first_earlier = 0;
> +  unsigned int end_earlier = 0;
>  
>    /* Order the stores by the bitposition they write to.  */
>    m_store_info.qsort (sort_by_bitpos);
> @@ -2719,6 +2749,12 @@ imm_store_chain_info::coalesce_immediate
>        if (i <= ignore)
>       goto done;
>  
> +      while (first_earlier < end_earlier
> +          && (m_store_info[first_earlier]->bitpos
> +              + m_store_info[first_earlier]->bitsize
> +              <= merged_store->start))
> +     first_earlier++;
> +
>        /* First try to handle group of stores like:
>        p[0] = data >> 24;
>        p[1] = data >> 16;
> @@ -2733,7 +2769,8 @@ imm_store_chain_info::coalesce_immediate
>       {
>         unsigned int try_size;
>         for (try_size = 64; try_size >= 16; try_size >>= 1)
> -         if (try_coalesce_bswap (merged_store, i - 1, try_size))
> +         if (try_coalesce_bswap (merged_store, i - 1, try_size,
> +                                 first_earlier))
>             break;
>  
>         if (try_size >= 16)
> @@ -2741,7 +2778,10 @@ imm_store_chain_info::coalesce_immediate
>             ignore = i + merged_store->stores.length () - 1;
>             m_merged_store_groups.safe_push (merged_store);
>             if (ignore < m_store_info.length ())
> -             merged_store = new merged_store_group (m_store_info[ignore]);
> +             {
> +               merged_store = new merged_store_group (m_store_info[ignore]);
> +               end_earlier = ignore;
> +             }
>             else
>               merged_store = NULL;
>             goto done;
> @@ -2776,12 +2816,16 @@ imm_store_chain_info::coalesce_immediate
>             && merged_store->only_constants
>             && info->lp_nr == merged_store->lp_nr)
>           {
> +           unsigned int first_order
> +             = MIN (merged_store->first_order, info->order);
>             unsigned int last_order
>               = MAX (merged_store->last_order, info->order);
>             unsigned HOST_WIDE_INT end
>               = MAX (merged_store->start + merged_store->width,
>                      info->bitpos + info->bitsize);
> -           if (check_no_overlap (m_store_info, i, true, last_order, end))
> +           if (check_no_overlap (m_store_info, i, true, first_order,
> +                                 last_order, merged_store->start, end,
> +                                 first_earlier, end_earlier))
>               {
>                 /* check_no_overlap call above made sure there are no
>                    overlapping stores with non-INTEGER_CST rhs_code
> @@ -2810,6 +2854,7 @@ imm_store_chain_info::coalesce_immediate
>                 do
>                   {
>                     unsigned int max_order = 0;
> +                   unsigned int min_order = first_order;
>                     unsigned first_nonmergeable_int_order = ~0U;
>                     unsigned HOST_WIDE_INT this_end = end;
>                     k = i;
> @@ -2836,6 +2881,7 @@ imm_store_chain_info::coalesce_immediate
>                                 break;
>                               }
>                             k = j;
> +                           min_order = MIN (min_order, info2->order);
>                             this_end = MAX (this_end,
>                                             info2->bitpos + info2->bitsize);
>                           }
> @@ -2852,6 +2898,12 @@ imm_store_chain_info::coalesce_immediate
>                           first_nonmergeable_order
>                             = MIN (first_nonmergeable_order, info2->order);
>                       }
> +                   if (k > i
> +                       && !check_no_overlap (m_store_info, len - 1, true,
> +                                             min_order, try_order,
> +                                             merged_store->start, this_end,
> +                                             first_earlier, end_earlier))
> +                     k = 0;
>                     if (k == 0)
>                       {
>                         if (last_order == try_order)
> @@ -2937,9 +2989,12 @@ imm_store_chain_info::coalesce_immediate
>             info->ops_swapped_p = true;
>           }
>         if (check_no_overlap (m_store_info, i, false,
> +                             MIN (merged_store->first_order, info->order),
>                               MAX (merged_store->last_order, info->order),
> +                             merged_store->start,
>                               MAX (merged_store->start + merged_store->width,
> -                                  info->bitpos + info->bitsize)))
> +                                  info->bitpos + info->bitsize),
> +                             first_earlier, end_earlier))
>           {
>             /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
>             if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
> @@ -2985,6 +3040,7 @@ imm_store_chain_info::coalesce_immediate
>       delete merged_store;
>  
>        merged_store = new merged_store_group (info);
> +      end_earlier = i;
>        if (dump_file && (dump_flags & TDF_DETAILS))
>       fputs ("New store group\n", dump_file);
>  
> --- gcc/testsuite/gcc.dg/store_merging_31.c.jj        2020-09-15 
> 16:18:47.451623297 +0200
> +++ gcc/testsuite/gcc.dg/store_merging_31.c   2020-09-15 16:18:31.460846430 
> +0200
> @@ -0,0 +1,27 @@
> +/* PR tree-optimization/97053 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +struct S { short a; char b[9]; int c; char d; int e; };
> +
> +__attribute__((noipa)) void
> +foo (char *x, char *y)
> +{
> +  if (__builtin_strcmp (x, "ABCDXXXX") != 0
> +      || __builtin_strcmp (y, "ABCDXXXX") != 0)
> +    __builtin_abort ();
> +}
> +
> +int
> +main ()
> +{
> +  char a[9] = "XXXXXXXX";
> +  struct S b = {};
> +  __builtin_memcpy (a, "ABCD", 4);
> +  b.a = 5;
> +  __builtin_memcpy (b.b, a, 8); 
> +  b.d = 'X';
> +  b.e = 1;
> +  foo (a, b.b);
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/store_merging_32.c.jj        2020-09-15 
> 16:18:51.381568453 +0200
> +++ gcc/testsuite/gcc.dg/store_merging_32.c   2020-09-15 15:38:27.827420970 
> +0200
> @@ -0,0 +1,129 @@
> +/* PR tree-optimization/97053 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fno-tree-dse" } */
> +
> +struct __attribute__((packed, may_alias)) S { long long s; };
> +struct __attribute__((packed, may_alias)) T { short t; };
> +
> +__attribute__((noipa)) void
> +test (char *p, char *q, int s)
> +{
> +  if ((s & 1) == 0)
> +    {
> +      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
> +       != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
> +       || (((struct S __attribute__((may_alias)) *) &p[1])->s
> +           != ((struct S __attribute__((may_alias)) *) &q[1])->s)
> +       || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
> +           != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
> +     __builtin_abort ();
> +    }
> +  else
> +    {
> +      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
> +       != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
> +       || (((struct S __attribute__((may_alias)) *) &p[1])->s
> +           != ((struct S __attribute__((may_alias)) *) &q[1])->s)
> +       || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 
> 1])->t
> +           != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) 
> - 1])->t)
> +       || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
> +     __builtin_abort ();
> +    }
> +}
> +
> +__attribute__((noipa)) void
> +foo (long long *p, char *q, char *r, char *s)
> +{
> +  char a[64] __attribute__((aligned (__alignof (short))));
> +  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
> +  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
> +  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
> +  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
> +  test (a, q, 0);
> +}
> +
> +__attribute__((noipa)) void
> +bar (long long *p, char *q, char *r, char *s, char *t)
> +{
> +  char a[64] __attribute__((aligned (__alignof (short))));
> +  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
> +  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
> +  ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 
> 2;
> +  a[3 * sizeof (short) - 2] = 3;
> +  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
> +  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
> +  ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 
> 2;
> +  t[3 * sizeof (short) - 2] = 3;
> +  test (a, q, 1);
> +}
> +
> +__attribute__((noipa)) void
> +baz (long long *p, char *q, char *r, char *s)
> +{
> +  char a[64] __attribute__((aligned (__alignof (short))));
> +  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
> +  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
> +  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
> +  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
> +  test (a, q, 2);
> +}
> +
> +__attribute__((noipa)) void
> +qux (long long *p, char *q, char *r, char *s, char *t)
> +{
> +  char a[64] __attribute__((aligned (__alignof (short))));
> +  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
> +  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
> +  a[3 * sizeof (short) - 2] = 3;
> +  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
> +  ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 
> 2;
> +  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
> +  s[3 * sizeof (short) - 2] = 3;
> +  ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
> +  test (a, q, 3);
> +}
> +
> +__attribute__((noipa)) void
> +corge (long long *p, char *q, char *r, char *s, short u[3])
> +{
> +  char a[64] __attribute__((aligned (__alignof (short))));
> +  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
> +  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
> +  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
> +  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
> +  test (a, q, 4);
> +}
> +
> +__attribute__((noipa)) void
> +garply (long long *p, char *q, char *r, char *s, short u[3])
> +{
> +  char a[64] __attribute__((aligned (__alignof (short))));
> +  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
> +  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
> +  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
> +  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
> +  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
> +  test (a, q, 6);
> +}
> +
> +int
> +main ()
> +{
> +  char a[64] __attribute__((aligned (__alignof (short))));
> +  long long p = -1LL;
> +  short u[] = { 1, 2, 3 };
> +  foo (&p, &a[0], &a[0], &a[0]);
> +  bar (&p, &a[0], &a[0], &a[0], &a[0]);
> +  baz (&p, &a[0], &a[0], &a[0]);
> +  qux (&p, &a[0], &a[0], &a[0], &a[0]);
> +  corge (&p, &a[0], &a[0], &a[0], u);
> +  garply (&p, &a[0], &a[0], &a[0], u);
> +  return 0;
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to