Ping

Richard Sandiford <richard.sandif...@linaro.org> writes:
> In this testcase, we (correctly) record after:
>
>   strcpy (p1, "abcde");
>   char *p2 = strchr (p1, '\0');
>   strcpy (p2, q);
>
> that the length of p1 and p2 can be calculated by converting the
> second strcpy to:
>
>   tmp = stpcpy (p2, q)
>
> and then doing tmp - p1 for p1 and tmp - p2 for p2.  This is delayed
> until we know whether we actually need it.  Then:
>
>   char *p3 = strchr (p2, '\0');
>
> forces us to calculate the length of p2 in this way.  At this point
> we had three related strinfos:
>
>   p1: delayed length, calculated from tmp = stpcpy (p2, q)
>   p2: known length, tmp - p2
>   p3: known length, 0
>
> After:
>
>   memcpy (p3, "x", 2);
>
> we use adjust_related_strinfos to add 1 to each length.  However,
> that didn't do anything for delayed lengths because:
>
>         else if (si->stmt != NULL)
>           /* Delayed length computation is unaffected.  */
>           ;
>
> So after the memcpy we had:
>
>   p1: delayed length, calculated from tmp = stpcpy (p2, q)
>   p2: known length, tmp - p2 + 1
>   p3: known length, 1
>
> where the length of p1 was no longer correct.
>
> I thought about three fixes:
>
>   (1) Make adjust_related_strinfos drop strinfos with a delayed length.
>
>   (2) Make adjust_related_strinfos compute the length itself
>       (via get_string_length).
>
>   (3) Make get_string_length update all related strinfos.  We can then
>       maintain the invariant that all lengths in a chain are delayed or
>       none are.
>
> (3) seemed like the best.  get_string_length has already made the
> invasive step of changing the code and computing the length for one
> strinfo.  Updating the related strinfos is just a couple of fold_builds,
> of the kind that the pass already does in several places.
>
> The point is that the code above only triggers if one of the delayed
> lengths has been computed.  That makes (1) unnecessarily pessimistic.
> It also wasn't obvious (to me) from first glance, so (2) might look
> more intrusive than it is.  I think it becomes easier to reason about
> if we do (3) and can assume that all lengths are delayed or none are.
> It also makes the min-length/maybe-not-terminated patch easier.
>
> [ I can go into more detail about why this should be enough to
>   maintain the invariant, and why the asserts should be valid,
>   but the message is already pretty long. ]
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Thanks,
> Richard
>
>
> 2017-05-16  Richard Sandiford  <richard.sandif...@linaro.org>
>
> gcc/
>       PR tree-optimization/80769
>       * tree-ssa-strlen.c (strinfo): Document that "stmt" is also used
>       for malloc and calloc.  Document the new invariant that all related
>       strinfos have delayed lengths or none do.
>       (verify_related_strinfos): Move earlier in file.
>       (set_endptr_and_length): New function, split out from...
>       (get_string_length): ...here.  Also set the lengths of related
>       strinfos.
>       (zero_length_string): Assert that chainsi has known (rather than
>       delayed) lengths.
>       (adjust_related_strinfos): Likewise.
>
> gcc/testsuite/
>       PR tree-optimization/80769
>       * gcc.dg/strlenopt-31.c: New test.
>       * gcc.dg/strlenopt-31g.c: Likewise.
>
> Index: gcc/tree-ssa-strlen.c
> ===================================================================
> --- gcc/tree-ssa-strlen.c     2017-05-16 08:00:03.808133862 +0100
> +++ gcc/tree-ssa-strlen.c     2017-05-16 08:20:51.408572143 +0100
> @@ -61,7 +61,13 @@ struct strinfo
>    tree length;
>    /* Any of the corresponding pointers for querying alias oracle.  */
>    tree ptr;
> -  /* Statement for delayed length computation.  */
> +  /* This is used for two things:
> +
> +     - To record the statement that should be used for delayed length
> +       computations.  We maintain the invariant that all related strinfos
> +       have delayed lengths or none do.
> +
> +     - To record the malloc or calloc call that produced this result.  */
>    gimple *stmt;
>    /* Pointer to '\0' if known, if NULL, it can be computed as
>       ptr + length.  */
> @@ -451,6 +457,45 @@ set_strinfo (int idx, strinfo *si)
>    (*stridx_to_strinfo)[idx] = si;
>  }
>  
> +/* Return the first strinfo in the related strinfo chain
> +   if all strinfos in between belong to the chain, otherwise NULL.  */
> +
> +static strinfo *
> +verify_related_strinfos (strinfo *origsi)
> +{
> +  strinfo *si = origsi, *psi;
> +
> +  if (origsi->first == 0)
> +    return NULL;
> +  for (; si->prev; si = psi)
> +    {
> +      if (si->first != origsi->first)
> +     return NULL;
> +      psi = get_strinfo (si->prev);
> +      if (psi == NULL)
> +     return NULL;
> +      if (psi->next != si->idx)
> +     return NULL;
> +    }
> +  if (si->idx != si->first)
> +    return NULL;
> +  return si;
> +}
> +
> +/* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
> +   Use LOC for folding.  */
> +
> +static void
> +set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
> +{
> +  si->endptr = endptr;
> +  si->stmt = NULL;
> +  tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
> +  tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
> +  si->length = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
> +                             end_as_size, start_as_size);
> +}
> +
>  /* Return string length, or NULL if it can't be computed.  */
>  
>  static tree
> @@ -546,12 +591,12 @@ get_string_length (strinfo *si)
>       case BUILT_IN_STPCPY_CHK_CHKP:
>         gcc_assert (lhs != NULL_TREE);
>         loc = gimple_location (stmt);
> -       si->endptr = lhs;
> -       si->stmt = NULL;
> -       lhs = fold_convert_loc (loc, size_type_node, lhs);
> -       si->length = fold_convert_loc (loc, size_type_node, si->ptr);
> -       si->length = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
> -                                     lhs, si->length);
> +       set_endptr_and_length (loc, si, lhs);
> +       for (strinfo *chainsi = verify_related_strinfos (si);
> +            chainsi != NULL;
> +            chainsi = get_next_strinfo (chainsi))
> +         if (chainsi->length == NULL)
> +           set_endptr_and_length (loc, chainsi, lhs);
>         break;
>       case BUILT_IN_MALLOC:
>         break;
> @@ -620,32 +665,6 @@ unshare_strinfo (strinfo *si)
>    return nsi;
>  }
>  
> -/* Return first strinfo in the related strinfo chain
> -   if all strinfos in between belong to the chain, otherwise
> -   NULL.  */
> -
> -static strinfo *
> -verify_related_strinfos (strinfo *origsi)
> -{
> -  strinfo *si = origsi, *psi;
> -
> -  if (origsi->first == 0)
> -    return NULL;
> -  for (; si->prev; si = psi)
> -    {
> -      if (si->first != origsi->first)
> -     return NULL;
> -      psi = get_strinfo (si->prev);
> -      if (psi == NULL)
> -     return NULL;
> -      if (psi->next != si->idx)
> -     return NULL;
> -    }
> -  if (si->idx != si->first)
> -    return NULL;
> -  return si;
> -}
> -
>  /* Attempt to create a new strinfo for BASESI + OFF, or find existing
>     strinfo if there is any.  Return it's idx, or 0 if no strinfo has
>     been created.  */
> @@ -749,7 +768,8 @@ zero_length_string (tree ptr, strinfo *c
>       {
>         do
>           {
> -           gcc_assert (si->length || si->stmt);
> +           /* We shouldn't mix delayed and non-delayed lengths.  */
> +           gcc_assert (si->length);
>             if (si->endptr == NULL_TREE)
>               {
>                 si = unshare_strinfo (si);
> @@ -770,12 +790,17 @@ zero_length_string (tree ptr, strinfo *c
>             return chainsi;
>           }
>       }
> -      else if (chainsi->first || chainsi->prev || chainsi->next)
> +      else
>       {
> -       chainsi = unshare_strinfo (chainsi);
> -       chainsi->first = 0;
> -       chainsi->prev = 0;
> -       chainsi->next = 0;
> +       /* We shouldn't mix delayed and non-delayed lengths.  */
> +       gcc_assert (chainsi->length);
> +       if (chainsi->first || chainsi->prev || chainsi->next)
> +         {
> +           chainsi = unshare_strinfo (chainsi);
> +           chainsi->first = 0;
> +           chainsi->prev = 0;
> +           chainsi->next = 0;
> +         }
>       }
>      }
>    idx = new_stridx (ptr);
> @@ -820,18 +845,13 @@ adjust_related_strinfos (location_t loc,
>         tree tem;
>  
>         si = unshare_strinfo (si);
> -       if (si->length)
> -         {
> -           tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj);
> -           si->length = fold_build2_loc (loc, PLUS_EXPR,
> -                                         TREE_TYPE (si->length), si->length,
> -                                         tem);
> -         }
> -       else if (si->stmt != NULL)
> -         /* Delayed length computation is unaffected.  */
> -         ;
> -       else
> -         gcc_unreachable ();
> +       /* We shouldn't see delayed lengths here; the caller must have
> +          calculated the old length in order to calculate the
> +          adjustment.  */
> +       gcc_assert (si->length);
> +       tem = fold_convert_loc (loc, TREE_TYPE (si->length), adj);
> +       si->length = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (si->length),
> +                                     si->length, tem);
>  
>         si->endptr = NULL_TREE;
>         si->dont_invalidate = true;
> Index: gcc/testsuite/gcc.dg/strlenopt-31.c
> ===================================================================
> --- /dev/null 2017-05-11 19:11:40.989165404 +0100
> +++ gcc/testsuite/gcc.dg/strlenopt-31.c       2017-05-16 08:20:26.780371709 
> +0100
> @@ -0,0 +1,25 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +#include "strlenopt.h"
> +
> +__attribute__((noinline, noclone)) int
> +bar (char *p1, const char *q)
> +{
> +  strcpy (p1, "abcde");
> +  char *p2 = strchr (p1, '\0');
> +  strcpy (p2, q);
> +  char *p3 = strchr (p2, '\0');
> +  memcpy (p3, "x", 2);
> +  return strlen (p1);
> +}
> +
> +int
> +main (void)
> +{
> +  char buffer[10];
> +  int res = bar (buffer, "foo");
> +  if (strcmp (buffer, "abcdefoox") != 0 || res != 9)
> +    abort ();
> +  return 0;
> +}
> Index: gcc/testsuite/gcc.dg/strlenopt-31g.c
> ===================================================================
> --- /dev/null 2017-05-11 19:11:40.989165404 +0100
> +++ gcc/testsuite/gcc.dg/strlenopt-31g.c      2017-05-16 08:20:26.780371709 
> +0100
> @@ -0,0 +1,9 @@
> +/* { dg-do run { target *-*-linux* *-*-gnu* } } */
> +/* { dg-options "-O2 -fdump-tree-strlen" } */
> +
> +#define USE_GNU
> +#include "strlenopt-31.c"
> +
> +/* { dg-final { scan-tree-dump-times "stpcpy \\(" 1 "strlen" } } */
> +/* { dg-final { scan-tree-dump-times "memcpy \\(" 2 "strlen" } } */
> +/* { dg-final { scan-tree-dump-not "strlen \\(" "strlen" } } */

Reply via email to