On Wed, Nov 10, 2021 at 12:31:29AM +0530, Siddhesh Poyarekar wrote:
>       * tree-object-size.h (compute_builtin_object_size): Return tree
>       instead of HOST_WIDE_INT.
>       * builtins.c (fold_builtin_object_size): Adjust.
>       * gimple-fold.c (gimple_fold_builtin_strncat): Likewise.
>       * ubsan.c (instrument_object_size): Likewise.
>       * tree-object-size.c (object_sizes): Change type to vec<tree>.
>       (initval): New function.
>       (unknown): Use it.
>       (size_unknown_p, size_initval, size_unknown): New functions.
>       (object_sizes_unknown_p): Use it.
>       (object_sizes_get): Return tree.
>       (object_sizes_initialize): Rename from object_sizes_set_force
>       and set VAL parameter type as tree.
>       (object_sizes_set): Set VAL parameter type as tree and adjust
>       implementation.
>       (size_for_offset): New function.
>       (addr_object_size): Change PSIZE parameter to tree and adjust
>       implementation.
>       (alloc_object_size): Return tree.
>       (compute_builtin_object_size): Return tree in PSIZE.
>       (expr_object_size, call_object_size, unknown_object_size):
>       Adjust for object_sizes_set change.
>       (merge_object_sizes): Set OFFSET type to tree and adjust
>       implementation.
>       (plus_stmt_object_size, cond_expr_object_size,
>       collect_object_sizes_for, check_for_plus_in_loops_1): Adjust for
>       change of type from HOST_WIDE_INT to tree.
>       (object_sizes_execute): Adjust for type change to tree.

> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -10226,7 +10226,7 @@ maybe_emit_sprintf_chk_warning (tree exp, enum 
> built_in_function fcode)
>  static tree
>  fold_builtin_object_size (tree ptr, tree ost)
>  {
> -  unsigned HOST_WIDE_INT bytes;
> +  tree bytes;
>    int object_size_type;
>  
>    if (!validate_arg (ptr, POINTER_TYPE)
> @@ -10251,17 +10251,15 @@ fold_builtin_object_size (tree ptr, tree ost)
>    if (TREE_CODE (ptr) == ADDR_EXPR)
>      {
>        compute_builtin_object_size (ptr, object_size_type, &bytes);
> -      if (wi::fits_to_tree_p (bytes, size_type_node))
> -     return build_int_cstu (size_type_node, bytes);
> +      return fold_convert (size_type_node, bytes);
>      }
>    else if (TREE_CODE (ptr) == SSA_NAME)
>      {
>        /* If object size is not known yet, delay folding until
>         later.  Maybe subsequent passes will help determining
>         it.  */
> -      if (compute_builtin_object_size (ptr, object_size_type, &bytes)
> -       && wi::fits_to_tree_p (bytes, size_type_node))
> -     return build_int_cstu (size_type_node, bytes);
> +      if (compute_builtin_object_size (ptr, object_size_type, &bytes))
> +     return fold_convert (size_type_node, bytes);

Neither of these are equivalent to what it used to do before.
If some target has e.g. pointers wider than size_t, then previously we could
compute bytes that doesn't fit into size_t and would return NULL which
eventually would result in object_sizes_execute or expand_builtin_object_size
resolving it to the unknown value.  But fold_convert will perform modulo
arithmetics on it, so say if size_t is 24-bit and bytes is 0x1000001, it
will fold to (size_t) 1.  I think we should still punt if it doesn't fit.
Or do we ensure that what it returns always has sizetype type?

> @@ -2486,13 +2486,14 @@ gimple_fold_builtin_strncat (gimple_stmt_iterator 
> *gsi)
>    if (cmpsrc < 0)
>      return false;
>  
> -  unsigned HOST_WIDE_INT dstsize;
> +  tree dstsize;
>  
>    bool nowarn = warning_suppressed_p (stmt, OPT_Wstringop_overflow_);
>  
> -  if (!nowarn && compute_builtin_object_size (dst, 1, &dstsize))
> +  if (!nowarn && compute_builtin_object_size (dst, 1, &dstsize)
> +      && tree_fits_uhwi_p (dstsize))
>      {
> -      int cmpdst = compare_tree_int (len, dstsize);
> +      int cmpdst = compare_tree_int (len, tree_to_uhwi (dstsize));

Why jump from tree to UHWI and back?
Check that TREE_CODE (dstsize) == INTEGER_CST and do
tree_int_cst_compare instead?

>  
>        if (cmpdst >= 0)
>       {
> @@ -2509,7 +2510,7 @@ gimple_fold_builtin_strncat (gimple_stmt_iterator *gsi)
>                                   "destination size")
>                              : G_("%qD specified bound %E exceeds "
>                                   "destination size %wu"),
> -                            fndecl, len, dstsize);
> +                            fndecl, len, tree_to_uhwi (dstsize));

Similarly, use %E and pass dstsize to it.

> +/* Initial value of object sizes; zero for maximum and SIZE_MAX for minimum
> +   object size.  */
> +
> +static inline unsigned HOST_WIDE_INT
> +initval (int object_size_type)
> +{
> +  return (unsigned HOST_WIDE_INT) -popcount_hwi (object_size_type
> +                                              & OST_MINIMUM);

This makes the code quite unreadable and on some arches it will be also
much worse (popcount is a libcall on many, and even if not, it is inline
function using a builtin inside of it).  Can't you use a simple readable
  return (object_size_type & OST_MINUMUM) ? HOST_WIDE_INT_M1U : 0;
and let the compiler optimize as much as it wants?
?

> +/* Bytes at end of the object with SZ from offset OFFSET. */
> +
> +static tree
> +size_for_offset (tree sz, tree offset)
> +{
> +  return size_binop (MINUS_EXPR, size_binop (MAX_EXPR, sz, offset), offset);
> +}
>  
> @@ -314,27 +356,22 @@ addr_object_size (struct object_size_info *osi, 
> const_tree ptr,
>                           SSA_NAME_VERSION (var)))
>           sz = object_sizes_get (osi, SSA_NAME_VERSION (var));
>         else
> -         sz = unknown (object_size_type);
> +         sz = size_unknown (object_size_type);
>       }
> -      if (sz != unknown (object_size_type))
> +      if (!size_unknown_p (sz, object_size_type))
>       {
> -       offset_int mem_offset;
> -       if (mem_ref_offset (pt_var).is_constant (&mem_offset))
> -         {
> -           offset_int dsz = wi::sub (sz, mem_offset);
> -           if (wi::neg_p (dsz))
> -             sz = 0;
> -           else if (wi::fits_uhwi_p (dsz))
> -             sz = dsz.to_uhwi ();
> -           else
> -             sz = unknown (object_size_type);
> -         }
> +       tree offset = TREE_OPERAND (pt_var, 1);
> +       if (TREE_CODE (offset) != INTEGER_CST
> +           || TREE_CODE (sz) != INTEGER_CST)
> +         sz = size_unknown (object_size_type);
>         else
> -         sz = unknown (object_size_type);
> +         sz = size_for_offset (sz, offset);

This doesn't match what the code did and I'm surprised if it works at all.
TREE_OPERAND (pt_var, 1), while it is an INTEGER_CST or POLY_INT_CST,
has in its type encoded the type for aliasing, so the type is some pointer
type.  Performing size_binop etc. on such values can misbehave, the code
assumes that it is sizetype or at least some integral type compatible with
it.Also, mem_ref_offset is signed and offset_int has bigger precision
than pointers on the target such that it can be always signed.  So
e.g. if MEM_REF's second operand is bigger or equal than half of the
address space, in the old code it would appear to be negative, wi::sub would
result in a value bigger than sz.  Not really sure right now if that is
exactly how we want to treat it, would be nice to try some testcase.
In any case, size_for_offset should be called with the offset converted
to sizetype if it does size_binop on it.
> +    reexamine |= merge_object_sizes (osi, var, then_, size_int (0));

size_zero_node ?
>    else
>      expr_object_size (osi, var, then_);
>  
> @@ -952,7 +970,7 @@ cond_expr_object_size (struct object_size_info *osi, tree 
> var, gimple *stmt)
>      return reexamine;
>  
>    if (TREE_CODE (else_) == SSA_NAME)
> -    reexamine |= merge_object_sizes (osi, var, else_, 0);
> +    reexamine |= merge_object_sizes (osi, var, else_, size_int (0));

Likewise.  Many times.

> -  unsigned HOST_WIDE_INT size;
> -  if (compute_builtin_object_size (base_addr, 0, &size))
> -    sizet = build_int_cst (sizetype, size);
> -  else if (optimize)

I'd prefer not to reindent it to avoid useless churn, just do
  if (compute_builtin_object_size (base_addr, 0, &sizet))
    ;
  else if (optimize)
...

        Jakub

Reply via email to