Hi David.
Thanks for the patch.  Please see a few comments below.

> @@ -975,6 +978,161 @@ static tree bpf_core_compute (tree, vec<unsigned int> 
> *);
>  static int bpf_core_get_index (const tree);
>  static bool is_attr_preserve_access (tree);
>  
> +static void
> +maybe_make_core_relo (tree expr, enum btf_core_reloc_kind kind)

This function is missing a comment explaining what it does.

> +{
> +  /* If we are not targetting BPF CO-RE, do not make a relocation. We
> +     might not be generating any debug info at all.  */
> +  if (!TARGET_BPF_CORE)
> +    return;
> +
> +  auto_vec<unsigned int, 16> accessors;
> +  tree container = bpf_core_compute (expr, &accessors);
> +
> +  /* Any valid use of the builtin must have at least one access. Otherwise,
> +     there is nothing to record and nothing to do. This is primarily a
> +     guard against optimizations leading to unexpected expressions in the
> +     argument of the builtin. For example, if the builtin is used to read
> +     a field of a structure which can be statically determined to hold a
> +     constant value, the argument to the builtin will be optimized to that
> +     constant. This is OK, and means the builtin call is superfluous.
> +     e.g.
> +     struct S foo;
> +     foo.a = 5;
> +     int x = __preserve_access_index (foo.a);
> +     ... do stuff with x
> +     'foo.a' in the builtin argument will be optimized to '5' with -01+.
> +     This sequence does not warrant recording a CO-RE relocation.  */
> +
> +  if (accessors.length () < 1)
> +    return;
> +  accessors.reverse ();
> +
> +  rtx_code_label *label = gen_label_rtx ();
> +  LABEL_PRESERVE_P (label) = 1;
> +  emit_label (label);
> +
> +  /* Determine what output section this relocation will apply to.
> +     If this function is associated with a section, use that. Otherwise,
> +     fall back on '.text'.  */
> +  const char * section_name;
> +  if (current_function_decl && DECL_SECTION_NAME (current_function_decl))
> +    section_name = DECL_SECTION_NAME (current_function_decl);
> +  else
> +    section_name = ".text";
> +
> +  /* Add the CO-RE relocation information to the BTF container.  */
> +  bpf_core_reloc_add (TREE_TYPE (container), section_name, &accessors, label,
> +                   kind);
> +}
> +
> +/* Expand a call to __builtin_preserve_field_info by evaluating the requested
> +   information about SRC according to KIND, and return a tree holding
> +   the result.  */
> +
> +static tree
> +bpf_core_field_info (tree src, enum btf_core_reloc_kind kind)
> +{
> +  unsigned int result;
> +  poly_int64 bitsize, bitpos;
> +  tree var_off;
> +  machine_mode mode;
> +  int unsignedp, reversep, volatilep;
> +
> +  get_inner_reference (src, &bitsize, &bitpos, &var_off, &mode, &unsignedp,
> +                    &reversep, &volatilep);

Since the information returned by the builtin is always constant
(positions, sizes) I think you will want to adjust the code for the
eventuality of variable positioned fields and also variable sized
fields.

get_inner_reference sets var_off to a tree if the position of the field
is variable.  In these cases `bitpos' is relative to that position.

Likewise, get_inner_reference sets `mode' is set to BLKmode and
`bitsize' will be set to -1.

I'm not sure what the built-in is supposed to do/return in these cases.
I guess it makes sense to error out, but what does LLVM do?

> +
> +  /* Note: Use DECL_BIT_FIELD_TYPE rather than DECL_BIT_FIELD here, because 
> it
> +     remembers whether the field in question was originally declared as a
> +     bitfield, regardless of how it has been optimized.  */
> +  bool bitfieldp = (TREE_CODE (src) == COMPONENT_REF
> +                 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (src, 1)));
> +
> +  unsigned int align = TYPE_ALIGN (TREE_TYPE (src));
> +  if (TREE_CODE (src) == COMPONENT_REF)
> +    {
> +      tree field = TREE_OPERAND (src, 1);
> +      if (DECL_BIT_FIELD_TYPE (field))
> +     align = TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field));
> +      else
> +     align = TYPE_ALIGN (TREE_TYPE (field));
> +    }
> +
> +  unsigned int start_bitpos = bitpos & ~(align - 1);
> +  unsigned int end_bitpos = start_bitpos + align;
> +
> +  switch (kind)
> +    {
> +    case BPF_RELO_FIELD_BYTE_OFFSET:
> +      {
> +     if (bitfieldp)
> +       result = start_bitpos / 8;
> +     else
> +       result = bitpos / 8;
> +      }
> +      break;
> +
> +    case BPF_RELO_FIELD_BYTE_SIZE:
> +      {
> +     if (bitfieldp)
> +       {
> +         /* To match LLVM behavior, byte size of bitfields is recorded as
> +            the full size of the base type. A 3-bit bitfield of type int is
> +            therefore recorded as having a byte size of 4 bytes. */
> +         result = end_bitpos - start_bitpos;
> +         if (result & (result - 1))
> +           error ("unsupported field expression");
> +         result = result / 8;
> +       }
> +     else
> +       result = bitsize / 8;
> +      }
> +      break;
> +
> +    case BPF_RELO_FIELD_EXISTS:
> +      /* The field always exists at compile time.  */
> +      result = 1;
> +      break;
> +
> +    case BPF_RELO_FIELD_SIGNED:
> +      result = !unsignedp;
> +      break;
> +
> +    case BPF_RELO_FIELD_LSHIFT_U64:
> +    case BPF_RELO_FIELD_RSHIFT_U64:
> +      {
> +     if (!bitfieldp)
> +       {
> +         if (bitsize > 64)
> +           error ("field size too large");
> +         result = 64 - bitsize;
> +         break;
> +       }
> +
> +     if (end_bitpos - start_bitpos > 64)
> +       error ("field size too large");
> +
> +     if (kind == BPF_RELO_FIELD_LSHIFT_U64)
> +       {
> +         if (TARGET_BIG_ENDIAN)
> +           result = bitpos + 64 - start_bitpos - align;
> +         else
> +           result = start_bitpos + 64 - bitpos - bitsize;
> +       }
> +     else /* RSHIFT_U64 */
> +       result = 64 - bitsize;
> +      }
> +      break;
> +
> +    default:
> +      error ("invalid argument to built-in function");
> +      return error_mark_node;
> +      break;
> +    }
> +
> +  return build_int_cst (unsigned_type_node, result);
> +}
> +
>  /* Expand a call to a BPF-specific built-in function that was set up
>     with bpf_init_builtins.  */
>  
> @@ -1025,17 +1183,15 @@ bpf_expand_builtin (tree exp, rtx target 
> ATTRIBUTE_UNUSED,
>        /* The result of the load is in R0.  */
>        return gen_rtx_REG (ops[0].mode, BPF_R0);
>      }
> +
>    else if (code == -1)
>      {
> -      /* A resolved overloaded builtin, e.g. __bpf_preserve_access_index_si 
> */
> +      /* A resolved overloaded __builtin_preserve_access_index.  */
>        tree arg = CALL_EXPR_ARG (exp, 0);
>  
>        if (arg == NULL_TREE)
>       return NULL_RTX;
>  
> -      auto_vec<unsigned int, 16> accessors;
> -      tree container;
> -
>        if (TREE_CODE (arg) == SSA_NAME)
>       {
>         gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
> @@ -1049,51 +1205,42 @@ bpf_expand_builtin (tree exp, rtx target 
> ATTRIBUTE_UNUSED,
>        /* Avoid double-recording information if the argument is an access to
>        a struct/union marked __attribute__((preserve_access_index)). This
>        Will be handled by the attribute handling pass.  */
> -      if (is_attr_preserve_access (arg))
> -     return expand_normal (arg);
> -
> -      container = bpf_core_compute (arg, &accessors);
> -
> -      /* Any valid use of the builtin must have at least one access. 
> Otherwise,
> -      there is nothing to record and nothing to do. This is primarily a
> -      guard against optimizations leading to unexpected expressions in the
> -      argument of the builtin. For example, if the builtin is used to read
> -      a field of a structure which can be statically determined to hold a
> -      constant value, the argument to the builtin will be optimized to that
> -      constant. This is OK, and means the builtin call is superfluous.
> -      e.g.
> -        struct S foo;
> -        foo.a = 5;
> -        int x = __preserve_access_index (foo.a);
> -        ... do stuff with x
> -      'foo.a' in the builtin argument will be optimized to '5' with -01+.
> -      This sequence does not warrant recording a CO-RE relocation.  */
> -
> -      if (accessors.length () < 1)
> -     return expand_normal (arg);
> -
> -      accessors.reverse ();
> -
> -      container = TREE_TYPE (container);
> -
> -      rtx_code_label *label = gen_label_rtx ();
> -      LABEL_PRESERVE_P (label) = 1;
> -      emit_label (label);
> -
> -      /* Determine what output section this relocation will apply to.
> -      If this function is associated with a section, use that. Otherwise,
> -      fall back on '.text'.  */
> -      const char * section_name;
> -      if (current_function_decl && DECL_SECTION_NAME (current_function_decl))
> -     section_name = DECL_SECTION_NAME (current_function_decl);
> +      if (!is_attr_preserve_access (arg))
> +     maybe_make_core_relo (arg, BPF_RELO_FIELD_BYTE_OFFSET);
> +
> +      return expand_normal (arg);
> +    }
> +
> +  else if (code == -2)
> +    {
> +      /* A resolved overloaded __builtin_preserve_field_info.  */
> +      tree src = CALL_EXPR_ARG (exp, 0);
> +      tree kind_tree = CALL_EXPR_ARG (exp, 1);
> +      unsigned HOST_WIDE_INT kind_val;
> +      if (tree_fits_uhwi_p (kind_tree))
> +     kind_val = tree_to_uhwi (kind_tree);
>        else
> -     section_name = ".text";
> +     error ("invalid argument to built-in function");
>  
> -      /* Add the CO-RE relocation information to the BTF container.  */
> -      bpf_core_reloc_add (container, section_name, &accessors, label);
> +      enum btf_core_reloc_kind kind = (enum btf_core_reloc_kind) kind_val;
>  
> -      return expand_normal (arg);
> +      if (TREE_CODE (src) == SSA_NAME)
> +     {
> +       gimple *def_stmt = SSA_NAME_DEF_STMT (src);
> +       if (is_gimple_assign (def_stmt))
> +         src = gimple_assign_rhs1 (def_stmt);
> +     }
> +      if (TREE_CODE (src) == ADDR_EXPR)
> +     src = TREE_OPERAND (src, 0);
> +
> +      tree result = bpf_core_field_info (src, kind);

If I read this properly, for something like:

  __builtin_preserve_field_info (a = foo.bar + bar.baz, KIND)

On one side CO-RE relocations are computed for both foo.bar and bar.baz
(I see bpf_core_compute does that) as expected.

But then the builtin returns information that can only apply to one
access.  Which one?

> +
> +      if (result != error_mark_node)
> +     maybe_make_core_relo (src, kind);
> +
> +      return expand_normal (result);
>      }
> +
>    gcc_unreachable ();
>  }
>  
> @@ -1259,17 +1406,29 @@ bpf_core_get_index (const tree node)
>     __builtin_preserve_access_index.  */
>  
>  static tree
> -bpf_core_newdecl (tree type)
> +bpf_core_newdecl (tree type, bool is_pai)
>  {
> -  tree rettype = build_function_type_list (type, type, NULL);
> +  tree rettype;
>    char name[80];
> -  int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_");
> +  static unsigned long pai_count = 0;
> +  static unsigned long pfi_count = 0;
>  
> -  static unsigned long cnt = 0;
> -  len = snprintf (name + len, sizeof (name) - len, "%lu", cnt++);
> +  if (is_pai)
> +    {
> +      rettype = build_function_type_list (type, type, NULL);
> +      int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_");
> +      len = snprintf (name + len, sizeof (name) - len, "%lu", pai_count++);
> +    }
> +  else
> +    {
> +      rettype = build_function_type_list (unsigned_type_node, type,
> +                                       unsigned_type_node, NULL);
> +      int len = snprintf (name, sizeof (name), "%s", "__builtin_pfi_");
> +      len = snprintf (name + len, sizeof (name) - len, "%lu", pfi_count++);
> +    }
>  
> -  return add_builtin_function_ext_scope (name, rettype, -1, BUILT_IN_MD, 
> NULL,
> -                                      NULL_TREE);
> +  return add_builtin_function_ext_scope (name, rettype, is_pai ? -1 : -2,
> +                                      BUILT_IN_MD, NULL, NULL_TREE);
>  }
>  
>  /* Return whether EXPR could access some aggregate data structure that
> @@ -1278,22 +1437,33 @@ bpf_core_newdecl (tree type)
>  static int
>  bpf_core_is_maybe_aggregate_access (tree expr)
>  {
> -  enum tree_code code = TREE_CODE (expr);
> -  if (code == COMPONENT_REF || code == ARRAY_REF)
> -    return 1;
> -
> -  if (code == ADDR_EXPR)
> +  switch (TREE_CODE (expr))
> +    {
> +    case COMPONENT_REF:
> +    case BIT_FIELD_REF:
> +    case ARRAY_REF:
> +    case ARRAY_RANGE_REF:
> +      return 1;
> +    case ADDR_EXPR:
> +    case NOP_EXPR:
>        return bpf_core_is_maybe_aggregate_access (TREE_OPERAND (expr, 0));
> -
> +    default:;
> +    }
>    return 0;
>  }
>  
> +struct core_walk_data {
> +  location_t loc;
> +  tree arg;
> +};
> +
>  /* Callback function used with walk_tree from 
> bpf_resolve_overloaded_builtin.  */
>  
>  static tree
>  bpf_core_walk (tree *tp, int *walk_subtrees, void *data)
>  {
> -  location_t loc = *((location_t *) data);
> +  struct core_walk_data *dat = (struct core_walk_data *) data;
> +  bool is_pai = dat->arg == NULL_TREE;
>  
>    /* If this is a type, don't do anything. */
>    if (TYPE_P (*tp))
> @@ -1302,10 +1472,18 @@ bpf_core_walk (tree *tp, int *walk_subtrees, void 
> *data)
>        return NULL_TREE;
>      }
>  
> +  /* Build a new function call to a resolved builtin for the desired 
> operation.
> +     If this is a preserve_field_info call, pass along the argument to the
> +     resolved builtin call. */
>    if (bpf_core_is_maybe_aggregate_access (*tp))
>      {
> -      tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp));
> -      tree newcall = build_call_expr_loc (loc, newdecl, 1, *tp);
> +      tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp), is_pai);
> +      tree newcall;
> +      if (is_pai)
> +     newcall = build_call_expr_loc (dat->loc, newdecl, 1, *tp);
> +      else
> +     newcall = build_call_expr_loc (dat->loc, newdecl, 2, *tp, dat->arg);
> +
>        *tp = newcall;
>        *walk_subtrees = 0;
>      }
> @@ -1344,7 +1522,12 @@ bpf_small_register_classes_for_mode_p (machine_mode 
> mode)
>  static tree
>  bpf_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist)
>  {
> -  if (DECL_MD_FUNCTION_CODE (fndecl) != BPF_BUILTIN_PRESERVE_ACCESS_INDEX)
> +  bool is_pai = DECL_MD_FUNCTION_CODE (fndecl)
> +    == BPF_BUILTIN_PRESERVE_ACCESS_INDEX;
> +  bool is_pfi = DECL_MD_FUNCTION_CODE (fndecl)
> +    == BPF_BUILTIN_PRESERVE_FIELD_INFO;
> +
> +  if (!is_pai && !is_pfi)
>      return NULL_TREE;
>  
>    /* We only expect one argument, but it may be an arbitrarily-complicated
> @@ -1352,16 +1535,17 @@ bpf_resolve_overloaded_builtin (location_t loc, tree 
> fndecl, void *arglist)
>    vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (arglist);
>    unsigned n_params = params ? params->length() : 0;
>  
> -  if (n_params != 1)
> +  if ((is_pai && n_params != 1) || (is_pfi && n_params != 2))
>      {
> -      error_at (loc, "expected exactly 1 argument");
> +      error_at (loc, "wrong number of arguments");
>        return NULL_TREE;
>      }
>  
>    tree param = (*params)[0];
>  
> -  /* If not generating BPF_CORE information, the builtin does nothing.  */
> -  if (!TARGET_BPF_CORE)
> +  /* If not generating BPF_CORE information, preserve_access_index does 
> nothing,
> +     and simply "resolves to" the argument.  */
> +  if (!TARGET_BPF_CORE && is_pai)
>      return param;
>  
>    /* Do remove_c_maybe_const_expr for the arg.
> @@ -1387,7 +1571,11 @@ bpf_resolve_overloaded_builtin (location_t loc, tree 
> fndecl, void *arglist)
>       This ensures that all the relevant information remains within the
>       expression trees the builtin finally gets.  */
>  
> -  walk_tree (&param, bpf_core_walk, (void *) &loc, NULL);
> +  struct core_walk_data data;
> +  data.loc = loc;
> +  data.arg = is_pai ? NULL_TREE : (*params)[1];
> +
> +  walk_tree (&param, bpf_core_walk, (void *) &data, NULL);
>  
>    return param;
>  }
> @@ -1524,7 +1712,8 @@ handle_attr_preserve (function *fn)
>                     emit_label (label);
>  
>                     /* Add the CO-RE relocation information to the BTF 
> container.  */
> -                   bpf_core_reloc_add (container, section_name, &accessors, 
> label);
> +                   bpf_core_reloc_add (container, section_name, &accessors, 
> label,
> +                                       BPF_RELO_FIELD_BYTE_OFFSET);
>                   }
>               }
>           }
> diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc
> index 8897a045ea1..9f71040846b 100644
> --- a/gcc/config/bpf/coreout.cc
> +++ b/gcc/config/bpf/coreout.cc
> @@ -152,7 +152,8 @@ static GTY (()) vec<bpf_core_section_ref, va_gc> 
> *bpf_core_sections;
>  
>  void
>  bpf_core_reloc_add (const tree type, const char * section_name,
> -                 vec<unsigned int> *accessors, rtx_code_label *label)
> +                 vec<unsigned int> *accessors, rtx_code_label *label,
> +                 enum btf_core_reloc_kind kind)
>  {
>    char buf[40];
>    unsigned int i, n = 0;
> @@ -173,7 +174,7 @@ bpf_core_reloc_add (const tree type, const char * 
> section_name,
>  
>    bpfcr->bpfcr_type = get_btf_id (ctf_lookup_tree_type (ctfc, type));
>    bpfcr->bpfcr_insn_label = label;
> -  bpfcr->bpfcr_kind = BPF_RELO_FIELD_BYTE_OFFSET;
> +  bpfcr->bpfcr_kind = kind;
>  
>    /* Add the CO-RE reloc to the appropriate section.  */
>    bpf_core_section_ref sec;
> diff --git a/gcc/config/bpf/coreout.h b/gcc/config/bpf/coreout.h
> index 3c7bdfd8c2f..498853f6e00 100644
> --- a/gcc/config/bpf/coreout.h
> +++ b/gcc/config/bpf/coreout.h
> @@ -103,7 +103,7 @@ extern void btf_ext_init (void);
>  extern void btf_ext_output (void);
>  
>  extern void bpf_core_reloc_add (const tree, const char *, vec<unsigned int> 
> *,
> -                             rtx_code_label *);
> +                             rtx_code_label *, enum btf_core_reloc_kind);
>  extern int bpf_core_get_sou_member_index (ctf_container_ref, const tree);
>  
>  #ifdef       __cplusplus


> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 04af0584d82..0d3bcb24ab9 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -15745,6 +15745,27 @@ Load 32-bits from the @code{struct sk_buff} packet 
> data pointed by the register
>  BPF Compile Once-Run Everywhere (CO-RE) support. Instruct GCC to generate 
> CO-RE relocation records for any accesses to aggregate data structures 
> (struct, union, array types) in @var{expr}. This builtin is otherwise 
> transparent, the return value is whatever @var{expr} evaluates to. It is also 
> overloaded: @var{expr} may be of any type (not necessarily a pointer), the 
> return type is the same. Has no effect if @code{-mco-re} is not in effect 
> (either specified or implied).
>  @end deftypefn
>  
> +@deftypefn {Built-in Function} unsigned int __builtin_preserve_field_info 
> (@var{expr}, unsigned int @var{kind})
> +BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to
> +extract information to aid in struct/union relocations.  @var{expr} is
> +an access to a field of a struct or union. Depending on @var{kind}, different
> +information is returned to the program. A CO-RE relocation for the access in
> +@var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect.
> +
> +@var{kind} is one of:
> +@smallexample
> +enum
> +@{
> +  FIELD_BYTE_OFFSET = 0,
> +  FIELD_BYTE_SIZE,
> +  FIELD_EXISTENCE,
> +  FIELD_SIGNEDNESS,
> +  FIELD_LSHIFT_U64,
> +  FIELD_RSHIFT_U64,
> +@};
> +@end smallexample
> +@end deftypefn
> +

We need to document what all these returned values are.  It may be
more-or-less obvious, but it is better to be explicit IMO.  It would
also be good to document the fact the returned value is always known at
compile-time.

Reply via email to