Hi David. Thanks for the patch. Please see a few comments below.
> @@ -975,6 +978,161 @@ static tree bpf_core_compute (tree, vec<unsigned int> > *); > static int bpf_core_get_index (const tree); > static bool is_attr_preserve_access (tree); > > +static void > +maybe_make_core_relo (tree expr, enum btf_core_reloc_kind kind) This function is missing a comment explaining what it does. > +{ > + /* If we are not targetting BPF CO-RE, do not make a relocation. We > + might not be generating any debug info at all. */ > + if (!TARGET_BPF_CORE) > + return; > + > + auto_vec<unsigned int, 16> accessors; > + tree container = bpf_core_compute (expr, &accessors); > + > + /* Any valid use of the builtin must have at least one access. Otherwise, > + there is nothing to record and nothing to do. This is primarily a > + guard against optimizations leading to unexpected expressions in the > + argument of the builtin. For example, if the builtin is used to read > + a field of a structure which can be statically determined to hold a > + constant value, the argument to the builtin will be optimized to that > + constant. This is OK, and means the builtin call is superfluous. > + e.g. > + struct S foo; > + foo.a = 5; > + int x = __preserve_access_index (foo.a); > + ... do stuff with x > + 'foo.a' in the builtin argument will be optimized to '5' with -01+. > + This sequence does not warrant recording a CO-RE relocation. */ > + > + if (accessors.length () < 1) > + return; > + accessors.reverse (); > + > + rtx_code_label *label = gen_label_rtx (); > + LABEL_PRESERVE_P (label) = 1; > + emit_label (label); > + > + /* Determine what output section this relocation will apply to. > + If this function is associated with a section, use that. Otherwise, > + fall back on '.text'. */ > + const char * section_name; > + if (current_function_decl && DECL_SECTION_NAME (current_function_decl)) > + section_name = DECL_SECTION_NAME (current_function_decl); > + else > + section_name = ".text"; > + > + /* Add the CO-RE relocation information to the BTF container. */ > + bpf_core_reloc_add (TREE_TYPE (container), section_name, &accessors, label, > + kind); > +} > + > +/* Expand a call to __builtin_preserve_field_info by evaluating the requested > + information about SRC according to KIND, and return a tree holding > + the result. */ > + > +static tree > +bpf_core_field_info (tree src, enum btf_core_reloc_kind kind) > +{ > + unsigned int result; > + poly_int64 bitsize, bitpos; > + tree var_off; > + machine_mode mode; > + int unsignedp, reversep, volatilep; > + > + get_inner_reference (src, &bitsize, &bitpos, &var_off, &mode, &unsignedp, > + &reversep, &volatilep); Since the information returned by the builtin is always constant (positions, sizes) I think you will want to adjust the code for the eventuality of variable positioned fields and also variable sized fields. get_inner_reference sets var_off to a tree if the position of the field is variable. In these cases `bitpos' is relative to that position. Likewise, get_inner_reference sets `mode' is set to BLKmode and `bitsize' will be set to -1. I'm not sure what the built-in is supposed to do/return in these cases. I guess it makes sense to error out, but what does LLVM do? > + > + /* Note: Use DECL_BIT_FIELD_TYPE rather than DECL_BIT_FIELD here, because > it > + remembers whether the field in question was originally declared as a > + bitfield, regardless of how it has been optimized. */ > + bool bitfieldp = (TREE_CODE (src) == COMPONENT_REF > + && DECL_BIT_FIELD_TYPE (TREE_OPERAND (src, 1))); > + > + unsigned int align = TYPE_ALIGN (TREE_TYPE (src)); > + if (TREE_CODE (src) == COMPONENT_REF) > + { > + tree field = TREE_OPERAND (src, 1); > + if (DECL_BIT_FIELD_TYPE (field)) > + align = TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)); > + else > + align = TYPE_ALIGN (TREE_TYPE (field)); > + } > + > + unsigned int start_bitpos = bitpos & ~(align - 1); > + unsigned int end_bitpos = start_bitpos + align; > + > + switch (kind) > + { > + case BPF_RELO_FIELD_BYTE_OFFSET: > + { > + if (bitfieldp) > + result = start_bitpos / 8; > + else > + result = bitpos / 8; > + } > + break; > + > + case BPF_RELO_FIELD_BYTE_SIZE: > + { > + if (bitfieldp) > + { > + /* To match LLVM behavior, byte size of bitfields is recorded as > + the full size of the base type. A 3-bit bitfield of type int is > + therefore recorded as having a byte size of 4 bytes. */ > + result = end_bitpos - start_bitpos; > + if (result & (result - 1)) > + error ("unsupported field expression"); > + result = result / 8; > + } > + else > + result = bitsize / 8; > + } > + break; > + > + case BPF_RELO_FIELD_EXISTS: > + /* The field always exists at compile time. */ > + result = 1; > + break; > + > + case BPF_RELO_FIELD_SIGNED: > + result = !unsignedp; > + break; > + > + case BPF_RELO_FIELD_LSHIFT_U64: > + case BPF_RELO_FIELD_RSHIFT_U64: > + { > + if (!bitfieldp) > + { > + if (bitsize > 64) > + error ("field size too large"); > + result = 64 - bitsize; > + break; > + } > + > + if (end_bitpos - start_bitpos > 64) > + error ("field size too large"); > + > + if (kind == BPF_RELO_FIELD_LSHIFT_U64) > + { > + if (TARGET_BIG_ENDIAN) > + result = bitpos + 64 - start_bitpos - align; > + else > + result = start_bitpos + 64 - bitpos - bitsize; > + } > + else /* RSHIFT_U64 */ > + result = 64 - bitsize; > + } > + break; > + > + default: > + error ("invalid argument to built-in function"); > + return error_mark_node; > + break; > + } > + > + return build_int_cst (unsigned_type_node, result); > +} > + > /* Expand a call to a BPF-specific built-in function that was set up > with bpf_init_builtins. */ > > @@ -1025,17 +1183,15 @@ bpf_expand_builtin (tree exp, rtx target > ATTRIBUTE_UNUSED, > /* The result of the load is in R0. */ > return gen_rtx_REG (ops[0].mode, BPF_R0); > } > + > else if (code == -1) > { > - /* A resolved overloaded builtin, e.g. __bpf_preserve_access_index_si > */ > + /* A resolved overloaded __builtin_preserve_access_index. */ > tree arg = CALL_EXPR_ARG (exp, 0); > > if (arg == NULL_TREE) > return NULL_RTX; > > - auto_vec<unsigned int, 16> accessors; > - tree container; > - > if (TREE_CODE (arg) == SSA_NAME) > { > gimple *def_stmt = SSA_NAME_DEF_STMT (arg); > @@ -1049,51 +1205,42 @@ bpf_expand_builtin (tree exp, rtx target > ATTRIBUTE_UNUSED, > /* Avoid double-recording information if the argument is an access to > a struct/union marked __attribute__((preserve_access_index)). This > Will be handled by the attribute handling pass. */ > - if (is_attr_preserve_access (arg)) > - return expand_normal (arg); > - > - container = bpf_core_compute (arg, &accessors); > - > - /* Any valid use of the builtin must have at least one access. > Otherwise, > - there is nothing to record and nothing to do. This is primarily a > - guard against optimizations leading to unexpected expressions in the > - argument of the builtin. For example, if the builtin is used to read > - a field of a structure which can be statically determined to hold a > - constant value, the argument to the builtin will be optimized to that > - constant. This is OK, and means the builtin call is superfluous. > - e.g. > - struct S foo; > - foo.a = 5; > - int x = __preserve_access_index (foo.a); > - ... do stuff with x > - 'foo.a' in the builtin argument will be optimized to '5' with -01+. > - This sequence does not warrant recording a CO-RE relocation. */ > - > - if (accessors.length () < 1) > - return expand_normal (arg); > - > - accessors.reverse (); > - > - container = TREE_TYPE (container); > - > - rtx_code_label *label = gen_label_rtx (); > - LABEL_PRESERVE_P (label) = 1; > - emit_label (label); > - > - /* Determine what output section this relocation will apply to. > - If this function is associated with a section, use that. Otherwise, > - fall back on '.text'. */ > - const char * section_name; > - if (current_function_decl && DECL_SECTION_NAME (current_function_decl)) > - section_name = DECL_SECTION_NAME (current_function_decl); > + if (!is_attr_preserve_access (arg)) > + maybe_make_core_relo (arg, BPF_RELO_FIELD_BYTE_OFFSET); > + > + return expand_normal (arg); > + } > + > + else if (code == -2) > + { > + /* A resolved overloaded __builtin_preserve_field_info. */ > + tree src = CALL_EXPR_ARG (exp, 0); > + tree kind_tree = CALL_EXPR_ARG (exp, 1); > + unsigned HOST_WIDE_INT kind_val; > + if (tree_fits_uhwi_p (kind_tree)) > + kind_val = tree_to_uhwi (kind_tree); > else > - section_name = ".text"; > + error ("invalid argument to built-in function"); > > - /* Add the CO-RE relocation information to the BTF container. */ > - bpf_core_reloc_add (container, section_name, &accessors, label); > + enum btf_core_reloc_kind kind = (enum btf_core_reloc_kind) kind_val; > > - return expand_normal (arg); > + if (TREE_CODE (src) == SSA_NAME) > + { > + gimple *def_stmt = SSA_NAME_DEF_STMT (src); > + if (is_gimple_assign (def_stmt)) > + src = gimple_assign_rhs1 (def_stmt); > + } > + if (TREE_CODE (src) == ADDR_EXPR) > + src = TREE_OPERAND (src, 0); > + > + tree result = bpf_core_field_info (src, kind); If I read this properly, for something like: __builtin_preserve_field_info (a = foo.bar + bar.baz, KIND) On one side CO-RE relocations are computed for both foo.bar and bar.baz (I see bpf_core_compute does that) as expected. But then the builtin returns information that can only apply to one access. Which one? > + > + if (result != error_mark_node) > + maybe_make_core_relo (src, kind); > + > + return expand_normal (result); > } > + > gcc_unreachable (); > } > > @@ -1259,17 +1406,29 @@ bpf_core_get_index (const tree node) > __builtin_preserve_access_index. */ > > static tree > -bpf_core_newdecl (tree type) > +bpf_core_newdecl (tree type, bool is_pai) > { > - tree rettype = build_function_type_list (type, type, NULL); > + tree rettype; > char name[80]; > - int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_"); > + static unsigned long pai_count = 0; > + static unsigned long pfi_count = 0; > > - static unsigned long cnt = 0; > - len = snprintf (name + len, sizeof (name) - len, "%lu", cnt++); > + if (is_pai) > + { > + rettype = build_function_type_list (type, type, NULL); > + int len = snprintf (name, sizeof (name), "%s", "__builtin_pai_"); > + len = snprintf (name + len, sizeof (name) - len, "%lu", pai_count++); > + } > + else > + { > + rettype = build_function_type_list (unsigned_type_node, type, > + unsigned_type_node, NULL); > + int len = snprintf (name, sizeof (name), "%s", "__builtin_pfi_"); > + len = snprintf (name + len, sizeof (name) - len, "%lu", pfi_count++); > + } > > - return add_builtin_function_ext_scope (name, rettype, -1, BUILT_IN_MD, > NULL, > - NULL_TREE); > + return add_builtin_function_ext_scope (name, rettype, is_pai ? -1 : -2, > + BUILT_IN_MD, NULL, NULL_TREE); > } > > /* Return whether EXPR could access some aggregate data structure that > @@ -1278,22 +1437,33 @@ bpf_core_newdecl (tree type) > static int > bpf_core_is_maybe_aggregate_access (tree expr) > { > - enum tree_code code = TREE_CODE (expr); > - if (code == COMPONENT_REF || code == ARRAY_REF) > - return 1; > - > - if (code == ADDR_EXPR) > + switch (TREE_CODE (expr)) > + { > + case COMPONENT_REF: > + case BIT_FIELD_REF: > + case ARRAY_REF: > + case ARRAY_RANGE_REF: > + return 1; > + case ADDR_EXPR: > + case NOP_EXPR: > return bpf_core_is_maybe_aggregate_access (TREE_OPERAND (expr, 0)); > - > + default:; > + } > return 0; > } > > +struct core_walk_data { > + location_t loc; > + tree arg; > +}; > + > /* Callback function used with walk_tree from > bpf_resolve_overloaded_builtin. */ > > static tree > bpf_core_walk (tree *tp, int *walk_subtrees, void *data) > { > - location_t loc = *((location_t *) data); > + struct core_walk_data *dat = (struct core_walk_data *) data; > + bool is_pai = dat->arg == NULL_TREE; > > /* If this is a type, don't do anything. */ > if (TYPE_P (*tp)) > @@ -1302,10 +1472,18 @@ bpf_core_walk (tree *tp, int *walk_subtrees, void > *data) > return NULL_TREE; > } > > + /* Build a new function call to a resolved builtin for the desired > operation. > + If this is a preserve_field_info call, pass along the argument to the > + resolved builtin call. */ > if (bpf_core_is_maybe_aggregate_access (*tp)) > { > - tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp)); > - tree newcall = build_call_expr_loc (loc, newdecl, 1, *tp); > + tree newdecl = bpf_core_newdecl (TREE_TYPE (*tp), is_pai); > + tree newcall; > + if (is_pai) > + newcall = build_call_expr_loc (dat->loc, newdecl, 1, *tp); > + else > + newcall = build_call_expr_loc (dat->loc, newdecl, 2, *tp, dat->arg); > + > *tp = newcall; > *walk_subtrees = 0; > } > @@ -1344,7 +1522,12 @@ bpf_small_register_classes_for_mode_p (machine_mode > mode) > static tree > bpf_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist) > { > - if (DECL_MD_FUNCTION_CODE (fndecl) != BPF_BUILTIN_PRESERVE_ACCESS_INDEX) > + bool is_pai = DECL_MD_FUNCTION_CODE (fndecl) > + == BPF_BUILTIN_PRESERVE_ACCESS_INDEX; > + bool is_pfi = DECL_MD_FUNCTION_CODE (fndecl) > + == BPF_BUILTIN_PRESERVE_FIELD_INFO; > + > + if (!is_pai && !is_pfi) > return NULL_TREE; > > /* We only expect one argument, but it may be an arbitrarily-complicated > @@ -1352,16 +1535,17 @@ bpf_resolve_overloaded_builtin (location_t loc, tree > fndecl, void *arglist) > vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (arglist); > unsigned n_params = params ? params->length() : 0; > > - if (n_params != 1) > + if ((is_pai && n_params != 1) || (is_pfi && n_params != 2)) > { > - error_at (loc, "expected exactly 1 argument"); > + error_at (loc, "wrong number of arguments"); > return NULL_TREE; > } > > tree param = (*params)[0]; > > - /* If not generating BPF_CORE information, the builtin does nothing. */ > - if (!TARGET_BPF_CORE) > + /* If not generating BPF_CORE information, preserve_access_index does > nothing, > + and simply "resolves to" the argument. */ > + if (!TARGET_BPF_CORE && is_pai) > return param; > > /* Do remove_c_maybe_const_expr for the arg. > @@ -1387,7 +1571,11 @@ bpf_resolve_overloaded_builtin (location_t loc, tree > fndecl, void *arglist) > This ensures that all the relevant information remains within the > expression trees the builtin finally gets. */ > > - walk_tree (¶m, bpf_core_walk, (void *) &loc, NULL); > + struct core_walk_data data; > + data.loc = loc; > + data.arg = is_pai ? NULL_TREE : (*params)[1]; > + > + walk_tree (¶m, bpf_core_walk, (void *) &data, NULL); > > return param; > } > @@ -1524,7 +1712,8 @@ handle_attr_preserve (function *fn) > emit_label (label); > > /* Add the CO-RE relocation information to the BTF > container. */ > - bpf_core_reloc_add (container, section_name, &accessors, > label); > + bpf_core_reloc_add (container, section_name, &accessors, > label, > + BPF_RELO_FIELD_BYTE_OFFSET); > } > } > } > diff --git a/gcc/config/bpf/coreout.cc b/gcc/config/bpf/coreout.cc > index 8897a045ea1..9f71040846b 100644 > --- a/gcc/config/bpf/coreout.cc > +++ b/gcc/config/bpf/coreout.cc > @@ -152,7 +152,8 @@ static GTY (()) vec<bpf_core_section_ref, va_gc> > *bpf_core_sections; > > void > bpf_core_reloc_add (const tree type, const char * section_name, > - vec<unsigned int> *accessors, rtx_code_label *label) > + vec<unsigned int> *accessors, rtx_code_label *label, > + enum btf_core_reloc_kind kind) > { > char buf[40]; > unsigned int i, n = 0; > @@ -173,7 +174,7 @@ bpf_core_reloc_add (const tree type, const char * > section_name, > > bpfcr->bpfcr_type = get_btf_id (ctf_lookup_tree_type (ctfc, type)); > bpfcr->bpfcr_insn_label = label; > - bpfcr->bpfcr_kind = BPF_RELO_FIELD_BYTE_OFFSET; > + bpfcr->bpfcr_kind = kind; > > /* Add the CO-RE reloc to the appropriate section. */ > bpf_core_section_ref sec; > diff --git a/gcc/config/bpf/coreout.h b/gcc/config/bpf/coreout.h > index 3c7bdfd8c2f..498853f6e00 100644 > --- a/gcc/config/bpf/coreout.h > +++ b/gcc/config/bpf/coreout.h > @@ -103,7 +103,7 @@ extern void btf_ext_init (void); > extern void btf_ext_output (void); > > extern void bpf_core_reloc_add (const tree, const char *, vec<unsigned int> > *, > - rtx_code_label *); > + rtx_code_label *, enum btf_core_reloc_kind); > extern int bpf_core_get_sou_member_index (ctf_container_ref, const tree); > > #ifdef __cplusplus > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index 04af0584d82..0d3bcb24ab9 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -15745,6 +15745,27 @@ Load 32-bits from the @code{struct sk_buff} packet > data pointed by the register > BPF Compile Once-Run Everywhere (CO-RE) support. Instruct GCC to generate > CO-RE relocation records for any accesses to aggregate data structures > (struct, union, array types) in @var{expr}. This builtin is otherwise > transparent, the return value is whatever @var{expr} evaluates to. It is also > overloaded: @var{expr} may be of any type (not necessarily a pointer), the > return type is the same. Has no effect if @code{-mco-re} is not in effect > (either specified or implied). > @end deftypefn > > +@deftypefn {Built-in Function} unsigned int __builtin_preserve_field_info > (@var{expr}, unsigned int @var{kind}) > +BPF Compile Once-Run Everywhere (CO-RE) support. This builtin is used to > +extract information to aid in struct/union relocations. @var{expr} is > +an access to a field of a struct or union. Depending on @var{kind}, different > +information is returned to the program. A CO-RE relocation for the access in > +@var{expr} with kind @var{kind} is recorded if @code{-mco-re} is in effect. > + > +@var{kind} is one of: > +@smallexample > +enum > +@{ > + FIELD_BYTE_OFFSET = 0, > + FIELD_BYTE_SIZE, > + FIELD_EXISTENCE, > + FIELD_SIGNEDNESS, > + FIELD_LSHIFT_U64, > + FIELD_RSHIFT_U64, > +@}; > +@end smallexample > +@end deftypefn > + We need to document what all these returned values are. It may be more-or-less obvious, but it is better to be explicit IMO. It would also be good to document the fact the returned value is always known at compile-time.