Today socket filter only deals with linear skbs. This change allows ebpf programs to look into non-linear skb e.g. skb frags. This will be useful when users need to look into data which is not contained in the linear part of skb.
Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com> Reviewed-by: Shannon Nelson <shannon.nel...@oracle.com> Reviewed-by: Sowmini Varadhan <sowmini.varad...@oracle.com> --- include/linux/filter.h | 2 ++ include/uapi/linux/bpf.h | 10 ++++++- net/core/filter.c | 44 +++++++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 10 ++++++- tools/testing/selftests/bpf/bpf_helpers.h | 2 ++ 5 files changed, 64 insertions(+), 4 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 9dbcb9d..603b8bf 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -500,6 +500,7 @@ struct sk_filter { struct bpf_skb_data_end { struct qdisc_skb_cb qdisc_cb; + u8 index; void *data_meta; void *data_end; }; @@ -534,6 +535,7 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); cb->data_meta = skb->data - skb_metadata_len(skb); cb->data_end = skb->data + skb_headlen(skb); + cb->index = 0; } static inline u8 *bpf_skb_cb(struct sk_buff *skb) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d94d333..5fe9668 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1902,6 +1902,13 @@ struct bpf_stack_build_id { * egress otherwise). This is the only flag supported for now. * Return * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_next_skb_frag(struct sk_buff *skb) + * Description + * This helper allows users to look into non-linear part of skb + * e.g. skb frags. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -1976,7 +1983,8 @@ struct bpf_stack_build_id { FN(fib_lookup), \ FN(sock_hash_update), \ FN(msg_redirect_hash), \ - FN(sk_redirect_hash), + FN(sk_redirect_hash), \ + FN(next_skb_frag), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 51ea7dd..fd8e90f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3752,6 +3752,38 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_next_skb_frag, struct sk_buff *, skb) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + const skb_frag_t *frag; + + if (skb->data_len == 0) + return -ENODATA; + + if (cb->index == (u8)skb_shinfo(skb)->nr_frags) + return -ENODATA; + + /* get the frag start and end address into data_meta and data_end + * respectively so eBPF program can look into skb frag + */ + frag = &skb_shinfo(skb)->frags[cb->index]; + cb->data_meta = page_address(skb_frag_page(frag)) + + frag->page_offset; + cb->data_end = cb->data_meta + skb_frag_size(frag); + + /* update frag index */ + cb->index++; + + return 0; +} + +static const struct bpf_func_proto bpf_next_skb_frag_proto = { + .func = bpf_next_skb_frag, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { @@ -4415,6 +4447,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; + case BPF_FUNC_next_skb_frag: + return &bpf_next_skb_frag_proto; default: return bpf_base_func_proto(func_id); } @@ -4698,10 +4732,16 @@ static bool sk_filter_is_valid_access(int off, int size, struct bpf_insn_access_aux *info) { switch (off) { - case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data): - case bpf_ctx_range(struct __sk_buff, data_meta): + info->reg_type = PTR_TO_PACKET; + break; case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + case bpf_ctx_range(struct __sk_buff, data_meta): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d94d333..5fe9668 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1902,6 +1902,13 @@ struct bpf_stack_build_id { * egress otherwise). This is the only flag supported for now. * Return * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_next_skb_frag(struct sk_buff *skb) + * Description + * This helper allows users to look into non-linear part of skb + * e.g. skb frags. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -1976,7 +1983,8 @@ struct bpf_stack_build_id { FN(fib_lookup), \ FN(sock_hash_update), \ FN(msg_redirect_hash), \ - FN(sk_redirect_hash), + FN(sk_redirect_hash), \ + FN(next_skb_frag), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 8f143df..51f2153 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -114,6 +114,8 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) BPF_FUNC_fib_lookup; +static unsigned long long (*bpf_next_skb_frag)(void *ctx) = + (void *) BPF_FUNC_next_skb_frag; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- 1.8.3.1