From: Petar Penkov <ppen...@google.com> Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector path. The BPF program is kept as a global variable so it is accessible to all flow dissectors.
Signed-off-by: Petar Penkov <ppen...@google.com> Signed-off-by: Willem de Bruijn <will...@google.com> --- include/linux/bpf_types.h | 1 + include/linux/skbuff.h | 7 + include/net/flow_dissector.h | 16 +++ include/uapi/linux/bpf.h | 14 +- kernel/bpf/syscall.c | 8 ++ kernel/bpf/verifier.c | 2 + net/core/filter.c | 157 ++++++++++++++++++++++ net/core/flow_dissector.c | 76 +++++++++++ tools/bpf/bpftool/prog.c | 1 + tools/include/uapi/linux/bpf.h | 5 +- tools/lib/bpf/libbpf.c | 2 + tools/testing/selftests/bpf/bpf_helpers.h | 3 + 12 files changed, 290 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index cd26c090e7c0..22083712dd18 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #ifdef CONFIG_INET BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) #endif +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17a13e4785fc..ce0e863f02a2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -243,6 +243,8 @@ struct scatterlist; struct pipe_inode_info; struct iov_iter; struct napi_struct; +struct bpf_prog; +union bpf_attr; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog); + +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); + bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 6a4586dcdede..edb919d320c1 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -270,6 +270,22 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow); extern struct flow_dissector flow_keys_dissector; extern struct flow_dissector flow_keys_basic_dissector; +/* struct bpf_flow_dissect_cb: + * + * This struct is used to pass parameters to BPF programs of type + * BPF_PROG_TYPE_FLOW_DISSECTOR. Before such a program is run, the caller sets + * the control block of the skb to be a struct of this type. The first field is + * used to communicate the next header offset between the BPF programs and the + * first value of it is passed from the kernel. The last two fields are used for + * writing out flow keys. + */ +struct bpf_flow_dissect_cb { + u16 nhoff; + u16 unused; + void *target_container; + struct flow_dissector *flow_dissector; +}; + /* struct flow_keys_digest: * * This structure is used to hold a digest of the full flow keys. This is a diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 66917a4eba27..8bc0fdab685d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -152,6 +152,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_SEG6LOCAL, BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, }; enum bpf_attach_type { @@ -172,6 +173,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP4_SENDMSG, BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, __MAX_BPF_ATTACH_TYPE }; @@ -2141,6 +2143,15 @@ union bpf_attr { * request in the skb. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_flow_dissector_write_keys(const struct sk_buff *skb, const void *from, u32 len, enum flow_dissector_key_id key_id) + * Description + * Try to write *len* bytes from the source pointer into the offset + * of the key with id *key_id*. If *len* is different from the + * size of the key, an error is returned. If the key is not used, + * this function exits with no effect and code 0. + * Return + * 0 on success, negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2226,7 +2237,8 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(flow_dissector_write_keys), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 43727ed0d94a..a06568841a92 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1616,6 +1616,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_LIRC_MODE2: ptype = BPF_PROG_TYPE_LIRC_MODE2; break; + case BPF_FLOW_DISSECTOR: + ptype = BPF_PROG_TYPE_FLOW_DISSECTOR; + break; default: return -EINVAL; } @@ -1637,6 +1640,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_LIRC_MODE2: ret = lirc_prog_attach(attr, prog); break; + case BPF_PROG_TYPE_FLOW_DISSECTOR: + ret = skb_flow_dissector_bpf_prog_attach(attr, prog); + break; default: ret = cgroup_bpf_prog_attach(attr, ptype, prog); } @@ -1689,6 +1695,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL); case BPF_LIRC_MODE2: return lirc_prog_detach(attr); + case BPF_FLOW_DISSECTOR: + return skb_flow_dissector_bpf_prog_detach(attr); default: return -EINVAL; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ca90679a7fe5..6d3f268fa8e0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1321,6 +1321,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_FLOW_DISSECTOR: if (meta) return meta->pkt_access; @@ -3976,6 +3977,7 @@ static bool may_access_skb(enum bpf_prog_type type) case BPF_PROG_TYPE_SOCKET_FILTER: case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: return true; default: return false; diff --git a/net/core/filter.c b/net/core/filter.c index fd423ce3da34..03d3037e6508 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4820,6 +4820,111 @@ bool bpf_helper_changes_pkt_data(void *func) return false; } +BPF_CALL_4(bpf_flow_dissector_write_keys, const struct sk_buff *, skb, + const void *, from, u32, len, enum flow_dissector_key_id, key_id) +{ + struct bpf_flow_dissect_cb *cb; + void *dest; + + cb = (struct bpf_flow_dissect_cb *)bpf_skb_cb(skb); + + /* Make sure the dissector actually uses the key. It is not an error if + * it does not, but we should not continue past this point in that case + */ + if (!dissector_uses_key(cb->flow_dissector, key_id)) + return 0; + + /* Make sure the length is correct */ + switch (key_id) { + case FLOW_DISSECTOR_KEY_CONTROL: + case FLOW_DISSECTOR_KEY_ENC_CONTROL: + if (len != sizeof(struct flow_dissector_key_control)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_BASIC: + if (len != sizeof(struct flow_dissector_key_basic)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + case FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS: + if (len != sizeof(struct flow_dissector_key_ipv4_addrs)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + case FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS: + if (len != sizeof(struct flow_dissector_key_ipv6_addrs)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_ICMP: + if (len != sizeof(struct flow_dissector_key_icmp)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_PORTS: + case FLOW_DISSECTOR_KEY_ENC_PORTS: + if (len != sizeof(struct flow_dissector_key_ports)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_ETH_ADDRS: + if (len != sizeof(struct flow_dissector_key_eth_addrs)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_TIPC: + if (len != sizeof(struct flow_dissector_key_tipc)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_ARP: + if (len != sizeof(struct flow_dissector_key_arp)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_VLAN: + case FLOW_DISSECTOR_KEY_CVLAN: + if (len != sizeof(struct flow_dissector_key_vlan)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_FLOW_LABEL: + if (len != sizeof(struct flow_dissector_key_tags)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_GRE_KEYID: + case FLOW_DISSECTOR_KEY_ENC_KEYID: + case FLOW_DISSECTOR_KEY_MPLS_ENTROPY: + if (len != sizeof(struct flow_dissector_key_keyid)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_MPLS: + if (len != sizeof(struct flow_dissector_key_mpls)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_TCP: + if (len != sizeof(struct flow_dissector_key_tcp)) + return -EINVAL; + break; + case FLOW_DISSECTOR_KEY_IP: + case FLOW_DISSECTOR_KEY_ENC_IP: + if (len != sizeof(struct flow_dissector_key_ip)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + dest = skb_flow_dissector_target(cb->flow_dissector, key_id, + cb->target_container); + + memcpy(dest, from, len); + return 0; +} + +static const struct bpf_func_proto bpf_flow_dissector_write_keys_proto = { + .func = bpf_flow_dissector_write_keys, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -5100,6 +5205,19 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +static const struct bpf_func_proto * +flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_flow_dissector_write_keys: + return &bpf_flow_dissector_write_keys_proto; + default: + return bpf_base_func_proto(func_id); + } +} + static const struct bpf_func_proto * lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -5738,6 +5856,35 @@ static bool sk_msg_is_valid_access(int off, int size, return true; } +static bool flow_dissector_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (type == BPF_WRITE) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, cb[0]): + break; + default: + return false; + } + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range_till(struct __sk_buff, cb[1], cb[4]): + return false; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -6995,6 +7142,16 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = { const struct bpf_prog_ops sk_msg_prog_ops = { }; +const struct bpf_verifier_ops flow_dissector_verifier_ops = { + .get_func_proto = flow_dissector_func_proto, + .is_valid_access = flow_dissector_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_ld_abs = bpf_gen_ld_abs, +}; + +const struct bpf_prog_ops flow_dissector_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ce9eeeb7c024..767daa231f04 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -25,6 +25,11 @@ #include <net/flow_dissector.h> #include <scsi/fc/fc_fcoe.h> #include <uapi/linux/batadv_packet.h> +#include <linux/bpf.h> + +/* BPF program accessible by all flow dissectors */ +static struct bpf_prog __rcu *flow_dissector_prog; +static DEFINE_MUTEX(flow_dissector_mutex); static void dissector_set_key(struct flow_dissector *flow_dissector, enum flow_dissector_key_id key_id) @@ -62,6 +67,40 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, } EXPORT_SYMBOL(skb_flow_dissector_init); +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + struct bpf_prog *attached; + + mutex_lock(&flow_dissector_mutex); + attached = rcu_dereference_protected(flow_dissector_prog, + lockdep_is_held(&flow_dissector_mutex)); + if (attached) { + /* Only one BPF program can be attached at a time */ + mutex_unlock(&flow_dissector_mutex); + return -EEXIST; + } + rcu_assign_pointer(flow_dissector_prog, prog); + mutex_unlock(&flow_dissector_mutex); + return 0; +} + +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +{ + struct bpf_prog *attached; + + mutex_lock(&flow_dissector_mutex); + attached = rcu_dereference_protected(flow_dissector_prog, + lockdep_is_held(&flow_dissector_mutex)); + if (!flow_dissector_prog) { + mutex_unlock(&flow_dissector_mutex); + return -EINVAL; + } + bpf_prog_put(attached); + RCU_INIT_POINTER(flow_dissector_prog, NULL); + mutex_unlock(&flow_dissector_mutex); + return 0; +} /** * skb_flow_get_be16 - extract be16 entity * @skb: sk_buff to extract from @@ -619,6 +658,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_vlan *key_vlan; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; + struct bpf_prog *attached; int num_hdrs = 0; u8 ip_proto = 0; bool ret; @@ -658,6 +698,42 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); + rcu_read_lock(); + attached = rcu_dereference(flow_dissector_prog); + if (attached) { + /* Note that even though the const qualifier is discarded + * throughout the execution of the BPF program, all changes(the + * control block) are reverted after the BPF program returns. + * Therefore, __skb_flow_dissect does not alter the skb. + */ + struct bpf_flow_dissect_cb *cb; + u8 cb_saved[BPF_SKB_CB_LEN]; + u32 result; + + cb = (struct bpf_flow_dissect_cb *)(bpf_skb_cb((struct sk_buff *)skb)); + + /* Save Control Block */ + memcpy(cb_saved, cb, sizeof(cb_saved)); + memset(cb, 0, sizeof(cb_saved)); + + /* Pass parameters to the BPF program */ + cb->nhoff = nhoff; + cb->target_container = target_container; + cb->flow_dissector = flow_dissector; + + bpf_compute_data_pointers((struct sk_buff *)skb); + result = BPF_PROG_RUN(attached, skb); + + /* Restore state */ + memcpy(cb, cb_saved, sizeof(cb_saved)); + + key_control->thoff = min_t(u16, key_control->thoff, + skb ? skb->len : hlen); + rcu_read_unlock(); + return result == BPF_OK; + } + rcu_read_unlock(); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index dce960d22106..b1cd3bc8db70 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -74,6 +74,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", }; static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 66917a4eba27..acd74a0dd063 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -152,6 +152,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_SEG6LOCAL, BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, }; enum bpf_attach_type { @@ -172,6 +173,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP4_SENDMSG, BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, __MAX_BPF_ATTACH_TYPE }; @@ -2226,7 +2228,8 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(flow_dissector_write_keys), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2abd0f112627..0c749ce1b717 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1502,6 +1502,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: return false; case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_KPROBE: @@ -2121,6 +2122,7 @@ static const struct { BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG), BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2), + BPF_PROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR), BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND), BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND), BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT), diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index e4be7730222d..4204c496a04f 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -143,6 +143,9 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = (void *) BPF_FUNC_skb_cgroup_id; static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; +static int (*bpf_flow_dissector_write_keys)(void *ctx, void *src, int len, + int key) = + (void *) BPF_FUNC_flow_dissector_write_keys; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- 2.18.0.865.gffc8e1a3cd6-goog