On Fri, Sep 07, 2018 at 05:11:08PM -0700, Petar Penkov wrote: > From: Petar Penkov <ppen...@google.com> > > Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and > attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector > path. The BPF program is per-network namespace. > > Signed-off-by: Petar Penkov <ppen...@google.com> > Signed-off-by: Willem de Bruijn <will...@google.com> > --- > include/linux/bpf.h | 1 + > include/linux/bpf_types.h | 1 + > include/linux/skbuff.h | 7 ++ > include/net/net_namespace.h | 3 + > include/net/sch_generic.h | 12 ++- > include/uapi/linux/bpf.h | 25 ++++++ > kernel/bpf/syscall.c | 8 ++ > kernel/bpf/verifier.c | 32 ++++++++ > net/core/filter.c | 67 ++++++++++++++++ > net/core/flow_dissector.c | 136 +++++++++++++++++++++++++++++++++ > tools/bpf/bpftool/prog.c | 1 + > tools/include/uapi/linux/bpf.h | 25 ++++++ > tools/lib/bpf/libbpf.c | 2 +
please split up update to tools/include/uapi/linux/bpf.h as a separate patch 2. We often have conflicts in there, so best to have a separate. Also please split tools/lib and tools/bpf chnages into patch 3. > 13 files changed, 317 insertions(+), 3 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 523481a3471b..988a00797bcd 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -212,6 +212,7 @@ enum bpf_reg_type { > PTR_TO_PACKET_META, /* skb->data - meta_len */ > PTR_TO_PACKET, /* reg points to skb->data */ > PTR_TO_PACKET_END, /* skb->data + headlen */ > + PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ > }; > > /* The information passed from prog-specific *_is_valid_access > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index cd26c090e7c0..22083712dd18 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) > #ifdef CONFIG_INET > BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) > #endif > +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) > > BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) > BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index 17a13e4785fc..ce0e863f02a2 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -243,6 +243,8 @@ struct scatterlist; > struct pipe_inode_info; > struct iov_iter; > struct napi_struct; > +struct bpf_prog; > +union bpf_attr; > > #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) > struct nf_conntrack { > @@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector > *flow_dissector, > const struct flow_dissector_key *key, > unsigned int key_count); > > +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, > + struct bpf_prog *prog); > + > +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); > + > bool __skb_flow_dissect(const struct sk_buff *skb, > struct flow_dissector *flow_dissector, > void *target_container, > diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h > index 9b5fdc50519a..99d4148e0f90 100644 > --- a/include/net/net_namespace.h > +++ b/include/net/net_namespace.h > @@ -43,6 +43,7 @@ struct ctl_table_header; > struct net_generic; > struct uevent_sock; > struct netns_ipvs; > +struct bpf_prog; > > > #define NETDEV_HASHBITS 8 > @@ -145,6 +146,8 @@ struct net { > #endif > struct net_generic __rcu *gen; > > + struct bpf_prog __rcu *flow_dissector_prog; > + > /* Note : following structs are cache line aligned */ > #ifdef CONFIG_XFRM > struct netns_xfrm xfrm; > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h > index a6d00093f35e..1b81ba85fd2d 100644 > --- a/include/net/sch_generic.h > +++ b/include/net/sch_generic.h > @@ -19,6 +19,7 @@ struct Qdisc_ops; > struct qdisc_walker; > struct tcf_walker; > struct module; > +struct bpf_flow_keys; > > typedef int tc_setup_cb_t(enum tc_setup_type type, > void *type_data, void *cb_priv); > @@ -307,9 +308,14 @@ struct tcf_proto { > }; > > struct qdisc_skb_cb { > - unsigned int pkt_len; > - u16 slave_dev_queue_mapping; > - u16 tc_classid; > + union { > + struct { > + unsigned int pkt_len; > + u16 slave_dev_queue_mapping; > + u16 tc_classid; > + }; > + struct bpf_flow_keys *flow_keys; > + }; is this magic really necessary? flow_dissector runs very early in recv path. There is no qdisc or conflicts with tcp/ip use of cb. I think the whole cb block can be used. > #define QDISC_CB_PRIV_LEN 20 > unsigned char data[QDISC_CB_PRIV_LEN]; > }; > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 66917a4eba27..3064706fcaaa 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -152,6 +152,7 @@ enum bpf_prog_type { > BPF_PROG_TYPE_LWT_SEG6LOCAL, > BPF_PROG_TYPE_LIRC_MODE2, > BPF_PROG_TYPE_SK_REUSEPORT, > + BPF_PROG_TYPE_FLOW_DISSECTOR, > }; > > enum bpf_attach_type { > @@ -172,6 +173,7 @@ enum bpf_attach_type { > BPF_CGROUP_UDP4_SENDMSG, > BPF_CGROUP_UDP6_SENDMSG, > BPF_LIRC_MODE2, > + BPF_FLOW_DISSECTOR, > __MAX_BPF_ATTACH_TYPE > }; > > @@ -2333,6 +2335,7 @@ struct __sk_buff { > /* ... here. */ > > __u32 data_meta; > + __u32 flow_keys; please use struct bpf_flow_keys *flow_keys; instead. See what we did in 'struct sk_msg_md' and in 'struct sk_reuseport_md'. There is no need to hide pointers in u32. > }; > > struct bpf_tunnel_key { > @@ -2778,4 +2781,26 @@ enum bpf_task_fd_type { > BPF_FD_TYPE_URETPROBE, /* filename + offset */ > }; > > +struct bpf_flow_keys { > + __u16 thoff; > + __u16 addr_proto; /* ETH_P_* of valid addrs */ > + __u8 is_frag; > + __u8 is_first_frag; > + __u8 is_encap; > + __be16 n_proto; > + __u8 ip_proto; > + union { > + struct { > + __be32 ipv4_src; > + __be32 ipv4_dst; > + }; > + struct { > + __u32 ipv6_src[4]; /* in6_addr; network order */ > + __u32 ipv6_dst[4]; /* in6_addr; network order */ > + }; > + }; > + __be16 sport; > + __be16 dport; > +}; > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 3c9636f03bb2..b3c2d09bcf7a 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -1615,6 +1615,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) > case BPF_LIRC_MODE2: > ptype = BPF_PROG_TYPE_LIRC_MODE2; > break; > + case BPF_FLOW_DISSECTOR: > + ptype = BPF_PROG_TYPE_FLOW_DISSECTOR; > + break; > default: > return -EINVAL; > } > @@ -1636,6 +1639,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) > case BPF_PROG_TYPE_LIRC_MODE2: > ret = lirc_prog_attach(attr, prog); > break; > + case BPF_PROG_TYPE_FLOW_DISSECTOR: > + ret = skb_flow_dissector_bpf_prog_attach(attr, prog); > + break; > default: > ret = cgroup_bpf_prog_attach(attr, ptype, prog); > } > @@ -1688,6 +1694,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) > return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL); > case BPF_LIRC_MODE2: > return lirc_prog_detach(attr); > + case BPF_FLOW_DISSECTOR: > + return skb_flow_dissector_bpf_prog_detach(attr); > default: > return -EINVAL; > } > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 6ff1bac1795d..8ccbff4fff93 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -261,6 +261,7 @@ static const char * const reg_type_str[] = { > [PTR_TO_PACKET] = "pkt", > [PTR_TO_PACKET_META] = "pkt_meta", > [PTR_TO_PACKET_END] = "pkt_end", > + [PTR_TO_FLOW_KEYS] = "flow_keys", > }; > > static char slot_type_char[] = { > @@ -965,6 +966,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) > case PTR_TO_PACKET: > case PTR_TO_PACKET_META: > case PTR_TO_PACKET_END: > + case PTR_TO_FLOW_KEYS: > case CONST_PTR_TO_MAP: > return true; > default: > @@ -1238,6 +1240,7 @@ static bool may_access_direct_pkt_data(struct > bpf_verifier_env *env, > case BPF_PROG_TYPE_LWT_XMIT: > case BPF_PROG_TYPE_SK_SKB: > case BPF_PROG_TYPE_SK_MSG: > + case BPF_PROG_TYPE_FLOW_DISSECTOR: > if (meta) > return meta->pkt_access; > > @@ -1321,6 +1324,18 @@ static int check_ctx_access(struct bpf_verifier_env > *env, int insn_idx, int off, > return -EACCES; > } > > +static int check_flow_keys_access(struct bpf_verifier_env *env, int off, > + int size) > +{ > + if (size < 0 || off < 0 || > + (u64)off + size > sizeof(struct bpf_flow_keys)) { > + verbose(env, "invalid access to flow keys off=%d size=%d\n", > + off, size); > + return -EACCES; > + } > + return 0; > +} > + > static bool __is_pointer_value(bool allow_ptr_leaks, > const struct bpf_reg_state *reg) > { > @@ -1422,6 +1437,9 @@ static int check_ptr_alignment(struct bpf_verifier_env > *env, > * right in front, treat it the very same way. > */ > return check_pkt_ptr_alignment(env, reg, off, size, strict); > + case PTR_TO_FLOW_KEYS: > + pointer_desc = "flow keys "; > + break; > case PTR_TO_MAP_VALUE: > pointer_desc = "value "; > break; > @@ -1692,6 +1710,17 @@ static int check_mem_access(struct bpf_verifier_env > *env, int insn_idx, u32 regn > err = check_packet_access(env, regno, off, size, false); > if (!err && t == BPF_READ && value_regno >= 0) > mark_reg_unknown(env, regs, value_regno); > + } else if (reg->type == PTR_TO_FLOW_KEYS) { > + if (t == BPF_WRITE && value_regno >= 0 && > + is_pointer_value(env, value_regno)) { > + verbose(env, "R%d leaks addr into flow keys\n", > + value_regno); > + return -EACCES; > + } > + > + err = check_flow_keys_access(env, off, size); > + if (!err && t == BPF_READ && value_regno >= 0) > + mark_reg_unknown(env, regs, value_regno); > } else { > verbose(env, "R%d invalid mem access '%s'\n", regno, > reg_type_str[reg->type]); > @@ -1839,6 +1868,8 @@ static int check_helper_mem_access(struct > bpf_verifier_env *env, int regno, > case PTR_TO_PACKET_META: > return check_packet_access(env, regno, reg->off, access_size, > zero_size_allowed); > + case PTR_TO_FLOW_KEYS: > + return check_flow_keys_access(env, reg->off, access_size); > case PTR_TO_MAP_VALUE: > return check_map_access(env, regno, reg->off, access_size, > zero_size_allowed); > @@ -4366,6 +4397,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct > bpf_reg_state *rcur, > case PTR_TO_CTX: > case CONST_PTR_TO_MAP: > case PTR_TO_PACKET_END: > + case PTR_TO_FLOW_KEYS: > /* Only valid matches are exact, which memcmp() above > * would have accepted > */ > diff --git a/net/core/filter.c b/net/core/filter.c > index 8cb242b4400f..bc3725c26794 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5122,6 +5122,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const > struct bpf_prog *prog) > } > } > > +static const struct bpf_func_proto * > +flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog > *prog) > +{ > + switch (func_id) { > + case BPF_FUNC_skb_load_bytes: > + return &bpf_skb_load_bytes_proto; > + default: > + return bpf_base_func_proto(func_id); > + } > +} > + > static const struct bpf_func_proto * > lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > { > @@ -5237,6 +5248,7 @@ static bool bpf_skb_is_valid_access(int off, int size, > enum bpf_access_type type > case bpf_ctx_range(struct __sk_buff, data): > case bpf_ctx_range(struct __sk_buff, data_meta): > case bpf_ctx_range(struct __sk_buff, data_end): > + case bpf_ctx_range(struct __sk_buff, flow_keys): > if (size != size_default) > return false; > break; > @@ -5265,6 +5277,7 @@ static bool sk_filter_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, data): > case bpf_ctx_range(struct __sk_buff, data_meta): > case bpf_ctx_range(struct __sk_buff, data_end): > + case bpf_ctx_range(struct __sk_buff, flow_keys): > case bpf_ctx_range_till(struct __sk_buff, family, local_port): > return false; > } > @@ -5290,6 +5303,7 @@ static bool lwt_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, tc_classid): > case bpf_ctx_range_till(struct __sk_buff, family, local_port): > case bpf_ctx_range(struct __sk_buff, data_meta): > + case bpf_ctx_range(struct __sk_buff, flow_keys): > return false; > } > > @@ -5500,6 +5514,7 @@ static bool tc_cls_act_is_valid_access(int off, int > size, > case bpf_ctx_range(struct __sk_buff, data_end): > info->reg_type = PTR_TO_PACKET_END; > break; > + case bpf_ctx_range(struct __sk_buff, flow_keys): > case bpf_ctx_range_till(struct __sk_buff, family, local_port): > return false; > } > @@ -5701,6 +5716,7 @@ static bool sk_skb_is_valid_access(int off, int size, > switch (off) { > case bpf_ctx_range(struct __sk_buff, tc_classid): > case bpf_ctx_range(struct __sk_buff, data_meta): > + case bpf_ctx_range(struct __sk_buff, flow_keys): > return false; > } > > @@ -5760,6 +5776,39 @@ static bool sk_msg_is_valid_access(int off, int size, > return true; > } > > +static bool flow_dissector_is_valid_access(int off, int size, > + enum bpf_access_type type, > + const struct bpf_prog *prog, > + struct bpf_insn_access_aux *info) > +{ > + if (type == BPF_WRITE) { > + switch (off) { > + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): > + break; > + default: > + return false; > + } > + } > + > + switch (off) { > + case bpf_ctx_range(struct __sk_buff, data): > + info->reg_type = PTR_TO_PACKET; > + break; > + case bpf_ctx_range(struct __sk_buff, data_end): > + info->reg_type = PTR_TO_PACKET_END; > + break; > + case bpf_ctx_range(struct __sk_buff, flow_keys): > + info->reg_type = PTR_TO_FLOW_KEYS; > + break; > + case bpf_ctx_range(struct __sk_buff, tc_classid): > + case bpf_ctx_range(struct __sk_buff, data_meta): > + case bpf_ctx_range_till(struct __sk_buff, family, local_port): > + return false; > + } > + > + return bpf_skb_is_valid_access(off, size, type, prog, info); > +} > + > static u32 bpf_convert_ctx_access(enum bpf_access_type type, > const struct bpf_insn *si, > struct bpf_insn *insn_buf, > @@ -6054,6 +6103,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type > type, > bpf_target_off(struct sock_common, > skc_num, 2, target_size)); > break; > + > + case offsetof(struct __sk_buff, flow_keys): > + off = si->off; > + off -= offsetof(struct __sk_buff, flow_keys); > + off += offsetof(struct sk_buff, cb); > + off += offsetof(struct qdisc_skb_cb, flow_keys); > + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, > + si->src_reg, off); > + break; > } > > return insn - insn_buf; > @@ -7017,6 +7075,15 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = { > const struct bpf_prog_ops sk_msg_prog_ops = { > }; > > +const struct bpf_verifier_ops flow_dissector_verifier_ops = { > + .get_func_proto = flow_dissector_func_proto, > + .is_valid_access = flow_dissector_is_valid_access, > + .convert_ctx_access = bpf_convert_ctx_access, > +}; > + > +const struct bpf_prog_ops flow_dissector_prog_ops = { > +}; > + > int sk_detach_filter(struct sock *sk) > { > int ret = -ENOENT; > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c > index ce9eeeb7c024..7eed48c46a94 100644 > --- a/net/core/flow_dissector.c > +++ b/net/core/flow_dissector.c > @@ -25,6 +25,9 @@ > #include <net/flow_dissector.h> > #include <scsi/fc/fc_fcoe.h> > #include <uapi/linux/batadv_packet.h> > +#include <linux/bpf.h> > + > +static DEFINE_MUTEX(flow_dissector_mutex); > > static void dissector_set_key(struct flow_dissector *flow_dissector, > enum flow_dissector_key_id key_id) > @@ -62,6 +65,44 @@ void skb_flow_dissector_init(struct flow_dissector > *flow_dissector, > } > EXPORT_SYMBOL(skb_flow_dissector_init); > > +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, > + struct bpf_prog *prog) > +{ > + struct bpf_prog *attached; > + struct net *net; > + > + net = current->nsproxy->net_ns; > + mutex_lock(&flow_dissector_mutex); > + attached = rcu_dereference_protected(net->flow_dissector_prog, > + > lockdep_is_held(&flow_dissector_mutex)); > + if (attached) { > + /* Only one BPF program can be attached at a time */ > + mutex_unlock(&flow_dissector_mutex); > + return -EEXIST; > + } > + rcu_assign_pointer(net->flow_dissector_prog, prog); > + mutex_unlock(&flow_dissector_mutex); > + return 0; > +} > + > +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) > +{ > + struct bpf_prog *attached; > + struct net *net; > + > + net = current->nsproxy->net_ns; > + mutex_lock(&flow_dissector_mutex); > + attached = rcu_dereference_protected(net->flow_dissector_prog, > + > lockdep_is_held(&flow_dissector_mutex)); > + if (!attached) { > + mutex_unlock(&flow_dissector_mutex); > + return -ENOENT; > + } > + bpf_prog_put(attached); > + RCU_INIT_POINTER(net->flow_dissector_prog, NULL); > + mutex_unlock(&flow_dissector_mutex); > + return 0; > +} > /** > * skb_flow_get_be16 - extract be16 entity > * @skb: sk_buff to extract from > @@ -588,6 +629,60 @@ static bool skb_flow_dissect_allowed(int *num_hdrs) > return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS); > } > > +static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, > + struct flow_dissector *flow_dissector, > + void *target_container) > +{ > + struct flow_dissector_key_control *key_control; > + struct flow_dissector_key_basic *key_basic; > + struct flow_dissector_key_addrs *key_addrs; > + struct flow_dissector_key_ports *key_ports; > + > + key_control = skb_flow_dissector_target(flow_dissector, > + FLOW_DISSECTOR_KEY_CONTROL, > + target_container); > + key_control->thoff = flow_keys->thoff; > + if (flow_keys->is_frag) > + key_control->flags |= FLOW_DIS_IS_FRAGMENT; > + if (flow_keys->is_first_frag) > + key_control->flags |= FLOW_DIS_FIRST_FRAG; > + if (flow_keys->is_encap) > + key_control->flags |= FLOW_DIS_ENCAPSULATION; > + > + key_basic = skb_flow_dissector_target(flow_dissector, > + FLOW_DISSECTOR_KEY_BASIC, > + target_container); > + key_basic->n_proto = flow_keys->n_proto; > + key_basic->ip_proto = flow_keys->ip_proto; > + > + if (flow_keys->addr_proto == ETH_P_IP && > + dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { > + key_addrs = skb_flow_dissector_target(flow_dissector, > + > FLOW_DISSECTOR_KEY_IPV4_ADDRS, > + target_container); > + key_addrs->v4addrs.src = flow_keys->ipv4_src; > + key_addrs->v4addrs.dst = flow_keys->ipv4_dst; > + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + } else if (flow_keys->addr_proto == ETH_P_IPV6 && > + dissector_uses_key(flow_dissector, > + FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { > + key_addrs = skb_flow_dissector_target(flow_dissector, > + > FLOW_DISSECTOR_KEY_IPV6_ADDRS, > + target_container); > + memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src, > + sizeof(key_addrs->v6addrs)); > + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; > + } > + > + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { > + key_ports = skb_flow_dissector_target(flow_dissector, > + FLOW_DISSECTOR_KEY_PORTS, > + target_container); > + key_ports->src = flow_keys->sport; > + key_ports->dst = flow_keys->dport; > + } > +} > + > /** > * __skb_flow_dissect - extract the flow_keys struct and return it > * @skb: sk_buff to extract the flow from, can be NULL if the rest are > specified > @@ -619,6 +714,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, > struct flow_dissector_key_vlan *key_vlan; > enum flow_dissect_ret fdret; > enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; > + struct bpf_prog *attached; > int num_hdrs = 0; > u8 ip_proto = 0; > bool ret; > @@ -658,6 +754,46 @@ bool __skb_flow_dissect(const struct sk_buff *skb, > FLOW_DISSECTOR_KEY_BASIC, > target_container); > > + rcu_read_lock(); > + attached = skb ? rcu_dereference(dev_net(skb->dev)->flow_dissector_prog) > + : NULL; > + if (attached) { > + /* Note that even though the const qualifier is discarded > + * throughout the execution of the BPF program, all changes(the > + * control block) are reverted after the BPF program returns. > + * Therefore, __skb_flow_dissect does not alter the skb. > + */ > + struct bpf_flow_keys flow_keys = {}; > + struct qdisc_skb_cb cb_saved; > + struct qdisc_skb_cb *cb; > + u16 *pseudo_cb; > + u32 result; > + > + cb = qdisc_skb_cb(skb); > + pseudo_cb = (u16 *)bpf_skb_cb((struct sk_buff *)skb); > + > + /* Save Control Block */ > + memcpy(&cb_saved, cb, sizeof(cb_saved)); > + memset(cb, 0, sizeof(cb_saved)); > + > + /* Pass parameters to the BPF program */ > + cb->flow_keys = &flow_keys; > + *pseudo_cb = nhoff; I don't understand this bit. What is this pseudo_cb and why nhoff goes in there? Some odd way to pass it into the prog? > + > + bpf_compute_data_pointers((struct sk_buff *)skb); > + result = BPF_PROG_RUN(attached, skb); > + > + /* Restore state */ > + memcpy(cb, &cb_saved, sizeof(cb_saved)); > + > + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, > + target_container); > + key_control->thoff = min_t(u16, key_control->thoff, skb->len); > + rcu_read_unlock(); > + return result == BPF_OK; > + } > + rcu_read_unlock(); > + > if (dissector_uses_key(flow_dissector, > FLOW_DISSECTOR_KEY_ETH_ADDRS)) { > struct ethhdr *eth = eth_hdr(skb);