From: Lorand Jakab <loja...@cisco.com> When user space handles a packet back to the kernel datapath for execution, it is not accompanied by the full flow key, only packet metadata. This doesn't allow detection of packet type (L2/L3). Add the OVS_KEY_ATTR_PACKET_ETHERTYPE attribute to the packet metadata, containing 0 for layer 2 packets and the Ethertype for layer 3 packets.
Signed-off-by: Lorand Jakab <loja...@cisco.com> Signed-off-by: Simon Horman <simon.hor...@netronome.com> --- v9 [Simon Horman] * Rebase v8 [Lorand Jakab] v7 [Lorand Jakab] * New patch --- include/uapi/linux/openvswitch.h | 2 ++ net/openvswitch/datapath.c | 13 +------------ net/openvswitch/flow.c | 5 ++++- net/openvswitch/flow_netlink.c | 39 +++++++++++++++++++++++++++++++-------- 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 53129c03273c..2bc2c12ced78 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -329,6 +329,8 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ + OVS_KEY_ATTR_PACKET_ETHERTYPE, /* be16 Ethernet type for packet + * execution */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 856bd8dba676..780957e78d2b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -546,7 +546,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct datapath *dp; - struct ethhdr *eth; struct vport *input_vport; u16 mru = 0; int len; @@ -567,17 +566,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); - skb_reset_mac_header(packet); - eth = eth_hdr(packet); - - /* Normally, setting the skb 'protocol' field would be handled by a - * call to eth_type_trans(), but it assumes there's a sending - * device, which we may not have. */ - if (eth_proto_is_802_3(eth->h_proto)) - packet->protocol = eth->h_proto; - else - packet->protocol = htons(ETH_P_802_2); - /* Set packet's mru */ if (a[OVS_PACKET_ATTR_MRU]) { mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); @@ -604,6 +592,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; + packet->protocol = flow->key.eth.type; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 6e174ea5f2bb..d320c2657627 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -471,7 +471,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) /* Link layer. */ if (key->phy.is_layer3) { key->eth.tci = 0; - key->eth.type = skb->protocol; } else { eth = eth_hdr(skb); ether_addr_copy(key->eth.src, eth->h_source); @@ -693,6 +692,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { + key->eth.type = skb->protocol; + return key_extract(skb, key); } @@ -747,6 +748,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (is_teb) skb->protocol = key->eth.type; + else if (is_layer3) + key->eth.type = skb->protocol; return err; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d0ef4fa69bea..2bca1e5e9a18 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -286,7 +286,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 27); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -359,6 +359,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, + [OVS_KEY_ATTR_PACKET_ETHERTYPE] = { .len = sizeof(__be16) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -816,6 +817,8 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) { + bool is_layer3; + if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -902,15 +905,35 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, sizeof(*cl), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } - if (is_mask) { + + /* For full flow keys the layer is determined based on the presence of + * OVS_KEY_ATTR_ETHERNET + */ + if (is_mask) /* Always exact match is_layer3 */ - SW_FLOW_KEY_PUT(match, phy.is_layer3, true, is_mask); - } else { - if (*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) - SW_FLOW_KEY_PUT(match, phy.is_layer3, false, is_mask); - else - SW_FLOW_KEY_PUT(match, phy.is_layer3, true, is_mask); + is_layer3 = true; + else + is_layer3 = !(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)); + /* Packets from user space for execution only have metadata key + * attributes. OVS_KEY_ATTR_PACKET_ETHERTYPE is then used to specify + * the starting layer of the packet. Packets with Ethernet headers + * have this attribute set to 0 + */ + if (*attrs & (1ULL << OVS_KEY_ATTR_PACKET_ETHERTYPE)) { + __be16 eth_type; + + if (is_mask) { + /* Always exact match packet EtherType */ + eth_type = htons(0xffff); + } else { + eth_type = nla_get_be16(a[OVS_KEY_ATTR_PACKET_ETHERTYPE]); + is_layer3 = ((eth_type == htons(ETH_P_IP)) || + (eth_type == htons(ETH_P_IPV6))); + } + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); } + + SW_FLOW_KEY_PUT(match, phy.is_layer3, is_layer3, is_mask); return 0; } -- 2.7.0.rc3.207.g0ac5344 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev