This commit relaxes the assumption that all packets have an Ethernet header, and adds support for layer 3 flows. For each packet received on the Linux kernel datapath the l2 and l3 members of struct ofpbuf are intialized appropriately, and some functions now expect this (notably flow_extract()), in order to differentiate between layer 2 and layer 3 packets. struct flow has now a new 'base_layer' member, because we cannot assume that a flow has no Ethernet header when eth_src and eth_dst are 0. For layer 3 packets, the protocol type is still stored in the eth_type member.
Switching L2->L3 and L3->L2 are both implemented by adding the pop_eth and push_eth actions respectively when a transition is detected. The push_eth action puts 0s on both source and destination MACs. These addresses can be modified with mod_dl_dst and mod_dl_src actions. Added new prerequisite MFP_ETHERNET for fields MFF_VLAN_TCI, MFF_DL_VLAN, MFF_VLAN_VID and MFF_DL_VLAN_PCP. Signed-off-by: Lorand Jakab <loja...@cisco.com> --- build-aux/extract-ofp-fields | 1 + lib/bfd.c | 1 + lib/dpif-netdev.c | 3 +- lib/dpif-netlink.c | 8 +++ lib/dpif.c | 6 ++- lib/flow.c | 124 +++++++++++++++++++++++++++---------------- lib/flow.h | 16 ++++-- lib/match.c | 2 +- lib/meta-flow.c | 5 ++ lib/meta-flow.h | 9 ++-- lib/netdev-dummy.c | 1 + lib/netdev-linux.c | 1 + lib/nx-match.c | 2 +- lib/odp-util.c | 34 ++++++++---- lib/odp-util.h | 2 +- lib/ofp-print.c | 19 ++++--- lib/ofp-print.h | 3 +- lib/ofp-util.c | 2 +- lib/ofpbuf.h | 12 +++-- lib/packets.c | 2 + lib/pcap-file.c | 1 + ofproto/ofproto-dpif-xlate.c | 30 ++++++++--- ofproto/ofproto-dpif-xlate.h | 2 +- ofproto/ofproto-dpif.c | 3 +- ofproto/ofproto.c | 1 + tests/ofproto-dpif.at | 6 +-- tests/vlan-splinters.at | 4 +- 27 files changed, 206 insertions(+), 94 deletions(-) diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields index b15b01d..b1f305b 100755 --- a/build-aux/extract-ofp-fields +++ b/build-aux/extract-ofp-fields @@ -33,6 +33,7 @@ FORMATTING = {"decimal": ("MFS_DECIMAL", 1, 8), "TCP flags": ("MFS_TCP_FLAGS", 2, 2)} PREREQS = {"none": "MFP_NONE", + "Ethernet": "MFP_ETHERNET", "ARP": "MFP_ARP", "VLAN VID": "MFP_VLAN_VID", "IPv4": "MFP_IPV4", diff --git a/lib/bfd.c b/lib/bfd.c index 7f6bf5b..84e14d4 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -610,6 +610,7 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL)); ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ + ofpbuf_set_frame(p, ofpbuf_data(p)); eth = ofpbuf_put_uninit(p, sizeof *eth); memcpy(eth->eth_src, eth_addr_is_zero(bfd->local_eth_src) ? eth_src diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 3b9a862..f1db47e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2628,7 +2628,8 @@ dp_netdev_upcall(struct dp_netdev *dp, struct dpif_packet *packet_, true); packet_str = ofp_packet_to_string(ofpbuf_data(packet), - ofpbuf_size(packet)); + ofpbuf_size(packet), + ofpbuf_is_layer3_packet(packet)); odp_flow_key_format(ofpbuf_data(&key), ofpbuf_size(&key), &ds); diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index d90fdd9..4b115d2 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -1910,6 +1910,14 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, ofpbuf_set_data(&upcall->packet, (char *)ofpbuf_data(&upcall->packet) + sizeof(struct nlattr)); ofpbuf_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET])); + ofpbuf_set_frame(&upcall->packet, ofpbuf_data(&upcall->packet)); + + /* Set the correct layer based on the presence of OVS_KEY_ATTR_ETHERNET */ + if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) { + ofpbuf_set_l3(&upcall->packet, NULL); + } else { + upcall->packet.l3_ofs = 0; + } *dp_ifindex = ovs_header->dp_ifindex; diff --git a/lib/dpif.c b/lib/dpif.c index 086f0ba..9e0f99d 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1298,7 +1298,8 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall) char *packet; packet = ofp_packet_to_string(ofpbuf_data(&upcall->packet), - ofpbuf_size(&upcall->packet)); + ofpbuf_size(&upcall->packet), + ofpbuf_is_layer3_packet(&upcall->packet)); ds_init(&flow); odp_flow_key_format(upcall->key, upcall->key_len, &flow); @@ -1588,7 +1589,8 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute, char *packet; packet = ofp_packet_to_string(ofpbuf_data(execute->packet), - ofpbuf_size(execute->packet)); + ofpbuf_size(execute->packet), + ofpbuf_is_layer3_packet(execute->packet)); ds_put_format(&ds, "%s: %sexecute ", dpif_name(dpif), (subexecute ? "sub-" diff --git a/lib/flow.c b/lib/flow.c index 7f28b4d..b46a483 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -121,7 +121,7 @@ struct mf_ctx { * away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are * defined as macros. */ -#if (FLOW_WC_SEQ != 28) +#if (FLOW_WC_SEQ != 29) #define MINIFLOW_ASSERT(X) ovs_assert(X) BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " "assertions enabled. Consider updating FLOW_WC_SEQ after " @@ -326,18 +326,35 @@ invalid: return false; } -/* Initializes 'flow' members from 'packet' and 'md' +/* Determines IP version if a layer 3 packet */ +static ovs_be16 +get_l3_eth_type(struct ofpbuf *packet) +{ + struct ip_header *ip = ofpbuf_l3(packet); + int ip_ver = IP_VER(ip->ip_ihl_ver); + switch (ip_ver) { + case 4: + return htons(ETH_TYPE_IP); + case 6: + return htons(ETH_TYPE_IPV6); + default: + return 0; + } +} + +/* Initializes 'flow' members from 'packet' and 'md'. Expects packet->frame + * pointer to be equal to ofpbuf_data(packet), and packet->l3_ofs to be set to + * 0 for layer 3 packets. * - * Initializes 'packet' header l2 pointer to the start of the Ethernet - * header, and the layer offsets as follows: + * Initializes the layer offsets as follows: * * - packet->l2_5_ofs to the start of the MPLS shim header, or UINT16_MAX - * when there is no MPLS shim header. + * when there is no MPLS shim header, or Ethernet header * - * - packet->l3_ofs to just past the Ethernet header, or just past the - * vlan_header if one is present, to the first byte of the payload of the - * Ethernet frame. UINT16_MAX if the frame is too short to contain an - * Ethernet header. + * - packet->l3_ofs (if not 0) to just past the Ethernet header, or just + * past the vlan_header if one is present, to the first byte of the + * payload of the Ethernet frame. UINT16_MAX if the frame is too short to + * contain an Ethernet header. * * - packet->l4_ofs to just past the IPv4 header, if one is present and * has at least the content used for the fields of interest for the flow, @@ -354,6 +371,8 @@ flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, COVERAGE_INC(flow_extract); + ovs_assert(packet->frame == ofpbuf_data(packet)); + miniflow_initialize(&m.mf, m.buf); miniflow_extract(packet, md, &m.mf); miniflow_expand(&m.mf, flow); @@ -369,7 +388,7 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, size_t size = ofpbuf_size(packet); uint32_t *values = miniflow_values(dst); struct mf_ctx mf = { 0, values, values + FLOW_U32S }; - char *l2; + char *frame = NULL; ovs_be16 dl_type; uint8_t nw_frag, nw_tos, nw_ttl, nw_proto; @@ -385,40 +404,48 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port)); } - /* Initialize packet's layer pointer and offsets. */ - l2 = data; - ofpbuf_set_frame(packet, data); + if (packet->l3_ofs) { + frame = data; + miniflow_push_uint32(mf, base_layer, LAYER_2); - /* Must have full Ethernet header to proceed. */ - if (OVS_UNLIKELY(size < sizeof(struct eth_header))) { - goto out; - } else { - ovs_be16 vlan_tci; + /* Must have full Ethernet header to proceed. */ + if (OVS_UNLIKELY(size < sizeof(struct eth_header))) { + goto out; + } else { + ovs_be16 vlan_tci; + + /* Link layer. */ + BUILD_ASSERT(offsetof(struct flow, dl_dst) + 6 + == offsetof(struct flow, dl_src)); + miniflow_push_words(mf, dl_dst, data, ETH_ADDR_LEN * 2 / 4); + /* dl_type, vlan_tci. */ + vlan_tci = parse_vlan(&data, &size); + dl_type = parse_ethertype(&data, &size); + miniflow_push_be16(mf, dl_type, dl_type); + miniflow_push_be16(mf, vlan_tci, vlan_tci); + } - /* Link layer. */ - BUILD_ASSERT(offsetof(struct flow, dl_dst) + 6 - == offsetof(struct flow, dl_src)); - miniflow_push_words(mf, dl_dst, data, ETH_ADDR_LEN * 2 / 4); - /* dl_type, vlan_tci. */ - vlan_tci = parse_vlan(&data, &size); - dl_type = parse_ethertype(&data, &size); - miniflow_push_be16(mf, dl_type, dl_type); - miniflow_push_be16(mf, vlan_tci, vlan_tci); - } + /* Parse mpls. */ + if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { + int count; + const void *mpls = data; + + packet->l2_5_ofs = (char *)data - frame; + count = parse_mpls(&data, &size); + miniflow_push_words(mf, mpls_lse, mpls, count); + } - /* Parse mpls. */ - if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { - int count; - const void *mpls = data; + /* Network layer. */ + packet->l3_ofs = (char *)data - frame; + } else { + miniflow_push_uint32(mf, base_layer, LAYER_3); - packet->l2_5_ofs = (char *)data - l2; - count = parse_mpls(&data, &size); - miniflow_push_words(mf, mpls_lse, mpls, count); + /* We assume L3 packets are either IPv4 or IPv6 */ + dl_type = get_l3_eth_type(packet); + miniflow_push_be16(mf, dl_type, dl_type); + miniflow_push_be16(mf, vlan_tci, 0); } - /* Network layer. */ - packet->l3_ofs = (char *)data - l2; - nw_frag = 0; if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { const struct ip_header *nh = data; @@ -590,7 +617,7 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, goto out; } - packet->l4_ofs = (char *)data - l2; + packet->l4_ofs = (char *)data - frame; miniflow_push_be32(mf, nw_frag, BYTES_TO_BE32(nw_frag, nw_tos, nw_ttl, nw_proto)); @@ -689,7 +716,7 @@ flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc) void flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); fmd->dp_hash = flow->dp_hash; fmd->recirc_id = flow->recirc_id; @@ -836,7 +863,7 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc, memset(&wc->masks, 0x0, sizeof wc->masks); /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); if (flow->tunnel.ip_dst) { if (flow->tunnel.flags & FLOW_TNL_F_KEY) { @@ -860,13 +887,16 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc, WC_MASK_FIELD(wc, recirc_id); WC_MASK_FIELD(wc, dp_hash); WC_MASK_FIELD(wc, in_port); + WC_MASK_FIELD(wc, base_layer); /* actset_output wildcarded. */ - WC_MASK_FIELD(wc, dl_dst); - WC_MASK_FIELD(wc, dl_src); WC_MASK_FIELD(wc, dl_type); - WC_MASK_FIELD(wc, vlan_tci); + if (flow->base_layer == LAYER_2) { + WC_MASK_FIELD(wc, dl_dst); + WC_MASK_FIELD(wc, dl_src); + WC_MASK_FIELD(wc, vlan_tci); + } if (flow->dl_type == htons(ETH_TYPE_IP)) { WC_MASK_FIELD(wc, nw_src); @@ -933,7 +963,7 @@ uint64_t flow_wc_map(const struct flow *flow) { /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); uint64_t map = (flow->tunnel.ip_dst) ? MINIFLOW_MAP(tunnel) : 0; @@ -985,7 +1015,7 @@ void flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc) { /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata); memset(&wc->masks.regs, 0, sizeof wc->masks.regs); @@ -1543,7 +1573,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type, flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label)); /* Clear all L3 and L4 fields. */ - BUILD_ASSERT(FLOW_WC_SEQ == 28); + BUILD_ASSERT(FLOW_WC_SEQ == 29); memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0, sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT); } diff --git a/lib/flow.h b/lib/flow.h index 2259680..637f847 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -38,7 +38,7 @@ struct pkt_metadata; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 28 +#define FLOW_WC_SEQ 29 /* Number of Open vSwitch extension 32-bit registers. */ #define FLOW_N_REGS 8 @@ -74,6 +74,11 @@ const char *flow_tun_flag_to_string(uint32_t flags); /* Maximum number of supported MPLS labels. */ #define FLOW_MAX_MPLS_LABELS 3 +enum base_layer { + LAYER_2 = 0, + LAYER_3 = 1 +}; + /* * A flow in the network. * @@ -90,6 +95,10 @@ const char *flow_tun_flag_to_string(uint32_t flags); * lower layer fields are first used to determine if the later fields need to * be looked at. This enables better wildcarding for datapath flows. * + * The starting layer is specified by 'base_layer'. When 'base_layer' is + * LAYER_3, dl_src, dl_tci, and vlan_tci are not used for matching. The + * dl_type field is still used to specify the layer 3 protocol. + * * NOTE: Order of the fields is significant, any change in the order must be * reflected in miniflow_extract()! */ @@ -104,6 +113,7 @@ struct flow { union flow_in_port in_port; /* Input port.*/ ofp_port_t actset_output; /* Output port in action set. */ ovs_be16 pad1; /* Pad to 32 bits. */ + uint32_t base_layer; /* Fields start at this layer */ /* L2, Order the same as in the Ethernet header! */ uint8_t dl_dst[ETH_ADDR_LEN]; /* Ethernet destination address. */ @@ -156,8 +166,8 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(offsetof(struct flow, dp_hash) + sizeof(uint32_t) - == sizeof(struct flow_tnl) + 180 - && FLOW_WC_SEQ == 28); + == sizeof(struct flow_tnl) + 184 + && FLOW_WC_SEQ == 29); /* Incremental points at which flow classification may be performed in * segments. diff --git a/lib/match.c b/lib/match.c index bd3b13d..7c8178f 100644 --- a/lib/match.c +++ b/lib/match.c @@ -870,7 +870,7 @@ match_format(const struct match *match, struct ds *s, int priority) int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "priority=%d,", priority); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 9aa71a3..9694928 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -260,6 +260,8 @@ mf_are_prereqs_ok(const struct mf_field *mf, const struct flow *flow) case MFP_NONE: return true; + case MFP_ETHERNET: + return flow->base_layer == LAYER_2; case MFP_ARP: return (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)); @@ -341,6 +343,9 @@ mf_mask_field_and_prereqs(const struct mf_field *mf, struct flow *mask) case MFP_VLAN_VID: mask->vlan_tci |= htons(VLAN_CFI); break; + case MFP_ETHERNET: + mask->base_layer = UINT32_MAX; + break; case MFP_NONE: break; } diff --git a/lib/meta-flow.h b/lib/meta-flow.h index 1646995..25508c9 100644 --- a/lib/meta-flow.h +++ b/lib/meta-flow.h @@ -674,7 +674,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: be16. * Maskable: bitwise. * Formatting: hexadecimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: NXM_OF_VLAN_TCI(4) since v1.1. * OXM: none. @@ -690,7 +690,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: be16 (low 12 bits). * Maskable: no. * Formatting: decimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: none. * OXM: none. @@ -708,7 +708,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: be16 (low 12 bits). * Maskable: bitwise. * Formatting: decimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: none. * OXM: OXM_OF_VLAN_VID(6) since OF1.2 and v1.7. @@ -724,7 +724,7 @@ enum OVS_PACKED_ENUM mf_field_id { * Type: u8 (low 3 bits). * Maskable: no. * Formatting: decimal. - * Prerequisites: none. + * Prerequisites: Ethernet. * Access: read/write. * NXM: none. * OXM: none. @@ -1395,6 +1395,7 @@ enum OVS_PACKED_ENUM mf_prereqs { MFP_NONE, /* L2 requirements. */ + MFP_ETHERNET, MFP_ARP, MFP_VLAN_VID, MFP_IPV4, diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index f6d5070..335e740 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -832,6 +832,7 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dpif_packet **arr, netdev->stats.rx_bytes += ofpbuf_size(packet); ovs_mutex_unlock(&netdev->mutex); + ofpbuf_set_frame(packet, ofpbuf_data(packet)); dp_packet_pad(packet); /* This performs a (sometimes unnecessary) copy */ diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 99425f7..e717c79 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1053,6 +1053,7 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dpif_packet **packets, } dpif_packet_delete(packet); } else { + ofpbuf_set_frame(buffer, ofpbuf_data(buffer)); dp_packet_pad(buffer); dpif_packet_set_dp_hash(packet, 0); packets[0] = packet; diff --git a/lib/nx-match.c b/lib/nx-match.c index bc6682d..d12efc3 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -817,7 +817,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, int match_len; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); /* Metadata. */ if (match->wc.masks.dp_hash) { diff --git a/lib/odp-util.c b/lib/odp-util.c index 7be7a27..a0ad73e 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2862,7 +2862,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, size_t max_mpls_depth, bool recirc, bool export_mask) { struct ovs_key_ethernet *eth_key; - size_t encap; + size_t encap = 0; const struct flow *data = export_mask ? mask : flow; nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); @@ -2884,6 +2884,10 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); } + if (flow->base_layer == LAYER_3) { + goto noethernet; + } + eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, sizeof *eth_key); get_ethernet_key(data, eth_key); @@ -2899,8 +2903,6 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, if (flow->vlan_tci == htons(0)) { goto unencap; } - } else { - encap = 0; } if (ntohs(flow->dl_type) < ETH_TYPE_MIN) { @@ -2923,6 +2925,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type); +noethernet: if (flow->dl_type == htons(ETH_TYPE_IP)) { struct ovs_key_ipv4 *ipv4_key; @@ -3302,7 +3305,13 @@ parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], *expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE; } else { if (!is_mask) { - flow->dl_type = htons(FLOW_DL_TYPE_NONE); + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) { + flow->dl_type = htons(ETH_TYPE_IP); + } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV6)) { + flow->dl_type = htons(ETH_TYPE_IPV6); + } else { + flow->dl_type = htons(FLOW_DL_TYPE_NONE); + } } else if (ntohs(src_flow->dl_type) < ETH_TYPE_MIN) { /* See comments in odp_flow_key_from_flow__(). */ VLOG_ERR_RL(&rl, "mask expected for non-Ethernet II frame"); @@ -3692,12 +3701,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]); put_ethernet_key(eth_key, flow); - if (is_mask) { - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; - } - } - if (!is_mask) { + flow->base_layer = LAYER_2; expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; + } else { + flow->base_layer = LAYER_3; } /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */ @@ -3714,6 +3721,7 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, } if (is_mask) { flow->vlan_tci = htons(0xffff); + flow->base_layer = 0xffffffff; if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN)) { flow->vlan_tci = nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]); expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_VLAN); @@ -3960,6 +3968,14 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base_flow, { struct ovs_key_ethernet key, base, mask; + /* If we have a L3 --> L2 flow, the push_eth action takes care of setting + * the appropriate MAC source and destination addresses, no need to add a + * set action + */ + if (base_flow->base_layer == LAYER_3 && flow->base_layer == LAYER_2) { + return; + } + get_ethernet_key(flow, &key); get_ethernet_key(base_flow, &base); get_ethernet_key(&wc->masks, &mask); diff --git a/lib/odp-util.h b/lib/odp-util.h index 0bbc347..d94972b 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -134,7 +134,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); * add another field and forget to adjust this value. */ #define ODPUTIL_FLOW_KEY_BYTES 512 -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow * key. An array of "struct nlattr" might not, in theory, be sufficiently diff --git a/lib/ofp-print.c b/lib/ofp-print.c index c446770..ba72f55 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -55,10 +55,11 @@ static void ofp_print_table_features(struct ds *, const struct ofputil_table_features *, const struct ofputil_table_stats *); -/* Returns a string that represents the contents of the Ethernet frame in the - * 'len' bytes starting at 'data'. The caller must free the returned string.*/ +/* Returns a string that represents the contents of the Ethernet frame + * (is_layer3 == False) or IP packet (is_layer3 == True) in the 'len' bytes + * starting at 'data'. The caller must free the returned string.*/ char * -ofp_packet_to_string(const void *data, size_t len) +ofp_packet_to_string(const void *data, size_t len, bool is_layer3) { struct ds ds = DS_EMPTY_INITIALIZER; const struct pkt_metadata md = PKT_METADATA_INITIALIZER(0); @@ -67,6 +68,12 @@ ofp_packet_to_string(const void *data, size_t len) size_t l4_size; ofpbuf_use_const(&buf, data, len); + ofpbuf_set_frame(&buf, ofpbuf_data(&buf)); + + if (is_layer3) { + buf.l3_ofs = 0; + } + flow_extract(&buf, &md, &flow); flow_format(&ds, &flow); @@ -160,7 +167,7 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, ds_put_char(string, '\n'); if (verbosity > 0) { - char *packet = ofp_packet_to_string(pin.packet, pin.packet_len); + char *packet = ofp_packet_to_string(pin.packet, pin.packet_len, false); ds_put_cstr(string, packet); free(packet); } @@ -194,7 +201,7 @@ ofp_print_packet_out(struct ds *string, const struct ofp_header *oh, if (po.buffer_id == UINT32_MAX) { ds_put_format(string, " data_len=%"PRIuSIZE, po.packet_len); if (verbosity > 0 && po.packet_len > 0) { - char *packet = ofp_packet_to_string(po.packet, po.packet_len); + char *packet = ofp_packet_to_string(po.packet, po.packet_len, false); ds_put_char(string, '\n'); ds_put_cstr(string, packet); free(packet); @@ -2977,5 +2984,5 @@ ofp_print(FILE *stream, const void *oh, size_t len, int verbosity) void ofp_print_packet(FILE *stream, const void *data, size_t len) { - print_and_free(stream, ofp_packet_to_string(data, len)); + print_and_free(stream, ofp_packet_to_string(data, len, false)); } diff --git a/lib/ofp-print.h b/lib/ofp-print.h index 825e139..15aa196 100644 --- a/lib/ofp-print.h +++ b/lib/ofp-print.h @@ -21,6 +21,7 @@ #include <stdint.h> #include <stdio.h> +#include <stdbool.h> struct ds; struct ofp10_match; @@ -39,7 +40,7 @@ void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity); char *ofp_to_string(const void *, size_t, int verbosity); char *ofp10_match_to_string(const struct ofp10_match *, int verbosity); -char *ofp_packet_to_string(const void *data, size_t len); +char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3); void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *); void ofp_print_version(const struct ofp_header *, struct ds *); diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 94047fa..f96b26b 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -186,7 +186,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h index ea03c9d..7e77c5d 100644 --- a/lib/ofpbuf.h +++ b/lib/ofpbuf.h @@ -274,11 +274,12 @@ static inline bool ofpbuf_equal(const struct ofpbuf *a, const struct ofpbuf *b) memcmp(ofpbuf_data(a), ofpbuf_data(b), ofpbuf_size(a)) == 0; } -/* Get the start if the Ethernet frame. 'l3_ofs' marks the end of the l2 - * headers, so return NULL if it is not set. */ +/* Get the start of the Ethernet frame. 'l3_ofs' marks the end of the l2 + * headers, so return NULL if it is not set. A 'l3_ofs' of 0 marks a layer 3 + * packet, so return NULL in that case too. */ static inline void * ofpbuf_l2(const struct ofpbuf *b) { - return (b->l3_ofs != UINT16_MAX) ? b->frame : NULL; + return (b->l3_ofs != UINT16_MAX && b->l3_ofs != 0) ? b->frame : NULL; } /* Sets the packet frame start pointer and resets all layer offsets. @@ -374,6 +375,11 @@ static inline const void *ofpbuf_get_icmp_payload(const struct ofpbuf *b) ? (const char *)ofpbuf_l4(b) + ICMP_HEADER_LEN : NULL; } +static inline bool ofpbuf_is_layer3_packet(const struct ofpbuf *b) +{ + return (b->frame == ofpbuf_data(b)) && (b->l3_ofs == 0); +} + #ifdef DPDK_NETDEV BUILD_ASSERT_DECL(offsetof(struct ofpbuf, mbuf) == 0); diff --git a/lib/packets.c b/lib/packets.c index ca56ff6..d9c5317 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -391,6 +391,8 @@ eth_from_hex(const char *hex, struct ofpbuf **packetp) return "Packet data too short for Ethernet"; } + ofpbuf_set_frame(packet, ofpbuf_data(packet)); + return NULL; } diff --git a/lib/pcap-file.c b/lib/pcap-file.c index 191e690..682503d 100644 --- a/lib/pcap-file.c +++ b/lib/pcap-file.c @@ -185,6 +185,7 @@ ovs_pcap_read(FILE *file, struct ofpbuf **bufp, long long int *when) ofpbuf_delete(buf); return error; } + ofpbuf_set_frame(buf, ofpbuf_data(buf)); *bufp = buf; return 0; } diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index f781bc5..e001de1 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -163,6 +163,7 @@ struct xport { bool may_enable; /* May be enabled in bonds. */ bool is_tunnel; /* Is a tunnel port. */ + bool is_layer3; /* Is a layer 3 port. */ struct cfm *cfm; /* CFM handle or null. */ struct bfd *bfd; /* BFD handle or null. */ @@ -392,7 +393,7 @@ static void xlate_xport_set(struct xport *xport, odp_port_t odp_port, const struct rstp_port *rstp_port, enum ofputil_port_config config, enum ofputil_port_state state, bool is_tunnel, - bool may_enable); + bool may_enable, bool is_layer3); static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *); static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *); static void xlate_xport_remove(struct xlate_cfg *, struct xport *); @@ -534,13 +535,14 @@ xlate_xport_set(struct xport *xport, odp_port_t odp_port, const struct bfd *bfd, int stp_port_no, const struct rstp_port* rstp_port, enum ofputil_port_config config, enum ofputil_port_state state, - bool is_tunnel, bool may_enable) + bool is_tunnel, bool may_enable, bool is_layer3) { xport->config = config; xport->state = state; xport->stp_port_no = stp_port_no; xport->is_tunnel = is_tunnel; xport->may_enable = may_enable; + xport->is_layer3 = is_layer3; xport->odp_port = odp_port; if (xport->rstp_port != rstp_port) { @@ -627,7 +629,7 @@ xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle, xlate_xport_set(new_xport, xport->odp_port, xport->netdev, xport->cfm, xport->bfd, xport->stp_port_no, xport->rstp_port, xport->config, xport->state, xport->is_tunnel, - xport->may_enable); + xport->may_enable, xport->is_layer3); if (xport->peer) { struct xport *peer = xport_lookup(new_xcfg, xport->peer->ofport); @@ -869,7 +871,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, const struct ofproto_port_queue *qdscp_list, size_t n_qdscp, enum ofputil_port_config config, enum ofputil_port_state state, bool is_tunnel, - bool may_enable) + bool may_enable, bool is_layer3) { size_t i; struct xport *xport; @@ -889,7 +891,8 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, ovs_assert(xport->ofp_port == ofp_port); xlate_xport_set(xport, odp_port, netdev, cfm, bfd, stp_port_no, - rstp_port, config, state, is_tunnel, may_enable); + rstp_port, config, state, is_tunnel, may_enable, + is_layer3); if (xport->peer) { xport->peer->peer = NULL; @@ -2132,7 +2135,7 @@ xlate_normal(struct xlate_ctx *ctx) } /* Learn source MAC. */ - if (ctx->xin->may_learn) { + if (ctx->xin->may_learn && !(in_port->is_layer3)) { update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle); } if (ctx->xin->xcache) { @@ -2594,6 +2597,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; struct flow_tnl flow_tnl; + const struct xport *in_xport = get_ofp_port(ctx->xbridge, flow->in_port.ofp_port); ovs_be16 flow_vlan_tci; uint32_t flow_pkt_mark; uint8_t flow_nw_tos; @@ -2603,7 +2607,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 29); memset(&flow_tnl, 0, sizeof flow_tnl); if (!xport) { @@ -2643,6 +2647,17 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, xport->xbundle); } + /* Add the appropriate {pop,push}_eth datapath action to packets swicthing + * between layer 2 and layer 3 ports */ + if (in_xport) { + if (!in_xport->is_layer3 && xport->is_layer3) { + odp_put_pop_eth_action(ctx->xout->odp_actions); + } else if (in_xport && in_xport->is_layer3 && !xport->is_layer3) { + odp_put_push_eth_action(ctx->xout->odp_actions, flow->dl_src, + flow->dl_dst, flow->dl_type); + } + } + if (xport->peer) { const struct xport *peer = xport->peer; struct flow old_flow = ctx->xin->flow; @@ -4337,6 +4352,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) flow_wildcards_init_catchall(wc); memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port); + memset(&wc->masks.base_layer, 0xff, sizeof wc->masks.base_layer); memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); if (is_ip_any(flow)) { wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 9a03782..8f1932e 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -171,7 +171,7 @@ void xlate_ofport_set(struct ofproto_dpif *, struct ofbundle *, const struct ofproto_port_queue *qdscp, size_t n_qdscp, enum ofputil_port_config, enum ofputil_port_state, bool is_tunnel, - bool may_enable); + bool may_enable, bool is_layer3); void xlate_ofport_remove(struct ofport_dpif *); struct ofproto_dpif * xlate_lookup_ofproto(const struct dpif_backer *, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index a056a63..1fb5c4f 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -658,7 +658,7 @@ type_run(const char *type) ofport->rstp_port, ofport->qdscp, ofport->n_qdscp, ofport->up.pp.config, ofport->up.pp.state, ofport->is_tunnel, - ofport->may_enable); + ofport->may_enable, ofport->is_layer3); } xlate_txn_commit(); } @@ -1085,6 +1085,7 @@ check_variable_length_userdata(struct dpif_backer *backer) ofpbuf_init(&packet, ETH_HEADER_LEN); eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN); eth->eth_type = htons(0x1234); + ofpbuf_set_frame(&packet, ofpbuf_data(&packet)); /* Execute the actions. On older datapaths this fails with ERANGE, on * newer datapaths it succeeds. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 95f2905..9e53b51 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -3183,6 +3183,7 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) } else { /* Ensure that the L3 header is 32-bit aligned. */ payload = ofpbuf_clone_data_with_headroom(po.packet, po.packet_len, 2); + ofpbuf_set_frame(payload, ofpbuf_data(payload)); } /* Verify actions against packet, then send packet if successful. */ diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 5349386..5d7da5f 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -3790,15 +3790,15 @@ in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -odp_flow="in_port(1)" -br_flow="in_port=1" +odp_flow="in_port(1),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)" +br_flow="in_port=1,dl_dst=00:00:00:00:00:00" # Test command: ofproto/trace odp_flow with in_port as a name. AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 2 ]) -odp_flow="in_port(1)" +odp_flow="in_port(1),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)" # Test command: ofproto/trace odp_flow AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl diff --git a/tests/vlan-splinters.at b/tests/vlan-splinters.at index 883528d..0b1b3db 100644 --- a/tests/vlan-splinters.at +++ b/tests/vlan-splinters.at @@ -28,7 +28,7 @@ for args in '9 p2' '11 p3' '15 p4'; do # Check that when a packet is received on $splinter_port, it is # treated as if it had been received on p1 in the correct VLAN. - AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($splinter_port)"], + AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($splinter_port),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"], [0], [stdout]) AT_CHECK_UNQUOTED([sed -n '/^Flow/p; /^Datapath/p' stdout], [0], [dnl Flow: in_port=$p1,dl_vlan=$vlan,dl_vlan_pcp=0,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x05ff @@ -37,7 +37,7 @@ Datapath actions: $access_port # Check that when an OpenFlow action sends a packet to p1 on # splintered VLAN $vlan, it is actually output to $splinter_port. - AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($access_port)"], + AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($access_port),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: $splinter_port ]) -- 1.9.3 (Apple Git-50) _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev