This commit relaxes the assumption that all packets have an Ethernet header, and adds support for layer 3 flows. For each packet received on the Linux kernel datapath the l2 and l3 members of struct ofpbuf are intialized appropriately, and some functions now expect this (notably flow_extract()), in order to differentiate between layer 2 and layer 3 packets. struct flow has now a new 'base_layer' member, because we cannot assume that a flow has no Ethernet header when eth_src and eth_dst are 0. For layer 3 packets, the protocol type is still stored in the eth_type member.
Switching L2->L3 and L3->L2 are both implemented by adding the pop_eth and push_eth actions respectively when a transition is detected. The push_eth action puts 0s on both source and destination MACs. These addresses can be modified with mod_dl_dst and mod_dl_src actions. Added new prerequisite MFP_ETHERNET for fields MFF_ETH_SRC, MFF_ETH_DST, MFF_VLAN_TCI, MFF_DL_VLAN, MFF_VLAN_VID and MFF_DL_VLAN_PCP. Signed-off-by: Lorand Jakab <loja...@cisco.com> --- lib/bfd.c | 1 + lib/dpif-linux.c | 8 +++ lib/dpif-netdev.c | 5 ++ lib/dpif.c | 6 ++- lib/flow.c | 113 +++++++++++++++++++++++++++---------------- lib/flow.h | 16 ++++-- lib/match.c | 11 +++-- lib/meta-flow.c | 17 ++++--- lib/meta-flow.h | 1 + lib/netdev-dummy.c | 1 + lib/netdev-linux.c | 1 + lib/nx-match.c | 2 +- lib/odp-util.c | 33 +++++++++---- lib/ofp-print.c | 19 +++++--- lib/ofp-print.h | 3 +- lib/ofp-util.c | 2 +- lib/ofpbuf.h | 12 +++-- lib/packets.c | 2 + lib/pcap-file.c | 1 + ofproto/ofproto-dpif-xlate.c | 19 ++++++-- ofproto/ofproto-dpif-xlate.h | 3 +- ofproto/ofproto-dpif.c | 4 +- ofproto/ofproto.c | 1 + tests/ofproto-dpif.at | 6 +-- tests/vlan-splinters.at | 4 +- 25 files changed, 202 insertions(+), 89 deletions(-) diff --git a/lib/bfd.c b/lib/bfd.c index d83d198..c55852b 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -598,6 +598,7 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL)); ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ + ofpbuf_set_frame(p, ofpbuf_data(p)); eth = ofpbuf_put_uninit(p, sizeof *eth); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN); diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index abb4b51..7161b96 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -1710,6 +1710,14 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, ofpbuf_set_data(&upcall->packet, (char *)ofpbuf_data(&upcall->packet) + sizeof(struct nlattr)); ofpbuf_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET])); + ofpbuf_set_frame(&upcall->packet, ofpbuf_data(&upcall->packet)); + + /* Set the correct layer based on the presence of OVS_KEY_ATTR_ETHERNET */ + if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) { + ofpbuf_set_l3(&upcall->packet, NULL); + } else { + upcall->packet.l3_ofs = 0; + } *dp_ifindex = ovs_header->dp_ifindex; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 40503a2..273b65d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2076,6 +2076,11 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, ofpbuf_set_data(&upcall->packet, ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet))); ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet)); + if (packet->frame == ofpbuf_data(packet)) { + upcall->packet.frame = ofpbuf_data(&upcall->packet); + upcall->packet.l2_5_ofs = packet->l2_5_ofs; + upcall->packet.l3_ofs = packet->l3_ofs; + } seq_change(q->seq); diff --git a/lib/dpif.c b/lib/dpif.c index d7cfc5c..d4d3683 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1363,7 +1363,8 @@ dpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall, char *packet; packet = ofp_packet_to_string(ofpbuf_data(&upcall->packet), - ofpbuf_size(&upcall->packet)); + ofpbuf_size(&upcall->packet), + ofpbuf_is_layer3_packet(&upcall->packet)); ds_init(&flow); odp_flow_key_format(upcall->key, upcall->key_len, &flow); @@ -1567,7 +1568,8 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute, char *packet; packet = ofp_packet_to_string(ofpbuf_data(execute->packet), - ofpbuf_size(execute->packet)); + ofpbuf_size(execute->packet), + ofpbuf_is_layer3_packet(execute->packet)); ds_put_format(&ds, "%s: execute ", dpif_name(dpif)); format_odp_actions(&ds, execute->actions, execute->actions_len); if (error) { diff --git a/lib/flow.c b/lib/flow.c index 9c9adc5..2b87309 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -121,7 +121,7 @@ struct mf_ctx { * away. Some GCC versions gave warnigns on ALWAYS_INLINE, so these are * defined as macros. */ -#if (FLOW_WC_SEQ != 26) +#if (FLOW_WC_SEQ != 27) #define MINIFLOW_ASSERT(X) ovs_assert(X) #else #define MINIFLOW_ASSERT(X) @@ -323,18 +323,35 @@ invalid: return false; } -/* Initializes 'flow' members from 'packet' and 'md' +/* Determines IP version if a layer 3 packet */ +static ovs_be16 +get_l3_eth_type(struct ofpbuf *packet) +{ + struct ip_header *ip = ofpbuf_l3(packet); + int ip_ver = IP_VER(ip->ip_ihl_ver); + switch (ip_ver) { + case 4: + return htons(ETH_TYPE_IP); + case 6: + return htons(ETH_TYPE_IPV6); + default: + return 0; + } +} + +/* Initializes 'flow' members from 'packet' and 'md'. Expects packet->frame + * pointer to be equal to packet->data_, and packet->l3_ofs to be set to 0 for + * layer 3 packets. * - * Initializes 'packet' header l2 pointer to the start of the Ethernet - * header, and the layer offsets as follows: + * Initializes the layer offsets as follows: * * - packet->l2_5_ofs to the start of the MPLS shim header, or UINT16_MAX - * when there is no MPLS shim header. + * when there is no MPLS shim header, or Ethernet header * - * - packet->l3_ofs to just past the Ethernet header, or just past the - * vlan_header if one is present, to the first byte of the payload of the - * Ethernet frame. UINT16_MAX if the frame is too short to contain an - * Ethernet header. + * - packet->l3_ofs (if not 0) to just past the Ethernet header, or just + * past the vlan_header if one is present, to the first byte of the + * payload of the Ethernet frame. UINT16_MAX if the frame is too short to + * contain an Ethernet header. * * - packet->l4_ofs to just past the IPv4 header, if one is present and * has at least the content used for the fields of interest for the flow, @@ -351,6 +368,8 @@ flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, COVERAGE_INC(flow_extract); + ovs_assert(packet->frame == packet->data_); + miniflow_initialize(&m.mf, m.buf); miniflow_extract(packet, md, &m.mf); miniflow_expand(&m.mf, flow); @@ -366,7 +385,7 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, size_t size = ofpbuf_size(packet); uint32_t *values = miniflow_values(dst); struct mf_ctx mf = { 0, values, values + FLOW_U32S }; - char *l2; + char *frame; ovs_be16 dl_type; uint8_t nw_frag, nw_tos, nw_ttl, nw_proto; @@ -382,40 +401,48 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port)); } - /* Initialize packet's layer pointer and offsets. */ - l2 = data; - ofpbuf_set_frame(packet, data); - - /* Must have full Ethernet header to proceed. */ - if (OVS_UNLIKELY(size < sizeof(struct eth_header))) { - goto out; + if (packet->l3_ofs) { + frame = data; + miniflow_push_uint32(mf, base_layer, LAYER_2); + + /* Must have full Ethernet header to proceed. */ + if (OVS_UNLIKELY(size < sizeof(struct eth_header))) { + goto out; + } else { + ovs_be16 vlan_tci; + + /* Link layer. */ + BUILD_ASSERT(offsetof(struct flow, dl_dst) + 6 + == offsetof(struct flow, dl_src)); + miniflow_push_words(mf, dl_dst, data, ETH_ADDR_LEN * 2 / 4); + /* dl_type, vlan_tci. */ + vlan_tci = parse_vlan(&data, &size); + dl_type = parse_ethertype(&data, &size); + miniflow_push_be16(mf, dl_type, dl_type); + miniflow_push_be16(mf, vlan_tci, vlan_tci); + } + + /* Parse mpls. */ + if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { + int count; + const void *mpls = data; + + packet->l2_5_ofs = (char *)data - frame; + count = parse_mpls(&data, &size); + miniflow_push_words(mf, mpls_lse, mpls, count); + } + + /* Network layer. */ + packet->l3_ofs = (char *)data - frame; } else { - ovs_be16 vlan_tci; + miniflow_push_uint32(mf, base_layer, LAYER_3); - /* Link layer. */ - BUILD_ASSERT(offsetof(struct flow, dl_dst) + 6 - == offsetof(struct flow, dl_src)); - miniflow_push_words(mf, dl_dst, data, ETH_ADDR_LEN * 2 / 4); - /* dl_type, vlan_tci. */ - vlan_tci = parse_vlan(&data, &size); - dl_type = parse_ethertype(&data, &size); - miniflow_push_be16(mf, dl_type, dl_type); - miniflow_push_be16(mf, vlan_tci, vlan_tci); + /* We assume L3 packets are either IPv4 or IPv6 */ + dl_type = get_l3_eth_type(packet); + miniflow_push_be16(mf, dl_type, dl_type); + miniflow_push_be16(mf, vlan_tci, 0); } - /* Parse mpls. */ - if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { - int count; - const void *mpls = data; - - packet->l2_5_ofs = (char *)data - l2; - count = parse_mpls(&data, &size); - miniflow_push_words(mf, mpls_lse, mpls, count); - } - - /* Network layer. */ - packet->l3_ofs = (char *)data - l2; - nw_frag = 0; if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { const struct ip_header *nh = data; @@ -566,7 +593,7 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, goto out; } - packet->l4_ofs = (char *)data - l2; + packet->l4_ofs = (char *)data - frame; miniflow_push_be32(mf, nw_frag, BYTES_TO_BE32(nw_frag, nw_tos, nw_ttl, nw_proto)); @@ -656,7 +683,7 @@ flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc) void flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); fmd->dp_hash = flow->dp_hash; fmd->recirc_id = flow->recirc_id; @@ -1316,7 +1343,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type, flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label)); /* Clear all L3 and L4 fields. */ - BUILD_ASSERT(FLOW_WC_SEQ == 26); + BUILD_ASSERT(FLOW_WC_SEQ == 27); memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0, sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT); } diff --git a/lib/flow.h b/lib/flow.h index 0f3ffde..1985545 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -38,7 +38,7 @@ struct pkt_metadata; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 26 +#define FLOW_WC_SEQ 27 #define FLOW_N_REGS 8 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS); @@ -64,6 +64,11 @@ const char *flow_tun_flag_to_string(uint32_t flags); /* Maximum number of supported MPLS labels. */ #define FLOW_MAX_MPLS_LABELS 3 +enum base_layer { + LAYER_2 = 0, + LAYER_3 = 1 +}; + /* * A flow in the network. * @@ -80,6 +85,10 @@ const char *flow_tun_flag_to_string(uint32_t flags); * lower layer fields are first used to determine if the later fields need to * be looked at. This enables better wildcarding for datapath flows. * + * The starting layer is specified by 'base_layer'. When 'base_layer' is + * LAYER_3, dl_src, dl_tci, and vlan_tci are not used for matching. The + * dl_type field is still used to specify the layer 3 protocol. + * * NOTE: Order of the fields is significant, any change in the order must be * reflected in miniflow_extract()! */ @@ -92,6 +101,7 @@ struct flow { uint32_t pkt_mark; /* Packet mark. */ uint32_t recirc_id; /* Must be exact match. */ union flow_in_port in_port; /* Input port.*/ + uint32_t base_layer; /* Fields start at this layer */ /* L2, Order the same as in the Ethernet header! */ uint8_t dl_dst[6]; /* Ethernet destination address. */ @@ -129,8 +139,8 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(offsetof(struct flow, dp_hash) + sizeof(uint32_t) - == sizeof(struct flow_tnl) + 172 - && FLOW_WC_SEQ == 26); + == sizeof(struct flow_tnl) + 176 + && FLOW_WC_SEQ == 27); /* Incremental points at which flow classification may be performed in * segments. diff --git a/lib/match.c b/lib/match.c index 308f906..250464c 100644 --- a/lib/match.c +++ b/lib/match.c @@ -81,9 +81,12 @@ match_wc_init(struct match *match, const struct flow *flow) memset(&wc->masks.metadata, 0xff, sizeof wc->masks.metadata); memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port); - memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); - memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); - memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); + + if (flow->base_layer == LAYER_2) { + memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); + memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); + memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); + } if (flow->dl_type == htons(ETH_TYPE_IPV6)) { memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src); @@ -943,7 +946,7 @@ match_format(const struct match *match, struct ds *s, unsigned int priority) int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "priority=%u,", priority); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 6ef564e..4651971 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -260,7 +260,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = { MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, - MFP_NONE, + MFP_ETHERNET, true, NXM_OF_ETH_SRC, "NXM_OF_ETH_SRC", OXM_OF_ETH_SRC, "OXM_OF_ETH_SRC", @@ -272,7 +272,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = { MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, - MFP_NONE, + MFP_ETHERNET, true, NXM_OF_ETH_DST, "NXM_OF_ETH_DST", OXM_OF_ETH_DST, "OXM_OF_ETH_DST", @@ -298,7 +298,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = { MF_FIELD_SIZES(be16), MFM_FULLY, MFS_HEXADECIMAL, - MFP_NONE, + MFP_ETHERNET, true, NXM_OF_VLAN_TCI, "NXM_OF_VLAN_TCI", NXM_OF_VLAN_TCI, "NXM_OF_VLAN_TCI", @@ -310,7 +310,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = { sizeof(ovs_be16), 12, MFM_NONE, MFS_DECIMAL, - MFP_NONE, + MFP_ETHERNET, true, 0, NULL, 0, NULL, @@ -322,7 +322,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = { sizeof(ovs_be16), 12, MFM_FULLY, MFS_DECIMAL, - MFP_NONE, + MFP_ETHERNET, true, OXM_OF_VLAN_VID, "OXM_OF_VLAN_VID", OXM_OF_VLAN_VID, "OXM_OF_VLAN_VID", @@ -334,7 +334,7 @@ const struct mf_field mf_fields[MFF_N_IDS] = { 1, 3, MFM_NONE, MFS_DECIMAL, - MFP_NONE, + MFP_ETHERNET, true, 0, NULL, 0, NULL, @@ -1055,6 +1055,8 @@ mf_are_prereqs_ok(const struct mf_field *mf, const struct flow *flow) case MFP_NONE: return true; + case MFP_ETHERNET: + return flow->base_layer == LAYER_2; case MFP_ARP: return (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)); @@ -1132,6 +1134,9 @@ mf_mask_field_and_prereqs(const struct mf_field *mf, struct flow *mask) case MFP_VLAN_VID: mask->vlan_tci |= htons(VLAN_CFI); break; + case MFP_ETHERNET: + mask->base_layer = 0xff; + break; case MFP_NONE: break; } diff --git a/lib/meta-flow.h b/lib/meta-flow.h index d02d320..67af340 100644 --- a/lib/meta-flow.h +++ b/lib/meta-flow.h @@ -189,6 +189,7 @@ enum OVS_PACKED_ENUM mf_prereqs { MFP_NONE, /* L2 requirements. */ + MFP_ETHERNET, MFP_ARP, MFP_VLAN_VID, MFP_IPV4, diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index 501fb82..cd8597d 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -777,6 +777,7 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct ofpbuf **arr, int *c) netdev->stats.rx_bytes += ofpbuf_size(packet); ovs_mutex_unlock(&netdev->mutex); + ofpbuf_set_frame(packet, ofpbuf_data(packet)); dp_packet_pad(packet); arr[0] = packet; *c = 1; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index c1d9323..dde0000 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1009,6 +1009,7 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct ofpbuf **packet, int *c) } ofpbuf_delete(buffer); } else { + ofpbuf_set_frame(buffer, ofpbuf_data(buffer)); dp_packet_pad(buffer); packet[0] = buffer; *c = 1; diff --git a/lib/nx-match.c b/lib/nx-match.c index 3839f19..a85f30c 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -572,7 +572,7 @@ nx_put_raw(struct ofpbuf *b, bool oxm, const struct match *match, int match_len; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); /* Metadata. */ if (match->wc.masks.dp_hash) { diff --git a/lib/odp-util.c b/lib/odp-util.c index a22ab01..d519517 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2525,7 +2525,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, size_t max_mpls_depth, bool export_mask) { struct ovs_key_ethernet *eth_key; - size_t encap; + size_t encap = 0; const struct flow *data = export_mask ? mask : flow; nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); @@ -2550,6 +2550,10 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); } + if (flow->base_layer == LAYER_3) { + goto noethernet; + } + eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, sizeof *eth_key); memcpy(eth_key->eth_src, data->dl_src, ETH_ADDR_LEN); @@ -2566,8 +2570,6 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, if (flow->vlan_tci == htons(0)) { goto unencap; } - } else { - encap = 0; } if (ntohs(flow->dl_type) < ETH_TYPE_MIN) { @@ -2590,6 +2592,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type); +noethernet: if (flow->dl_type == htons(ETH_TYPE_IP)) { struct ovs_key_ipv4 *ipv4_key; @@ -2982,7 +2985,13 @@ parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], *expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE; } else { if (!is_mask) { - flow->dl_type = htons(FLOW_DL_TYPE_NONE); + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) { + flow->dl_type = htons(ETH_TYPE_IP); + } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV6)) { + flow->dl_type = htons(ETH_TYPE_IPV6); + } else { + flow->dl_type = htons(FLOW_DL_TYPE_NONE); + } } else if (ntohs(src_flow->dl_type) < ETH_TYPE_MIN) { /* See comments in odp_flow_key_from_flow__(). */ VLOG_ERR_RL(&rl, "mask expected for non-Ethernet II frame"); @@ -3396,12 +3405,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]); memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN); - if (is_mask) { - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; - } - } - if (!is_mask) { + flow->base_layer = LAYER_2; expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; + } else { + flow->base_layer = LAYER_3; } /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */ @@ -3599,6 +3606,14 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base, return; } + /* If we have a L3 --> L2 flow, the push_eth action takes care of setting + * the appropriate MAC source and destination addresses, no need to add a + * set action + */ + if (base->base_layer == LAYER_3 && flow->base_layer == LAYER_2) { + return; + } + memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); diff --git a/lib/ofp-print.c b/lib/ofp-print.c index a2e515d..956373f 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -53,10 +53,11 @@ static void ofp_print_queue_name(struct ds *string, uint32_t port); static void ofp_print_error(struct ds *, enum ofperr); -/* Returns a string that represents the contents of the Ethernet frame in the - * 'len' bytes starting at 'data'. The caller must free the returned string.*/ +/* Returns a string that represents the contents of the Ethernet frame + * (is_layer3 == False) or IP packet (is_layer3 == True) in the 'len' bytes + * starting at 'data'. The caller must free the returned string.*/ char * -ofp_packet_to_string(const void *data, size_t len) +ofp_packet_to_string(const void *data, size_t len, bool is_layer3) { struct ds ds = DS_EMPTY_INITIALIZER; const struct pkt_metadata md = PKT_METADATA_INITIALIZER(0); @@ -65,6 +66,12 @@ ofp_packet_to_string(const void *data, size_t len) size_t l4_size; ofpbuf_use_const(&buf, data, len); + ofpbuf_set_frame(&buf, ofpbuf_data(&buf)); + + if (is_layer3) { + buf.l3_ofs = 0; + } + flow_extract(&buf, &md, &flow); flow_format(&ds, &flow); @@ -158,7 +165,7 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, ds_put_char(string, '\n'); if (verbosity > 0) { - char *packet = ofp_packet_to_string(pin.packet, pin.packet_len); + char *packet = ofp_packet_to_string(pin.packet, pin.packet_len, false); ds_put_cstr(string, packet); free(packet); } @@ -192,7 +199,7 @@ ofp_print_packet_out(struct ds *string, const struct ofp_header *oh, if (po.buffer_id == UINT32_MAX) { ds_put_format(string, " data_len=%"PRIuSIZE, po.packet_len); if (verbosity > 0 && po.packet_len > 0) { - char *packet = ofp_packet_to_string(po.packet, po.packet_len); + char *packet = ofp_packet_to_string(po.packet, po.packet_len, false); ds_put_char(string, '\n'); ds_put_cstr(string, packet); free(packet); @@ -3086,5 +3093,5 @@ ofp_print(FILE *stream, const void *oh, size_t len, int verbosity) void ofp_print_packet(FILE *stream, const void *data, size_t len) { - print_and_free(stream, ofp_packet_to_string(data, len)); + print_and_free(stream, ofp_packet_to_string(data, len, false)); } diff --git a/lib/ofp-print.h b/lib/ofp-print.h index 825e139..15aa196 100644 --- a/lib/ofp-print.h +++ b/lib/ofp-print.h @@ -21,6 +21,7 @@ #include <stdint.h> #include <stdio.h> +#include <stdbool.h> struct ds; struct ofp10_match; @@ -39,7 +40,7 @@ void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity); char *ofp_to_string(const void *, size_t, int verbosity); char *ofp10_match_to_string(const struct ofp10_match *, int verbosity); -char *ofp_packet_to_string(const void *data, size_t len); +char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3); void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *); void ofp_print_version(const struct ofp_header *, struct ds *); diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 2106fd8..4de4b66 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -85,7 +85,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h index 85be899..adf5e4c 100644 --- a/lib/ofpbuf.h +++ b/lib/ofpbuf.h @@ -267,11 +267,12 @@ static inline bool ofpbuf_equal(const struct ofpbuf *a, const struct ofpbuf *b) memcmp(ofpbuf_data(a), ofpbuf_data(b), ofpbuf_size(a)) == 0; } -/* Get the start if the Ethernet frame. 'l3_ofs' marks the end of the l2 - * headers, so return NULL if it is not set. */ +/* Get the start of the Ethernet frame. 'l3_ofs' marks the end of the l2 + * headers, so return NULL if it is not set. A 'l3_ofs' of 0 marks a layer 3 + * packet, so return NULL in that case too. */ static inline void * ofpbuf_l2(const struct ofpbuf *b) { - return (b->l3_ofs != UINT16_MAX) ? b->frame : NULL; + return (b->l3_ofs != UINT16_MAX && b->l3_ofs != 0) ? b->frame : NULL; } /* Sets the packet frame start pointer and resets all layer offsets. @@ -353,6 +354,11 @@ static inline const void *ofpbuf_get_icmp_payload(const struct ofpbuf *b) ? (const char *)ofpbuf_l4(b) + ICMP_HEADER_LEN : NULL; } +static inline bool ofpbuf_is_layer3_packet(const struct ofpbuf *b) +{ + return (b->frame == b->data_) && (b->l3_ofs == 0); +} + #ifdef DPDK_NETDEV static inline void * ofpbuf_data(const struct ofpbuf *b) { diff --git a/lib/packets.c b/lib/packets.c index c1f7ade..a19fc65 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -390,6 +390,8 @@ eth_from_hex(const char *hex, struct ofpbuf **packetp) return "Packet data too short for Ethernet"; } + ofpbuf_set_frame(packet, ofpbuf_data(packet)); + return NULL; } diff --git a/lib/pcap-file.c b/lib/pcap-file.c index 191e690..682503d 100644 --- a/lib/pcap-file.c +++ b/lib/pcap-file.c @@ -185,6 +185,7 @@ ovs_pcap_read(FILE *file, struct ofpbuf **bufp, long long int *when) ofpbuf_delete(buf); return error; } + ofpbuf_set_frame(buf, ofpbuf_data(buf)); *bufp = buf; return 0; } diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index da538b7..62e0d62 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -150,6 +150,7 @@ struct xport { bool may_enable; /* May be enabled in bonds. */ bool is_tunnel; /* Is a tunnel port. */ + bool is_layer3; /* Is a layer 3 port. */ struct cfm *cfm; /* CFM handle or null. */ struct bfd *bfd; /* BFD handle or null. */ @@ -497,7 +498,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, const struct ofproto_port_queue *qdscp_list, size_t n_qdscp, enum ofputil_port_config config, enum ofputil_port_state state, bool is_tunnel, - bool may_enable) + bool may_enable, bool is_layer3) { struct xport *xport = xport_lookup(ofport); size_t i; @@ -521,6 +522,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, xport->stp_port_no = stp_port_no; xport->is_tunnel = is_tunnel; xport->may_enable = may_enable; + xport->is_layer3 = is_layer3; xport->odp_port = odp_port; if (xport->netdev != netdev) { @@ -1545,7 +1547,7 @@ xlate_normal(struct xlate_ctx *ctx) } /* Learn source MAC. */ - if (ctx->xin->may_learn) { + if (ctx->xin->may_learn && !(in_port->is_layer3)) { update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle); } if (ctx->xin->xcache) { @@ -1805,6 +1807,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port); struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; + const struct xport *in_xport = get_ofp_port(ctx->xbridge, flow->in_port.ofp_port); ovs_be16 flow_vlan_tci; uint32_t flow_pkt_mark; uint8_t flow_nw_tos; @@ -1813,7 +1816,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 26); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); if (!xport) { xlate_report(ctx, "Nonexistent output port"); @@ -1840,6 +1843,16 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, xport->xbundle); } + if (in_xport && !in_xport->is_layer3 && xport->is_layer3) { + odp_put_pop_eth_action(&ctx->xout->odp_actions); + } + + if (flow->base_layer == LAYER_3 && !xport->is_layer3) { + flow->base_layer = LAYER_2; + odp_put_push_eth_action(&ctx->xout->odp_actions, flow->dl_src, + flow->dl_dst, flow->dl_type); + } + if (xport->peer) { const struct xport *peer = xport->peer; struct flow old_flow = ctx->xin->flow; diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 760736a..ac3b24d 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -165,7 +165,8 @@ void xlate_ofport_set(struct ofproto_dpif *, struct ofbundle *, int stp_port_no, const struct ofproto_port_queue *qdscp, size_t n_qdscp, enum ofputil_port_config, enum ofputil_port_state, bool is_tunnel, - bool may_enable) OVS_REQ_WRLOCK(xlate_rwlock); + bool may_enable, bool is_layer3) + OVS_REQ_WRLOCK(xlate_rwlock); void xlate_ofport_remove(struct ofport_dpif *) OVS_REQ_WRLOCK(xlate_rwlock); int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 926f3d6..f29ce47 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -631,7 +631,8 @@ type_run(const char *type) ofport->bfd, ofport->peer, stp_port, ofport->qdscp, ofport->n_qdscp, ofport->up.pp.config, ofport->up.pp.state, - ofport->is_tunnel, ofport->may_enable); + ofport->is_tunnel, ofport->may_enable, + ofport->is_layer3); } ovs_rwlock_unlock(&xlate_rwlock); } @@ -1039,6 +1040,7 @@ check_variable_length_userdata(struct dpif_backer *backer) ofpbuf_init(&packet, ETH_HEADER_LEN); eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN); eth->eth_type = htons(0x1234); + ofpbuf_set_frame(&packet, ofpbuf_data(&packet)); /* Execute the actions. On older datapaths this fails with ERANGE, on * newer datapaths it succeeds. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index d92bafd..fa69347 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2982,6 +2982,7 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) } else { /* Ensure that the L3 header is 32-bit aligned. */ payload = ofpbuf_clone_data_with_headroom(po.packet, po.packet_len, 2); + ofpbuf_set_frame(payload, ofpbuf_data(payload)); } /* Verify actions against packet, then send packet if successful. */ diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 4dcaf63..edd7c83 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -2609,15 +2609,15 @@ in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -odp_flow="in_port(1)" -br_flow="in_port=1" +odp_flow="in_port(1),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)" +br_flow="in_port=1,dl_dst=00:00:00:00:00:00" # Test command: ofproto/trace odp_flow with in_port as a name. AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 2 ]) -odp_flow="in_port(1)" +odp_flow="in_port(1),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)" # Test command: ofproto/trace odp_flow AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl diff --git a/tests/vlan-splinters.at b/tests/vlan-splinters.at index 3cc6187..6ac2eaf 100644 --- a/tests/vlan-splinters.at +++ b/tests/vlan-splinters.at @@ -27,7 +27,7 @@ for args in '9 p2' '11 p3' '15 p4'; do # Check that when a packet is received on $splinter_port, it is # treated as if it had been received on p1 in the correct VLAN. - AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($splinter_port)"], + AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($splinter_port),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"], [0], [stdout]) AT_CHECK_UNQUOTED([sed -n '/^Flow/p; /^Datapath/p' stdout], [0], [dnl Flow: metadata=0,in_port=$p1,dl_vlan=$vlan,dl_vlan_pcp=0,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x05ff @@ -36,7 +36,7 @@ Datapath actions: $access_port # Check that when an OpenFlow action sends a packet to p1 on # splintered VLAN $vlan, it is actually output to $splinter_port. - AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($access_port)"], + AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($access_port),eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00)"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: $splinter_port ]) -- 1.8.5.2 (Apple Git-48) _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev