This commit relaxes the assumption that all packets have an Ethernet header, and adds support for layer 3 flows. For each packet received on the Linux kernel datapath the l2 and l3 members of struct ofpbuf are intialized appropriately, and some functions now expect this (notable flow_extract()), in order to differentiate between layer 2 and layer 3 packets. struct flow has now a new 'noeth' member, because we cannot assume that a flow has no Ethernet header when eth_src and eth_dst are 0. For layer 3 packets, the protocol type is still stored in the eth_type member.
Switching L2->L3 and L3->L2 are both implemented by adding the pop_eth and push_eth actions respectively when a transition is detected. The push_eth action puts 0s on both source and destination MACs. These addresses can be modified with mod_dl_dst and mod_dl_src actions. Signed-off-by: Lorand Jakab <loja...@cisco.com> --- lib/bfd.c | 1 + lib/dpif-linux.c | 7 +++++++ lib/dpif-netdev.c | 12 +++++------- lib/dpif.c | 6 ++++-- lib/flow.c | 36 ++++++++++++++++++++++++++++++++---- lib/flow.h | 3 ++- lib/match.c | 9 ++++++--- lib/odp-util.c | 15 ++++++++++----- lib/ofp-print.c | 14 ++++++++++---- lib/ofp-print.h | 3 ++- ofproto/ofproto-dpif-xlate.c | 17 +++++++++++++++-- ofproto/ofproto-dpif-xlate.h | 3 ++- ofproto/ofproto-dpif.c | 3 ++- ofproto/ofproto.c | 1 + 14 files changed, 99 insertions(+), 31 deletions(-) diff --git a/lib/bfd.c b/lib/bfd.c index 740f4fc..21b70b1 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -540,6 +540,7 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL)); ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ + p->l2 = p->data; eth = ofpbuf_put_uninit(p, sizeof *eth); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN); diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 42958d3..777fdee 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -1383,6 +1383,13 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, upcall->key = CONST_CAST(struct nlattr *, nl_attr_get(a[OVS_PACKET_ATTR_KEY])); upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]); + if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) { + upcall->packet->l2 = upcall->packet->data; + upcall->packet->l3 = NULL; + } else { + upcall->packet->l2 = NULL; + upcall->packet->l3 = upcall->packet->data; + } upcall->userdata = a[OVS_PACKET_ATTR_USERDATA]; *dp_ifindex = ovs_header->dp_ifindex; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 1b0039c..9c07016 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1030,7 +1030,7 @@ static int dpif_netdev_execute(struct dpif *dpif, const struct dpif_execute *execute) { struct dp_netdev *dp = get_dp_netdev(dpif); - struct ofpbuf copy; + struct ofpbuf *copy; struct flow key; int error; @@ -1040,21 +1040,19 @@ dpif_netdev_execute(struct dpif *dpif, const struct dpif_execute *execute) } /* Make a deep copy of 'packet', because we might modify its data. */ - ofpbuf_init(©, DP_NETDEV_HEADROOM + execute->packet->size); - ofpbuf_reserve(©, DP_NETDEV_HEADROOM); - ofpbuf_put(©, execute->packet->data, execute->packet->size); + copy = ofpbuf_clone_with_headroom(execute->packet, DP_NETDEV_HEADROOM); - flow_extract(©, 0, 0, NULL, NULL, &key); + flow_extract(copy, 0, 0, NULL, NULL, &key); error = dpif_netdev_flow_from_nlattrs(execute->key, execute->key_len, &key); if (!error) { ovs_mutex_lock(&dp_netdev_mutex); - dp_netdev_execute_actions(dp, ©, &key, + dp_netdev_execute_actions(dp, copy, &key, execute->actions, execute->actions_len); ovs_mutex_unlock(&dp_netdev_mutex); } - ofpbuf_uninit(©); + ofpbuf_uninit(copy); return error; } diff --git a/lib/dpif.c b/lib/dpif.c index 783a7cb..8a51b08 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1328,7 +1328,8 @@ dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf) char *packet; packet = ofp_packet_to_string(upcall->packet->data, - upcall->packet->size); + upcall->packet->size, + upcall->packet->l3); ds_init(&flow); odp_flow_key_format(upcall->key, upcall->key_len, &flow); @@ -1530,7 +1531,8 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute, char *packet; packet = ofp_packet_to_string(execute->packet->data, - execute->packet->size); + execute->packet->size, + execute->packet->l3); ds_put_format(&ds, "%s: execute ", dpif_name(dpif)); format_odp_actions(&ds, execute->actions, execute->actions_len); if (error) { diff --git a/lib/flow.c b/lib/flow.c index 8c336f6..fa7faa1 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -41,6 +41,21 @@ COVERAGE_DEFINE(flow_extract); COVERAGE_DEFINE(miniflow_malloc); +static ovs_be16 +get_l3_eth_type(struct ofpbuf *packet) +{ + struct ip_header *ip = packet->l3; + int ip_ver = IP_VER(ip->ip_ihl_ver); + switch (ip_ver) { + case 4: + return htons(ETH_TYPE_IP); + case 6: + return htons(ETH_TYPE_IPV6); + default: + return 0; + } +} + static struct arp_eth_header * pull_arp(struct ofpbuf *packet) { @@ -383,6 +398,8 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, COVERAGE_INC(flow_extract); + ovs_assert(packet->l2 != NULL || packet->l3 != NULL); + memset(flow, 0, sizeof *flow); if (tnl) { @@ -395,11 +412,21 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, flow->skb_priority = skb_priority; flow->pkt_mark = pkt_mark; - packet->l2 = b.data; - packet->l2_5 = NULL; - packet->l3 = NULL; - packet->l4 = NULL; packet->l7 = NULL; + packet->l4 = NULL; + + if (packet->l3) { + packet->l2_5 = NULL; + packet->l2 = NULL; + flow->noeth = true; + /* We assume L3 packets are either IPv4 or IPv6 */ + flow->dl_type = get_l3_eth_type(packet); + goto layer3; + } + + packet->l3 = NULL; + packet->l2_5 = NULL; + packet->l2 = b.data; if (b.size < sizeof *eth) { return; @@ -425,6 +452,7 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, /* Network layer. */ packet->l3 = b.data; +layer3: if (flow->dl_type == htons(ETH_TYPE_IP)) { const struct ip_header *nh = pull_ip(&b); if (nh) { diff --git a/lib/flow.h b/lib/flow.h index 093d509..5d7c948 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -90,6 +90,7 @@ union flow_in_port { * a 32-bit datapath port number. */ struct flow { + bool noeth; /* Flow has no Ethernet header */ struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ ovs_be64 metadata; /* OpenFlow Metadata. */ struct in6_addr ipv6_src; /* IPv6 source address. */ @@ -124,7 +125,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(offsetof(struct flow, nw_frag) + 1 - == sizeof(struct flow_tnl) + 154 + == sizeof(struct flow_tnl) + 162 && FLOW_WC_SEQ == 22); /* Represents the metadata fields of struct flow. */ diff --git a/lib/match.c b/lib/match.c index 0f674b0..0e7cd6d 100644 --- a/lib/match.c +++ b/lib/match.c @@ -81,9 +81,12 @@ match_wc_init(struct match *match, const struct flow *flow) memset(&wc->masks.metadata, 0xff, sizeof wc->masks.metadata); memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port); - memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); - memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); - memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); + + if (!(flow->noeth)) { + memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); + memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); + memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); + } if (flow->dl_type == htons(ETH_TYPE_IPV6)) { memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src); diff --git a/lib/odp-util.c b/lib/odp-util.c index d584d3f..c3b5db4 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -3250,12 +3250,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]); memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN); - if (is_mask) { - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; - } - } - if (!is_mask) { + flow->noeth = false; expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; + } else { + flow->noeth = true; } /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */ @@ -3452,6 +3450,13 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base, return; } + /* If we have a L3 --> L2 flow, the push_eth action takes care of setting + * the appropriate MAC source and destination addresses, no need to add a + * set action + */ + if (base->noeth && !flow->noeth) + return; + memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 37e1f4f..9bcfa7c 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -55,13 +55,19 @@ static void ofp_print_error(struct ds *, enum ofperr); /* Returns a string that represents the contents of the Ethernet frame in the * 'len' bytes starting at 'data'. The caller must free the returned string.*/ char * -ofp_packet_to_string(const void *data, size_t len) +ofp_packet_to_string(const void *data, size_t len, bool is_layer3) { struct ds ds = DS_EMPTY_INITIALIZER; struct ofpbuf buf; struct flow flow; ofpbuf_use_const(&buf, data, len); + + if (is_layer3) + buf.l3 = buf.data; + else + buf.l2 = buf.data; + flow_extract(&buf, 0, 0, NULL, NULL, &flow); flow_format(&ds, &flow); @@ -157,7 +163,7 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, ds_put_char(string, '\n'); if (verbosity > 0) { - char *packet = ofp_packet_to_string(pin.packet, pin.packet_len); + char *packet = ofp_packet_to_string(pin.packet, pin.packet_len, false); ds_put_cstr(string, packet); free(packet); } @@ -191,7 +197,7 @@ ofp_print_packet_out(struct ds *string, const struct ofp_header *oh, if (po.buffer_id == UINT32_MAX) { ds_put_format(string, " data_len=%zu", po.packet_len); if (verbosity > 0 && po.packet_len > 0) { - char *packet = ofp_packet_to_string(po.packet, po.packet_len); + char *packet = ofp_packet_to_string(po.packet, po.packet_len, false); ds_put_char(string, '\n'); ds_put_cstr(string, packet); free(packet); @@ -2811,5 +2817,5 @@ ofp_print(FILE *stream, const void *oh, size_t len, int verbosity) void ofp_print_packet(FILE *stream, const void *data, size_t len) { - print_and_free(stream, ofp_packet_to_string(data, len)); + print_and_free(stream, ofp_packet_to_string(data, len, false)); } diff --git a/lib/ofp-print.h b/lib/ofp-print.h index 825e139..15aa196 100644 --- a/lib/ofp-print.h +++ b/lib/ofp-print.h @@ -21,6 +21,7 @@ #include <stdint.h> #include <stdio.h> +#include <stdbool.h> struct ds; struct ofp10_match; @@ -39,7 +40,7 @@ void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity); char *ofp_to_string(const void *, size_t, int verbosity); char *ofp10_match_to_string(const struct ofp10_match *, int verbosity); -char *ofp_packet_to_string(const void *data, size_t len); +char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3); void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *); void ofp_print_version(const struct ofp_header *, struct ds *); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 367dd88..b3c9c31 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -134,6 +134,7 @@ struct xport { bool may_enable; /* May be enabled in bonds. */ bool is_tunnel; /* Is a tunnel port. */ + bool is_layer3; /* Is a layer 3 port. */ struct cfm *cfm; /* CFM handle or null. */ struct bfd *bfd; /* BFD handle or null. */ @@ -400,7 +401,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, const struct ofproto_port_queue *qdscp_list, size_t n_qdscp, enum ofputil_port_config config, enum ofputil_port_state state, bool is_tunnel, - bool may_enable) + bool may_enable, bool is_layer3) { struct xport *xport = xport_lookup(ofport); size_t i; @@ -424,6 +425,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, xport->stp_port_no = stp_port_no; xport->is_tunnel = is_tunnel; xport->may_enable = may_enable; + xport->is_layer3 = is_layer3; xport->odp_port = odp_port; if (xport->netdev != netdev) { @@ -1410,7 +1412,7 @@ xlate_normal(struct xlate_ctx *ctx) } /* Learn source MAC. */ - if (ctx->xin->may_learn) { + if (ctx->xin->may_learn && !(in_port->is_layer3)) { update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle); } @@ -1655,6 +1657,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port); struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; + const struct xport *in_xport = get_ofp_port(ctx->xbridge, flow->in_port.ofp_port); ovs_be16 flow_vlan_tci; uint32_t flow_pkt_mark; uint8_t flow_nw_tos; @@ -1681,6 +1684,16 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, xport->xbundle); } + if ((in_xport) && !(in_xport->is_layer3) && xport->is_layer3) { + odp_put_pop_eth_action(&ctx->xout->odp_actions); + } + + if (flow->noeth && !(xport->is_layer3)) { + flow->noeth = false; + odp_put_push_eth_action(&ctx->xout->odp_actions, flow->dl_src, + flow->dl_dst, flow->dl_type); + } + if (xport->peer) { const struct xport *peer = xport->peer; struct flow old_flow = ctx->xin->flow; diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 7dd3534..0f4ac70 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -140,7 +140,8 @@ void xlate_ofport_set(struct ofproto_dpif *, struct ofbundle *, int stp_port_no, const struct ofproto_port_queue *qdscp, size_t n_qdscp, enum ofputil_port_config, enum ofputil_port_state, bool is_tunnel, - bool may_enable) OVS_REQ_WRLOCK(xlate_rwlock); + bool may_enable, bool is_layer3) + OVS_REQ_WRLOCK(xlate_rwlock); void xlate_ofport_remove(struct ofport_dpif *) OVS_REQ_WRLOCK(xlate_rwlock); int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 4b029f9..56f000e 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -828,7 +828,8 @@ type_run(const char *type) ofport->bfd, ofport->peer, stp_port, ofport->qdscp, ofport->n_qdscp, ofport->up.pp.config, ofport->up.pp.state, - ofport->is_tunnel, ofport->may_enable); + ofport->is_tunnel, ofport->may_enable, + ofport->is_layer3); } ovs_rwlock_unlock(&xlate_rwlock); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 2ccbcee..f031c9d 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2936,6 +2936,7 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) } else { /* Ensure that the L3 header is 32-bit aligned. */ payload = ofpbuf_clone_data_with_headroom(po.packet, po.packet_len, 2); + payload->l2 = payload->data; } /* Verify actions against packet, then send packet if successful. */ -- 1.8.3.4 (Apple Git-47) _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev