This commit relaxes the assumption that all packets have an Ethernet
header, and adds support for layer 3 flows.  For each packet received on
the Linux kernel datapath the l2 and l3 members of struct ofpbuf are
intialized appropriately, and some functions now expect this (notable
flow_extract()), in order to differentiate between layer 2 and layer 3
packets.  struct flow has now a new 'noeth' member, because we cannot
assume that a flow has no Ethernet header when eth_src and eth_dst are
0.  For layer 3 packets, the protocol type is still stored in the
eth_type member.

Switching L2->L3 and L3->L2 are both implemented by adding the pop_eth
and push_eth actions respectively when a transition is detected.  The
push_eth action puts 0s on both source and destination MACs.  These
addresses can be modified with mod_dl_dst and mod_dl_src actions.

Signed-off-by: Lorand Jakab <loja...@cisco.com>
---
 lib/bfd.c                    |  1 +
 lib/dpif-linux.c             |  7 +++++++
 lib/dpif-netdev.c            | 12 +++++-------
 lib/dpif.c                   |  6 ++++--
 lib/flow.c                   | 36 ++++++++++++++++++++++++++++++++----
 lib/flow.h                   |  3 ++-
 lib/match.c                  |  9 ++++++---
 lib/odp-util.c               | 15 ++++++++++-----
 lib/ofp-print.c              | 14 ++++++++++----
 lib/ofp-print.h              |  3 ++-
 ofproto/ofproto-dpif-xlate.c | 17 +++++++++++++++--
 ofproto/ofproto-dpif-xlate.h |  3 ++-
 ofproto/ofproto-dpif.c       |  3 ++-
 ofproto/ofproto.c            |  1 +
 14 files changed, 99 insertions(+), 31 deletions(-)

diff --git a/lib/bfd.c b/lib/bfd.c
index 740f4fc..21b70b1 100644
--- a/lib/bfd.c
+++ b/lib/bfd.c
@@ -540,6 +540,7 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
     ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL));
 
     ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */
+    p->l2 = p->data;
     eth = ofpbuf_put_uninit(p, sizeof *eth);
     memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
     memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN);
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 42958d3..777fdee 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -1383,6 +1383,13 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall 
*upcall,
     upcall->key = CONST_CAST(struct nlattr *,
                              nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
     upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
+    if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
+        upcall->packet->l2 = upcall->packet->data;
+        upcall->packet->l3 = NULL;
+    } else {
+        upcall->packet->l2 = NULL;
+        upcall->packet->l3 = upcall->packet->data;
+    }
     upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
     *dp_ifindex = ovs_header->dp_ifindex;
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1b0039c..9c07016 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1030,7 +1030,7 @@ static int
 dpif_netdev_execute(struct dpif *dpif, const struct dpif_execute *execute)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
-    struct ofpbuf copy;
+    struct ofpbuf *copy;
     struct flow key;
     int error;
 
@@ -1040,21 +1040,19 @@ dpif_netdev_execute(struct dpif *dpif, const struct 
dpif_execute *execute)
     }
 
     /* Make a deep copy of 'packet', because we might modify its data. */
-    ofpbuf_init(&copy, DP_NETDEV_HEADROOM + execute->packet->size);
-    ofpbuf_reserve(&copy, DP_NETDEV_HEADROOM);
-    ofpbuf_put(&copy, execute->packet->data, execute->packet->size);
+    copy = ofpbuf_clone_with_headroom(execute->packet, DP_NETDEV_HEADROOM);
 
-    flow_extract(&copy, 0, 0, NULL, NULL, &key);
+    flow_extract(copy, 0, 0, NULL, NULL, &key);
     error = dpif_netdev_flow_from_nlattrs(execute->key, execute->key_len,
                                           &key);
     if (!error) {
         ovs_mutex_lock(&dp_netdev_mutex);
-        dp_netdev_execute_actions(dp, &copy, &key,
+        dp_netdev_execute_actions(dp, copy, &key,
                                   execute->actions, execute->actions_len);
         ovs_mutex_unlock(&dp_netdev_mutex);
     }
 
-    ofpbuf_uninit(&copy);
+    ofpbuf_uninit(copy);
     return error;
 }
 
diff --git a/lib/dpif.c b/lib/dpif.c
index 783a7cb..8a51b08 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1328,7 +1328,8 @@ dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, 
struct ofpbuf *buf)
         char *packet;
 
         packet = ofp_packet_to_string(upcall->packet->data,
-                                      upcall->packet->size);
+                                      upcall->packet->size,
+                                      upcall->packet->l3);
 
         ds_init(&flow);
         odp_flow_key_format(upcall->key, upcall->key_len, &flow);
@@ -1530,7 +1531,8 @@ log_execute_message(struct dpif *dpif, const struct 
dpif_execute *execute,
         char *packet;
 
         packet = ofp_packet_to_string(execute->packet->data,
-                                      execute->packet->size);
+                                      execute->packet->size,
+                                      execute->packet->l3);
         ds_put_format(&ds, "%s: execute ", dpif_name(dpif));
         format_odp_actions(&ds, execute->actions, execute->actions_len);
         if (error) {
diff --git a/lib/flow.c b/lib/flow.c
index 8c336f6..fa7faa1 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -41,6 +41,21 @@
 COVERAGE_DEFINE(flow_extract);
 COVERAGE_DEFINE(miniflow_malloc);
 
+static ovs_be16
+get_l3_eth_type(struct ofpbuf *packet)
+{
+    struct ip_header *ip = packet->l3;
+    int ip_ver = IP_VER(ip->ip_ihl_ver);
+    switch (ip_ver) {
+    case 4:
+        return htons(ETH_TYPE_IP);
+    case 6:
+        return htons(ETH_TYPE_IPV6);
+    default:
+        return 0;
+    }
+}
+
 static struct arp_eth_header *
 pull_arp(struct ofpbuf *packet)
 {
@@ -383,6 +398,8 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, 
uint32_t pkt_mark,
 
     COVERAGE_INC(flow_extract);
 
+    ovs_assert(packet->l2 != NULL || packet->l3 != NULL);
+
     memset(flow, 0, sizeof *flow);
 
     if (tnl) {
@@ -395,11 +412,21 @@ flow_extract(struct ofpbuf *packet, uint32_t 
skb_priority, uint32_t pkt_mark,
     flow->skb_priority = skb_priority;
     flow->pkt_mark = pkt_mark;
 
-    packet->l2   = b.data;
-    packet->l2_5 = NULL;
-    packet->l3   = NULL;
-    packet->l4   = NULL;
     packet->l7   = NULL;
+    packet->l4   = NULL;
+
+    if (packet->l3) {
+        packet->l2_5 = NULL;
+        packet->l2   = NULL;
+        flow->noeth = true;
+        /* We assume L3 packets are either IPv4 or IPv6 */
+        flow->dl_type = get_l3_eth_type(packet);
+        goto layer3;
+    }
+
+    packet->l3   = NULL;
+    packet->l2_5 = NULL;
+    packet->l2   = b.data;
 
     if (b.size < sizeof *eth) {
         return;
@@ -425,6 +452,7 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, 
uint32_t pkt_mark,
 
     /* Network layer. */
     packet->l3 = b.data;
+layer3:
     if (flow->dl_type == htons(ETH_TYPE_IP)) {
         const struct ip_header *nh = pull_ip(&b);
         if (nh) {
diff --git a/lib/flow.h b/lib/flow.h
index 093d509..5d7c948 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -90,6 +90,7 @@ union flow_in_port {
  * a 32-bit datapath port number.
  */
 struct flow {
+    bool noeth;                 /* Flow has no Ethernet header */
     struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. */
     ovs_be64 metadata;          /* OpenFlow Metadata. */
     struct in6_addr ipv6_src;   /* IPv6 source address. */
@@ -124,7 +125,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0);
 
 /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
 BUILD_ASSERT_DECL(offsetof(struct flow, nw_frag) + 1
-                  == sizeof(struct flow_tnl) + 154
+                  == sizeof(struct flow_tnl) + 162
                   && FLOW_WC_SEQ == 22);
 
 /* Represents the metadata fields of struct flow. */
diff --git a/lib/match.c b/lib/match.c
index 0f674b0..0e7cd6d 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -81,9 +81,12 @@ match_wc_init(struct match *match, const struct flow *flow)
 
     memset(&wc->masks.metadata, 0xff, sizeof wc->masks.metadata);
     memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port);
-    memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
-    memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
-    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+
+    if (!(flow->noeth)) {
+        memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
+        memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
+        memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+    }
 
     if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
         memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src);
diff --git a/lib/odp-util.c b/lib/odp-util.c
index d584d3f..c3b5db4 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -3250,12 +3250,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t 
key_len,
         eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]);
         memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN);
         memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN);
-        if (is_mask) {
-            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
-        }
-    }
-    if (!is_mask) {
+        flow->noeth = false;
         expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET;
+    } else {
+        flow->noeth = true;
     }
 
     /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */
@@ -3452,6 +3450,13 @@ commit_set_ether_addr_action(const struct flow *flow, 
struct flow *base,
         return;
     }
 
+    /* If we have a L3 --> L2 flow, the push_eth action takes care of setting
+     * the appropriate MAC source and destination addresses, no need to add a
+     * set action
+     */
+    if (base->noeth && !flow->noeth)
+        return;
+
     memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
 
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 37e1f4f..9bcfa7c 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -55,13 +55,19 @@ static void ofp_print_error(struct ds *, enum ofperr);
 /* Returns a string that represents the contents of the Ethernet frame in the
  * 'len' bytes starting at 'data'.  The caller must free the returned string.*/
 char *
-ofp_packet_to_string(const void *data, size_t len)
+ofp_packet_to_string(const void *data, size_t len, bool is_layer3)
 {
     struct ds ds = DS_EMPTY_INITIALIZER;
     struct ofpbuf buf;
     struct flow flow;
 
     ofpbuf_use_const(&buf, data, len);
+
+    if (is_layer3)
+        buf.l3 = buf.data;
+    else
+        buf.l2 = buf.data;
+
     flow_extract(&buf, 0, 0, NULL, NULL, &flow);
     flow_format(&ds, &flow);
 
@@ -157,7 +163,7 @@ ofp_print_packet_in(struct ds *string, const struct 
ofp_header *oh,
     ds_put_char(string, '\n');
 
     if (verbosity > 0) {
-        char *packet = ofp_packet_to_string(pin.packet, pin.packet_len);
+        char *packet = ofp_packet_to_string(pin.packet, pin.packet_len, false);
         ds_put_cstr(string, packet);
         free(packet);
     }
@@ -191,7 +197,7 @@ ofp_print_packet_out(struct ds *string, const struct 
ofp_header *oh,
     if (po.buffer_id == UINT32_MAX) {
         ds_put_format(string, " data_len=%zu", po.packet_len);
         if (verbosity > 0 && po.packet_len > 0) {
-            char *packet = ofp_packet_to_string(po.packet, po.packet_len);
+            char *packet = ofp_packet_to_string(po.packet, po.packet_len, 
false);
             ds_put_char(string, '\n');
             ds_put_cstr(string, packet);
             free(packet);
@@ -2811,5 +2817,5 @@ ofp_print(FILE *stream, const void *oh, size_t len, int 
verbosity)
 void
 ofp_print_packet(FILE *stream, const void *data, size_t len)
 {
-    print_and_free(stream, ofp_packet_to_string(data, len));
+    print_and_free(stream, ofp_packet_to_string(data, len, false));
 }
diff --git a/lib/ofp-print.h b/lib/ofp-print.h
index 825e139..15aa196 100644
--- a/lib/ofp-print.h
+++ b/lib/ofp-print.h
@@ -21,6 +21,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <stdbool.h>
 
 struct ds;
 struct ofp10_match;
@@ -39,7 +40,7 @@ void ofp10_match_print(struct ds *, const struct ofp10_match 
*, int verbosity);
 
 char *ofp_to_string(const void *, size_t, int verbosity);
 char *ofp10_match_to_string(const struct ofp10_match *, int verbosity);
-char *ofp_packet_to_string(const void *data, size_t len);
+char *ofp_packet_to_string(const void *data, size_t len, bool is_layer3);
 
 void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *);
 void ofp_print_version(const struct ofp_header *, struct ds *);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 367dd88..b3c9c31 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -134,6 +134,7 @@ struct xport {
 
     bool may_enable;                 /* May be enabled in bonds. */
     bool is_tunnel;                  /* Is a tunnel port. */
+    bool is_layer3;                  /* Is a layer 3 port. */
 
     struct cfm *cfm;                 /* CFM handle or null. */
     struct bfd *bfd;                 /* BFD handle or null. */
@@ -400,7 +401,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct 
ofbundle *ofbundle,
                  const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
                  enum ofputil_port_config config,
                  enum ofputil_port_state state, bool is_tunnel,
-                 bool may_enable)
+                 bool may_enable, bool is_layer3)
 {
     struct xport *xport = xport_lookup(ofport);
     size_t i;
@@ -424,6 +425,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct 
ofbundle *ofbundle,
     xport->stp_port_no = stp_port_no;
     xport->is_tunnel = is_tunnel;
     xport->may_enable = may_enable;
+    xport->is_layer3 = is_layer3;
     xport->odp_port = odp_port;
 
     if (xport->netdev != netdev) {
@@ -1410,7 +1412,7 @@ xlate_normal(struct xlate_ctx *ctx)
     }
 
     /* Learn source MAC. */
-    if (ctx->xin->may_learn) {
+    if (ctx->xin->may_learn && !(in_port->is_layer3)) {
         update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
     }
 
@@ -1655,6 +1657,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t 
ofp_port,
     const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
     struct flow_wildcards *wc = &ctx->xout->wc;
     struct flow *flow = &ctx->xin->flow;
+    const struct xport *in_xport = get_ofp_port(ctx->xbridge, 
flow->in_port.ofp_port);
     ovs_be16 flow_vlan_tci;
     uint32_t flow_pkt_mark;
     uint8_t flow_nw_tos;
@@ -1681,6 +1684,16 @@ compose_output_action__(struct xlate_ctx *ctx, 
ofp_port_t ofp_port,
                                                  xport->xbundle);
     }
 
+    if ((in_xport) && !(in_xport->is_layer3) && xport->is_layer3) {
+        odp_put_pop_eth_action(&ctx->xout->odp_actions);
+    }
+
+    if (flow->noeth && !(xport->is_layer3)) {
+        flow->noeth = false;
+        odp_put_push_eth_action(&ctx->xout->odp_actions, flow->dl_src,
+                                flow->dl_dst, flow->dl_type);
+    }
+
     if (xport->peer) {
         const struct xport *peer = xport->peer;
         struct flow old_flow = ctx->xin->flow;
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 7dd3534..0f4ac70 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -140,7 +140,8 @@ void xlate_ofport_set(struct ofproto_dpif *, struct 
ofbundle *,
                       int stp_port_no, const struct ofproto_port_queue *qdscp,
                       size_t n_qdscp, enum ofputil_port_config,
                       enum ofputil_port_state, bool is_tunnel,
-                      bool may_enable) OVS_REQ_WRLOCK(xlate_rwlock);
+                      bool may_enable, bool is_layer3)
+    OVS_REQ_WRLOCK(xlate_rwlock);
 void xlate_ofport_remove(struct ofport_dpif *) OVS_REQ_WRLOCK(xlate_rwlock);
 
 int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet,
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 4b029f9..56f000e 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -828,7 +828,8 @@ type_run(const char *type)
                                  ofport->bfd, ofport->peer, stp_port,
                                  ofport->qdscp, ofport->n_qdscp,
                                  ofport->up.pp.config, ofport->up.pp.state,
-                                 ofport->is_tunnel, ofport->may_enable);
+                                 ofport->is_tunnel, ofport->may_enable,
+                                 ofport->is_layer3);
             }
             ovs_rwlock_unlock(&xlate_rwlock);
 
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 2ccbcee..f031c9d 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2936,6 +2936,7 @@ handle_packet_out(struct ofconn *ofconn, const struct 
ofp_header *oh)
     } else {
         /* Ensure that the L3 header is 32-bit aligned. */
         payload = ofpbuf_clone_data_with_headroom(po.packet, po.packet_len, 2);
+        payload->l2 = payload->data;
     }
 
     /* Verify actions against packet, then send packet if successful. */
-- 
1.8.3.4 (Apple Git-47)

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to