In some usecases such as VM migration or when VMs reuse IP addresses,
VMs become unreachable externally because
external switches/routers on localnet
have stale port-mac or arp caches. The problem resolves
after some time when the caches ageout which could be
minutes for port-mac bindings or hours for arp caches.

To fix this, send some gratuitous ARPs when a logical
port on a localnet datapath gets added. Such gratuitous arps
help on a best-effort basis to update the mac-port bindings and arp-caches
of external switches and routers on the localnet.

Reported-at: https://bugs.launchpad.net/networking-ovn/+bug/1545897
Reported-by: Kyle Mestery <mest...@mestery.com>
Signed-off-by: Ramu Ramamurthy <ramu.ramamur...@us.ibm.com>
---
 ovn/controller/ovn-controller.c |   2 +-
 ovn/controller/pinctrl.c        | 244 +++++++++++++++++++++++++++++++++++++++-
 ovn/controller/pinctrl.h        |   5 +-
 tests/ovn.at                    |  63 +++++++++++
 4 files changed, 311 insertions(+), 3 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index f68f842..ae7d44b 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -373,7 +373,7 @@ main(int argc, char *argv[])
 
             enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int);
 
-            pinctrl_run(&ctx, &lports, br_int);
+            pinctrl_run(&ctx, &lports, br_int, chassis_id, &local_datapaths);
 
             struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
             lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index cbac50e..aec2aa3 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -22,6 +22,8 @@
 #include "dp-packet.h"
 #include "flow.h"
 #include "lport.h"
+#include "ovn-controller.h"
+#include "lib/sset.h"
 #include "openvswitch/ofp-actions.h"
 #include "openvswitch/ofp-msgs.h"
 #include "openvswitch/ofp-print.h"
@@ -30,6 +32,7 @@
 #include "ovn-controller.h"
 #include "ovn/lib/actions.h"
 #include "ovn/lib/logical-fields.h"
+#include "ovn/lib/ovn-util.h"
 #include "poll-loop.h"
 #include "rconn.h"
 #include "socket-util.h"
@@ -54,6 +57,14 @@ static void run_put_arps(struct controller_ctx *,
 static void wait_put_arps(struct controller_ctx *);
 static void flush_put_arps(void);
 
+static void init_send_garps(void);
+static void destroy_send_garps(void);
+static void send_garp_wait(void);
+static void send_garp_run(const struct ovsrec_bridge *,
+                          const char *chassis_id,
+                          const struct lport_index *lports,
+                          struct hmap *local_datapaths);
+
 COVERAGE_DEFINE(pinctrl_drop_put_arp);
 
 void
@@ -62,6 +73,7 @@ pinctrl_init(void)
     swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
     conn_seq_no = 0;
     init_put_arps();
+    init_send_garps();
 }
 
 static ovs_be32
@@ -266,7 +278,9 @@ pinctrl_recv(const struct ofp_header *oh, enum ofptype type)
 
 void
 pinctrl_run(struct controller_ctx *ctx, const struct lport_index *lports,
-            const struct ovsrec_bridge *br_int)
+            const struct ovsrec_bridge *br_int,
+            const char *chassis_id,
+            struct hmap *local_datapaths)
 {
     if (br_int) {
         char *target;
@@ -307,6 +321,7 @@ pinctrl_run(struct controller_ctx *ctx, const struct 
lport_index *lports,
     }
 
     run_put_arps(ctx, lports);
+    send_garp_run(br_int, chassis_id, lports, local_datapaths);
 }
 
 void
@@ -315,6 +330,7 @@ pinctrl_wait(struct controller_ctx *ctx)
     wait_put_arps(ctx);
     rconn_run_wait(swconn);
     rconn_recv_wait(swconn);
+    send_garp_wait();
 }
 
 void
@@ -322,6 +338,7 @@ pinctrl_destroy(void)
 {
     rconn_destroy(swconn);
     destroy_put_arps();
+    destroy_send_garps();
 }
 
 /* Implementation of the "put_arp" OVN action.  This action sends a packet to
@@ -484,3 +501,228 @@ flush_put_arps(void)
         free(pa);
     }
 }
+
+/*
+ * Send gratuitous arp for vif on localnet.
+ *
+ * When a new vif on localnet is added, gratuitous arps are sent announcing
+ * the port's mac,ip mapping.  On localnet, such announcements are needed for
+ * switches and routers on the broadcast segment to update their port-mac
+ * and arp tables.
+ */
+struct garp_data {
+    struct eth_addr ea;          /* Ethernet address of port. */
+    ovs_be32 ipv4;               /* Ipv4 address of port. */
+    long long int announce_time; /* Next announcement in ms. */
+    int backoff;                 /* Backoff for the next announcement. */
+    int ofport;                  /* Ofport used to output this garp. */
+};
+
+/* Contains garps to be sent. */
+static struct shash send_garp_data;
+
+/* Next garp announcement in ms. */
+static long long int send_garp_time;
+
+static void
+init_send_garps(void)
+{
+    shash_init(&send_garp_data);
+    send_garp_time = LLONG_MAX;
+}
+
+static void
+destroy_send_garps(void)
+{
+    shash_destroy(&send_garp_data);
+}
+
+/* Add or update a vif for which garps need to be announced. */
+static void
+send_garp_update(const struct sbrec_port_binding *binding_rec,
+                 struct simap *localnet_ofports, struct hmap *local_datapaths)
+{
+    /* Find the localnet ofport to send this garp. */
+    struct local_datapath *ld
+        = get_local_datapath(local_datapaths,
+                             binding_rec->datapath->tunnel_key);
+    if (!ld || !ld->localnet_port) {
+        return;
+    }
+    int ofport = simap_get(localnet_ofports, ld->localnet_port->logical_port);
+    /* Update garp if it exists. */
+    struct garp_data *garp = shash_find_data(&send_garp_data,
+                                             binding_rec->logical_port);
+    if (garp) {
+        garp->ofport = ofport;
+        return;
+    }
+    /* Add garp for new vif. */
+    int i;
+    for (i = 0; i < binding_rec->n_mac; i++) {
+        struct lport_addresses laddrs;
+        if (!extract_lport_addresses(binding_rec->mac[i], &laddrs, false)
+            || !laddrs.n_ipv4_addrs) {
+            continue;
+        }
+        struct garp_data *garp = xmalloc(sizeof *garp);
+        garp->ofport = ofport;
+        garp->ea = laddrs.ea;
+        garp->ipv4 = 0;
+        garp->ipv4 = laddrs.ipv4_addrs[0].addr;
+        free(laddrs.ipv4_addrs);
+        garp->backoff = 1;
+        garp->announce_time = time_msec() + 1000;
+        shash_add(&send_garp_data, binding_rec->logical_port, garp);
+        break;
+    }
+}
+
+/* Remove a vif from garp announcements. */
+static void
+send_garp_delete(const char *lport)
+{
+    struct garp_data *garp = shash_find_and_delete(&send_garp_data, lport);
+    free(garp);
+}
+
+static long long int
+send_garp(struct garp_data *garp, long long int current_time)
+{
+    if (current_time < garp->announce_time) {
+        return garp->announce_time;
+    }
+    /* Compose a GARP request packet. */
+    uint64_t packet_stub[128 / 8];
+    struct dp_packet packet;
+    dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
+    compose_arp(&packet, ARP_OP_REQUEST, garp->ea, eth_addr_zero,
+                true, garp->ipv4, garp->ipv4);
+
+    /* Compose actions.  The garp request is output on localnet ofport. */
+    uint64_t ofpacts_stub[4096 / 8];
+    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+    enum ofp_version version = rconn_get_version(swconn);
+    ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
+
+    struct ofputil_packet_out po = {
+        .packet = dp_packet_data(&packet),
+        .packet_len = dp_packet_size(&packet),
+        .buffer_id = UINT32_MAX,
+        .in_port = OFPP_CONTROLLER,
+        .ofpacts = ofpacts.data,
+        .ofpacts_len = ofpacts.size,
+    };
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+    queue_msg(ofputil_encode_packet_out(&po, proto));
+    dp_packet_uninit(&packet);
+    ofpbuf_uninit(&ofpacts);
+
+    /* Set the next announcement.  Atmost 5 announcements are sent for a vif */
+    if (garp->backoff < 16) {
+        garp->backoff *= 2;
+        garp->announce_time = current_time + garp->backoff * 1000;
+    } else {
+        garp->announce_time = LLONG_MAX;
+    }
+    return garp->announce_time;
+}
+
+/* Get localnet vifs, and ofport for localnet patch ports. */
+static void
+get_localnet_vifs(const struct ovsrec_bridge *br_int,
+                  const char *this_chassis_id,
+                  const struct lport_index *lports,
+                  struct hmap *local_datapaths,
+                  struct sset *localnet_vifs,
+                  struct simap *localnet_ofports)
+{
+    for (int i = 0; i < br_int->n_ports; i++) {
+        const struct ovsrec_port *port_rec = br_int->ports[i];
+        if (!strcmp(port_rec->name, br_int->name)) {
+            continue;
+        }
+        const char *chassis_id = smap_get(&port_rec->external_ids,
+                                          "ovn-chassis-id");
+        if (chassis_id && !strcmp(chassis_id, this_chassis_id)) {
+            continue;
+        }
+        const char *localnet = smap_get(&port_rec->external_ids,
+                                        "ovn-localnet-port");
+        for (int j = 0; j < port_rec->n_interfaces; j++) {
+            const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
+            if (!iface_rec->n_ofport) {
+                continue;
+            }
+            if (localnet) {
+                int64_t ofport = iface_rec->ofport[0];
+                if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
+                    continue;
+                }
+                simap_put(localnet_ofports, localnet, ofport);
+                continue;
+            }
+            const char *iface_id = smap_get(&iface_rec->external_ids,
+                                            "iface-id");
+            if (!iface_id) {
+                continue;
+            }
+            const struct sbrec_port_binding *pb
+                = lport_lookup_by_name(lports, iface_id);
+            if (!pb) {
+                continue;
+            }
+            struct local_datapath *ld
+                = get_local_datapath(local_datapaths,
+                                     pb->datapath->tunnel_key);
+            if (ld && ld->localnet_port) {
+                sset_add(localnet_vifs, iface_id);
+            }
+        }
+    }
+}
+
+static void
+send_garp_wait(void) {
+    poll_timer_wait_until(send_garp_time);
+}
+
+static void
+send_garp_run(const struct ovsrec_bridge *br_int, const char *chassis_id,
+              const struct lport_index *lports,
+              struct hmap *local_datapaths)
+{
+    struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
+    struct simap localnet_ofports = SIMAP_INITIALIZER(&localnet_ofports);
+
+    get_localnet_vifs(br_int, chassis_id, lports, local_datapaths,
+                      &localnet_vifs, &localnet_ofports);
+
+    /* For deleted ports, remove from send_garp_data. */
+    struct shash_node *iter, *next;
+    SHASH_FOR_EACH_SAFE(iter, next, &send_garp_data) {
+        if (!sset_contains(&localnet_vifs, iter->name)) {
+            send_garp_delete(iter->name);
+        }
+    }
+    /* Update send_garp_data. */
+    const char *iface_id;
+    SSET_FOR_EACH(iface_id, &localnet_vifs) {
+        const struct sbrec_port_binding *pb = lport_lookup_by_name(lports,
+                                                                   iface_id);
+        if (pb) {
+            send_garp_update(pb, &localnet_ofports, local_datapaths);
+        }
+    }
+    /* Send garps, and update the next announcement. */
+    long long int current_time = time_msec();
+    send_garp_time = LLONG_MAX;
+    SHASH_FOR_EACH(iter, &send_garp_data) {
+        long long int next_announce = send_garp(iter->data, current_time);
+        if (send_garp_time > next_announce) {
+            send_garp_time = next_announce;
+        }
+    }
+    sset_destroy(&localnet_vifs);
+    simap_destroy(&localnet_ofports);
+}
diff --git a/ovn/controller/pinctrl.h b/ovn/controller/pinctrl.h
index 2ad5ec2..553eace 100644
--- a/ovn/controller/pinctrl.h
+++ b/ovn/controller/pinctrl.h
@@ -21,13 +21,16 @@
 
 #include "openvswitch/meta-flow.h"
 
+struct hmap;
 struct lport_index;
 struct ovsrec_bridge;
 struct controller_ctx;
 
 void pinctrl_init(void);
 void pinctrl_run(struct controller_ctx *, const struct lport_index *,
-                 const struct ovsrec_bridge *br_int);
+                 const struct ovsrec_bridge *,
+                 const char *chassis_id,
+                 struct hmap *local_datapaths);
 void pinctrl_wait(struct controller_ctx *);
 void pinctrl_destroy(void);
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 8c1f1be..8dd33c2 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -2341,4 +2341,67 @@ OVS_APP_EXIT_AND_WAIT([ovsdb-server])
 
 AT_CLEANUP
 
+AT_SETUP([ovn -- send gratuitous arp on localnet])
+AT_KEYWORDS([ovn])
 
+ovn_start
+ovn-nbctl lswitch-add lsw0
+net_add n1
+sim_add hv
+as hv
+ovs-vsctl \
+    -- add-br br-phys \
+    -- add-br br-eth0
+
+ovn_attach n1 br-phys 192.168.0.1
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . 
external-ids:ovn-bridge-mappings=physnet1:br-eth0])
+AT_CHECK([ovs-vsctl add-port br-eth0 snoopvif -- set Interface snoopvif 
options:tx_pcap=hv/snoopvif-tx.pcap options:rxq_pcap=hv/snoopvif-rx.pcap])
+
+# Create a vif.
+AT_CHECK([ovn-nbctl lport-add lsw0 localvif1])
+AT_CHECK([ovn-nbctl lport-set-addresses localvif1 "f0:00:00:00:00:01 
192.168.1.2"])
+AT_CHECK([ovn-nbctl lport-set-port-security localvif1 "f0:00:00:00:00:01"])
+
+# Create a localnet port.
+AT_CHECK([ovn-nbctl lport-add lsw0 ln_port])
+AT_CHECK([ovn-nbctl lport-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lport-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lport-set-options ln_port network_name=physnet1])
+
+AT_CHECK([ovs-vsctl add-port br-int localvif1 -- set Interface localvif1 
external_ids:iface-id=localvif1])
+
+# Wait for packet to be received.
+OVS_WAIT_UNTIL([test `wc -c < "hv/snoopvif-tx.pcap"` -ge 50])
+trim_zeros() {
+    sed 's/\(00\)\{1,\}$//'
+}
+$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv/snoopvif-tx.pcap | trim_zeros > 
packets
+expected="fffffffffffff0000000000108060001080006040001f00000000001c0a80102000000000000c0a80102"
+echo $expected > expout
+AT_CHECK([sort packets], [0], [expout])
+cat packets
+
+# Delete the localnet ports.
+AT_CHECK([ovs-vsctl del-port localvif1])
+AT_CHECK([ovn-nbctl lport-del ln_port])
+
+as hv
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as main
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+AT_CLEANUP
-- 
2.3.9

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to