In some usecases such as VM migration or when VMs reuse IP addresses, VMs become unreachable externally because external switches/routers on localnet have stale port-mac or arp caches. The problem resolves after some time when the caches ageout which could be minutes for port-mac bindings or hours for arp caches.
To fix this, send some gratuitous ARPs when a logical port on a localnet datapath gets added. Such gratuitous arps help on a best-effort basis to update the mac-port bindings and arp-caches of external switches and routers on the localnet. Reported-at: https://bugs.launchpad.net/networking-ovn/+bug/1545897 Reported-by: Kyle Mestery <mest...@mestery.com> Signed-off-by: Ramu Ramamurthy <ramu.ramamur...@us.ibm.com> --- ovn/controller/ovn-controller.c | 2 +- ovn/controller/pinctrl.c | 244 +++++++++++++++++++++++++++++++++++++++- ovn/controller/pinctrl.h | 5 +- tests/ovn.at | 63 +++++++++++ 4 files changed, 311 insertions(+), 3 deletions(-) diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index f68f842..ae7d44b 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -373,7 +373,7 @@ main(int argc, char *argv[]) enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int); - pinctrl_run(&ctx, &lports, br_int); + pinctrl_run(&ctx, &lports, br_int, chassis_id, &local_datapaths); struct hmap flow_table = HMAP_INITIALIZER(&flow_table); lflow_run(&ctx, &lports, &mcgroups, &local_datapaths, diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c index cbac50e..aec2aa3 100644 --- a/ovn/controller/pinctrl.c +++ b/ovn/controller/pinctrl.c @@ -22,6 +22,8 @@ #include "dp-packet.h" #include "flow.h" #include "lport.h" +#include "ovn-controller.h" +#include "lib/sset.h" #include "openvswitch/ofp-actions.h" #include "openvswitch/ofp-msgs.h" #include "openvswitch/ofp-print.h" @@ -30,6 +32,7 @@ #include "ovn-controller.h" #include "ovn/lib/actions.h" #include "ovn/lib/logical-fields.h" +#include "ovn/lib/ovn-util.h" #include "poll-loop.h" #include "rconn.h" #include "socket-util.h" @@ -54,6 +57,14 @@ static void run_put_arps(struct controller_ctx *, static void wait_put_arps(struct controller_ctx *); static void flush_put_arps(void); +static void init_send_garps(void); +static void destroy_send_garps(void); +static void send_garp_wait(void); +static void send_garp_run(const struct ovsrec_bridge *, + const char *chassis_id, + const struct lport_index *lports, + struct hmap *local_datapaths); + COVERAGE_DEFINE(pinctrl_drop_put_arp); void @@ -62,6 +73,7 @@ pinctrl_init(void) swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION); conn_seq_no = 0; init_put_arps(); + init_send_garps(); } static ovs_be32 @@ -266,7 +278,9 @@ pinctrl_recv(const struct ofp_header *oh, enum ofptype type) void pinctrl_run(struct controller_ctx *ctx, const struct lport_index *lports, - const struct ovsrec_bridge *br_int) + const struct ovsrec_bridge *br_int, + const char *chassis_id, + struct hmap *local_datapaths) { if (br_int) { char *target; @@ -307,6 +321,7 @@ pinctrl_run(struct controller_ctx *ctx, const struct lport_index *lports, } run_put_arps(ctx, lports); + send_garp_run(br_int, chassis_id, lports, local_datapaths); } void @@ -315,6 +330,7 @@ pinctrl_wait(struct controller_ctx *ctx) wait_put_arps(ctx); rconn_run_wait(swconn); rconn_recv_wait(swconn); + send_garp_wait(); } void @@ -322,6 +338,7 @@ pinctrl_destroy(void) { rconn_destroy(swconn); destroy_put_arps(); + destroy_send_garps(); } /* Implementation of the "put_arp" OVN action. This action sends a packet to @@ -484,3 +501,228 @@ flush_put_arps(void) free(pa); } } + +/* + * Send gratuitous arp for vif on localnet. + * + * When a new vif on localnet is added, gratuitous arps are sent announcing + * the port's mac,ip mapping. On localnet, such announcements are needed for + * switches and routers on the broadcast segment to update their port-mac + * and arp tables. + */ +struct garp_data { + struct eth_addr ea; /* Ethernet address of port. */ + ovs_be32 ipv4; /* Ipv4 address of port. */ + long long int announce_time; /* Next announcement in ms. */ + int backoff; /* Backoff for the next announcement. */ + int ofport; /* Ofport used to output this garp. */ +}; + +/* Contains garps to be sent. */ +static struct shash send_garp_data; + +/* Next garp announcement in ms. */ +static long long int send_garp_time; + +static void +init_send_garps(void) +{ + shash_init(&send_garp_data); + send_garp_time = LLONG_MAX; +} + +static void +destroy_send_garps(void) +{ + shash_destroy(&send_garp_data); +} + +/* Add or update a vif for which garps need to be announced. */ +static void +send_garp_update(const struct sbrec_port_binding *binding_rec, + struct simap *localnet_ofports, struct hmap *local_datapaths) +{ + /* Find the localnet ofport to send this garp. */ + struct local_datapath *ld + = get_local_datapath(local_datapaths, + binding_rec->datapath->tunnel_key); + if (!ld || !ld->localnet_port) { + return; + } + int ofport = simap_get(localnet_ofports, ld->localnet_port->logical_port); + /* Update garp if it exists. */ + struct garp_data *garp = shash_find_data(&send_garp_data, + binding_rec->logical_port); + if (garp) { + garp->ofport = ofport; + return; + } + /* Add garp for new vif. */ + int i; + for (i = 0; i < binding_rec->n_mac; i++) { + struct lport_addresses laddrs; + if (!extract_lport_addresses(binding_rec->mac[i], &laddrs, false) + || !laddrs.n_ipv4_addrs) { + continue; + } + struct garp_data *garp = xmalloc(sizeof *garp); + garp->ofport = ofport; + garp->ea = laddrs.ea; + garp->ipv4 = 0; + garp->ipv4 = laddrs.ipv4_addrs[0].addr; + free(laddrs.ipv4_addrs); + garp->backoff = 1; + garp->announce_time = time_msec() + 1000; + shash_add(&send_garp_data, binding_rec->logical_port, garp); + break; + } +} + +/* Remove a vif from garp announcements. */ +static void +send_garp_delete(const char *lport) +{ + struct garp_data *garp = shash_find_and_delete(&send_garp_data, lport); + free(garp); +} + +static long long int +send_garp(struct garp_data *garp, long long int current_time) +{ + if (current_time < garp->announce_time) { + return garp->announce_time; + } + /* Compose a GARP request packet. */ + uint64_t packet_stub[128 / 8]; + struct dp_packet packet; + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); + compose_arp(&packet, ARP_OP_REQUEST, garp->ea, eth_addr_zero, + true, garp->ipv4, garp->ipv4); + + /* Compose actions. The garp request is output on localnet ofport. */ + uint64_t ofpacts_stub[4096 / 8]; + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); + enum ofp_version version = rconn_get_version(swconn); + ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport; + + struct ofputil_packet_out po = { + .packet = dp_packet_data(&packet), + .packet_len = dp_packet_size(&packet), + .buffer_id = UINT32_MAX, + .in_port = OFPP_CONTROLLER, + .ofpacts = ofpacts.data, + .ofpacts_len = ofpacts.size, + }; + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + queue_msg(ofputil_encode_packet_out(&po, proto)); + dp_packet_uninit(&packet); + ofpbuf_uninit(&ofpacts); + + /* Set the next announcement. Atmost 5 announcements are sent for a vif */ + if (garp->backoff < 16) { + garp->backoff *= 2; + garp->announce_time = current_time + garp->backoff * 1000; + } else { + garp->announce_time = LLONG_MAX; + } + return garp->announce_time; +} + +/* Get localnet vifs, and ofport for localnet patch ports. */ +static void +get_localnet_vifs(const struct ovsrec_bridge *br_int, + const char *this_chassis_id, + const struct lport_index *lports, + struct hmap *local_datapaths, + struct sset *localnet_vifs, + struct simap *localnet_ofports) +{ + for (int i = 0; i < br_int->n_ports; i++) { + const struct ovsrec_port *port_rec = br_int->ports[i]; + if (!strcmp(port_rec->name, br_int->name)) { + continue; + } + const char *chassis_id = smap_get(&port_rec->external_ids, + "ovn-chassis-id"); + if (chassis_id && !strcmp(chassis_id, this_chassis_id)) { + continue; + } + const char *localnet = smap_get(&port_rec->external_ids, + "ovn-localnet-port"); + for (int j = 0; j < port_rec->n_interfaces; j++) { + const struct ovsrec_interface *iface_rec = port_rec->interfaces[j]; + if (!iface_rec->n_ofport) { + continue; + } + if (localnet) { + int64_t ofport = iface_rec->ofport[0]; + if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) { + continue; + } + simap_put(localnet_ofports, localnet, ofport); + continue; + } + const char *iface_id = smap_get(&iface_rec->external_ids, + "iface-id"); + if (!iface_id) { + continue; + } + const struct sbrec_port_binding *pb + = lport_lookup_by_name(lports, iface_id); + if (!pb) { + continue; + } + struct local_datapath *ld + = get_local_datapath(local_datapaths, + pb->datapath->tunnel_key); + if (ld && ld->localnet_port) { + sset_add(localnet_vifs, iface_id); + } + } + } +} + +static void +send_garp_wait(void) { + poll_timer_wait_until(send_garp_time); +} + +static void +send_garp_run(const struct ovsrec_bridge *br_int, const char *chassis_id, + const struct lport_index *lports, + struct hmap *local_datapaths) +{ + struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs); + struct simap localnet_ofports = SIMAP_INITIALIZER(&localnet_ofports); + + get_localnet_vifs(br_int, chassis_id, lports, local_datapaths, + &localnet_vifs, &localnet_ofports); + + /* For deleted ports, remove from send_garp_data. */ + struct shash_node *iter, *next; + SHASH_FOR_EACH_SAFE(iter, next, &send_garp_data) { + if (!sset_contains(&localnet_vifs, iter->name)) { + send_garp_delete(iter->name); + } + } + /* Update send_garp_data. */ + const char *iface_id; + SSET_FOR_EACH(iface_id, &localnet_vifs) { + const struct sbrec_port_binding *pb = lport_lookup_by_name(lports, + iface_id); + if (pb) { + send_garp_update(pb, &localnet_ofports, local_datapaths); + } + } + /* Send garps, and update the next announcement. */ + long long int current_time = time_msec(); + send_garp_time = LLONG_MAX; + SHASH_FOR_EACH(iter, &send_garp_data) { + long long int next_announce = send_garp(iter->data, current_time); + if (send_garp_time > next_announce) { + send_garp_time = next_announce; + } + } + sset_destroy(&localnet_vifs); + simap_destroy(&localnet_ofports); +} diff --git a/ovn/controller/pinctrl.h b/ovn/controller/pinctrl.h index 2ad5ec2..553eace 100644 --- a/ovn/controller/pinctrl.h +++ b/ovn/controller/pinctrl.h @@ -21,13 +21,16 @@ #include "openvswitch/meta-flow.h" +struct hmap; struct lport_index; struct ovsrec_bridge; struct controller_ctx; void pinctrl_init(void); void pinctrl_run(struct controller_ctx *, const struct lport_index *, - const struct ovsrec_bridge *br_int); + const struct ovsrec_bridge *, + const char *chassis_id, + struct hmap *local_datapaths); void pinctrl_wait(struct controller_ctx *); void pinctrl_destroy(void); diff --git a/tests/ovn.at b/tests/ovn.at index 8c1f1be..8dd33c2 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2341,4 +2341,67 @@ OVS_APP_EXIT_AND_WAIT([ovsdb-server]) AT_CLEANUP +AT_SETUP([ovn -- send gratuitous arp on localnet]) +AT_KEYWORDS([ovn]) +ovn_start +ovn-nbctl lswitch-add lsw0 +net_add n1 +sim_add hv +as hv +ovs-vsctl \ + -- add-br br-phys \ + -- add-br br-eth0 + +ovn_attach n1 br-phys 192.168.0.1 + +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-eth0]) +AT_CHECK([ovs-vsctl add-port br-eth0 snoopvif -- set Interface snoopvif options:tx_pcap=hv/snoopvif-tx.pcap options:rxq_pcap=hv/snoopvif-rx.pcap]) + +# Create a vif. +AT_CHECK([ovn-nbctl lport-add lsw0 localvif1]) +AT_CHECK([ovn-nbctl lport-set-addresses localvif1 "f0:00:00:00:00:01 192.168.1.2"]) +AT_CHECK([ovn-nbctl lport-set-port-security localvif1 "f0:00:00:00:00:01"]) + +# Create a localnet port. +AT_CHECK([ovn-nbctl lport-add lsw0 ln_port]) +AT_CHECK([ovn-nbctl lport-set-addresses ln_port unknown]) +AT_CHECK([ovn-nbctl lport-set-type ln_port localnet]) +AT_CHECK([ovn-nbctl lport-set-options ln_port network_name=physnet1]) + +AT_CHECK([ovs-vsctl add-port br-int localvif1 -- set Interface localvif1 external_ids:iface-id=localvif1]) + +# Wait for packet to be received. +OVS_WAIT_UNTIL([test `wc -c < "hv/snoopvif-tx.pcap"` -ge 50]) +trim_zeros() { + sed 's/\(00\)\{1,\}$//' +} +$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv/snoopvif-tx.pcap | trim_zeros > packets +expected="fffffffffffff0000000000108060001080006040001f00000000001c0a80102000000000000c0a80102" +echo $expected > expout +AT_CHECK([sort packets], [0], [expout]) +cat packets + +# Delete the localnet ports. +AT_CHECK([ovs-vsctl del-port localvif1]) +AT_CHECK([ovn-nbctl lport-del ln_port]) + +as hv +OVS_APP_EXIT_AND_WAIT([ovn-controller]) +OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as main +OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +AT_CLEANUP -- 2.3.9 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev