The patch includes L2 SW Gateway support, which depends on the physical endpoint patch changes. The gateway changes in this patch series are for SW gateways only, as HW gateways will be updated later.
The SW gateway runs in the context of ovn-controller as other HVs. The gateway node uses a single bridge (call it br-int) that is actively controlled by OVN. This bridge also houses the tunnels connecting other HVs. Additional physical bridges are created for each physical port supported by the gateway. These bridges enforce normal action only by default. A pair of patch ports are created to connect each LS to br-int. A new logical port type is added for SW gateways called "gw". This is needed to differentiate logic from HW gateway support. Changes to HW gateway support are coming in a subsequent series. patch.c: Physical bridge and patch port creation physical.c: Add SW gateway flow generation support, including physical endpoint support. Support gateway br-int patch ports as "physical" ports binding.c support the use of physical endpoint for gateway (gw) logical ports ovn-nb.xml: document the new "gw" logical port type remove tag field for localnet under container support ovn-sb.xml: Document the new "gw" logical port type ovn-controller.c; patch.h: Add a chassis name parameter needed for gateways Particular logical port types are not presently specified/enforced in the NB and SB schemas themselves. This may be to allow flexibility and ease of adding new types. Test case updates: ovn.at: A new test is added to exercise the SW gateway for L2 switching and also using physical endpoints. Signed-off-by: Darrell Ball <db...@vmware.com> --- ovn/controller/binding.c | 13 ++-- ovn/controller/ovn-controller.c | 2 +- ovn/controller/patch.c | 130 ++++++++++++++++++++++++++++++++++++++- ovn/controller/patch.h | 2 +- ovn/controller/physical.c | 35 ++++++++++- ovn/ovn-nb.xml | 4 ++ ovn/ovn-sb.xml | 4 ++ tests/ovn.at | 132 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 312 insertions(+), 10 deletions(-) diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c index d3ca9c9..ee7f6ea 100644 --- a/ovn/controller/binding.c +++ b/ovn/controller/binding.c @@ -181,7 +181,12 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, = shash_find_and_delete(&lports, binding_rec->logical_port); if (iface_rec || (binding_rec->parent_port && binding_rec->parent_port[0] && - sset_contains(&all_lports, binding_rec->parent_port))) { + sset_contains(&all_lports, binding_rec->parent_port)) + || ( !strcmp(binding_rec->type, "gw") && + binding_rec->phys_endpts && + binding_rec->phys_endpts[0] && + binding_rec->phys_endpts[0]->chassis && + binding_rec->phys_endpts[0]->chassis == chassis_rec )) { if (binding_rec->parent_port && binding_rec->parent_port[0]) { /* Add child logical port to the set of all local ports. */ sset_add(&all_lports, binding_rec->logical_port); @@ -207,9 +212,9 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, } } else if (binding_rec->chassis == chassis_rec) { if (ctx->ovnsb_idl_txn) { - VLOG_INFO("Releasing lport %s from this chassis.", - binding_rec->logical_port); - sbrec_port_binding_set_chassis(binding_rec, NULL); + sbrec_port_binding_set_chassis(binding_rec, NULL); + VLOG_INFO("Releasing lport %s from this chassis.", + binding_rec->logical_port); } } else if (!binding_rec->chassis && !strcmp(binding_rec->type, "localnet")) { diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index e52b731..fd57e21 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -293,7 +293,7 @@ main(int argc, char *argv[]) } if (br_int) { - patch_run(&ctx, br_int, &local_datapaths); + patch_run(&ctx, br_int, &local_datapaths, chassis_id); struct lport_index lports; struct mcgroup_index mcgroups; diff --git a/ovn/controller/patch.c b/ovn/controller/patch.c index 753ce3e..874fb94 100644 --- a/ovn/controller/patch.c +++ b/ovn/controller/patch.c @@ -276,9 +276,132 @@ add_logical_patch_ports(struct controller_ctx *ctx, } } +static const struct ovsrec_bridge * +create_br_physical_for_gateway(struct controller_ctx *ctx, + const struct ovsrec_open_vswitch *cfg, + const char *bridge_name) +{ + if (!ctx->ovs_idl_txn) { + return NULL; + } + + ovsdb_idl_txn_add_comment(ctx->ovs_idl_txn, + "ovn-controller: creating gateway physical bridge '%s'", + bridge_name); + + struct ovsrec_interface *iface; + iface = ovsrec_interface_insert(ctx->ovs_idl_txn); + ovsrec_interface_set_name(iface, bridge_name); + ovsrec_interface_set_type(iface, "internal"); + + struct ovsrec_port *port; + port = ovsrec_port_insert(ctx->ovs_idl_txn); + ovsrec_port_set_name(port, bridge_name); + ovsrec_port_set_interfaces(port, &iface, 1); + + struct ovsrec_bridge *bridge; + bridge = ovsrec_bridge_insert(ctx->ovs_idl_txn); + ovsrec_bridge_set_name(bridge, bridge_name); + ovsrec_bridge_set_fail_mode(bridge, "standalone"); + ovsrec_bridge_set_ports(bridge, &port, 1); + + struct ovsrec_bridge **bridges; + size_t bytes = sizeof *bridges * cfg->n_bridges; + bridges = xmalloc(bytes + sizeof *bridges); + memcpy(bridges, cfg->bridges, bytes); + bridges[cfg->n_bridges] = bridge; + ovsrec_open_vswitch_verify_bridges(cfg); + ovsrec_open_vswitch_set_bridges(cfg, bridges, cfg->n_bridges + 1); + + return bridge; +} + +static const struct ovsrec_bridge * +get_and_create_br_as_needed(struct controller_ctx *ctx, const char *br_name) +{ + const struct ovsrec_open_vswitch *cfg; + + cfg = ovsrec_open_vswitch_first(ctx->ovs_idl); + if (!cfg) { + return NULL; + } + + const struct ovsrec_bridge *br; + br = get_bridge(ctx->ovs_idl, br_name); + if (!br) { + return create_br_physical_for_gateway(ctx, cfg, br_name); + } + + return br; +} + +/* Note that create_patch_port checks/avoids redundant creates */ +static void +add_gateway_ls_br_and_patch_ports(struct controller_ctx *ctx, + const struct ovsrec_bridge *br_int, + struct shash *existing_ports, + const char *chassis_id) +{ + const struct ovsrec_bridge *br_gateway_physical = NULL; + + if (!br_int || !chassis_id) { + return; + } + + const struct sbrec_port_binding *binding; + const struct sbrec_physical_endpoint * phys_endpt_rec; + const struct sbrec_chassis *chassis_rec; + + SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) { + + if (!binding || !binding->phys_endpts || + strcmp(binding->type, "gw")) { + /* Not a binding for a gw port. */ + continue; + } + + if(!binding->phys_endpts[0]) { + VLOG_ERR("No physical endpt configured for gw port '%s' ", + binding->logical_port); + return; + } + + phys_endpt_rec = binding->phys_endpts[0]; + chassis_rec = phys_endpt_rec->chassis; + + if(!chassis_rec || !chassis_rec->name) { + VLOG_ERR("No chassis configured " + "for gw port '%s' in phys endpt ", + binding->logical_port); + return; + } + + /* The logical port is not on this chassis */ + if(strcmp(chassis_rec->name, chassis_id)) { + continue; + } + + br_gateway_physical = + get_and_create_br_as_needed(ctx, phys_endpt_rec->chassis_port); + + char *name1 = patch_port_name(br_int->name, binding->logical_port); + char *name2 = patch_port_name(binding->logical_port, br_int->name); + + create_patch_port(ctx, "ovn-gateway-patch-port", binding->logical_port, + br_int, name1, br_gateway_physical, name2, existing_ports); + create_patch_port(ctx, "ovn-gateway-patch-port", binding->logical_port, + br_gateway_physical, name2, br_int, name1, existing_ports); + + free(name1); + free(name2); + + } + +} + void patch_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, - struct hmap *local_datapaths) + struct hmap *local_datapaths, const char *chassis_id) { if (!ctx->ovs_idl_txn) { return; @@ -289,7 +412,8 @@ patch_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, const struct ovsrec_port *port; OVSREC_PORT_FOR_EACH (port, ctx->ovs_idl) { if (smap_get(&port->external_ids, "ovn-localnet-port") || - smap_get(&port->external_ids, "ovn-logical-patch-port")) { + smap_get(&port->external_ids, "ovn-logical-patch-port") || + smap_get(&port->external_ids, "ovn-gateway-patch-port")) { shash_add(&existing_ports, port->name, port); } } @@ -298,6 +422,8 @@ patch_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, * 'existing_ports' any patch ports that do exist in the database and * should be there. */ add_bridge_mappings(ctx, br_int, &existing_ports, local_datapaths); + add_gateway_ls_br_and_patch_ports(ctx, br_int, + &existing_ports, chassis_id); add_logical_patch_ports(ctx, br_int, &existing_ports); /* Now 'existing_ports' only still contains patch ports that exist in the diff --git a/ovn/controller/patch.h b/ovn/controller/patch.h index 38ee7a8..f040b25 100644 --- a/ovn/controller/patch.h +++ b/ovn/controller/patch.h @@ -27,6 +27,6 @@ struct hmap; struct ovsrec_bridge; void patch_run(struct controller_ctx *, const struct ovsrec_bridge *br_int, - struct hmap *local_datapaths); + struct hmap *local_datapaths, const char *chassis_id); #endif /* ovn/patch.h */ diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c index 657c3e2..9b73535 100644 --- a/ovn/controller/physical.c +++ b/ovn/controller/physical.c @@ -169,6 +169,8 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, "ovn-localnet-port"); const char *logpatch = smap_get(&port_rec->external_ids, "ovn-logical-patch-port"); + const char *gateway_patch = smap_get(&port_rec->external_ids, + "ovn-gateway-patch-port"); for (int j = 0; j < port_rec->n_interfaces; j++) { const struct ovsrec_interface *iface_rec = port_rec->interfaces[j]; @@ -189,6 +191,9 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, /* localnet patch ports can be handled just like VIFs. */ simap_put(&localvif_to_ofport, localnet, ofport); break; + } else if (is_patch && gateway_patch) { + /* gateway patch ports can be handled just like VIFs. */ + simap_put(&localvif_to_ofport, gateway_patch, ofport); } else if (is_patch && logpatch) { /* Logical patch ports can be handled just like VIFs. */ simap_put(&localvif_to_ofport, logpatch, ofport); @@ -231,6 +236,7 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, /* Set up flows in table 0 for physical-to-logical translation and in table * 64 for logical-to-physical translation. */ const struct sbrec_port_binding *binding; + const struct sbrec_physical_endpoint * phys_endpt_rec; SBREC_PORT_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) { /* Find the OpenFlow port for the logical port, as 'ofport'. This is * one of: @@ -270,6 +276,24 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, if (!strcmp(binding->type, "localnet") && ofport && binding->tag) { tag = *binding->tag; } + + /* For gw logical ports without phys_endpt + * binding, a tag of 0 is the default */ + if (!strcmp(binding->type, "gw") && ofport && + binding->phys_endpts) { + + /* Use any phys_endpt for localnet if shared port + * name; if localnet port is unique name, then there is a + * single phys_endpt. + * gw logical ports have a single phys_endpt */ + phys_endpt_rec = binding->phys_endpts[0]; + + /* only single vlan encap is supported initially */ + if (phys_endpt_rec && (!strcmp(phys_endpt_rec->type, "vlan"))) { + /* valid values verified on configuration */ + (void) str_to_int(phys_endpt_rec->ingress_encap, 10, &tag); + } + } } const struct chassis_tunnel *tun = NULL; @@ -326,7 +350,11 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, /* Match a VLAN tag and strip it, including stripping priority tags * (e.g. VLAN ID 0). In the latter case we'll add a second flow * for frames that lack any 802.1Q header later. */ - if (tag || !strcmp(binding->type, "localnet")) { + + if (tag || + (!strcmp(binding->type, "localnet")) || + (!strcmp(binding->type, "gw"))) { + match_set_dl_vlan(&match, htons(tag)); ofpact_put_STRIP_VLAN(&ofpacts); } @@ -350,7 +378,10 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, tag ? 150 : 100, &match, &ofpacts); - if (!tag && !strcmp(binding->type, "localnet")) { + if (!tag && + ((!strcmp(binding->type, "localnet")) || + (!strcmp(binding->type, "gw")))) { + /* Add a second flow for frames that lack any 802.1Q * header. For these, drop the OFPACT_STRIP_VLAN * action. */ diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml index e65bc3a..1e85b9a 100644 --- a/ovn/ovn-nb.xml +++ b/ovn/ovn-nb.xml @@ -138,6 +138,10 @@ <dd> A port to a logical switch on a VTEP gateway. </dd> + <dt><code>gw</code></dt> + <dd> + A port to a logical switch on a software gateway. + </dd> </dl> </column> </group> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index af80d0e..08239c8 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -1370,6 +1370,10 @@ tcp.flags = RST; table="Port_Binding"/>:<code>vtep-logical-switch</code> must also be defined. </dd> + <dt><code>gw</code></dt> + <dd> + A port to a logical switch on a software gateway chassis. + </dd> </dl> </column> </group> diff --git a/tests/ovn.at b/tests/ovn.at index a5757d5..bf008a6 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -1223,6 +1223,138 @@ for sim in hv1 hv2 hv3 vtep main; do done AT_CLEANUP +# Similar test to "hardware GW" +AT_SETUP([ovn -- 3 HVs, 1 VIFs/HV, 1 software GW, 1 LS]) +AT_SKIP_IF([test $HAVE_PYTHON = no]) +ovn_start + +# Configure the Northbound database +ovn-nbctl lswitch-add lsw0 + +ovn-nbctl lport-add lsw0 lp1 +ovn-nbctl lport-set-addresses lp1 f0:00:00:00:00:01 + +ovn-nbctl lport-add lsw0 lp2 +ovn-nbctl lport-set-addresses lp2 f0:00:00:00:00:02 + +ovn-nbctl lport-add lsw0 lp-gw +ovn-nbctl lport-set-type lp-gw gw +ovn-nbctl lport-set-addresses lp-gw unknown f0:00:00:00:00:03 + +net_add n1 # Network to connect hv1, hv2, and gw +net_add n2 # Network to connect gw and hv3 + +# Create hypervisor hv1 connected to n1 +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl add-port br-int vif1 -- set Interface vif1 external-ids:iface-id=lp1 options:tx_pcap=hv1/vif1-tx.pcap options:rxq_pcap=hv1/vif1-rx.pcap ofport-request=1 + +# Create hypervisor hv2 connected to n1 +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 +ovs-vsctl add-port br-int vif2 -- set Interface vif2 external-ids:iface-id=lp2 options:tx_pcap=hv2/vif2-tx.pcap options:rxq_pcap=hv2/vif2-rx.pcap ofport-request=1 + +# Create hypervisor hv_gw connected to n1 and n2 +# connect br-phys bridge to n1; connect hv-gw bridge to n2 +sim_add hv_gw +as hv_gw +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.3 + +ovn-sbctl phys-endpt-add pe1 hv_gw port_1 vlan 0 0 +ovn-sbctl lport-bind-phys-endpt lp-gw pe1 +net_attach n2 port_1 + +as hv_gw ovs-vsctl show + +# Add hv3 on the other side of the GW +sim_add hv3 +as hv3 +ovs-vsctl add-br br-phys +net_attach n2 br-phys +ovs-vsctl add-port br-phys vif3 -- set Interface vif3 options:tx_pcap=hv3/vif3-tx.pcap options:rxq_pcap=hv3/vif3-rx.pcap ofport-request=1 + + +# Pre-populate the hypervisors' ARP tables so that we don't lose any +# packets for ARP resolution (native tunneling doesn't queue packets +# for ARP resolution). +ovn_populate_arp + +# Allow some time for ovn-northd and ovn-controller to catch up. +# XXX This should be more systematic. +sleep 1 + +# test_packet INPORT DST SRC ETHTYPE OUTPORT... +# +# This shell function causes a packet to be received on INPORT. The packet's +# content has Ethernet destination DST and source SRC (each exactly 12 hex +# digits) and Ethernet type ETHTYPE (4 hex digits). The OUTPORTs (zero or +# more) list the VIFs on which the packet should be received. INPORT and the +# OUTPORTs are specified as lport numbers, e.g. 1 for vif1. +trim_zeros() { + sed 's/\(00\)\{1,\}$//' +} +for i in 1 2 3; do + : > $i.expected +done +test_packet() { + local inport=$1 packet=$2$3$4; shift; shift; shift; shift + #hv=hv`echo $inport | sed 's/^\(.\).*/\1/'` + hv=hv$inport + vif=vif$inport + as $hv ovs-appctl netdev-dummy/receive $vif $packet + for outport; do + echo $packet | trim_zeros >> $outport.expected + done +} + +# Send packets between all pairs of source and destination ports: +# +# 1. Unicast packets are delivered to exactly one lport (except that packets +# destined to their input ports are dropped). +# +# 2. Broadcast and multicast are delivered to all lports except the input port. +# +# 3. The lswitch delivers packets with an unknown destination to lports with +# "unknown" among their MAC addresses (and port security disabled). +for s in 1 2 3; do + bcast= + unknown= + for d in 1 2 3; do + if test $d != $s; then unicast=$d; else unicast=; fi + test_packet $s f0000000000$d f0000000000$s 00$s$d $unicast #1 + + # The vtep (vif3) is the only one configured for "unknown" + if test $d != $s && test $d = 3; then + unknown="$unknown $d" + fi + bcast="$bcast $unicast" + done + + test_packet $s ffffffffffff f0000000000$s 0${s}ff $bcast #2 + test_packet $s 010000000000 f0000000000$s 0${s}ff $bcast #3 + test_packet $s f0000000ffff f0000000000$s 0${s}66 $unknown #4 +done + +# Allow some time for packet forwarding. +# XXX This can be improved. +sleep 3 + +# Now check the packets actually received against the ones expected. +for i in 1 2 3; do + file=hv$i/vif$i-tx.pcap + echo $file + $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $file | trim_zeros > $i.packets + sort $i.expected > expout + AT_CHECK([sort $i.packets], [0], [expout]) + echo +done +AT_CLEANUP + # 3 hypervisors, 3 logical switches with 3 logical ports each, 1 logical router AT_SETUP([ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR]) AT_SKIP_IF([test $HAVE_PYTHON = no]) -- 1.9.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev