Got an offline ACK from Ethan, Applied to patch to branch-2.3
On Tue, Dec 23, 2014 at 10:42 AM, Alex Wang <al...@nicira.com> wrote: > After commit 0c7812e5e (recirculation: Do not drop packet when > there is no match from internal table.), if flow keys are modified > before the recirculation action (e.g. set vlan ID), the miss > handling of recirc'ed packets may not reach the intended > 'ofproto_dpif' which has rules looking up the 'recirc_id's, > causing drops. > > This commit adds an unittest that captures this bug. Moreover, > to solve this bug, this commit checks mapping between 'recirc_id' > and the corresponding 'ofproto_dpif', and makes sure that the > miss handling of recirc'ed packets are done with the correct > 'ofproto_dpif'. > > Signed-off-by: Alex Wang <al...@nicira.com> > Acked-by: Andy Zhou <az...@nicira.com> > Acked-by: Jarno Rajahalme <jrajaha...@nicira.com> > > --- > PATCH->V2: > - ovsrcu postpone the free of dpif_backer_recirc_node. > - fix unittest interminttent failure. > > Diff between backport & master: > - move + adjust the fix logic in xlate_lookup() (on master) to > xlate_receive(). > - change the cmap (for mapping between 'recirc_id' and 'ofproto') to hmap, > and protect the lookup with mutex. > - unittest adjust to work with branch-2.3. > --- > ofproto/ofproto-dpif-xlate.c | 45 ++++++++++++++++++++-- > ofproto/ofproto-dpif.c | 85 > ++++++++++++++++++++++++++++++++++++++++-- > ofproto/ofproto-dpif.h | 12 ++++++ > tests/ofproto-dpif.at | 68 +++++++++++++++++++++++++++++++++ > 4 files changed, 203 insertions(+), 7 deletions(-) > > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > index 0fc5443..ee353e3 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -634,6 +634,8 @@ xlate_receive(const struct dpif_backer *backer, struct > ofpbuf *packet, > struct dpif_sflow **sflow, struct netflow **netflow, > odp_port_t *odp_in_port) > { > + struct ofproto_dpif *recv_ofproto = NULL; > + struct ofproto_dpif *recirc_ofproto = NULL; > const struct xport *xport; > int error = ENODEV; > > @@ -655,6 +657,7 @@ xlate_receive(const struct dpif_backer *backer, struct > ofpbuf *packet, > if (!xport) { > goto exit; > } > + recv_ofproto = xport->xbridge->ofproto; > > if (vsp_adjust_flow(xport->xbridge->ofproto, flow)) { > if (packet) { > @@ -667,20 +670,54 @@ xlate_receive(const struct dpif_backer *backer, > struct ofpbuf *packet, > } > error = 0; > > + /* When recirc_id is set in 'flow', checks whether the ofproto_dpif > that > + * corresponds to the recirc_id is same as the receiving bridge. If > they > + * are the same, uses the 'recv_ofproto' and keeps the 'ofp_in_port' > as > + * assigned. Otherwise, uses the 'recirc_ofproto' that owns > recirc_id and > + * assigns OFPP_NONE to 'ofp_in_port'. Doing this is in that, the > + * recirculated flow must be processced by the ofproto which > originates > + * the recirculation, and as bridges can only see their own ports, the > + * in_port of the 'recv_ofproto' should not be passed to the > + * 'recirc_ofproto'. > + * > + * Admittedly, setting the 'ofp_in_port' to OFPP_NONE limits the > + * 'recirc_ofproto' from meaningfully matching on in_port of > recirculated > + * flow, and should be fixed in the near future. > + * > + * TODO: Restore the original patch port. > + */ > + if (flow->recirc_id) { > + recirc_ofproto = ofproto_dpif_recirc_get_ofproto(backer, > + flow->recirc_id); > + /* Returns error if could not find recirculation bridge */ > + if (!recirc_ofproto) { > + error = ENOENT; > + goto exit; > + } > + > + if (recv_ofproto != recirc_ofproto) { > + xport = NULL; > + flow->in_port.ofp_port = OFPP_NONE; > + if (odp_in_port) { > + *odp_in_port = ODPP_NONE; > + } > + } > + } > + > if (ofproto) { > - *ofproto = xport->xbridge->ofproto; > + *ofproto = xport ? recv_ofproto : recirc_ofproto; > } > > if (ipfix) { > - *ipfix = dpif_ipfix_ref(xport->xbridge->ipfix); > + *ipfix = xport ? dpif_ipfix_ref(xport->xbridge->ipfix) : NULL; > } > > if (sflow) { > - *sflow = dpif_sflow_ref(xport->xbridge->sflow); > + *sflow = xport ? dpif_sflow_ref(xport->xbridge->sflow) : NULL; > } > > if (netflow) { > - *netflow = netflow_ref(xport->xbridge->netflow); > + *netflow = xport ? netflow_ref(xport->xbridge->netflow) : NULL; > } > > exit: > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index 7204ccf..6df6b83 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -57,6 +57,7 @@ > #include "ofproto-dpif-sflow.h" > #include "ofproto-dpif-upcall.h" > #include "ofproto-dpif-xlate.h" > +#include "ovs-rcu.h" > #include "poll-loop.h" > #include "seq.h" > #include "simap.h" > @@ -238,6 +239,13 @@ COVERAGE_DEFINE(rev_port_toggled); > COVERAGE_DEFINE(rev_flow_table); > COVERAGE_DEFINE(rev_mac_learning); > > +/* Stores mapping between 'recirc_id' and 'ofproto-dpif'. */ > +struct dpif_backer_recirc_node { > + struct hmap_node hmap_node; > + struct ofproto_dpif *ofproto; > + uint32_t recirc_id; > +}; > + > /* All datapaths of a given type share a single dpif backer instance. */ > struct dpif_backer { > char *type; > @@ -256,6 +264,8 @@ struct dpif_backer { > > /* Recirculation. */ > struct recirc_id_pool *rid_pool; /* Recirculation ID pool. */ > + struct hmap recirc_map; /* Map of 'recirc_id's to 'ofproto's. > */ > + struct ovs_mutex recirc_mutex; /* Protects 'recirc_map'. */ > bool enable_recirc; /* True if the datapath supports recirculation > */ > > /* True if the datapath supports variable-length > @@ -794,6 +804,30 @@ dealloc(struct ofproto *ofproto_) > free(ofproto); > } > > +/* Called when 'ofproto' is destructed. Checks for and clears any > + * recirc_id leak. */ > +static void > +dpif_backer_recirc_clear_ofproto(struct dpif_backer *backer, > + struct ofproto_dpif *ofproto) > +{ > + struct dpif_backer_recirc_node *node; > + > + ovs_mutex_lock(&backer->recirc_mutex); > + HMAP_FOR_EACH (node, hmap_node, &backer->recirc_map) { > + if (node->ofproto == ofproto) { > + VLOG_ERR("recirc_id %"PRIu32", not freed when ofproto (%s) " > + "is destructed", node->recirc_id, ofproto->up.name); > + hmap_remove(&backer->recirc_map, &node->hmap_node); > + /* Does not matter whether directly free or use > ovsrcu_postpone, > + * since all datapath flows are already purged before calling > this > + * function, and no 'recirc_id' could be associated to > 'ofproto'. > + */ > + ovsrcu_postpone(free, node); > + } > + } > + ovs_mutex_unlock(&backer->recirc_mutex); > +} > + > static void > close_dpif_backer(struct dpif_backer *backer) > { > @@ -810,6 +844,8 @@ close_dpif_backer(struct dpif_backer *backer) > hmap_destroy(&backer->odp_to_ofport_map); > shash_find_and_delete(&all_dpif_backers, backer->type); > recirc_id_pool_destroy(backer->rid_pool); > + hmap_destroy(&backer->recirc_map); > + ovs_mutex_destroy(&backer->recirc_mutex); > free(backer->type); > dpif_close(backer->dpif); > free(backer); > @@ -921,6 +957,8 @@ open_dpif_backer(const char *type, struct dpif_backer > **backerp) > backer->variable_length_userdata = > check_variable_length_userdata(backer); > backer->max_mpls_depth = check_max_mpls_depth(backer); > backer->rid_pool = recirc_id_pool_create(); > + ovs_mutex_init(&backer->recirc_mutex); > + hmap_init(&backer->recirc_map); > > error = dpif_recv_set(backer->dpif, backer->recv_set_enable); > if (error) { > @@ -1306,6 +1344,8 @@ destruct(struct ofproto *ofproto_) > } > guarded_list_destroy(&ofproto->pins); > > + dpif_backer_recirc_clear_ofproto(ofproto->backer, ofproto); > + > mbridge_unref(ofproto->mbridge); > > netflow_unref(ofproto->netflow); > @@ -4758,20 +4798,59 @@ odp_port_to_ofp_port(const struct ofproto_dpif > *ofproto, odp_port_t odp_port) > } > } > > +struct ofproto_dpif * > +ofproto_dpif_recirc_get_ofproto(const struct dpif_backer *backer, > + uint32_t recirc_id) > +{ > + struct dpif_backer_recirc_node *node; > + > + ovs_mutex_lock(&backer->recirc_mutex); > + node = CONTAINER_OF(hmap_first_with_hash(&backer->recirc_map, > recirc_id), > + struct dpif_backer_recirc_node, hmap_node); > + ovs_mutex_unlock(&backer->recirc_mutex); > + > + return node ? node->ofproto : NULL; > +} > + > uint32_t > ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto) > { > struct dpif_backer *backer = ofproto->backer; > + uint32_t recirc_id = recirc_id_alloc(backer->rid_pool); > + > + if (recirc_id) { > + struct dpif_backer_recirc_node *node = xmalloc(sizeof *node); > + > + node->recirc_id = recirc_id; > + node->ofproto = ofproto; > > - return recirc_id_alloc(backer->rid_pool); > + ovs_mutex_lock(&backer->recirc_mutex); > + hmap_insert(&backer->recirc_map, &node->hmap_node, > node->recirc_id); > + ovs_mutex_unlock(&backer->recirc_mutex); > + } > + > + return recirc_id; > } > > void > ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t > recirc_id) > { > struct dpif_backer *backer = ofproto->backer; > - > - recirc_id_free(backer->rid_pool, recirc_id); > + struct dpif_backer_recirc_node *node; > + > + ovs_mutex_lock(&backer->recirc_mutex); > + node = CONTAINER_OF(hmap_first_with_hash(&backer->recirc_map, > recirc_id), > + struct dpif_backer_recirc_node, hmap_node); > + if (node) { > + hmap_remove(&backer->recirc_map, &node->hmap_node); > + ovs_mutex_unlock(&backer->recirc_mutex); > + recirc_id_free(backer->rid_pool, node->recirc_id); > + /* RCU postpone the free, since other threads may be referring > + * to 'node' at same time. */ > + ovsrcu_postpone(free, node); > + } else { > + ovs_mutex_unlock(&backer->recirc_mutex); > + } > } > > int > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h > index 6f77b1a..cee4723 100644 > --- a/ofproto/ofproto-dpif.h > +++ b/ofproto/ofproto-dpif.h > @@ -226,8 +226,20 @@ struct ofport_dpif *odp_port_to_ofport(const struct > dpif_backer *, odp_port_t); > * Post recirculation data path flows are managed like other data path > flows. > * They are created on demand. Miss handling, stats collection and > revalidation > * work the same way as regular flows. > + * > + * If the bridge which originates the recirculation is different from the > bridge > + * that receives the post recirculation packet (e.g. when patch port is > used), > + * the packet will be processed directly by the recirculation bridge with > + * in_port set to OFPP_NONE. Admittedly, doing this limits the > recirculation > + * bridge from matching on in_port of post recirculation packets, and > will be > + * fixed in the near future. > + * > + * TODO: Always restore the correct in_port. > + * > */ > > +struct ofproto_dpif *ofproto_dpif_recirc_get_ofproto(const struct > dpif_backer *ofproto, > + uint32_t recirc_id); > uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto); > void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t > recirc_id); > int ofproto_dpif_add_internal_flow(struct ofproto_dpif *, > diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at > index 1fcd937..6fb0dce 100644 > --- a/tests/ofproto-dpif.at > +++ b/tests/ofproto-dpif.at > @@ -195,6 +195,74 @@ AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > > br1_flows.txt]) > AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 24]) > AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 24]) > AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 24]) > + > +OVS_VSWITCHD_STOP() > +AT_CLEANUP > + > +# Makes sure recirculation does not change the way packet is handled. > +AT_SETUP([ofproto-dpif, balance-tcp bonding, different recirc flow ]) > +OVS_VSWITCHD_START( > + [add-bond br0 bond0 p1 p2 bond_mode=balance-tcp lacp=active \ > + other-config:lacp-time=fast other-config:bond-rebalance-interval=0 > -- \ > + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock > ofport_request=1 -- \ > + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock > ofport_request=2 -- \ > + add-br br1 -- \ > + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ > + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ > + fail-mode=standalone -- \ > + add-bond br1 bond1 p3 p4 bond_mode=balance-tcp lacp=active \ > + other-config:lacp-time=fast other-config:bond-rebalance-interval=0 > -- \ > + set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock > ofport_request=3 -- \ > + set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock > ofport_request=4 -- \ > + add-port br1 br1- -- set interface br1- type=patch options:peer=br1+ > ofport_request=100 -- \ > + add-br br-int -- \ > + set bridge br-int other-config:hwaddr=aa:77:aa:77:00:00 -- \ > + set bridge br-int datapath-type=dummy other-config:datapath-id=1235 \ > + fail-mode=secure -- \ > + add-port br-int br1+ -- set interface br1+ type=patch > options:peer=br1- ofport_request=101 -- \ > + add-port br-int p5 -- set interface p5 ofport_request=5 type=dummy > +]) > +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK > +]) > + > +# Waits for all ifaces enabled. > +OVS_WAIT_UNTIL([test `ovs-appctl bond/show | grep -- "may_enable: true" > | wc -l` -ge 4]) > + > +# The dl_vlan flow should not be ever matched, > +# since recirculation should not change the flow handling. > +AT_DATA([flows.txt], [dnl > +table=0 priority=1 in_port=5 actions=mod_vlan_vid:1,output(101) > +table=0 priority=2 in_port=5 dl_vlan=1 actions=drop > +]) > +AT_CHECK([ovs-ofctl add-flows br-int flows.txt]) > + > +# Sends a packet to trigger recirculation. > +# Should generate recirc_id(0x12d),dp_hash(0xcf/0xff). > +AT_CHECK([ovs-appctl netdev-dummy/receive p5 > "in_port(5),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no)"]) > + > +ovs-appctl time/warp 5000 100 > + > +# Forces revalidators to update all stats. > +AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [dnl > +megaflows disabled > +]) > +AT_CHECK([ovs-appctl upcall/enable-megaflows], [0], [dnl > +megaflows enabled > +]) > + > +# Checks the flow stats in br1, should only be one flow with non-zero > +# 'n_packets' from internal table. > +AT_CHECK([ovs-appctl bridge/dump-flows br1 | ofctl_strip | grep -- > "n_packets" | grep -- "table_id" | sed -e 's/output:[[0-9]]\+/output/'], > [0], [dnl > +table_id=254, n_packets=1, n_bytes=64, > priority=20,recirc_id=0x12d,dp_hash=0xcf/0xff,actions=output > +]) > + > +# Checks the flow stats in br-int, should be only one match. > +AT_CHECK([ovs-ofctl dump-flows br-int | ofctl_strip | sort], [0], [dnl > + n_packets=1, n_bytes=60, priority=1,in_port=5 > actions=mod_vlan_vid:1,output:101 > + priority=2,in_port=5,dl_vlan=1 actions=drop > +NXST_FLOW reply: > +]) > + > OVS_VSWITCHD_STOP() > AT_CLEANUP > > -- > 1.7.9.5 > > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev