The userspace datapath 1. receives a batch of packets. 2. finds a 'netdev_flow' (megaflow) for each packet. 3. groups the packets in output batches based on the 'netdev_flow'.
Until now the grouping (2) was done using a simple algorithm with a O(N^2) runtime, where N is the number of distinct megaflows of the packets in the incoming batch. This could quickly become a bottleneck, even with a small number of megaflows. With this commit the datapath simply stores in the 'netdev_flow' (the megaflow) a pointer to the output batch, if one has been created for the current input batch. The pointer will be cleared when the output batch is sent. In a simple phy2phy test with 128 megaflows the throughput is more than doubled. The reason that stopped us from doing this change was that the 'netdev_flow' memory was shared between multiple threads: this is no longer the case with the per-thread classifier. Also, this commit reorders struct dp_netdev_flow to group toghether the members used in the fastpath. Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- lib/dpif-netdev.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7d55997..333f5a4 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -290,13 +290,11 @@ struct dp_netdev_flow_stats { * requires synchronization, as noted in more detail below. */ struct dp_netdev_flow { - bool dead; - + const struct flow flow; /* Unmasked flow that created this entry. */ /* Hash table index by unmasked flow. */ const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */ /* 'flow_table'. */ const ovs_u128 ufid; /* Unique flow identifier. */ - const struct flow flow; /* Unmasked flow that created this entry. */ const int pmd_id; /* The 'core_id' of pmd thread owning this */ /* flow. */ @@ -306,12 +304,20 @@ struct dp_netdev_flow { * reference. */ struct ovs_refcount ref_cnt; + bool dead; + /* Statistics. */ struct dp_netdev_flow_stats stats; /* Actions. */ OVSRCU_TYPE(struct dp_netdev_actions *) actions; + /* While processing a group of input packets, the datapath uses the next + * member to store a pointer to the output batch for the flow. It is + * reset after the batch has been sent out (See dp_netdev_queue_batches(), + * packet_batch_init() and packet_batch_execute()). */ + struct packet_batch *batch; + /* Packet classification. */ struct dpcls_rule cr; /* In owning dp_netdev's 'cls'. */ /* 'cr' must be the last member. */ @@ -1974,6 +1980,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); memset(&flow->stats, 0, sizeof flow->stats); flow->dead = false; + flow->batch = NULL; *CONST_CAST(int *, &flow->pmd_id) = pmd->core_id; *CONST_CAST(struct flow *, &flow->flow) = match->flow; *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid; @@ -3045,8 +3052,9 @@ packet_batch_update(struct packet_batch *batch, struct dp_packet *packet, static inline void packet_batch_init(struct packet_batch *batch, struct dp_netdev_flow *flow) { - batch->flow = flow; + flow->batch = batch; + batch->flow = flow; batch->packet_count = 0; batch->byte_count = 0; batch->tcp_flags = 0; @@ -3061,7 +3069,8 @@ packet_batch_execute(struct packet_batch *batch, struct dp_netdev_actions *actions; struct dp_netdev_flow *flow = batch->flow; - dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count, + flow->batch = NULL; + dp_netdev_flow_used(flow, batch->packet_count, batch->byte_count, batch->tcp_flags, now); actions = dp_netdev_flow_get_actions(flow); @@ -3078,25 +3087,19 @@ dp_netdev_queue_batches(struct dp_packet *pkt, struct packet_batch *batches, size_t *n_batches, size_t max_batches) { - struct packet_batch *batch = NULL; - int j; + struct packet_batch *batch; if (OVS_UNLIKELY(!flow)) { return false; } - /* XXX: This O(n^2) algortihm makes sense if we're operating under the - * assumption that the number of distinct flows (and therefore the - * number of distinct batches) is quite small. If this turns out not - * to be the case, it may make sense to pre sort based on the - * netdev_flow pointer. That done we can get the appropriate batching - * in O(n * log(n)) instead. */ - for (j = *n_batches - 1; j >= 0; j--) { - if (batches[j].flow == flow) { - batch = &batches[j]; - packet_batch_update(batch, pkt, mf); - return true; - } + + batch = flow->batch; + + if (OVS_LIKELY(batch)) { + packet_batch_update(batch, pkt, mf); + return true; } + if (OVS_UNLIKELY(*n_batches >= max_batches)) { return false; } -- 2.1.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev