Now that dpif_execute has a 'flow' member, it's pretty easy to access a the flow (or the matching megaflow) in dp_execute_cb().
This means that's not necessary anymore for the connection tracker to reextract 'dl_type' from the packet, it can be passed as a parameter. This change means that we have to complicate sightly test-conntrack to group the packets by dl_type before passing them to the connection tracker. Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> Acked-by: Joe Stringer <j...@ovn.org> --- lib/conntrack.c | 47 ++++++++++++++++++--------------------------- lib/conntrack.h | 3 ++- lib/dpif-netdev.c | 21 ++++++++++---------- tests/test-conntrack.c | 52 +++++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 77 insertions(+), 46 deletions(-) diff --git a/lib/conntrack.c b/lib/conntrack.c index 8e6c826..6ef9114 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -53,7 +53,8 @@ struct conn_lookup_ctx { }; static bool conn_key_extract(struct conntrack *, struct dp_packet *, - struct conn_lookup_ctx *, uint16_t zone); + ovs_be16 dl_type, struct conn_lookup_ctx *, + uint16_t zone); static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis); static void conn_key_reverse(struct conn_key *); static void conn_key_lookup(struct conntrack_bucket *ctb, @@ -265,7 +266,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, * 'setlabel' behaves similarly for the connection label.*/ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, - bool commit, uint16_t zone, const uint32_t *setmark, + ovs_be16 dl_type, bool commit, uint16_t zone, + const uint32_t *setmark, const struct ovs_key_ct_labels *setlabel, const char *helper) { @@ -299,7 +301,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, for (i = 0; i < cnt; i++) { unsigned bucket; - if (!conn_key_extract(ct, pkts[i], &ctxs[i], zone)) { + if (!conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone)) { write_ct_md(pkts[i], CS_INVALID, zone, 0, OVS_U128_ZERO); continue; } @@ -917,7 +919,7 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, } static bool -conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, +conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, struct conn_lookup_ctx *ctx, uint16_t zone) { const struct eth_header *l2 = dp_packet_l2(pkt); @@ -941,43 +943,32 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, * We already have the l3 and l4 headers' pointers. Extracting * the l3 addresses and the l4 ports is really cheap, since they * can be found at fixed locations. - * 2) To extract the l3 and l4 types. - * Extracting the l3 and l4 types (especially the l3[1]) on the - * other hand is quite expensive, because they're not at a - * fixed location. + * 2) To extract the l4 type. + * Extracting the l4 types, for IPv6 can be quite expensive, because + * it's not at a fixed location. * * Here's a way to avoid (2) with the help of the datapath. - * The datapath doesn't keep the packet's extracted flow[2], so + * The datapath doesn't keep the packet's extracted flow[1], so * using that is not an option. We could use the packet's matching - * megaflow for l3 type (it's always unwildcarded), and for l4 type - * (we have to unwildcard it first). This means either: + * megaflow, but we have to make sure that the l4 type (nw_proto) + * is unwildcarded. This means either: * - * a) dpif-netdev passes the matching megaflow to dp_execute_cb(), which - * is used to extract the l3 type. Unfortunately, dp_execute_cb() is - * used also in dpif_netdev_execute(), which doesn't have a matching - * megaflow. + * a) dpif-netdev unwildcards the l4 type when a new flow is installed + * if the actions contains ct(). * - * b) We define an alternative OVS_ACTION_ATTR_CT, used only by the - * userspace datapath, which includes l3 (and l4) type. The - * alternative action could be generated by ofproto-dpif specifically - * for the userspace datapath. Having a different interface for - * userspace and kernel doesn't seem very clean, though. + * b) ofproto-dpif-xlate unwildcards the l4 type when translating a ct() + * action. This is already done in different actions, but it's + * unnecessary for the kernel. * * --- - * [1] A simple benchmark (running only the connection tracker - * over and over on the same packets) shows that if the - * l3 type is already provided we are 15% faster (running the - * connection tracker over a couple of DPDK devices with a - * stream of UDP 64-bytes packets shows that we are 4% faster). - * - * [2] The reasons for this are that keeping the flow increases + * [1] The reasons for this are that keeping the flow increases * (slightly) the cache footprint and increases computation * time as we move the packet around. Most importantly, the flow * should be updated by the actions and this can be slow, as * we use a sparse representation (miniflow). * */ - ctx->key.dl_type = parse_dl_type(l2, (char *) l3 - (char *) l2); + ctx->key.dl_type = dl_type; if (ctx->key.dl_type == htons(ETH_TYPE_IP)) { ok = extract_l3_ipv4(&ctx->key, l3, tail - (char *) l3, NULL, true); } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) { diff --git a/lib/conntrack.h b/lib/conntrack.h index d188105..254f61c 100644 --- a/lib/conntrack.h +++ b/lib/conntrack.h @@ -65,7 +65,8 @@ void conntrack_init(struct conntrack *); void conntrack_destroy(struct conntrack *); int conntrack_execute(struct conntrack *, struct dp_packet_batch *, - bool commit, uint16_t zone, const uint32_t *setmark, + ovs_be16 dl_type, bool commit, + uint16_t zone, const uint32_t *setmark, const struct ovs_key_ct_labels *setlabel, const char *helper); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 5793995..aef0dcf 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -508,7 +508,7 @@ static int dpif_netdev_open(const struct dpif_class *, const char *name, bool create, struct dpif **); static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *, - bool may_steal, + bool may_steal, const struct flow *flow, const struct nlattr *actions, size_t actions_len); static void dp_netdev_input(struct dp_netdev_pmd_thread *, @@ -2489,8 +2489,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) } packet_batch_init_packet(&pp, execute->packet); - dp_netdev_execute_actions(pmd, &pp, false, execute->actions, - execute->actions_len); + dp_netdev_execute_actions(pmd, &pp, false, execute->flow, + execute->actions, execute->actions_len); if (pmd->core_id == NON_PMD_CORE_ID) { ovs_mutex_unlock(&dp->non_pmd_mutex); @@ -3661,7 +3661,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, actions = dp_netdev_flow_get_actions(flow); - dp_netdev_execute_actions(pmd, &batch->array, true, + dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow, actions->actions, actions->size); } @@ -3788,7 +3788,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet, * the actions. Otherwise, if there are any slow path actions, * we'll send the packet up twice. */ packet_batch_init_packet(&b, packet); - dp_netdev_execute_actions(pmd, &b, true, + dp_netdev_execute_actions(pmd, &b, true, &match.flow, actions->data, actions->size); add_actions = put_actions->size ? put_actions : actions; @@ -3958,6 +3958,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_execute_aux { struct dp_netdev_pmd_thread *pmd; + const struct flow *flow; }; static void @@ -4027,7 +4028,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, NULL); if (!error || error == ENOSPC) { packet_batch_init_packet(&b, packet); - dp_netdev_execute_actions(pmd, &b, may_steal, + dp_netdev_execute_actions(pmd, &b, may_steal, flow, actions->data, actions->size); } else if (may_steal) { dp_packet_delete(packet); @@ -4222,8 +4223,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, } } - conntrack_execute(&dp->conntrack, packets_, commit, zone, - setmark, setlabel, helper); + conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, commit, + zone, setmark, setlabel, helper); break; } @@ -4247,10 +4248,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, - bool may_steal, + bool may_steal, const struct flow *flow, const struct nlattr *actions, size_t actions_len) { - struct dp_netdev_execute_aux aux = { pmd }; + struct dp_netdev_execute_aux aux = {pmd, flow}; odp_execute_actions(&aux, packets, may_steal, actions, actions_len, dp_execute_cb); diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c index 0ff70d1..6c1b373 100644 --- a/tests/test-conntrack.c +++ b/tests/test-conntrack.c @@ -30,7 +30,7 @@ static const char payload[] = "50540000000a50540000000908004500001c0000000000" "11a4cd0a0101010a0101020001000200080000"; static struct dp_packet_batch * -prepare_packets(size_t n, bool change, unsigned tid) +prepare_packets(size_t n, bool change, unsigned tid, ovs_be16 *dl_type) { struct dp_packet_batch *pkt_batch = xzalloc(sizeof *pkt_batch); struct flow flow; @@ -56,8 +56,10 @@ prepare_packets(size_t n, bool change, unsigned tid) } pkt_batch->packets[i] = pkt; + *dl_type = flow.dl_type; } + return pkt_batch; } @@ -83,12 +85,13 @@ ct_thread_main(void *aux_) { struct thread_aux *aux = aux_; struct dp_packet_batch *pkt_batch; + ovs_be16 dl_type; size_t i; - pkt_batch = prepare_packets(batch_size, change_conn, aux->tid); + pkt_batch = prepare_packets(batch_size, change_conn, aux->tid, &dl_type); ovs_barrier_block(&barrier); for (i = 0; i < n_pkts; i += batch_size) { - conntrack_execute(&ct, pkt_batch, true, 0, NULL, NULL, NULL); + conntrack_execute(&ct, pkt_batch, dl_type, true, 0, NULL, NULL, NULL); } ovs_barrier_block(&barrier); destroy_packets(pkt_batch); @@ -148,6 +151,44 @@ test_benchmark(struct ovs_cmdl_context *ctx) } static void +pcap_batch_execute_conntrack(struct conntrack *ct, + struct dp_packet_batch *pkt_batch) +{ + size_t i; + struct dp_packet_batch new_batch; + ovs_be16 dl_type = htons(0); + + dp_packet_batch_init(&new_batch); + + /* pkt_batch contains packets with different 'dl_type'. We have to + * call conntrack_execute() on packets with the same 'dl_type'. */ + + for (i = 0; i < pkt_batch->count; i++) { + struct dp_packet *pkt = pkt_batch->packets[i]; + struct flow flow; + + /* This also initializes the l3 and l4 pointers. */ + flow_extract(pkt, &flow); + + if (!new_batch.count) { + dl_type = flow.dl_type; + } + + if (flow.dl_type != dl_type) { + conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL, + NULL); + dp_packet_batch_init(&new_batch); + } + new_batch.packets[new_batch.count++] = pkt; + } + + if (new_batch.count) { + conntrack_execute(ct, &new_batch, dl_type, true, 0, NULL, NULL, NULL); + } + +} + +static void test_pcap(struct ovs_cmdl_context *ctx) { size_t total_count, i, batch_size; @@ -179,13 +220,10 @@ test_pcap(struct ovs_cmdl_context *ctx) dp_packet_batch_init(&pkt_batch); for (i = 0; i < batch_size; i++) { - struct flow dummy_flow; - err = ovs_pcap_read(pcap, &pkt_batch.packets[i], NULL); if (err) { break; } - flow_extract(pkt_batch.packets[i], &dummy_flow); } pkt_batch.count = i; @@ -193,7 +231,7 @@ test_pcap(struct ovs_cmdl_context *ctx) break; } - conntrack_execute(&ct, &pkt_batch, true, 0, NULL, NULL, NULL); + pcap_batch_execute_conntrack(&ct, &pkt_batch); for (i = 0; i < pkt_batch.count; i++) { struct ds ds = DS_EMPTY_INITIALIZER; -- 2.8.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev