Tunnel metadata has grown large since the addition of Geneve options. Copying tunnel metadata for performing a lookup is not necessary. Change recirc_metadata to use a pointer to struct flow_tnl, and only copy the tunnel metadata when needed, and only copy as little of it as possible.
Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> --- lib/flow.h | 22 +--------------- lib/packets.h | 65 ++++++++++++++++++++++++++++++++++++++++++++-- ofproto/ofproto-dpif-rid.c | 30 ++++++++++++++++----- ofproto/ofproto-dpif-rid.h | 64 +++++++++++++++++++++++++-------------------- 4 files changed, 124 insertions(+), 57 deletions(-) diff --git a/lib/flow.h b/lib/flow.h index 958e7cb..f9da6b3 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -65,28 +65,8 @@ BUILD_ASSERT_DECL(FLOW_N_REGS % 2 == 0); /* Even. */ BUILD_ASSERT_DECL(FLOW_NW_FRAG_ANY == NX_IP_FRAG_ANY); BUILD_ASSERT_DECL(FLOW_NW_FRAG_LATER == NX_IP_FRAG_LATER); -/* Some flags are exposed through OpenFlow while others are used only - * internally. */ - -/* Public flags */ -#define FLOW_TNL_F_OAM (1 << 0) - -#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1) BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM); -/* Private flags */ -#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1) -#define FLOW_TNL_F_CSUM (1 << 2) -#define FLOW_TNL_F_KEY (1 << 3) - -#define FLOW_TNL_F_MASK ((1 << 4) - 1) - -/* Purely internal to OVS userspace. These flags should never be exposed to - * the outside world and so aren't included in the flags mask. */ - -/* Tunnel information is in userspace datapath format. */ -#define FLOW_TNL_F_UDPIF (1 << 4) - const char *flow_tun_flag_to_string(uint32_t flags); /* Maximum number of supported MPLS labels. */ @@ -987,7 +967,7 @@ pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow) { md->recirc_id = flow->recirc_id; md->dp_hash = flow->dp_hash; - md->tunnel = flow->tunnel; + flow_tnl_copy__(&md->tunnel, &flow->tunnel); md->skb_priority = flow->skb_priority; md->pkt_mark = flow->pkt_mark; md->in_port = flow->in_port; diff --git a/lib/packets.h b/lib/packets.h index 38af37b..7140c83 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -35,9 +35,9 @@ struct ds; /* Tunnel information used in flow key and metadata. */ struct flow_tnl { - ovs_be64 tun_id; ovs_be32 ip_dst; ovs_be32 ip_src; + ovs_be64 tun_id; uint16_t flags; uint8_t ip_tos; uint8_t ip_ttl; @@ -49,6 +49,65 @@ struct flow_tnl { struct tun_metadata metadata; }; +/* Some flags are exposed through OpenFlow while others are used only + * internally. */ + +/* Public flags */ +#define FLOW_TNL_F_OAM (1 << 0) + +#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1) + +/* Private flags */ +#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1) +#define FLOW_TNL_F_CSUM (1 << 2) +#define FLOW_TNL_F_KEY (1 << 3) + +#define FLOW_TNL_F_MASK ((1 << 4) - 1) + +/* Purely internal to OVS userspace. These flags should never be exposed to + * the outside world and so aren't included in the flags mask. */ + +/* Tunnel information is in userspace datapath format. */ +#define FLOW_TNL_F_UDPIF (1 << 4) + +/* Returns an offset to 'src' covering all the meaningful fields in 'src'. */ +static inline size_t +flow_tnl_size(const struct flow_tnl *src) +{ + if (!src->ip_dst) { + /* Covers ip_dst only. */ + return offsetof(struct flow_tnl, ip_src); + } + if (src->flags & FLOW_TNL_F_UDPIF) { + /* Datapath format, cover all options we have. */ + return offsetof(struct flow_tnl, metadata.opts) + + src->metadata.present.len; + } + if (!src->metadata.present.map) { + /* No TLVs, opts is irrelevant. */ + return offsetof(struct flow_tnl, metadata.opts); + } + /* Have decoded TLVs, opts is relevant. */ + return sizeof *src; +} + +/* Copy flow_tnl, but avoid copying unused portions of tun_metadata. Unused + * data in 'dst' is NOT cleared, so this must not be used in cases where the + * uninitialized portion may be hashed over. */ +static inline void +flow_tnl_copy__(struct flow_tnl *dst, const struct flow_tnl *src) +{ + memcpy(dst, src, flow_tnl_size(src)); +} + +static inline bool +flow_tnl_equal(const struct flow_tnl *a, const struct flow_tnl *b) +{ + size_t a_size = flow_tnl_size(a); + + return a_size == flow_tnl_size(b) && !memcmp(a, b, a_size); +} + /* Unfortunately, a "struct flow" sometimes has to handle OpenFlow port * numbers and other times datapath (dpif) port numbers. This union allows * access to both. */ @@ -67,7 +126,9 @@ struct pkt_metadata { uint32_t skb_priority; /* Packet priority for QoS. */ uint32_t pkt_mark; /* Packet mark. */ union flow_in_port in_port; /* Input port. */ - struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ + struct flow_tnl tunnel; /* Encapsulating tunnel parameters. Note that + * if 'ip_dst' == 0, the rest of the fields may + * be uninitialized. */ }; static inline void diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c index b8207c4..5a02bb0 100644 --- a/ofproto/ofproto-dpif-rid.c +++ b/ofproto/ofproto-dpif-rid.c @@ -130,8 +130,16 @@ recirc_metadata_hash(const struct recirc_state *state) hash = hash_pointer(state->ofproto, 0); hash = hash_int(state->table_id, hash); - hash = hash_words64((const uint64_t *) &state->metadata, - sizeof state->metadata / sizeof(uint64_t), + if (state->metadata.tunnel->ip_dst) { + /* We may leave remainder bytes unhashed, but that is unlikely as + * the tunnel is not in the datapath format. */ + hash = hash_words64((const uint64_t *) state->metadata.tunnel, + flow_tnl_size(state->metadata.tunnel) + / sizeof(uint64_t), hash); + } + hash = hash_words64((const uint64_t *) &state->metadata.metadata, + (sizeof state->metadata - sizeof state->metadata.tunnel) + / sizeof(uint64_t), hash); if (state->stack && state->stack->size != 0) { hash = hash_words64((const uint64_t *) state->stack->data, @@ -153,7 +161,9 @@ recirc_metadata_equal(const struct recirc_state *a, { return (a->table_id == b->table_id && a->ofproto == b->ofproto - && !memcmp(&a->metadata, &b->metadata, sizeof a->metadata) + && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel) + && !memcmp(&a->metadata.metadata, &b->metadata.metadata, + sizeof a->metadata - sizeof a->metadata.tunnel) && (((!a->stack || !a->stack->size) && (!b->stack || !b->stack->size)) || (a->stack && b->stack && ofpbuf_equal(a->stack, b->stack))) @@ -193,9 +203,13 @@ recirc_ref_equal(const struct recirc_state *target, uint32_t hash) } static void -recirc_state_clone(struct recirc_state *new, const struct recirc_state *old) +recirc_state_clone(struct recirc_state *new, const struct recirc_state *old, + struct flow_tnl *tunnel) { *new = *old; + flow_tnl_copy__(tunnel, old->metadata.tunnel); + new->metadata.tunnel = tunnel; + if (new->stack) { new->stack = new->stack->size ? ofpbuf_clone(new->stack) : NULL; } @@ -216,9 +230,11 @@ recirc_alloc_id__(const struct recirc_state *state, uint32_t hash) ovs_assert(state->action_set_len <= state->ofpacts_len); struct recirc_id_node *node = xzalloc(sizeof *node); + node->hash = hash; ovs_refcount_init(&node->refcount); - recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state); + recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state, + &node->state_metadata_tunnel); ovs_mutex_lock(&mutex); for (;;) { @@ -269,10 +285,12 @@ recirc_alloc_id_ctx(const struct recirc_state *state) uint32_t recirc_alloc_id(struct ofproto_dpif *ofproto) { + struct flow_tnl tunnel; + tunnel.ip_dst = htonl(0); struct recirc_state state = { .table_id = TBL_INTERNAL, .ofproto = ofproto, - .metadata = { .in_port = OFPP_NONE }, + .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE }, }; return recirc_alloc_id__(&state, recirc_metadata_hash(&state))->id; } diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h index 11ae486..e7d95bf 100644 --- a/ofproto/ofproto-dpif-rid.h +++ b/ofproto/ofproto-dpif-rid.h @@ -34,40 +34,40 @@ struct rule; * ============= * * Recirculation is a technique to allow a frame to re-enter the datapath - * packet processing path for one or multiple times to achieve more flexible - * packet processing, such modifying header fields after MPLS POP action and - * selecting bond a slave port for bond ports. + * packet processing path to achieve more flexible packet processing, such as + * modifying header fields after MPLS POP action and selecting a slave port for + * bond ports. * * Data path and user space interface * ----------------------------------- * * Recirculation uses two uint32_t fields, recirc_id and dp_hash, and a RECIRC - * action. The value recirc_id is used to select the next packet processing - * steps among multiple instances of recirculation. When a packet initially - * enters the data path it is assigned with recirc_id 0, which indicates no - * recirculation. Recirc_ids are managed by the user space, opaque to the - * data path. + * action. recirc_id is used to select the next packet processing steps among + * multiple instances of recirculation. When a packet initially enters the + * datapath it is assigned with recirc_id 0, which indicates no recirculation. + * Recirc_ids are managed by the user space, opaque to the datapath. * - * On the other hand, dp_hash can only be computed by the data path, opaque to - * the user space. In fact, user space may not able to recompute the hash - * value. The dp_hash value should be wildcarded for a newly received - * packet. HASH action specifies whether the hash is computed, and if - * computed, how many fields are to be included in the hash computation. The - * computed hash value is stored into the dp_hash field prior to recirculation. + * On the other hand, dp_hash can only be computed by the datapath, opaque to + * the user space, as the datapath is free to choose the hashing algorithm + * without informing user space about it. The dp_hash value should be + * wildcarded for newly received packets. HASH action specifies whether the + * hash is computed, and if computed, how many fields are to be included in the + * hash computation. The computed hash value is stored into the dp_hash field + * prior to recirculation. * * The RECIRC action sets the recirc_id field and then reprocesses the packet - * as if it was received on the same input port. RECIRC action works like a - * function call; actions listed behind the RECIRC action will be executed - * after its execution. RECIRC action can be nested, data path implementation - * limits the number of recirculation executed to prevent unreasonable nesting - * depth or infinite loop. + * as if it was received again on the same input port. RECIRC action works + * like a function call; actions listed after the RECIRC action will be + * executed after recirculation. RECIRC action can be nested, but datapath + * implementation limits the number of nested recirculations to prevent + * unreasonable nesting depth or infinite loop. * * User space recirculation context * --------------------------------- * - * Recirculation is hidden from the OpenFlow controllers. Action translation - * code deduces when recirculation is necessary and issues a data path - * recirculation action. All OpenFlow actions to be performed after + * Recirculation is usually hidden from the OpenFlow controllers. Action + * translation code deduces when recirculation is necessary and issues a + * datapath recirculation action. All OpenFlow actions to be performed after * recirculation are derived from the OpenFlow pipeline and are stored with the * recirculation ID. When the OpenFlow tables are changed in a way affecting * the recirculation flows, new recirculation ID with new metadata and actions @@ -76,9 +76,10 @@ struct rule; * Recirculation ID pool * ---------------------- * - * Recirculation ID needs to be unique for all data paths. Recirculation ID - * pool keeps track recirculation ids and stores OpenFlow pipeline translation - * context so that flow processing may continue after recirculation. + * Recirculation ID needs to be unique for all datapaths. Recirculation ID + * pool keeps track of recirculation ids and stores OpenFlow pipeline + * translation context so that flow processing may continue after + * recirculation. * * A Recirculation ID can be any uint32_t value, except for that the value 0 is * reserved for 'no recirculation' case. @@ -96,7 +97,7 @@ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33); struct recirc_metadata { /* Metadata in struct flow. */ - struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ + const struct flow_tnl *tunnel; /* Encapsulating tunnel parameters. */ ovs_be64 metadata; /* OpenFlow Metadata. */ uint64_t regs[FLOW_N_XREGS]; /* Registers. */ ofp_port_t in_port; /* Incoming port. */ @@ -108,7 +109,7 @@ recirc_metadata_from_flow(struct recirc_metadata *md, const struct flow *flow) { memset(md, 0, sizeof *md); - md->tunnel = flow->tunnel; + md->tunnel = &flow->tunnel; md->metadata = flow->metadata; memcpy(md->regs, flow->regs, sizeof md->regs); md->in_port = flow->in_port.ofp_port; @@ -119,7 +120,11 @@ static inline void recirc_metadata_to_flow(const struct recirc_metadata *md, struct flow *flow) { - flow->tunnel = md->tunnel; + if (md->tunnel && md->tunnel->ip_dst) { + flow->tunnel = *md->tunnel; + } else { + memset(&flow->tunnel, 0, sizeof flow->tunnel); + } flow->metadata = md->metadata; memcpy(flow->regs, md->regs, sizeof flow->regs); flow->in_port.ofp_port = md->in_port; @@ -161,6 +166,9 @@ struct recirc_id_node { * This state should not be modified after inserting a node in the pool, * hence the 'const' to emphasize that. */ const struct recirc_state state; + + /* Storage for tunnel metadata. */ + struct flow_tnl state_metadata_tunnel; }; void recirc_init(void); -- 2.1.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev