On Feb 8, 2014, at 7:01 PM, Jesse Gross <je...@nicira.com> wrote: > On Thu, Feb 6, 2014 at 3:13 PM, Jarno Rajahalme <jrajaha...@nicira.com> wrote: >> diff --git a/datapath/flow.c b/datapath/flow.c >> index abe6789..e86034e 100644 >> --- a/datapath/flow.c >> +++ b/datapath/flow.c >> @@ -80,96 +76,126 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct >> sk_buff *skb) >> tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); >> } >> >> - spin_lock(&stats->lock); >> + /* Check if already have node-specific stats. */ >> + if (likely(stats)) >> + spin_lock(&stats->lock); >> + else { >> + stats = flow->stats[0]; /* Pre-allocated. */ >> + spin_lock(&stats->lock); >> + >> + /* If the current NUMA-node is the only writer on the >> + * pre-allocated stats keep using them. >> + * A previous locker may have already allocated the stats, >> + * so we need to check again. If the node-specific stats >> + * were already allocated, we update the pre-allocated stats >> + * as we have already locked them. */ >> + if (likely(stats->last_writer != node && stats->last_writer >> >= 0 >> + && !flow->stats[node])) { > > I don't know if this likely() annotation is right - it seems that it > would be more common that we are using node 0's stats than allocating.
I changed this to: if (unlikely(stats->last_writer != node) && likely(stats->last_writer >= 0) && likely(!flow->stats[node])) { > >> static int check_header(struct sk_buff *skb, int len) >> diff --git a/datapath/flow.h b/datapath/flow.h >> index eafcfd8..f6cce35 100644 >> --- a/datapath/flow.h >> +++ b/datapath/flow.h >> @@ -155,14 +155,9 @@ struct flow_stats { >> unsigned long used; /* Last used time (in jiffies). */ >> spinlock_t lock; /* Lock for atomic stats update. */ >> __be16 tcp_flags; /* Union of seen TCP flags. */ >> -}; >> - >> -struct sw_flow_stats { >> - bool is_percpu; >> - union { >> - struct flow_stats *stat; >> - struct flow_stats __percpu *cpu_stats; >> - }; >> + int last_writer; /* NUMA-node id of the last writer or >> + * -1. Meaningful for 'stats[0]' >> only. >> + */ > > Should we put last_writer directly in struct sw_flow stats? It would > reduce the size a little bit and might even be a little clearer. > This patch actually removes the struct sw_flow_stats, and the ‘last_writer’ is part of the struct flow_stats. The alternative to put it to struct sw_flow would use more space (flow_stats are allocated cache line aligned, so there is some slack), and make the struct sw_flow a bit more volatile than it needs to be. To be clear, the definitions now look like this: struct flow_stats { u64 packet_count; /* Number of packets matched. */ u64 byte_count; /* Number of bytes matched. */ unsigned long used; /* Last used time (in jiffies). */ spinlock_t lock; /* Lock for atomic stats update. */ __be16 tcp_flags; /* Union of seen TCP flags. */ int last_writer; /* NUMA-node id of the last writer or * -1. Meaningful for 'stats[0]' only. */ }; struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; u32 hash; struct sw_flow_key key; struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats *stats[]; /* One for each NUMA node. First one * is allocated at flow creation time, * the rest are allocated on demand * while holding the 'stats[0].lock'. */ }; >> diff --git a/datapath/flow_table.c b/datapath/flow_table.c >> index 4e6b1c0..3f0829c 100644 >> --- a/datapath/flow_table.c >> +++ b/datapath/flow_table.c >> @@ -608,16 +603,28 @@ int ovs_flow_init(void) >> BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); >> BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); >> >> - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, >> - 0, NULL); >> + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) >> + + (num_possible_nodes() >> + * sizeof(struct flow_stats *)), >> + 0, SLAB_HWCACHE_ALIGN, NULL); > > Why do we need to cache line align the flows? We shouldn't be writing > to them very often. I’ll revert this. > > One other thing, it might be good to do a revert of the 5-tuple patch > first and then layer this on top as it would probably make it clearer > what is going on. OK, v6 coming up! Jarno _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev