On Thu, Feb 6, 2014 at 3:13 PM, Jarno Rajahalme <jrajaha...@nicira.com> wrote:
> diff --git a/datapath/flow.c b/datapath/flow.c
> index abe6789..e86034e 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -80,96 +76,126 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct 
> sk_buff *skb)
>                 tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
>         }
>
> -       spin_lock(&stats->lock);
> +       /* Check if already have node-specific stats. */
> +       if (likely(stats))
> +               spin_lock(&stats->lock);
> +       else {
> +               stats = flow->stats[0]; /* Pre-allocated. */
> +               spin_lock(&stats->lock);
> +
> +               /* If the current NUMA-node is the only writer on the
> +                * pre-allocated stats keep using them.
> +                * A previous locker may have already allocated the stats,
> +                * so we need to check again.  If the node-specific stats
> +                * were already allocated, we update the pre-allocated stats
> +                * as we have already locked them. */
> +               if (likely(stats->last_writer != node && stats->last_writer 
> >= 0
> +                          && !flow->stats[node])) {

I don't know if this likely() annotation is right - it seems that it
would be more common that we are using node 0's stats than allocating.

>  static int check_header(struct sk_buff *skb, int len)
> diff --git a/datapath/flow.h b/datapath/flow.h
> index eafcfd8..f6cce35 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> @@ -155,14 +155,9 @@ struct flow_stats {
>         unsigned long used;             /* Last used time (in jiffies). */
>         spinlock_t lock;                /* Lock for atomic stats update. */
>         __be16 tcp_flags;               /* Union of seen TCP flags. */
> -};
> -
> -struct sw_flow_stats {
> -       bool is_percpu;
> -       union {
> -               struct flow_stats *stat;
> -               struct flow_stats __percpu *cpu_stats;
> -       };
> +       int last_writer;                /* NUMA-node id of the last writer or
> +                                        * -1. Meaningful for 'stats[0]' only.
> +                                        */

Should we put last_writer directly in struct sw_flow stats? It would
reduce the size a little bit and might even be a little clearer.

> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index 4e6b1c0..3f0829c 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -608,16 +603,28 @@ int ovs_flow_init(void)
>         BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
>         BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
>
> -       flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
> -                                       0, NULL);
> +       flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
> +                                      + (num_possible_nodes()
> +                                         * sizeof(struct flow_stats *)),
> +                                      0, SLAB_HWCACHE_ALIGN, NULL);

Why do we need to cache line align the flows? We shouldn't be writing
to them very often.

One other thing, it might be good to do a revert of the 5-tuple patch
first and then layer this on top as it would probably make it clearer
what is going on.
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to