On Thu, Feb 6, 2014 at 3:13 PM, Jarno Rajahalme <jrajaha...@nicira.com> wrote:
>     Keep kernel flow stats for each NUMA node rather than each (logical)
>     CPU.  This avoids using the per-CPU allocator and removes most of the
>
> diff --git a/datapath/flow.c b/datapath/flow.c
> index abe6789..e86034e 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> @@ -64,14 +64,10 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
>
>  void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
>  {
> -       struct flow_stats *stats;
> +       int node = numa_node_id();
> +       struct flow_stats *stats = flow->stats[node];
>         __be16 tcp_flags = 0;
>
> -       if (!flow->stats.is_percpu)
> -               stats = flow->stats.stat;
> -       else
> -               stats = this_cpu_ptr(flow->stats.cpu_stats);
> -
>         if ((flow->key.eth.type == htons(ETH_P_IP) ||
>              flow->key.eth.type == htons(ETH_P_IPV6)) &&
>             flow->key.ip.frag != OVS_FRAG_TYPE_LATER &&
> @@ -80,96 +76,126 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct 
> sk_buff *skb)
>                 tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
>         }
>
> -       spin_lock(&stats->lock);
> +       /* Check if already have node-specific stats. */
> +       if (likely(stats))
> +               spin_lock(&stats->lock);
> +       else {
> +               stats = flow->stats[0]; /* Pre-allocated. */
> +               spin_lock(&stats->lock);
> +
> +               /* If the current NUMA-node is the only writer on the
> +                * pre-allocated stats keep using them.
> +                * A previous locker may have already allocated the stats,
> +                * so we need to check again.  If the node-specific stats
> +                * were already allocated, we update the pre-allocated stats
> +                * as we have already locked them. */
> +               if (likely(stats->last_writer != node && stats->last_writer 
> >= 0
> +                          && !flow->stats[node])) {
> +                       /* Try to allocate node-specific stats. */
> +                       struct flow_stats *new_stats;
> +
> +                       new_stats = kmem_cache_alloc_node(flow_stats_cache,
> +                                                         GFP_THISNODE |
> +                                                         __GFP_NOMEMALLOC,
> +                                                         node);
> +                       if (likely(new_stats)) {
> +                               new_stats->used = jiffies;
> +                               new_stats->packet_count = 1;
> +                               new_stats->byte_count = skb->len;
> +                               new_stats->tcp_flags = tcp_flags;
> +                               new_stats->last_writer = node;
> +                               spin_lock_init(&new_stats->lock);
> +
> +                               flow->stats[node] = new_stats;
> +                               goto unlock; /* Unlock the pre-allocated 
> stats. */

One more thing I noticed is that we need memory barrier for numa-stats
array. Since other process could be reading starts from different cpu.

> +                       }
> +               }
> +       }
> +
>         stats->used = jiffies;
>         stats->packet_count++;
>         stats->byte_count += skb->len;
>         stats->tcp_flags |= tcp_flags;
> +       stats->last_writer = node;
> +unlock:
>         spin_unlock(&stats->lock);
>  }
>
> -static void stats_read(struct flow_stats *stats, bool lock_bh,
> -                      struct ovs_flow_stats *ovs_stats,
> -                      unsigned long *used, __be16 *tcp_flags)
> -{
> -       if (lock_bh)
> -               spin_lock_bh(&stats->lock);
> -       else
> -               spin_lock(&stats->lock);
> -
> -       if (time_after(stats->used, *used))
> -               *used = stats->used;
> -       *tcp_flags |= stats->tcp_flags;
> -       ovs_stats->n_packets += stats->packet_count;
> -       ovs_stats->n_bytes += stats->byte_count;
> -
> -       if (lock_bh)
> -               spin_unlock_bh(&stats->lock);
> -       else
> -               spin_unlock(&stats->lock);
> -}
> -
>  void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats 
> *ovs_stats,
>                         unsigned long *used, __be16 *tcp_flags)
>  {
> -       int cpu, cur_cpu;
> +       int node, local_node;
> +       struct flow_stats *stats;
>
> -       *used = 0;
> -       *tcp_flags = 0;
> -       memset(ovs_stats, 0, sizeof(*ovs_stats));
> +       preempt_disable();
> +       local_node = numa_node_id();
>
> -       if (!flow->stats.is_percpu) {
> -               stats_read(flow->stats.stat, true, ovs_stats, used, 
> tcp_flags);
> +       /* Start from the local node. */
> +       stats = flow->stats[local_node];
> +       if (stats && likely(stats->packet_count)) {
> +               spin_lock_bh(&stats->lock);
> +               *used = stats->used;
> +               *tcp_flags = stats->tcp_flags;
> +               ovs_stats->n_packets = stats->packet_count;
> +               ovs_stats->n_bytes = stats->byte_count;
> +               spin_unlock_bh(&stats->lock);
>         } else {
> -               cur_cpu = get_cpu();
> -
> -               for_each_possible_cpu(cpu) {
> -                       struct flow_stats *stats;
> -                       bool lock_bh;
> +               *used = 0;
> +               *tcp_flags = 0;
> +               ovs_stats->n_packets = 0;
> +               ovs_stats->n_bytes = 0;
> +       }
>
> -                       stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
> -                       lock_bh = (cpu == cur_cpu);
> -                       stats_read(stats, lock_bh, ovs_stats, used, 
> tcp_flags);
> +       /* Collect stats from other nodes. */
> +       for_each_node(node) {
> +               if (node == local_node)
> +                       continue; /* Done already. */
> +               stats = flow->stats[node];
> +               if (stats && likely(stats->packet_count)) {
> +                       spin_lock(&stats->lock);
> +                       if (time_after(stats->used, *used))
> +                               *used = stats->used;
> +                       *tcp_flags |= stats->tcp_flags;
> +                       ovs_stats->n_packets += stats->packet_count;
> +                       ovs_stats->n_bytes += stats->byte_count;
> +                       spin_unlock(&stats->lock);
>                 }
> -               put_cpu();
>         }
> -}
> -
> -static void stats_reset(struct flow_stats *stats, bool lock_bh)
> -{
> -       if (lock_bh)
> -               spin_lock_bh(&stats->lock);
> -       else
> -               spin_lock(&stats->lock);
> -
> -       stats->used = 0;
> -       stats->packet_count = 0;
> -       stats->byte_count = 0;
> -       stats->tcp_flags = 0;
> -
> -       if (lock_bh)
> -               spin_unlock_bh(&stats->lock);
> -       else
> -               spin_unlock(&stats->lock);
> +       preempt_enable();
>  }
>
>  void ovs_flow_stats_clear(struct sw_flow *flow)
>  {
> -       int cpu, cur_cpu;
> -
> -       if (!flow->stats.is_percpu) {
> -               stats_reset(flow->stats.stat, true);
> -       } else {
> -               cur_cpu = get_cpu();
> +       int node, local_node;
> +       struct flow_stats *stats;
>
> -               for_each_possible_cpu(cpu) {
> -                       bool lock_bh;
> +       preempt_disable();
> +       local_node = numa_node_id();
>
> -                       lock_bh = (cpu == cur_cpu);
> -                       stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu), 
> lock_bh);
> +       /* Start from the current node. */
> +       stats = flow->stats[local_node];
> +       if (stats) {
> +               spin_lock_bh(&stats->lock);
> +               stats->used = 0;
> +               stats->packet_count = 0;
> +               stats->byte_count = 0;
> +               stats->tcp_flags = 0;
> +               spin_unlock_bh(&stats->lock);
> +       }
> +       for_each_node(node) {
> +               if (node == local_node)
> +                       continue; /* Done already. */
> +               stats = flow->stats[node];
> +               if (stats) {
> +                       spin_lock(&stats->lock);
> +                       stats->used = 0;
> +                       stats->packet_count = 0;
> +                       stats->byte_count = 0;
> +                       stats->tcp_flags = 0;
> +                       spin_unlock(&stats->lock);
>                 }
> -               put_cpu();
>         }
> +       preempt_enable();
>  }
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to