On Fri, Oct 18, 2013 at 8:25 AM, Andy Zhou <az...@nicira.com> wrote: > Do we still need a spinlock when we have per-cpu counters? > Yes, cmd OVS_FLOW_ATTR_CLEAR needs clear stats. So lock is required for atomic reset.
> > On Fri, Oct 18, 2013 at 8:11 AM, Pravin B Shelar <pshe...@nicira.com> wrote: >> >> With mega flow implementation ovs flow can be shared between >> multiple CPUs which makes stats updates highly contended >> operation. Following patch allocates separate stats for each >> CPU to make stats update scalable. >> >> Signed-off-by: Pravin B Shelar <pshe...@nicira.com> >> --- >> datapath/datapath.c | 54 >> ++++++++++++++++++------------------------------ >> datapath/flow.c | 50 >> +++++++++++++++++++++++++++++++++++++++------ >> datapath/flow.h | 20 +++++++++++------ >> datapath/flow_table.c | 14 ++++++++++-- >> 4 files changed, 87 insertions(+), 51 deletions(-) >> >> diff --git a/datapath/datapath.c b/datapath/datapath.c >> index 9e6df12..25f25f2 100644 >> --- a/datapath/datapath.c >> +++ b/datapath/datapath.c >> @@ -252,9 +252,9 @@ void ovs_dp_process_received_packet(struct vport *p, >> struct sk_buff *skb) >> OVS_CB(skb)->flow = flow; >> OVS_CB(skb)->pkt_key = &key; >> >> - stats_counter = &stats->n_hit; >> - ovs_flow_used(OVS_CB(skb)->flow, skb); >> + ovs_flow_stats_update(OVS_CB(skb)->flow, skb); >> ovs_execute_actions(dp, skb); >> + stats_counter = &stats->n_hit; >> >> out: >> /* Update datapath statistics. */ >> @@ -454,14 +454,6 @@ out: >> return err; >> } >> >> -static void clear_stats(struct sw_flow *flow) >> -{ >> - flow->used = 0; >> - flow->tcp_flags = 0; >> - flow->packet_count = 0; >> - flow->byte_count = 0; >> -} >> - >> static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info >> *info) >> { >> struct ovs_header *ovs_header = info->userhdr; >> @@ -629,11 +621,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow >> *flow, struct datapath *dp, >> { >> const int skb_orig_len = skb->len; >> struct nlattr *start; >> - struct ovs_flow_stats stats; >> + struct sw_flow_stats flow_stats; >> struct ovs_header *ovs_header; >> struct nlattr *nla; >> - unsigned long used; >> - u8 tcp_flags; >> int err; >> >> ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, >> flags, cmd); >> @@ -662,24 +652,24 @@ static int ovs_flow_cmd_fill_info(struct sw_flow >> *flow, struct datapath *dp, >> >> nla_nest_end(skb, nla); >> >> - spin_lock_bh(&flow->lock); >> - used = flow->used; >> - stats.n_packets = flow->packet_count; >> - stats.n_bytes = flow->byte_count; >> - tcp_flags = flow->tcp_flags; >> - spin_unlock_bh(&flow->lock); >> - >> - if (used && >> - nla_put_u64(skb, OVS_FLOW_ATTR_USED, >> ovs_flow_used_time(used))) >> + ovs_flow_stats_get(flow, &flow_stats); >> + if (flow_stats.used && >> + nla_put_u64(skb, OVS_FLOW_ATTR_USED, >> ovs_flow_used_time(flow_stats.used))) >> goto nla_put_failure; >> >> - if (stats.n_packets && >> - nla_put(skb, OVS_FLOW_ATTR_STATS, >> - sizeof(struct ovs_flow_stats), &stats)) >> - goto nla_put_failure; >> + if (flow_stats.packet_count) { >> + struct ovs_flow_stats stats = { >> + .n_packets = flow_stats.packet_count, >> + .n_bytes = flow_stats.byte_count, >> + }; >> >> - if (tcp_flags && >> - nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) >> + if (nla_put(skb, OVS_FLOW_ATTR_STATS, >> + sizeof(struct ovs_flow_stats), &stats)) >> + goto nla_put_failure; >> + } >> + >> + if (flow_stats.tcp_flags && >> + nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, >> flow_stats.tcp_flags)) >> goto nla_put_failure; >> >> /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions >> if >> @@ -809,7 +799,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff >> *skb, struct genl_info *info) >> error = PTR_ERR(flow); >> goto err_unlock_ovs; >> } >> - clear_stats(flow); >> >> flow->key = masked_key; >> flow->unmasked_key = key; >> @@ -855,11 +844,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff >> *skb, struct genl_info *info) >> info->snd_seq, >> OVS_FLOW_CMD_NEW); >> >> /* Clear stats. */ >> - if (a[OVS_FLOW_ATTR_CLEAR]) { >> - spin_lock_bh(&flow->lock); >> - clear_stats(flow); >> - spin_unlock_bh(&flow->lock); >> - } >> + if (a[OVS_FLOW_ATTR_CLEAR]) >> + ovs_sw_flow_stats_clear(flow); >> } >> ovs_unlock(); >> >> diff --git a/datapath/flow.c b/datapath/flow.c >> index faa4e15..1ee01ec 100644 >> --- a/datapath/flow.c >> +++ b/datapath/flow.c >> @@ -62,8 +62,9 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) >> #define TCP_FLAGS_OFFSET 13 >> #define TCP_FLAG_MASK 0x3f >> >> -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) >> +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) >> { >> + struct sw_flow_stats *stats = &flow->stats[smp_processor_id()]; >> u8 tcp_flags = 0; >> >> if ((flow->key.eth.type == htons(ETH_P_IP) || >> @@ -74,12 +75,47 @@ void ovs_flow_used(struct sw_flow *flow, struct >> sk_buff *skb) >> tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; >> } >> >> - spin_lock(&flow->lock); >> - flow->used = jiffies; >> - flow->packet_count++; >> - flow->byte_count += skb->len; >> - flow->tcp_flags |= tcp_flags; >> - spin_unlock(&flow->lock); >> + spin_lock(&stats->lock); >> + stats->used = jiffies; >> + stats->packet_count++; >> + stats->byte_count += skb->len; >> + stats->tcp_flags |= tcp_flags; >> + spin_unlock(&stats->lock); >> +} >> + >> +void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res) >> +{ >> + int cpu; >> + >> + memset(res, 0, sizeof(*res)); >> + >> + for_each_possible_cpu(cpu) { >> + struct sw_flow_stats *stats = &flow->stats[cpu]; >> + >> + spin_lock(&stats->lock); >> + if (time_after(stats->used, res->used)) >> + res->used = stats->used; >> + res->packet_count += stats->packet_count; >> + res->byte_count += stats->byte_count; >> + res->tcp_flags |= stats->tcp_flags; >> + spin_unlock(&stats->lock); >> + } >> +} >> + >> +void ovs_sw_flow_stats_clear(struct sw_flow *flow) >> +{ >> + int cpu; >> + >> + for_each_possible_cpu(cpu) { >> + struct sw_flow_stats *stats = &flow->stats[cpu]; >> + >> + spin_lock(&stats->lock); >> + stats->used = 0; >> + stats->packet_count = 0; >> + stats->byte_count = 0; >> + stats->tcp_flags = 0; >> + spin_unlock(&stats->lock); >> + } >> } >> >> static int check_header(struct sk_buff *skb, int len) >> diff --git a/datapath/flow.h b/datapath/flow.h >> index 91a3022..987e9e5 100644 >> --- a/datapath/flow.h >> +++ b/datapath/flow.h >> @@ -19,6 +19,7 @@ >> #ifndef FLOW_H >> #define FLOW_H 1 >> >> +#include <linux/cache.h> >> #include <linux/kernel.h> >> #include <linux/netlink.h> >> #include <linux/openvswitch.h> >> @@ -146,6 +147,14 @@ struct sw_flow_actions { >> struct nlattr actions[]; >> }; >> >> +struct sw_flow_stats { >> + u64 packet_count; /* Number of packets matched. */ >> + u64 byte_count; /* Number of bytes matched. */ >> + unsigned long used; /* Last used time (in jiffies). */ >> + spinlock_t lock; /* Lock for values below. */ >> + u8 tcp_flags; /* Union of seen TCP flags. */ >> +} ____cacheline_aligned; >> + >> struct sw_flow { >> struct rcu_head rcu; >> struct hlist_node hash_node[2]; >> @@ -155,12 +164,7 @@ struct sw_flow { >> struct sw_flow_key unmasked_key; >> struct sw_flow_mask *mask; >> struct sw_flow_actions __rcu *sf_acts; >> - >> - spinlock_t lock; /* Lock for values below. */ >> - unsigned long used; /* Last used time (in jiffies). */ >> - u64 packet_count; /* Number of packets matched. */ >> - u64 byte_count; /* Number of bytes matched. */ >> - u8 tcp_flags; /* Union of seen TCP flags. */ >> + struct sw_flow_stats stats[]; >> }; >> >> struct arp_eth_header { >> @@ -177,7 +181,9 @@ struct arp_eth_header { >> unsigned char ar_tip[4]; /* target IP address >> */ >> } __packed; >> >> -void ovs_flow_used(struct sw_flow *, struct sk_buff *); >> +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); >> +void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res); >> +void ovs_sw_flow_stats_clear(struct sw_flow *flow); >> u64 ovs_flow_used_time(unsigned long flow_jiffies); >> >> int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key >> *); >> diff --git a/datapath/flow_table.c b/datapath/flow_table.c >> index 98eb809..23ed9ee 100644 >> --- a/datapath/flow_table.c >> +++ b/datapath/flow_table.c >> @@ -76,15 +76,19 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const >> struct sw_flow_key *src, >> struct sw_flow *ovs_flow_alloc(void) >> { >> struct sw_flow *flow; >> + int cpu; >> >> flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); >> if (!flow) >> return ERR_PTR(-ENOMEM); >> >> - spin_lock_init(&flow->lock); >> flow->sf_acts = NULL; >> flow->mask = NULL; >> >> + memset(flow->stats, 0, num_possible_cpus() * sizeof(struct >> sw_flow_stats)); >> + for_each_possible_cpu(cpu) >> + spin_lock_init(&flow->stats[cpu].lock); >> + >> return flow; >> } >> >> @@ -561,11 +565,15 @@ int ovs_flow_tbl_insert(struct flow_table *table, >> struct sw_flow *flow, >> * Returns zero if successful or a negative error code. */ >> int ovs_flow_init(void) >> { >> + int flow_size; >> + >> BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); >> BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); >> >> - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), >> 0, >> - 0, NULL); >> + flow_size = sizeof(struct sw_flow) + >> + (num_possible_cpus() * sizeof(struct sw_flow_stats)); >> + >> + flow_cache = kmem_cache_create("sw_flow", flow_size, 0, 0, NULL); >> if (flow_cache == NULL) >> return -ENOMEM; >> >> -- >> 1.7.1 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev > > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev