Hey Ethan, Could you review this?
Thanks, Alex Wang, On Tue, May 6, 2014 at 6:40 AM, Ryan Wilson <wr...@nicira.com> wrote: > This patch also adds a ref/unref structure for ofgroup, so handler and > revalidator threads can update group / bucket stats via the xlate cache. > > See the following thread for more information: > http://openvswitch.org/pipermail/dev/2014-January/036107.html > > Signed-off-by: Ryan Wilson <wr...@nicira.com> > --- > lib/ofp-util.h | 12 ++++--- > ofproto/ofproto-dpif-xlate.c | 50 ++++++++++++++++++++------- > ofproto/ofproto-dpif.c | 58 +++++++++++++++++-------------- > ofproto/ofproto-dpif.h | 22 ++++++++++++ > ofproto/ofproto-provider.h | 12 ++++++- > ofproto/ofproto.c | 78 > +++++++++++++++++++++++++----------------- > tests/ofproto-dpif.at | 17 +++++++++ > 7 files changed, 174 insertions(+), 75 deletions(-) > > diff --git a/lib/ofp-util.h b/lib/ofp-util.h > index 782e512..77967a5 100644 > --- a/lib/ofp-util.h > +++ b/lib/ofp-util.h > @@ -1042,6 +1042,11 @@ int ofputil_decode_queue_stats(struct > ofputil_queue_stats *qs, struct ofpbuf *ms > void ofputil_append_queue_stat(struct list *replies, > const struct ofputil_queue_stats *oqs); > > +struct bucket_counter { > + uint64_t packet_count; /* Number of packets processed by bucket. */ > + uint64_t byte_count; /* Number of bytes processed by bucket. */ > +}; > + > /* Bucket for use in groups. */ > struct ofputil_bucket { > struct list list_node; > @@ -1054,6 +1059,8 @@ struct ofputil_bucket { > * failover groups. */ > struct ofpact *ofpacts; /* Series of "struct ofpact"s. */ > size_t ofpacts_len; /* Length of ofpacts, in bytes. */ > + > + struct bucket_counter stats; > }; > > /* Protocol-independent group_mod. */ > @@ -1064,11 +1071,6 @@ struct ofputil_group_mod { > struct list buckets; /* Contains "struct ofputil_bucket"s. */ > }; > > -struct bucket_counter { > - uint64_t packet_count; /* Number of packets processed by bucket. */ > - uint64_t byte_count; /* Number of bytes processed by bucket. */ > -}; > - > /* Group stats reply, independent of protocol. */ > struct ofputil_group_stats { > uint32_t group_id; /* Group identifier. */ > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > index da538b7..9e99b9a 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -235,6 +235,7 @@ enum xc_type { > XC_LEARN, > XC_NORMAL, > XC_FIN_TIMEOUT, > + XC_GROUP, > }; > > /* xlate_cache entries hold enough information to perform the side > effects of > @@ -279,6 +280,10 @@ struct xc_entry { > uint16_t idle; > uint16_t hard; > } fin; > + struct { > + struct group_dpif *group; > + struct ofputil_bucket *bucket; > + } group; > } u; > }; > > @@ -837,7 +842,7 @@ odp_port_is_alive(const struct xlate_ctx *ctx, > ofp_port_t ofp_port) > return true; > } > > -static const struct ofputil_bucket * > +static struct ofputil_bucket * > group_first_live_bucket(const struct xlate_ctx *, const struct group_dpif > *, > int depth); > > @@ -862,7 +867,7 @@ group_is_alive(const struct xlate_ctx *ctx, uint32_t > group_id, int depth) > > static bool > bucket_is_alive(const struct xlate_ctx *ctx, > - const struct ofputil_bucket *bucket, int depth) > + struct ofputil_bucket *bucket, int depth) > { > if (depth >= MAX_LIVENESS_RECURSION) { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > @@ -879,7 +884,7 @@ bucket_is_alive(const struct xlate_ctx *ctx, > group_is_alive(ctx, bucket->watch_group, depth + 1)); > } > > -static const struct ofputil_bucket * > +static struct ofputil_bucket * > group_first_live_bucket(const struct xlate_ctx *ctx, > const struct group_dpif *group, int depth) > { > @@ -896,16 +901,16 @@ group_first_live_bucket(const struct xlate_ctx *ctx, > return NULL; > } > > -static const struct ofputil_bucket * > +static struct ofputil_bucket * > group_best_live_bucket(const struct xlate_ctx *ctx, > const struct group_dpif *group, > uint32_t basis) > { > - const struct ofputil_bucket *best_bucket = NULL; > + struct ofputil_bucket *best_bucket = NULL; > uint32_t best_score = 0; > int i = 0; > > - const struct ofputil_bucket *bucket; > + struct ofputil_bucket *bucket; > const struct list *buckets; > > group_dpif_get_buckets(group, &buckets); > @@ -2112,7 +2117,8 @@ match: > } > > static void > -xlate_group_bucket(struct xlate_ctx *ctx, const struct ofputil_bucket > *bucket) > +xlate_group_bucket(struct xlate_ctx *ctx, struct group_dpif *group, > + struct ofputil_bucket *bucket) > { > uint64_t action_list_stub[1024 / 8]; > struct ofpbuf action_list, action_set; > @@ -2127,19 +2133,30 @@ xlate_group_bucket(struct xlate_ctx *ctx, const > struct ofputil_bucket *bucket) > > ofpbuf_uninit(&action_set); > ofpbuf_uninit(&action_list); > + > + if (ctx->xin->resubmit_stats) { > + group_dpif_credit_stats(group, bucket, ctx->xin->resubmit_stats); > + } > + if (ctx->xin->xcache) { > + struct xc_entry *entry; > + > + entry = xlate_cache_add_entry(ctx->xin->xcache, XC_GROUP); > + entry->u.group.group = group_dpif_ref(group); > + entry->u.group.bucket = bucket; > + } > } > > static void > xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group) > { > - const struct ofputil_bucket *bucket; > + struct ofputil_bucket *bucket; > const struct list *buckets; > struct flow old_flow = ctx->xin->flow; > > group_dpif_get_buckets(group, &buckets); > > LIST_FOR_EACH (bucket, list_node, buckets) { > - xlate_group_bucket(ctx, bucket); > + xlate_group_bucket(ctx, group, bucket); > /* Roll back flow to previous state. > * This is equivalent to cloning the packet for each bucket. > * > @@ -2153,11 +2170,11 @@ xlate_all_group(struct xlate_ctx *ctx, struct > group_dpif *group) > static void > xlate_ff_group(struct xlate_ctx *ctx, struct group_dpif *group) > { > - const struct ofputil_bucket *bucket; > + struct ofputil_bucket *bucket; > > bucket = group_first_live_bucket(ctx, group, 0); > if (bucket) { > - xlate_group_bucket(ctx, bucket); > + xlate_group_bucket(ctx, group, bucket); > } > } > > @@ -2165,14 +2182,14 @@ static void > xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group) > { > struct flow_wildcards *wc = &ctx->xout->wc; > - const struct ofputil_bucket *bucket; > + struct ofputil_bucket *bucket; > uint32_t basis; > > basis = hash_mac(ctx->xin->flow.dl_dst, 0, 0); > bucket = group_best_live_bucket(ctx, group, basis); > if (bucket) { > memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); > - xlate_group_bucket(ctx, bucket); > + xlate_group_bucket(ctx, group, bucket); > } > } > > @@ -3598,6 +3615,10 @@ xlate_push_stats(struct xlate_cache *xcache, bool > may_learn, > xlate_fin_timeout__(entry->u.fin.rule, stats->tcp_flags, > entry->u.fin.idle, entry->u.fin.hard); > break; > + case XC_GROUP: > + group_dpif_credit_stats(entry->u.group.group, > entry->u.group.bucket, > + stats); > + break; > default: > OVS_NOT_REACHED(); > } > @@ -3666,6 +3687,9 @@ xlate_cache_clear(struct xlate_cache *xcache) > /* 'u.fin.rule' is always already held as a XC_RULE, which > * has already released it's reference above. */ > break; > + case XC_GROUP: > + group_dpif_unref(entry->u.group.group); > + break; > default: > OVS_NOT_REACHED(); > } > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index e50b4fe..ee32fb7 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -107,7 +107,6 @@ struct group_dpif { > struct ovs_mutex stats_mutex; > uint64_t packet_count OVS_GUARDED; /* Number of packets received. */ > uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */ > - struct bucket_counter *bucket_stats OVS_GUARDED; /* Bucket > statistics. */ > }; > > struct ofbundle { > @@ -3541,17 +3540,32 @@ static void > group_construct_stats(struct group_dpif *group) > OVS_REQUIRES(group->stats_mutex) > { > + struct ofputil_bucket *bucket; > + const struct list *buckets; > + > group->packet_count = 0; > group->byte_count = 0; > - if (!group->bucket_stats) { > - group->bucket_stats = xcalloc(group->up.n_buckets, > - sizeof *group->bucket_stats); > - } else { > - memset(group->bucket_stats, 0, group->up.n_buckets * > - sizeof *group->bucket_stats); > + > + group_dpif_get_buckets(group, &buckets); > + LIST_FOR_EACH (bucket, list_node, buckets) { > + bucket->stats.packet_count = 0; > + bucket->stats.byte_count = 0; > } > } > > +void > +group_dpif_credit_stats(struct group_dpif *group, > + struct ofputil_bucket *bucket, > + const struct dpif_flow_stats *stats) > +{ > + ovs_mutex_lock(&group->stats_mutex); > + bucket->stats.packet_count += stats->n_packets; > + bucket->stats.byte_count += stats->n_bytes; > + group->packet_count += stats->n_packets; > + group->byte_count += stats->n_bytes; > + ovs_mutex_unlock(&group->stats_mutex); > +} > + > static enum ofperr > group_construct(struct ofgroup *group_) > { > @@ -3578,34 +3592,19 @@ group_construct(struct ofgroup *group_) > } > > static void > -group_destruct__(struct group_dpif *group) > - OVS_REQUIRES(group->stats_mutex) > -{ > - free(group->bucket_stats); > - group->bucket_stats = NULL; > -} > - > -static void > group_destruct(struct ofgroup *group_) > { > struct group_dpif *group = group_dpif_cast(group_); > - ovs_mutex_lock(&group->stats_mutex); > - group_destruct__(group); > - ovs_mutex_unlock(&group->stats_mutex); > ovs_mutex_destroy(&group->stats_mutex); > } > > static enum ofperr > -group_modify(struct ofgroup *group_, struct ofgroup *victim_) > +group_modify(struct ofgroup *group_) > { > struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto); > struct group_dpif *group = group_dpif_cast(group_); > - struct group_dpif *victim = group_dpif_cast(victim_); > > ovs_mutex_lock(&group->stats_mutex); > - if (victim->up.n_buckets < group->up.n_buckets) { > - group_destruct__(group); > - } > group_construct_stats(group); > ovs_mutex_unlock(&group->stats_mutex); > > @@ -3618,12 +3617,21 @@ static enum ofperr > group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats > *ogs) > { > struct group_dpif *group = group_dpif_cast(group_); > + struct ofputil_bucket *bucket; > + const struct list *buckets; > + struct bucket_counter *bucket_stats; > > ovs_mutex_lock(&group->stats_mutex); > ogs->packet_count = group->packet_count; > ogs->byte_count = group->byte_count; > - memcpy(ogs->bucket_stats, group->bucket_stats, > - group->up.n_buckets * sizeof *group->bucket_stats); > + > + group_dpif_get_buckets(group, &buckets); > + bucket_stats = ogs->bucket_stats; > + LIST_FOR_EACH (bucket, list_node, buckets) { > + bucket_stats->packet_count = bucket->stats.packet_count; > + bucket_stats->byte_count = bucket->stats.byte_count; > + bucket_stats++; > + } > ovs_mutex_unlock(&group->stats_mutex); > > return 0; > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h > index d4ad624..8fc6edc 100644 > --- a/ofproto/ofproto-dpif.h > +++ b/ofproto/ofproto-dpif.h > @@ -125,6 +125,9 @@ void choose_miss_rule(enum ofputil_port_config, > struct rule_dpif *no_packet_in_rule, > struct rule_dpif **rule, bool take_ref); > > +void group_dpif_credit_stats(struct group_dpif *, > + struct ofputil_bucket *, > + const struct dpif_flow_stats *); > bool group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id, > struct group_dpif **group); > > @@ -232,6 +235,25 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); > > /* struct rule_dpif has struct rule as it's first member. */ > #define RULE_CAST(RULE) ((struct rule *)RULE) > +#define GROUP_CAST(GROUP) ((struct ofgroup *) GROUP) > + > +static inline struct group_dpif* group_dpif_ref(struct group_dpif *group) > +{ > + if (group) { > + ofproto_group_ref(GROUP_CAST(group)); > + } > + return group; > +} > + > +static inline void group_dpif_unref(struct group_dpif *group) > +{ > + if (group) { > + struct ofgroup *ofgroup = GROUP_CAST(group); > + struct ofproto *ofproto = ofgroup->ofproto; > + ovs_rwlock_wrlock(&ofproto->groups_rwlock); > + ofproto_group_unref(ofproto, ofgroup); > + } > +} > > static inline void rule_dpif_ref(struct rule_dpif *rule) > { > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h > index 141adec..8d527f8 100644 > --- a/ofproto/ofproto-provider.h > +++ b/ofproto/ofproto-provider.h > @@ -488,6 +488,12 @@ struct ofgroup { > uint32_t group_id; > enum ofp11_group_type type; /* One of OFPGT_*. */ > > + /* Number of references. > + * The classifier owns one reference. > + * Any thread trying to keep a rule from being freed should hold its > own > + * reference, typically the xlate cache. */ > + struct ovs_refcount ref_count; > + > long long int created; /* Creation time. */ > long long int modified; /* Time of last modification. */ > > @@ -502,6 +508,10 @@ bool ofproto_group_lookup(const struct ofproto > *ofproto, uint32_t group_id, > void ofproto_group_release(struct ofgroup *group) > OVS_RELEASES(group->rwlock); > > +void ofproto_group_ref(struct ofgroup *); > +void ofproto_group_unref(struct ofproto *, struct ofgroup *) > + OVS_RELEASES(ofproto->groups_rwlock); > + > /* ofproto class structure, to be defined by each ofproto implementation. > * > * > @@ -1684,7 +1694,7 @@ struct ofproto_class { > void (*group_destruct)(struct ofgroup *); > void (*group_dealloc)(struct ofgroup *); > > - enum ofperr (*group_modify)(struct ofgroup *, struct ofgroup *victim); > + enum ofperr (*group_modify)(struct ofgroup *); > > enum ofperr (*group_get_stats)(const struct ofgroup *, > struct ofputil_group_stats *); > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c > index d92bafd..47bb1fc 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -2642,6 +2642,49 @@ ofproto_rule_unref(struct rule *rule) > } > } > > +static void > +group_destroy_cb(struct ofgroup *group) > +{ > + group->ofproto->ofproto_class->group_destruct(group); > + ofputil_bucket_list_destroy(&group->buckets); > + ovs_rwlock_destroy(&group->rwlock); > + group->ofproto->ofproto_class->group_dealloc(group); > +} > + > +void > +ofproto_group_ref(struct ofgroup *group) > +{ > + if (group) { > + ovs_refcount_ref(&group->ref_count); > + } > +} > + > +void > +ofproto_group_unref(struct ofproto* ofproto, struct ofgroup *group) > + OVS_RELEASES(ofproto->groups_rwlock) > +{ > + if (group && ovs_refcount_unref(&group->ref_count) == 1) { > + struct match match; > + struct ofputil_flow_mod fm; > + > + /* Delete all flow entries containing this group in a group > action */ > + match_init_catchall(&match); > + flow_mod_init(&fm, &match, 0, NULL, 0, OFPFC_DELETE); > + fm.out_group = group->group_id; > + handle_flow_mod__(ofproto, NULL, &fm, NULL); > + > + ovs_rwlock_wrlock(&group->rwlock); > + hmap_remove(&ofproto->groups, &group->hmap_node); > + /* No-one can find this group any more. */ > + ofproto->n_groups[group->type]--; > + ovs_rwlock_unlock(&ofproto->groups_rwlock); > + ovs_rwlock_unlock(&group->rwlock); > + ovsrcu_postpone(group_destroy_cb, group); > + } else { > + ovs_rwlock_unlock(&ofproto->groups_rwlock); > + } > +} > + > static uint32_t get_provider_meter_id(const struct ofproto *, > uint32_t of_meter_id); > > @@ -5608,6 +5651,7 @@ add_group(struct ofproto *ofproto, struct > ofputil_group_mod *gm) > ofgroup->group_id = gm->group_id; > ofgroup->type = gm->type; > ofgroup->created = ofgroup->modified = time_msec(); > + ovs_refcount_init(&ofgroup->ref_count); > > list_move(&ofgroup->buckets, &gm->buckets); > ofgroup->n_buckets = list_size(&ofgroup->buckets); > @@ -5697,7 +5741,7 @@ modify_group(struct ofproto *ofproto, struct > ofputil_group_mod *gm) > list_move(&ofgroup->buckets, &gm->buckets); > ofgroup->n_buckets = list_size(&ofgroup->buckets); > > - error = ofproto->ofproto_class->group_modify(ofgroup, victim); > + error = ofproto->ofproto_class->group_modify(ofgroup); > if (!error) { > ofputil_bucket_list_destroy(&victim->buckets); > ofproto->n_groups[victim->type]--; > @@ -5718,34 +5762,6 @@ modify_group(struct ofproto *ofproto, struct > ofputil_group_mod *gm) > return error; > } > > -static void > -delete_group__(struct ofproto *ofproto, struct ofgroup *ofgroup) > - OVS_RELEASES(ofproto->groups_rwlock) > -{ > - struct match match; > - struct ofputil_flow_mod fm; > - > - /* Delete all flow entries containing this group in a group action */ > - match_init_catchall(&match); > - flow_mod_init(&fm, &match, 0, NULL, 0, OFPFC_DELETE); > - fm.out_group = ofgroup->group_id; > - handle_flow_mod__(ofproto, NULL, &fm, NULL); > - > - /* Must wait until existing readers are done, > - * while holding the container's write lock at the same time. */ > - ovs_rwlock_wrlock(&ofgroup->rwlock); > - hmap_remove(&ofproto->groups, &ofgroup->hmap_node); > - /* No-one can find this group any more. */ > - ofproto->n_groups[ofgroup->type]--; > - ovs_rwlock_unlock(&ofproto->groups_rwlock); > - > - ofproto->ofproto_class->group_destruct(ofgroup); > - ofputil_bucket_list_destroy(&ofgroup->buckets); > - ovs_rwlock_unlock(&ofgroup->rwlock); > - ovs_rwlock_destroy(&ofgroup->rwlock); > - ofproto->ofproto_class->group_dealloc(ofgroup); > -} > - > /* Implements OFPGC_DELETE. */ > static void > delete_group(struct ofproto *ofproto, uint32_t group_id) > @@ -5760,7 +5776,7 @@ delete_group(struct ofproto *ofproto, uint32_t > group_id) > break; > } > ofgroup = CONTAINER_OF(node, struct ofgroup, hmap_node); > - delete_group__(ofproto, ofgroup); > + ofproto_group_unref(ofproto, ofgroup); > /* Lock for each node separately, so that we will not jam the > * other threads for too long time. */ > ovs_rwlock_wrlock(&ofproto->groups_rwlock); > @@ -5769,7 +5785,7 @@ delete_group(struct ofproto *ofproto, uint32_t > group_id) > HMAP_FOR_EACH_IN_BUCKET (ofgroup, hmap_node, > hash_int(group_id, 0), &ofproto->groups) > { > if (ofgroup->group_id == group_id) { > - delete_group__(ofproto, ofgroup); > + ofproto_group_unref(ofproto, ofgroup); > return; > } > } > diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at > index c46e997..dc7e174 100644 > --- a/tests/ofproto-dpif.at > +++ b/tests/ofproto-dpif.at > @@ -381,6 +381,23 @@ AT_CHECK([tail -1 stdout], [0], > OVS_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([ofproto-dpif - group and bucket stats]) > +OVS_VSWITCHD_START > +ADD_OF_PORTS([br0], [1], [10]) > +AT_CHECK([ovs-ofctl -O OpenFlow12 add-group br0 > 'group_id=1234,type=select,bucket=output:10']) > +AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 'ip > actions=write_actions(group:1234)']) > +for d in 0 1 2; do > + ovs-appctl netdev-dummy/receive br0 > "in_port(1),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:0$d),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" > +done > +AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore]) > +AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn dump-group-stats br0], [0], > [stdout]) > +AT_CHECK([STRIP_XIDS stdout], [0], [dnl > +OFPST_GROUP reply (OF1.2): > + > group_id=1234,ref_count=0,packet_count=3,byte_count=180,bucket0:packet_count=3,byte_count=180 > +]) > +OVS_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([ofproto-dpif - registers]) > OVS_VSWITCHD_START > ADD_OF_PORTS([br0], [20], [21], [22], [33], [90]) > -- > 1.7.9.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev >
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev