This patch also adds a ref/unref structure for ofgroup, so handler and revalidator threads can update group / bucket stats via the xlate cache.
See the following thread for more information: http://openvswitch.org/pipermail/dev/2014-January/036107.html Signed-off-by: Ryan Wilson <wr...@nicira.com> --- lib/ofp-util.h | 12 ++++--- ofproto/ofproto-dpif-xlate.c | 50 ++++++++++++++++++++------- ofproto/ofproto-dpif.c | 58 +++++++++++++++++-------------- ofproto/ofproto-dpif.h | 22 ++++++++++++ ofproto/ofproto-provider.h | 12 ++++++- ofproto/ofproto.c | 78 +++++++++++++++++++++++++----------------- tests/ofproto-dpif.at | 17 +++++++++ 7 files changed, 174 insertions(+), 75 deletions(-) diff --git a/lib/ofp-util.h b/lib/ofp-util.h index 782e512..77967a5 100644 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@ -1042,6 +1042,11 @@ int ofputil_decode_queue_stats(struct ofputil_queue_stats *qs, struct ofpbuf *ms void ofputil_append_queue_stat(struct list *replies, const struct ofputil_queue_stats *oqs); +struct bucket_counter { + uint64_t packet_count; /* Number of packets processed by bucket. */ + uint64_t byte_count; /* Number of bytes processed by bucket. */ +}; + /* Bucket for use in groups. */ struct ofputil_bucket { struct list list_node; @@ -1054,6 +1059,8 @@ struct ofputil_bucket { * failover groups. */ struct ofpact *ofpacts; /* Series of "struct ofpact"s. */ size_t ofpacts_len; /* Length of ofpacts, in bytes. */ + + struct bucket_counter stats; }; /* Protocol-independent group_mod. */ @@ -1064,11 +1071,6 @@ struct ofputil_group_mod { struct list buckets; /* Contains "struct ofputil_bucket"s. */ }; -struct bucket_counter { - uint64_t packet_count; /* Number of packets processed by bucket. */ - uint64_t byte_count; /* Number of bytes processed by bucket. */ -}; - /* Group stats reply, independent of protocol. */ struct ofputil_group_stats { uint32_t group_id; /* Group identifier. */ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index da538b7..9e99b9a 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -235,6 +235,7 @@ enum xc_type { XC_LEARN, XC_NORMAL, XC_FIN_TIMEOUT, + XC_GROUP, }; /* xlate_cache entries hold enough information to perform the side effects of @@ -279,6 +280,10 @@ struct xc_entry { uint16_t idle; uint16_t hard; } fin; + struct { + struct group_dpif *group; + struct ofputil_bucket *bucket; + } group; } u; }; @@ -837,7 +842,7 @@ odp_port_is_alive(const struct xlate_ctx *ctx, ofp_port_t ofp_port) return true; } -static const struct ofputil_bucket * +static struct ofputil_bucket * group_first_live_bucket(const struct xlate_ctx *, const struct group_dpif *, int depth); @@ -862,7 +867,7 @@ group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth) static bool bucket_is_alive(const struct xlate_ctx *ctx, - const struct ofputil_bucket *bucket, int depth) + struct ofputil_bucket *bucket, int depth) { if (depth >= MAX_LIVENESS_RECURSION) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); @@ -879,7 +884,7 @@ bucket_is_alive(const struct xlate_ctx *ctx, group_is_alive(ctx, bucket->watch_group, depth + 1)); } -static const struct ofputil_bucket * +static struct ofputil_bucket * group_first_live_bucket(const struct xlate_ctx *ctx, const struct group_dpif *group, int depth) { @@ -896,16 +901,16 @@ group_first_live_bucket(const struct xlate_ctx *ctx, return NULL; } -static const struct ofputil_bucket * +static struct ofputil_bucket * group_best_live_bucket(const struct xlate_ctx *ctx, const struct group_dpif *group, uint32_t basis) { - const struct ofputil_bucket *best_bucket = NULL; + struct ofputil_bucket *best_bucket = NULL; uint32_t best_score = 0; int i = 0; - const struct ofputil_bucket *bucket; + struct ofputil_bucket *bucket; const struct list *buckets; group_dpif_get_buckets(group, &buckets); @@ -2112,7 +2117,8 @@ match: } static void -xlate_group_bucket(struct xlate_ctx *ctx, const struct ofputil_bucket *bucket) +xlate_group_bucket(struct xlate_ctx *ctx, struct group_dpif *group, + struct ofputil_bucket *bucket) { uint64_t action_list_stub[1024 / 8]; struct ofpbuf action_list, action_set; @@ -2127,19 +2133,30 @@ xlate_group_bucket(struct xlate_ctx *ctx, const struct ofputil_bucket *bucket) ofpbuf_uninit(&action_set); ofpbuf_uninit(&action_list); + + if (ctx->xin->resubmit_stats) { + group_dpif_credit_stats(group, bucket, ctx->xin->resubmit_stats); + } + if (ctx->xin->xcache) { + struct xc_entry *entry; + + entry = xlate_cache_add_entry(ctx->xin->xcache, XC_GROUP); + entry->u.group.group = group_dpif_ref(group); + entry->u.group.bucket = bucket; + } } static void xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group) { - const struct ofputil_bucket *bucket; + struct ofputil_bucket *bucket; const struct list *buckets; struct flow old_flow = ctx->xin->flow; group_dpif_get_buckets(group, &buckets); LIST_FOR_EACH (bucket, list_node, buckets) { - xlate_group_bucket(ctx, bucket); + xlate_group_bucket(ctx, group, bucket); /* Roll back flow to previous state. * This is equivalent to cloning the packet for each bucket. * @@ -2153,11 +2170,11 @@ xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group) static void xlate_ff_group(struct xlate_ctx *ctx, struct group_dpif *group) { - const struct ofputil_bucket *bucket; + struct ofputil_bucket *bucket; bucket = group_first_live_bucket(ctx, group, 0); if (bucket) { - xlate_group_bucket(ctx, bucket); + xlate_group_bucket(ctx, group, bucket); } } @@ -2165,14 +2182,14 @@ static void xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group) { struct flow_wildcards *wc = &ctx->xout->wc; - const struct ofputil_bucket *bucket; + struct ofputil_bucket *bucket; uint32_t basis; basis = hash_mac(ctx->xin->flow.dl_dst, 0, 0); bucket = group_best_live_bucket(ctx, group, basis); if (bucket) { memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); - xlate_group_bucket(ctx, bucket); + xlate_group_bucket(ctx, group, bucket); } } @@ -3598,6 +3615,10 @@ xlate_push_stats(struct xlate_cache *xcache, bool may_learn, xlate_fin_timeout__(entry->u.fin.rule, stats->tcp_flags, entry->u.fin.idle, entry->u.fin.hard); break; + case XC_GROUP: + group_dpif_credit_stats(entry->u.group.group, entry->u.group.bucket, + stats); + break; default: OVS_NOT_REACHED(); } @@ -3666,6 +3687,9 @@ xlate_cache_clear(struct xlate_cache *xcache) /* 'u.fin.rule' is always already held as a XC_RULE, which * has already released it's reference above. */ break; + case XC_GROUP: + group_dpif_unref(entry->u.group.group); + break; default: OVS_NOT_REACHED(); } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index e50b4fe..ee32fb7 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -107,7 +107,6 @@ struct group_dpif { struct ovs_mutex stats_mutex; uint64_t packet_count OVS_GUARDED; /* Number of packets received. */ uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */ - struct bucket_counter *bucket_stats OVS_GUARDED; /* Bucket statistics. */ }; struct ofbundle { @@ -3541,17 +3540,32 @@ static void group_construct_stats(struct group_dpif *group) OVS_REQUIRES(group->stats_mutex) { + struct ofputil_bucket *bucket; + const struct list *buckets; + group->packet_count = 0; group->byte_count = 0; - if (!group->bucket_stats) { - group->bucket_stats = xcalloc(group->up.n_buckets, - sizeof *group->bucket_stats); - } else { - memset(group->bucket_stats, 0, group->up.n_buckets * - sizeof *group->bucket_stats); + + group_dpif_get_buckets(group, &buckets); + LIST_FOR_EACH (bucket, list_node, buckets) { + bucket->stats.packet_count = 0; + bucket->stats.byte_count = 0; } } +void +group_dpif_credit_stats(struct group_dpif *group, + struct ofputil_bucket *bucket, + const struct dpif_flow_stats *stats) +{ + ovs_mutex_lock(&group->stats_mutex); + bucket->stats.packet_count += stats->n_packets; + bucket->stats.byte_count += stats->n_bytes; + group->packet_count += stats->n_packets; + group->byte_count += stats->n_bytes; + ovs_mutex_unlock(&group->stats_mutex); +} + static enum ofperr group_construct(struct ofgroup *group_) { @@ -3578,34 +3592,19 @@ group_construct(struct ofgroup *group_) } static void -group_destruct__(struct group_dpif *group) - OVS_REQUIRES(group->stats_mutex) -{ - free(group->bucket_stats); - group->bucket_stats = NULL; -} - -static void group_destruct(struct ofgroup *group_) { struct group_dpif *group = group_dpif_cast(group_); - ovs_mutex_lock(&group->stats_mutex); - group_destruct__(group); - ovs_mutex_unlock(&group->stats_mutex); ovs_mutex_destroy(&group->stats_mutex); } static enum ofperr -group_modify(struct ofgroup *group_, struct ofgroup *victim_) +group_modify(struct ofgroup *group_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto); struct group_dpif *group = group_dpif_cast(group_); - struct group_dpif *victim = group_dpif_cast(victim_); ovs_mutex_lock(&group->stats_mutex); - if (victim->up.n_buckets < group->up.n_buckets) { - group_destruct__(group); - } group_construct_stats(group); ovs_mutex_unlock(&group->stats_mutex); @@ -3618,12 +3617,21 @@ static enum ofperr group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs) { struct group_dpif *group = group_dpif_cast(group_); + struct ofputil_bucket *bucket; + const struct list *buckets; + struct bucket_counter *bucket_stats; ovs_mutex_lock(&group->stats_mutex); ogs->packet_count = group->packet_count; ogs->byte_count = group->byte_count; - memcpy(ogs->bucket_stats, group->bucket_stats, - group->up.n_buckets * sizeof *group->bucket_stats); + + group_dpif_get_buckets(group, &buckets); + bucket_stats = ogs->bucket_stats; + LIST_FOR_EACH (bucket, list_node, buckets) { + bucket_stats->packet_count = bucket->stats.packet_count; + bucket_stats->byte_count = bucket->stats.byte_count; + bucket_stats++; + } ovs_mutex_unlock(&group->stats_mutex); return 0; diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index d4ad624..8fc6edc 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -125,6 +125,9 @@ void choose_miss_rule(enum ofputil_port_config, struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule, bool take_ref); +void group_dpif_credit_stats(struct group_dpif *, + struct ofputil_bucket *, + const struct dpif_flow_stats *); bool group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id, struct group_dpif **group); @@ -232,6 +235,25 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); /* struct rule_dpif has struct rule as it's first member. */ #define RULE_CAST(RULE) ((struct rule *)RULE) +#define GROUP_CAST(GROUP) ((struct ofgroup *) GROUP) + +static inline struct group_dpif* group_dpif_ref(struct group_dpif *group) +{ + if (group) { + ofproto_group_ref(GROUP_CAST(group)); + } + return group; +} + +static inline void group_dpif_unref(struct group_dpif *group) +{ + if (group) { + struct ofgroup *ofgroup = GROUP_CAST(group); + struct ofproto *ofproto = ofgroup->ofproto; + ovs_rwlock_wrlock(&ofproto->groups_rwlock); + ofproto_group_unref(ofproto, ofgroup); + } +} static inline void rule_dpif_ref(struct rule_dpif *rule) { diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 141adec..8d527f8 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -488,6 +488,12 @@ struct ofgroup { uint32_t group_id; enum ofp11_group_type type; /* One of OFPGT_*. */ + /* Number of references. + * The classifier owns one reference. + * Any thread trying to keep a rule from being freed should hold its own + * reference, typically the xlate cache. */ + struct ovs_refcount ref_count; + long long int created; /* Creation time. */ long long int modified; /* Time of last modification. */ @@ -502,6 +508,10 @@ bool ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id, void ofproto_group_release(struct ofgroup *group) OVS_RELEASES(group->rwlock); +void ofproto_group_ref(struct ofgroup *); +void ofproto_group_unref(struct ofproto *, struct ofgroup *) + OVS_RELEASES(ofproto->groups_rwlock); + /* ofproto class structure, to be defined by each ofproto implementation. * * @@ -1684,7 +1694,7 @@ struct ofproto_class { void (*group_destruct)(struct ofgroup *); void (*group_dealloc)(struct ofgroup *); - enum ofperr (*group_modify)(struct ofgroup *, struct ofgroup *victim); + enum ofperr (*group_modify)(struct ofgroup *); enum ofperr (*group_get_stats)(const struct ofgroup *, struct ofputil_group_stats *); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index d92bafd..47bb1fc 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2642,6 +2642,49 @@ ofproto_rule_unref(struct rule *rule) } } +static void +group_destroy_cb(struct ofgroup *group) +{ + group->ofproto->ofproto_class->group_destruct(group); + ofputil_bucket_list_destroy(&group->buckets); + ovs_rwlock_destroy(&group->rwlock); + group->ofproto->ofproto_class->group_dealloc(group); +} + +void +ofproto_group_ref(struct ofgroup *group) +{ + if (group) { + ovs_refcount_ref(&group->ref_count); + } +} + +void +ofproto_group_unref(struct ofproto* ofproto, struct ofgroup *group) + OVS_RELEASES(ofproto->groups_rwlock) +{ + if (group && ovs_refcount_unref(&group->ref_count) == 1) { + struct match match; + struct ofputil_flow_mod fm; + + /* Delete all flow entries containing this group in a group action */ + match_init_catchall(&match); + flow_mod_init(&fm, &match, 0, NULL, 0, OFPFC_DELETE); + fm.out_group = group->group_id; + handle_flow_mod__(ofproto, NULL, &fm, NULL); + + ovs_rwlock_wrlock(&group->rwlock); + hmap_remove(&ofproto->groups, &group->hmap_node); + /* No-one can find this group any more. */ + ofproto->n_groups[group->type]--; + ovs_rwlock_unlock(&ofproto->groups_rwlock); + ovs_rwlock_unlock(&group->rwlock); + ovsrcu_postpone(group_destroy_cb, group); + } else { + ovs_rwlock_unlock(&ofproto->groups_rwlock); + } +} + static uint32_t get_provider_meter_id(const struct ofproto *, uint32_t of_meter_id); @@ -5608,6 +5651,7 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm) ofgroup->group_id = gm->group_id; ofgroup->type = gm->type; ofgroup->created = ofgroup->modified = time_msec(); + ovs_refcount_init(&ofgroup->ref_count); list_move(&ofgroup->buckets, &gm->buckets); ofgroup->n_buckets = list_size(&ofgroup->buckets); @@ -5697,7 +5741,7 @@ modify_group(struct ofproto *ofproto, struct ofputil_group_mod *gm) list_move(&ofgroup->buckets, &gm->buckets); ofgroup->n_buckets = list_size(&ofgroup->buckets); - error = ofproto->ofproto_class->group_modify(ofgroup, victim); + error = ofproto->ofproto_class->group_modify(ofgroup); if (!error) { ofputil_bucket_list_destroy(&victim->buckets); ofproto->n_groups[victim->type]--; @@ -5718,34 +5762,6 @@ modify_group(struct ofproto *ofproto, struct ofputil_group_mod *gm) return error; } -static void -delete_group__(struct ofproto *ofproto, struct ofgroup *ofgroup) - OVS_RELEASES(ofproto->groups_rwlock) -{ - struct match match; - struct ofputil_flow_mod fm; - - /* Delete all flow entries containing this group in a group action */ - match_init_catchall(&match); - flow_mod_init(&fm, &match, 0, NULL, 0, OFPFC_DELETE); - fm.out_group = ofgroup->group_id; - handle_flow_mod__(ofproto, NULL, &fm, NULL); - - /* Must wait until existing readers are done, - * while holding the container's write lock at the same time. */ - ovs_rwlock_wrlock(&ofgroup->rwlock); - hmap_remove(&ofproto->groups, &ofgroup->hmap_node); - /* No-one can find this group any more. */ - ofproto->n_groups[ofgroup->type]--; - ovs_rwlock_unlock(&ofproto->groups_rwlock); - - ofproto->ofproto_class->group_destruct(ofgroup); - ofputil_bucket_list_destroy(&ofgroup->buckets); - ovs_rwlock_unlock(&ofgroup->rwlock); - ovs_rwlock_destroy(&ofgroup->rwlock); - ofproto->ofproto_class->group_dealloc(ofgroup); -} - /* Implements OFPGC_DELETE. */ static void delete_group(struct ofproto *ofproto, uint32_t group_id) @@ -5760,7 +5776,7 @@ delete_group(struct ofproto *ofproto, uint32_t group_id) break; } ofgroup = CONTAINER_OF(node, struct ofgroup, hmap_node); - delete_group__(ofproto, ofgroup); + ofproto_group_unref(ofproto, ofgroup); /* Lock for each node separately, so that we will not jam the * other threads for too long time. */ ovs_rwlock_wrlock(&ofproto->groups_rwlock); @@ -5769,7 +5785,7 @@ delete_group(struct ofproto *ofproto, uint32_t group_id) HMAP_FOR_EACH_IN_BUCKET (ofgroup, hmap_node, hash_int(group_id, 0), &ofproto->groups) { if (ofgroup->group_id == group_id) { - delete_group__(ofproto, ofgroup); + ofproto_group_unref(ofproto, ofgroup); return; } } diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index c46e997..dc7e174 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -381,6 +381,23 @@ AT_CHECK([tail -1 stdout], [0], OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif - group and bucket stats]) +OVS_VSWITCHD_START +ADD_OF_PORTS([br0], [1], [10]) +AT_CHECK([ovs-ofctl -O OpenFlow12 add-group br0 'group_id=1234,type=select,bucket=output:10']) +AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 'ip actions=write_actions(group:1234)']) +for d in 0 1 2; do + ovs-appctl netdev-dummy/receive br0 "in_port(1),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:0$d),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" +done +AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore]) +AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn dump-group-stats br0], [0], [stdout]) +AT_CHECK([STRIP_XIDS stdout], [0], [dnl +OFPST_GROUP reply (OF1.2): + group_id=1234,ref_count=0,packet_count=3,byte_count=180,bucket0:packet_count=3,byte_count=180 +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - registers]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [20], [21], [22], [33], [90]) -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev