Couple of high level questions about this. Instead of protecting coverage_run() with a mutex and calling it in time_poll(), why not just call it once at the beginning of the main thread's run loop? If we do that, we can avoid all the locking which could affect the threads.
Ben, could we do the same for coverage_clear()? It seems a bit odd to me that time_poll() calls this stuff in a multithreaded environment. That said, perhaps we should leave it as it is, and do something more comprehensive to deal with multiple threads later. When you compute "slots", why is it possible that the number of run intervals is a multiple of COVERAGE_RUN_INTERVAL? I.E. why isn't slots always equal to 1? Ethan On Tue, Sep 24, 2013 at 5:54 PM, Alex Wang <al...@nicira.com> wrote: > This commit changes the "ovs-appctl coverage/show" command to show the > the averaged per-second rates for the last few seconds, the last minute > and the last hour, and the total counts of all of the coverage counters. > > Signed-off-by: Alex Wang <al...@nicira.com> > --- > lib/coverage-unixctl.man | 4 +- > lib/coverage.c | 113 > ++++++++++++++++++++++++++++++++++++++++++++-- > lib/coverage.h | 17 ++++++- > lib/timeval.c | 1 + > tests/ofproto-dpif.at | 34 ++++++++++++++ > 5 files changed, 162 insertions(+), 7 deletions(-) > > diff --git a/lib/coverage-unixctl.man b/lib/coverage-unixctl.man > index 9718894..8e5df81 100644 > --- a/lib/coverage-unixctl.man > +++ b/lib/coverage-unixctl.man > @@ -8,4 +8,6 @@ main loop takes unusually long to run. > Coverage counters are useful mainly for performance analysis and > debugging. > .IP "\fBcoverage/show\fR" > -Displays the values of all of the coverage counters. > +Displays the averaged per-second rates for the last few seconds, the > +last minute and the last hour, and the total counts of all of the > +coverage counters. > diff --git a/lib/coverage.c b/lib/coverage.c > index 23e2997..4364734 100644 > --- a/lib/coverage.c > +++ b/lib/coverage.c > @@ -63,7 +63,14 @@ struct coverage_counter *coverage_counters[] = { > > static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER; > > +static long long int coverage_run_time = LLONG_MIN; > + > +/* Index counter used to compute the moving average array's index. */ > +static unsigned int idx_count = 0; > + > static void coverage_read(struct svec *); > +static unsigned int coverage_array_sum(const unsigned int *arr, > + const unsigned int len); > > static void > coverage_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, > @@ -206,6 +213,7 @@ coverage_log(void) > static void > coverage_read(struct svec *lines) > { > + struct coverage_counter **c = coverage_counters; > unsigned long long int *totals; > size_t n_never_hit; > uint32_t hash; > @@ -215,24 +223,37 @@ coverage_read(struct svec *lines) > > n_never_hit = 0; > svec_add_nocopy(lines, > - xasprintf("Event coverage, hash=%08"PRIx32":", hash)); > + xasprintf("Event coverage, avg rate over last: %d " > + "seconds, last minute, last hour, " > + "hash=%08"PRIx32":", > + COVERAGE_RUN_INTERVAL/1000, hash)); > > totals = xmalloc(n_coverage_counters * sizeof *totals); > ovs_mutex_lock(&coverage_mutex); > for (i = 0; i < n_coverage_counters; i++) { > - totals[i] = coverage_counters[i]->total; > + totals[i] = c[i]->total; > } > ovs_mutex_unlock(&coverage_mutex); > > for (i = 0; i < n_coverage_counters; i++) { > if (totals[i]) { > - svec_add_nocopy(lines, xasprintf("%-24s %9llu", > - coverage_counters[i]->name, > - totals[i])); > + /* Shows the averaged per-second rates for the last > + * COVERAGE_RUN_INTERVAL interval, the last minute and > + * the last hour. */ > + svec_add_nocopy(lines, > + xasprintf("%-24s %5.1f/sec %9.3f/sec " > + "%13.4f/sec total: %llu", > + c[i]->name, > + (c[i]->min[(idx_count - 1) % MIN_AVG_LEN] > + * 1000.0 / COVERAGE_RUN_INTERVAL), > + coverage_array_sum(c[i]->min, MIN_AVG_LEN) / 60.0, > + coverage_array_sum(c[i]->hr, HR_AVG_LEN) / 3600.0, > + totals[i])); > } else { > n_never_hit++; > } > } > + > svec_add_nocopy(lines, xasprintf("%zu events never hit", n_never_hit)); > free(totals); > } > @@ -249,3 +270,85 @@ coverage_clear(void) > } > ovs_mutex_unlock(&coverage_mutex); > } > + > +/* Runs approximately every COVERAGE_RUN_INTERVAL amount of time to update > the > + * coverage counters' 'min' and 'hr' array. 'min' array is for cumulating > + * per second counts into per minute count. 'hr' array is for cumulating per > + * minute counts into per hour count. Every thread may call this function. > */ > +void > +coverage_run(void) > +{ > + /* Defines the moving average array index variables. */ > + static unsigned int min_idx, hr_idx; > + struct coverage_counter **c = coverage_counters; > + long long int now; > + > + ovs_mutex_lock(&coverage_mutex); > + now = time_msec(); > + /* Initialize the coverage_run_time. */ > + if (coverage_run_time == LLONG_MIN) { > + coverage_run_time = now + COVERAGE_RUN_INTERVAL; > + } > + > + if (now >= coverage_run_time) { > + size_t i, j; > + /* Computes the number of COVERAGE_RUN_INTERVAL slots, since > + * it is possible that the actual run interval is multiple of > + * COVERAGE_RUN_INTERVAL. */ > + int slots = (now - coverage_run_time) / COVERAGE_RUN_INTERVAL + 1; > + > + for (i = 0; i < n_coverage_counters; i++) { > + unsigned int count, portion; > + unsigned int m_idx = min_idx; > + unsigned int h_idx = hr_idx; > + unsigned int idx = idx_count; > + > + /* Computes the differences between the current total and the one > + * recorded in last invocation of coverage_run(). */ > + count = c[i]->total - c[i]->last_total; > + c[i]->last_total = c[i]->total; > + /* The count over the time interval is evenly distributed > + * among slots by calculating the portion. */ > + portion = count / slots; > + > + for (j = 0; j < slots; j++) { > + /* Updates the index variables. */ > + /* The m_idx is increased from 0 to MIN_AVG_LEN - 1. Every > + * time the m_idx finishes a cycle (a cycle is one minute), > + * the h_idx is incremented by 1. */ > + m_idx = idx % MIN_AVG_LEN; > + h_idx = idx / MIN_AVG_LEN; > + > + c[i]->min[m_idx] = portion + (j == (slots - 1) > + ? count % slots : 0); > + c[i]->hr[h_idx] = m_idx == 0 > + ? c[i]->min[m_idx] > + : (c[i]->hr[h_idx] + c[i]->min[m_idx]); > + /* This is to guarantee that h_idx ranges from 0 to 59. */ > + idx = (idx + 1) % (MIN_AVG_LEN * HR_AVG_LEN); > + } > + } > + > + /* Updates the global index variables. */ > + idx_count = (idx_count + slots) % (MIN_AVG_LEN * HR_AVG_LEN); > + min_idx = idx_count % MIN_AVG_LEN; > + hr_idx = idx_count / MIN_AVG_LEN; > + /* Updates the run time. */ > + coverage_run_time = now + COVERAGE_RUN_INTERVAL; > + } > + ovs_mutex_unlock(&coverage_mutex); > +} > + > +static unsigned int > +coverage_array_sum(const unsigned int *arr, const unsigned int len) > +{ > + unsigned int sum = 0; > + size_t i; > + > + ovs_mutex_lock(&coverage_mutex); > + for (i = 0; i < len; i++) { > + sum += arr[i]; > + } > + ovs_mutex_unlock(&coverage_mutex); > + return sum; > +} > diff --git a/lib/coverage.h b/lib/coverage.h > index 3d1a115..163728e 100644 > --- a/lib/coverage.h > +++ b/lib/coverage.h > @@ -30,11 +30,25 @@ > #include "ovs-thread.h" > #include "vlog.h" > > +/* Makes coverage_run run every 5000 ms (5 seconds). > + * If this value is redefined, the new value must > + * divide 60000 (1 minute). */ > +#define COVERAGE_RUN_INTERVAL 5000 > +BUILD_ASSERT_DECL(60000 % COVERAGE_RUN_INTERVAL == 0); > + > +/* Defines the moving average array length. */ > +#define MIN_AVG_LEN (60000/COVERAGE_RUN_INTERVAL) > +#define HR_AVG_LEN 60 > + > /* A coverage counter. */ > struct coverage_counter { > const char *const name; /* Textual name. */ > unsigned int (*const count)(void); /* Gets, zeros this thread's count. */ > unsigned long long int total; /* Total count. */ > + unsigned long long int last_total; > + /* The moving average arrays. */ > + unsigned int min[MIN_AVG_LEN]; > + unsigned int hr[HR_AVG_LEN]; > }; > > /* Defines COUNTER. There must be exactly one such definition at file scope > @@ -56,7 +70,7 @@ struct coverage_counter { > } \ > extern struct coverage_counter counter_##COUNTER; \ > struct coverage_counter counter_##COUNTER \ > - = { #COUNTER, COUNTER##_count, 0 }; \ > + = { #COUNTER, COUNTER##_count, 0, 0, {0}, {0} }; \ > extern struct coverage_counter *counter_ptr_##COUNTER; \ > struct coverage_counter *counter_ptr_##COUNTER \ > __attribute__((section("coverage"))) = &counter_##COUNTER > @@ -80,6 +94,7 @@ struct coverage_counter { > void coverage_init(void); > void coverage_log(void); > void coverage_clear(void); > +void coverage_run(void); > > /* Implementation detail. */ > #define COVERAGE_DEFINE__(COUNTER) \ > diff --git a/lib/timeval.c b/lib/timeval.c > index 223ed30..64ae845 100644 > --- a/lib/timeval.c > +++ b/lib/timeval.c > @@ -234,6 +234,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long > long int timeout_when, > log_poll_interval(*last_wakeup); > } > coverage_clear(); > + coverage_run(); > start = time_msec(); > > timeout_when = MIN(timeout_when, deadline); > diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at > index f67c3ab..0a63299 100644 > --- a/tests/ofproto-dpif.at > +++ b/tests/ofproto-dpif.at > @@ -2816,3 +2816,37 @@ AT_CHECK([ovs-appctl bond/show | sed -n > '/^.*may_enable:.*/p'], [0], [dnl > > OVS_VSWITCHD_STOP > AT_CLEANUP > + > +# Unit test for appctl coverage/show command > +AT_SETUP([ofproto-dpif - coverage/show]) > +OVS_VSWITCHD_START > + > +ovs-appctl time/stop > +# before the first 5 seconds, nothing can be calculated but the total count. > +# there should be two unixctl commands received, but the count of the "appctl > +# coverage/show" command is not updated to the total. so there show only 1. > +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], > [dnl > +unixctl_received 0.0/sec 0.000/sec 0.0000/sec total: 1 > +]) > + > +ovs-appctl time/warp 5000 > +# at first 5 second time instant, should have stats. > +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], > [dnl > +unixctl_received 0.6/sec 0.050/sec 0.0008/sec total: 3 > +]) > + > +for i in `seq 0 10`; do ovs-appctl time/warp 5000; done > +# advance to first 1 minute time instant. > +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], > [dnl > +unixctl_received 0.2/sec 0.250/sec 0.0042/sec total: > 15 > +]) > + > +ovs-appctl time/warp 60000 > +# advance to next 1 minute time instant directly, should observe the > per-minute > +# rate drop. > +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], > [dnl > +unixctl_received 0.4/sec 0.033/sec 0.0047/sec total: > 17 > +]) > + > +OVS_VSWITCHD_STOP > +AT_CLEANUP > -- > 1.7.9.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev