On Sat, May 28, 2016 at 21:15:06 +0300, Sergey Fedorov wrote: > On 25/05/16 04:13, Emilio G. Cota wrote: > > diff --git a/util/qdist.c b/util/qdist.c > > new file mode 100644 > > index 0000000..3343640 > > --- /dev/null > > +++ b/util/qdist.c > > @@ -0,0 +1,386 @@ > (snip) > > + > > +void qdist_add(struct qdist *dist, double x, long count) > > +{ > > + struct qdist_entry *entry = NULL; > > + > > + if (dist->entries) { > > + struct qdist_entry e; > > + > > + e.x = x; > > + entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); > > + } > > + > > + if (entry) { > > + entry->count += count; > > + return; > > + } > > + > > + dist->entries = g_realloc(dist->entries, > > + sizeof(*dist->entries) * (dist->n + 1)); > > Repeated doubling?
Can you please elaborate? > > + dist->n++; > > + entry = &dist->entries[dist->n - 1]; > > What if we combine the above two lines: > > entry = &dist->entries[dist->n++]; > > or just reverse them: > > entry = &dist->entries[dist->n]; > dist->n++; I have less trouble understanding the original. > > + entry->x = x; > > + entry->count = count; > > + qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); > > +} > > + > (snip) > > +static char *qdist_pr_internal(const struct qdist *dist) > > +{ > > + double min, max, step; > > + GString *s = g_string_new(""); > > + size_t i; > > + > > + /* if only one entry, its printout will be either full or empty */ > > + if (dist->n == 1) { > > + if (dist->entries[0].count) { > > + g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - > > 1]); > > + } else { > > + g_string_append_c(s, ' '); > > + } > > + goto out; > > + } > > + > > + /* get min and max counts */ > > + min = dist->entries[0].count; > > + max = min; > > + for (i = 0; i < dist->n; i++) { > > + struct qdist_entry *e = &dist->entries[i]; > > + > > + if (e->count < min) { > > + min = e->count; > > + } > > + if (e->count > max) { > > + max = e->count; > > + } > > + } > > + > > + /* floor((count - min) * step) will give us the block index */ > > + step = (QDIST_NR_BLOCK_CODES - 1) / (max - min); > > + > > + for (i = 0; i < dist->n; i++) { > > + struct qdist_entry *e = &dist->entries[i]; > > + int index; > > + > > + /* make an exception with 0; instead of using block[0], print a > > space */ > > + if (e->count) { > > + index = (int)((e->count - min) * step); > > So "e->count == min" gives us one eighth block instead of just space? Yes, only 0 can print a space. > > + g_string_append_unichar(s, qdist_blocks[index]); > > + } else { > > + g_string_append_c(s, ' '); > > + } > > + } > > + out: > > + return g_string_free(s, FALSE); > > +} > > + > > +/* > > + * Bin the distribution in @from into @n bins of consecutive, > > non-overlapping > > + * intervals, copying the result to @to. > > + * > > + * This function is internal to qdist: only this file and test code should > > + * ever call it. > > + * > > + * Note: calling this function on an already-binned qdist is a bug. > > + * > > + * If @n == 0 or @from->n == 1, use @from->n. > > + */ > > +void qdist_bin__internal(struct qdist *to, const struct qdist *from, > > size_t n) > > +{ > > + double xmin, xmax; > > + double step; > > + size_t i, j, j_min; > > + > > + qdist_init(to); > > + > > + if (!from->entries) { > > + return; > > + } > > + if (!n || from->n == 1) { > > + n = from->n; > > + } > > + > > + /* set equally-sized bins between @from's left and right */ > > + xmin = qdist_xmin(from); > > + xmax = qdist_xmax(from); > > + step = (xmax - xmin) / n; > > + > > + if (n == from->n) { > > + /* if @from's entries are equally spaced, no need to re-bin */ > > + for (i = 0; i < from->n; i++) { > > + if (from->entries[i].x != xmin + i * step) { > > + goto rebin; > > static inline function instead of goto? It would have quite a few arguments, I think the goto is fine. > > + } > > + } > > + /* they're equally spaced, so copy the dist and bail out */ > > + to->entries = g_malloc(sizeof(*to->entries) * from->n); > > g_new()? Changed. > > + to->n = from->n; > > + memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); > > + return; > > + } > > + > > + rebin: > > + j_min = 0; > > + for (i = 0; i < n; i++) { > > + double x; > > + double left, right; > > + > > + left = xmin + i * step; > > + right = xmin + (i + 1) * step; > > + > > + /* Add x, even if it might not get any counts later */ > > + x = left; > > This way we round down to the left margin of each bin like this: > > xmin [*---*---*---*---*] xmax -- from > | /| /| /| / > | / | / | / | / > |/ |/ |/ |/ > | | | | > V V V V > [* * * *] -- to (snip) > xmin [*----*----*----*] xmax -- from > \ /\ /\ /\ / > \ / \ / \ / \ / > | | | | > V V V V > [* * * *] -- to > > I'm not sure which is the more correct option from the mathematical > point of view; but multiple-binning with the last variant of the > algorithm we would still give the same result. There's no "right" or "wrong" way as long as we're consistent and we print the right counts in the right bins. I think the convention I chose is simple enough, and leads to simple printing of the labels. But yes other alternatives would be OK here. > > + qdist_add(to, x, 0); > > + > > + /* > > + * To avoid double-counting we capture [left, right) ranges, > > except for > > + * the righmost bin, which captures a [left, right] range. > > + */ > > + for (j = j_min; j < from->n; j++) { > > Looks like we don't need to keep both 'j' and 'j_min'. We could just use > 'j', initialize it before the outer loop, and do the inner loop with > "while". I prefer for over while if the for loop looks idiomatic. wrt j_min, I'd let the compiler deal with it. Thanks, Emilio