Hi all,
In case it's useful to anyone, here's a little statistical aggregator I put
together (source file attached). This is for online calculation of statistical
quantities and incidentally offers an efficient way to calculate standard
deviation in a single pass over data, using the algorithm described in this
article:
http://www.cs.berkeley.edu/~mhoemmen/cs194/Tutorials/variance.pdf
One design decision I'm not sure about -- in the case that this aggregator is
given ranges of ranges as input, it will aggregate over every single element in
the ranges of ranges (... of ranges of ranges of ranges ...). This is confirmed
in the unittests. I'm not sure if this is really a feature or a design flaw --
comments welcome (and about any other aspects of the design).
Best wishes,
-- Joe
module aggregator;
import std.algorithm, std.math, std.range, std.traits;
struct Aggregator
{
private:
size_t _count = 0;
real _mean = 0;
real _var = 0;
real _min = real.max;
real _max = real.min;
public:
size_t count() @property pure nothrow const
{
return _count;
}
real mean() @property pure nothrow const
{
return _mean;
}
real sampleVar() @property pure nothrow const
{
return _var / (_count - 1);
}
real sampleSD() @property pure nothrow const
{
return sqrt(this.sampleVar);
}
real standardVar() @property pure nothrow const
{
return _var / _count;
}
real standardSD() @property pure nothrow const
{
return sqrt(this.standardVar);
}
real min() @property pure nothrow const
{
return _min;
}
real max() @property pure nothrow const
{
return _max;
}
void add(T)(T x) @property pure nothrow
if(isNumeric!T)
{
_count++;
_min = std.algorithm.min(_min, x);
_max = std.algorithm.max(_max, x);
_var = _var + ((_count - 1) * (x - _mean) ^^ 2) / _count;
_mean = _mean + (x - _mean) / _count;
}
void add(T)(T r)
if(isInputRange!T)
{
foreach(x; r)
this.add(x);
}
}
unittest
{
Aggregator a;
foreach(i; iota(1, 11))
a.add(i);
assert(a.count == 10);
assert(a.mean == reduce!"a+b"(iota(0, 11)) / 10.0L);
assert(a.min == 1);
assert(a.max == 10);
real testVar = reduce!((x, y) => x + (y - a.mean) ^^ 2)(iota(0.0L, 11.0L));
assert(a.sampleVar == testVar / (a.count - 1));
assert(a.sampleSD == sqrt(testVar / (a.count - 1)));
assert(a.standardVar == testVar / a.count);
assert(a.standardSD == sqrt(testVar / a.count));
Aggregator b;
b.add([10_000, 10_001, 10_002]);
assert(b.mean == 10_001);
assert(b.sampleVar == 1);
Aggregator c;
c.add([[1, 2, 3], [4, 5, 6], [7, 8, 9, 10]]);
assert(c.mean == a.mean);
assert(c.sampleVar == a.sampleVar);
assert(c.sampleSD == a.sampleSD);
assert(c.standardVar == a.standardVar);
assert(c.standardSD == a.standardSD);
assert(c.min == a.min);
assert(c.max == a.max);
assert(c.count == a.count);
}