Most testing methods are not great for accurate normality testing: https://stats.stackexchange.com/questions/2492/is-normality-testing-essentially-useless. Nonetheless, they at least catch usual coding mistakes.
In particular, this patch adds a Anderson-Darling based test for normality. Signed-off-by: Ganesh Ajjanagadde <gajja...@gmail.com> --- libavutil/Makefile | 1 + libavutil/lfg.c | 2 +- libavutil/rand.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/libavutil/Makefile b/libavutil/Makefile index fb20c8a..77be557 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -200,6 +200,7 @@ TESTPROGS = adler32 \ parseutils \ pixdesc \ pixelutils \ + rand \ random_seed \ rational \ ripemd \ diff --git a/libavutil/lfg.c b/libavutil/lfg.c index 5ffd76f..93c3867 100644 --- a/libavutil/lfg.c +++ b/libavutil/lfg.c @@ -101,7 +101,7 @@ int main(void) samp0, samp1); } - /* TODO: add proper normality test */ + /* proper normality testing done in lavu/rand.c */ samp_mean /= 1000; samp_stddev /= 999; samp_stddev -= (1000.0/999.0)*samp_mean*samp_mean; diff --git a/libavutil/rand.c b/libavutil/rand.c index 8b36e92..0154717 100644 --- a/libavutil/rand.c +++ b/libavutil/rand.c @@ -347,3 +347,96 @@ void av_gaussian_get(AVRAND64 *rng, double *out, int len) for (int i = 0; i < len; ++i) out[i] = ziggurat(rng); } + +#ifdef TEST +#include "libm.h" +#include "log.h" +#include "timer.h" + +static inline int compare(const void *a, const void *b) +{ + double da = *(double *)a; + double db = *(double *)b; + if (da == db) + return 0; + if (da > db) + return 1; + return -1; +} + +/* Gaussian cdf */ +static inline double phi(double x) +{ + return 0.5 + 0.5 * erf(x * M_SQRT1_2); +} + +/* The Anderson-Darling test for normality for an array of iid samples with a + * fixed mean and stddev: https://en.wikipedia.org/wiki/Anderson%E2%80%93Darling_test. + * Note that no matter what we do, these tests are quite unsatisfying: + * https://stats.stackexchange.com/questions/2492/is-normality-testing-essentially-useless. + * Nevertheless, some testing is better than no testing, especially to guard + * against coding bugs. */ +static int is_normal(double *x, int len, double mu, double sigma) +{ + /* Somewhat arbitrarily use the 10% significance level */ + double thresh = 1.933; + double a_squared = -len; + double a_incr = 0.0; + qsort(x, len, sizeof(*x), compare); + for (int i = 0; i < len; ++i) { + double y1 = (x[i]-mu)/sigma; + double y2 = (x[len-1-i]-mu)/sigma; + y1 = log(phi(y1)); + y2 = log(1-phi(y2)); + a_incr += (2*i+1)*(y1 + y2); + } + a_incr /= len; + a_squared -= a_incr; + if (a_squared > thresh) { + av_log(NULL, AV_LOG_ERROR, "Normality test failed!\n"); + av_log(NULL, AV_LOG_ERROR, "a_squared: %lf > threshold: %lf\n", a_squared, thresh); + return 0; + } + return 1; +} + +int main(void) +{ + uint64_t y = 0; + int i, j; + AVRAND64 state; + + av_rand64_init(&state, UINT64_C(0xdeadbeefdeadbeef)); + + for (j = 0; j < 1000000; j++) { + START_TIMER + for (i = 0; i < 624; i++) { + y += av_rand64_get(&state); + } + STOP_TIMER("624 calls of av_rand64_get"); + } + + /* BMG usage example */ + { + double mean = 1000; + double stddev = 53; + int num_samples = 10000000; + double *samples = av_malloc(num_samples * sizeof(*samples)); + if (!samples) { + av_log(NULL, AV_LOG_ERROR, "Out of memory!\n"); + return 1; + } + + av_rand64_init(&state, UINT64_C(0xdeadbeefdeadbeef)); + + av_log(NULL, AV_LOG_INFO, "Gaussian samples, mean %lf, stddev %lf\n", mean, stddev); + av_gaussian_get(&state, samples, num_samples); + for (i = 0; i < num_samples; i++) { + samples[i] *= stddev; + samples[i] += mean; + } + return !is_normal(samples, num_samples, mean, stddev); + } + +} +#endif -- 2.7.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel