[FFmpeg-devel] [PATCHv3 3/3] lavu/rand: add a better normality test
Ganesh Ajjanagadde
gajjanag at gmail.com
Tue Mar 15 05:47:01 CET 2016
Most testing methods are not great for accurate normality testing:
https://stats.stackexchange.com/questions/2492/is-normality-testing-essentially-useless.
Nonetheless, they at least catch usual coding mistakes.
In particular, this patch adds a Anderson-Darling based test for normality.
Signed-off-by: Ganesh Ajjanagadde <gajjanag at gmail.com>
---
libavutil/Makefile | 1 +
libavutil/lfg.c | 2 +-
libavutil/rand.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 95 insertions(+), 1 deletion(-)
diff --git a/libavutil/Makefile b/libavutil/Makefile
index fb20c8a..77be557 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -200,6 +200,7 @@ TESTPROGS = adler32 \
parseutils \
pixdesc \
pixelutils \
+ rand \
random_seed \
rational \
ripemd \
diff --git a/libavutil/lfg.c b/libavutil/lfg.c
index 5ffd76f..93c3867 100644
--- a/libavutil/lfg.c
+++ b/libavutil/lfg.c
@@ -101,7 +101,7 @@ int main(void)
samp0,
samp1);
}
- /* TODO: add proper normality test */
+ /* proper normality testing done in lavu/rand.c */
samp_mean /= 1000;
samp_stddev /= 999;
samp_stddev -= (1000.0/999.0)*samp_mean*samp_mean;
diff --git a/libavutil/rand.c b/libavutil/rand.c
index 8b36e92..0154717 100644
--- a/libavutil/rand.c
+++ b/libavutil/rand.c
@@ -347,3 +347,96 @@ void av_gaussian_get(AVRAND64 *rng, double *out, int len)
for (int i = 0; i < len; ++i)
out[i] = ziggurat(rng);
}
+
+#ifdef TEST
+#include "libm.h"
+#include "log.h"
+#include "timer.h"
+
+static inline int compare(const void *a, const void *b)
+{
+ double da = *(double *)a;
+ double db = *(double *)b;
+ if (da == db)
+ return 0;
+ if (da > db)
+ return 1;
+ return -1;
+}
+
+/* Gaussian cdf */
+static inline double phi(double x)
+{
+ return 0.5 + 0.5 * erf(x * M_SQRT1_2);
+}
+
+/* The Anderson-Darling test for normality for an array of iid samples with a
+ * fixed mean and stddev: https://en.wikipedia.org/wiki/Anderson%E2%80%93Darling_test.
+ * Note that no matter what we do, these tests are quite unsatisfying:
+ * https://stats.stackexchange.com/questions/2492/is-normality-testing-essentially-useless.
+ * Nevertheless, some testing is better than no testing, especially to guard
+ * against coding bugs. */
+static int is_normal(double *x, int len, double mu, double sigma)
+{
+ /* Somewhat arbitrarily use the 10% significance level */
+ double thresh = 1.933;
+ double a_squared = -len;
+ double a_incr = 0.0;
+ qsort(x, len, sizeof(*x), compare);
+ for (int i = 0; i < len; ++i) {
+ double y1 = (x[i]-mu)/sigma;
+ double y2 = (x[len-1-i]-mu)/sigma;
+ y1 = log(phi(y1));
+ y2 = log(1-phi(y2));
+ a_incr += (2*i+1)*(y1 + y2);
+ }
+ a_incr /= len;
+ a_squared -= a_incr;
+ if (a_squared > thresh) {
+ av_log(NULL, AV_LOG_ERROR, "Normality test failed!\n");
+ av_log(NULL, AV_LOG_ERROR, "a_squared: %lf > threshold: %lf\n", a_squared, thresh);
+ return 0;
+ }
+ return 1;
+}
+
+int main(void)
+{
+ uint64_t y = 0;
+ int i, j;
+ AVRAND64 state;
+
+ av_rand64_init(&state, UINT64_C(0xdeadbeefdeadbeef));
+
+ for (j = 0; j < 1000000; j++) {
+ START_TIMER
+ for (i = 0; i < 624; i++) {
+ y += av_rand64_get(&state);
+ }
+ STOP_TIMER("624 calls of av_rand64_get");
+ }
+
+ /* BMG usage example */
+ {
+ double mean = 1000;
+ double stddev = 53;
+ int num_samples = 10000000;
+ double *samples = av_malloc(num_samples * sizeof(*samples));
+ if (!samples) {
+ av_log(NULL, AV_LOG_ERROR, "Out of memory!\n");
+ return 1;
+ }
+
+ av_rand64_init(&state, UINT64_C(0xdeadbeefdeadbeef));
+
+ av_log(NULL, AV_LOG_INFO, "Gaussian samples, mean %lf, stddev %lf\n", mean, stddev);
+ av_gaussian_get(&state, samples, num_samples);
+ for (i = 0; i < num_samples; i++) {
+ samples[i] *= stddev;
+ samples[i] += mean;
+ }
+ return !is_normal(samples, num_samples, mean, stddev);
+ }
+
+}
+#endif
--
2.7.3
More information about the ffmpeg-devel
mailing list