[FFmpeg-devel] [RFC][PATCH] ffplay: introduce audio visualizations.

Sat Oct 29 12:48:56 CEST 2011

---
Here is just a simple PoC for audio visualizations in ffplay. The current
engine keep the linear display (cyclic vertical lines from left to right) and
add a "default" profile to keep the current display output.

I wonder if that won't be too heavy to deal with if it is extended to the whole
x/y window; I might reach a performance limitation by doing so. I guess shaders
might be a workaround for this, but I think it's overkill and inappropriate in
ffplay.

Anyway, any feedback on that is welcome if you want me to continue this.
---
 ffplay.c |  117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index 5a32d4e..4dddda1 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -37,6 +37,7 @@
 #include "libswscale/swscale.h"
 #include "libavcodec/audioconvert.h"
 #include "libavutil/opt.h"
+#include "libavutil/eval.h"
 #include "libavcodec/avfft.h"
 #include "libswresample/swresample.h"
 
@@ -229,6 +230,30 @@ typedef struct VideoState {
     int refresh;
 } VideoState;
 
+typedef struct {
+    int id;
+    AVExpr *e[3];
+} AudioVisualizationContext;
+
+static AudioVisualizationContext audio_visualization_ctx;
+
+typedef struct {
+    const char *name;
+    const char *v[3];       ///< r, g, b expr strings in the desired order
+    int color_layout[3];    ///< index of r, g and b in v
+} AudioVisualization;
+
+static const AudioVisualization audio_visualizations[] = {{
+        .name = "default",
+        .v = {
+            "sqrt(q * mag_ch0)",
+            "sqrt(q * mag_ch1)",
+            "(v0+v1)/2"
+        },
+        .color_layout = {0,1,2},
+    }
+};
+
 static int opt_help(const char *opt, const char *arg);
 
 /* options specified by the user */
@@ -731,6 +756,24 @@ static inline int compute_mod(int a, int b)
     return a < 0 ? a%b + b : a%b;
 }
 
+static const char *visu_var_names[] = {
+    "mag_ch0", "mag_ch1",
+    "v0", "v1",
+    "w", "h",
+    "t",
+    "q",
+    NULL
+};
+
+enum visu_var_name {
+    VAR_MAG_CH0, VAR_MAG_CH1,
+    VAR_V0, VAR_V1,
+    VAR_W, VAR_H,
+    VAR_TIME,
+    VAR_Q,
+    VAR_VARS_NB
+};
+
 static void video_audio_display(VideoState *s)
 {
     int i, i_start, x, y1, y, ys, delay, n, nb_display_channels;
@@ -835,6 +878,10 @@ static void video_audio_display(VideoState *s)
             s->rdft_data= av_malloc(4*nb_freq*sizeof(*s->rdft_data));
         }
         {
+            double var_values[VAR_VARS_NB] = {0};
+            AudioVisualizationContext *vctx = &audio_visualization_ctx;
+            const AudioVisualization *visu = &audio_visualizations[vctx->id];
+
             FFTSample *data[2];
             for(ch = 0;ch < nb_display_channels; ch++) {
                 data[ch] = s->rdft_data + 2*nb_freq*ch;
@@ -849,14 +896,26 @@ static void video_audio_display(VideoState *s)
                 av_rdft_calc(s->rdft, data[ch]);
             }
             //least efficient way to do this, we should of course directly access it but its more than fast enough
+            var_values[VAR_W   ] = s->width;
+            var_values[VAR_H   ] = s->height;
+            var_values[VAR_TIME] = s->audio_clock;
+            var_values[VAR_Q   ] = 1/sqrt(nb_freq);
             for(y=0; y<s->height; y++){
-                double w= 1/sqrt(nb_freq);
-                int a= sqrt(w*sqrt(data[0][2*y+0]*data[0][2*y+0] + data[0][2*y+1]*data[0][2*y+1]));
-                int b= (nb_display_channels == 2 ) ? sqrt(w*sqrt(data[1][2*y+0]*data[1][2*y+0]
-                       + data[1][2*y+1]*data[1][2*y+1])) : a;
-                a= FFMIN(a,255);
-                b= FFMIN(b,255);
-                fgcolor = SDL_MapRGB(screen->format, a, b, (a+b)/2);
+#define MAGNITUDE(re, im) sqrt(re*re + im*im)
+                double v_dbl[3];
+                unsigned v_uint[3];
+                var_values[VAR_MAG_CH0] = MAGNITUDE(data[0][2*y+0], data[0][2*y+1]);
+                if (nb_display_channels == 2)
+                    var_values[VAR_MAG_CH1] = MAGNITUDE(data[1][2*y+0], data[1][2*y+1]);
+                else
+                    var_values[VAR_MAG_CH1] = var_values[VAR_MAG_CH0];
+                v_dbl[0] = var_values[VAR_V0] = av_expr_eval(vctx->e[0], var_values, NULL);
+                v_dbl[1] = var_values[VAR_V1] = av_expr_eval(vctx->e[1], var_values, NULL);
+                v_dbl[2] =                      av_expr_eval(vctx->e[2], var_values, NULL);
+                v_uint[0] = (unsigned)v_dbl[visu->color_layout[0]] & 0xff;
+                v_uint[1] = (unsigned)v_dbl[visu->color_layout[1]] & 0xff;
+                v_uint[2] = (unsigned)v_dbl[visu->color_layout[2]] & 0xff;
+                fgcolor = SDL_MapRGB(screen->format, v_uint[0], v_uint[1], v_uint[2]);
 
                 fill_rectangle(screen,
                             s->xpos, s->height-y, 1, 1,
@@ -921,6 +980,36 @@ static void do_exit(VideoState *is)
     exit(0);
 }
 
+#define VISU_INIT_EVAL_EXPR(v_idx) do {                                 \
+    r = av_expr_parse(&vctx->e[v_idx], visu->v[v_idx], visu_var_names,  \
+                      NULL, NULL, NULL, NULL, 0, NULL);                 \
+    if (r < 0)                                                          \
+        return r;                                                       \
+} while (0)
+
+static int init_audio_visualization(const char *name)
+{
+    int r;
+    const AudioVisualization *visu  = audio_visualizations;
+    AudioVisualizationContext *vctx = &audio_visualization_ctx;
+    if (name) {
+        int i;
+        for (i = 0; i < FF_ARRAY_ELEMS(audio_visualizations); i++) {
+            if (strcmp(audio_visualizations[i].name, name) == 0) {
+                visu     = &audio_visualizations[i];
+                vctx->id = i;
+                break;
+            }
+        }
+        return -1;
+    }
+    VISU_INIT_EVAL_EXPR(0);
+    VISU_INIT_EVAL_EXPR(1);
+    VISU_INIT_EVAL_EXPR(2);
+    return 0;
+}
+
+
 static int video_open(VideoState *is){
     int flags = SDL_HWSURFACE|SDL_ASYNCBLIT|SDL_HWACCEL;
     int w,h;
@@ -962,6 +1051,10 @@ static int video_open(VideoState *is){
     is->width = screen->w;
     is->height = screen->h;
 
+    if (is->audio_st && is->show_mode != SHOW_MODE_VIDEO
+        && !audio_visualization_ctx.e[0])
+        init_audio_visualization(NULL);
+
     return 0;
 }
 
@@ -3018,6 +3111,15 @@ static int opt_codec(void *o, const char *opt, const char *arg)
     return 0;
 }
 
+static int opt_visu(const char *name)
+{
+    if (init_audio_visualization(name) < 0) {
+        fprintf(stderr, "Visualization '%s' not found", name);
+        exit(1);
+    }
+    return 0;
+}
+
 static int dummy;
 
 static const OptionDef options[] = {
@@ -3059,6 +3161,7 @@ static const OptionDef options[] = {
 #if CONFIG_AVFILTER
     { "vf", OPT_STRING | HAS_ARG, {(void*)&vfilters}, "video filters", "filter list" },
 #endif
+    { "visu", HAS_ARG | OPT_FUNC2, {(void*)&opt_visu}, "select audio visualization", "vizualisation name" },
     { "rdftspeed", OPT_INT | HAS_ARG| OPT_AUDIO | OPT_EXPERT, {(void*)&rdftspeed}, "rdft speed", "msecs" },
     { "showmode", HAS_ARG, {(void*)opt_show_mode}, "select show mode (0 = video, 1 = waves, 2 = RDFT)", "mode" },
     { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" },
-- 
1.7.7.1