[FFmpeg-devel] [PATCHv2 4/4] avfilter/vf_framerate: add SIMD functions for frame blending

James Almer jamrial at gmail.com
Fri Jan 19 00:31:23 EET 2018


On 1/18/2018 6:16 PM, James Almer wrote:
> On 1/18/2018 6:06 PM, Marton Balint wrote:
>> Blend function speedups on x86_64 Core i5 4460:
>>
>> ffmpeg -f lavfi -i allyuv -vf framerate=60:threads=1 -f null none
>>
>> C:     447548411 decicycles in Blend,    2048 runs,      0 skips
>> SSSE3: 130020087 decicycles in Blend,    2048 runs,      0 skips
>> AVX2:  128508221 decicycles in Blend,    2048 runs,      0 skips
>>
>> ffmpeg -f lavfi -i allyuv -vf format=yuv420p12,framerate=60:threads=1 -f null none
>>
>> C:     228932745 decicycles in Blend,    2048 runs,      0 skips
>> SSE4:  123357781 decicycles in Blend,    2048 runs,      0 skips
>> AVX2:  121215353 decicycles in Blend,    2048 runs,      0 skips
>>
>> Signed-off-by: Marton Balint <cus at passwd.hu>
>> ---
>>  libavfilter/vf_framerate.c       |  24 ++++++-
>>  libavfilter/x86/Makefile         |   1 +
>>  libavfilter/x86/vf_framerate.asm | 136 +++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 158 insertions(+), 3 deletions(-)
>>  create mode 100644 libavfilter/x86/vf_framerate.asm
>>
>> diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
>> index d315ef5d09..6a3b85910f 100644
>> --- a/libavfilter/vf_framerate.c
>> +++ b/libavfilter/vf_framerate.c
>> @@ -29,11 +29,13 @@
>>  #define DEBUG
>>  
>>  #include "libavutil/avassert.h"
>> +#include "libavutil/cpu.h"
>>  #include "libavutil/imgutils.h"
>>  #include "libavutil/internal.h"
>>  #include "libavutil/opt.h"
>>  #include "libavutil/pixdesc.h"
>>  #include "libavutil/pixelutils.h"
>> +#include "libavutil/x86/cpu.h"
>>  
>>  #include "avfilter.h"
>>  #include "internal.h"
>> @@ -246,7 +248,7 @@ static int blend_frames(AVFilterContext *ctx, int interpolate)
>>          av_frame_copy_props(s->work, s->f0);
>>  
>>          ff_dlog(ctx, "blend_frames() INTERPOLATE to create work frame\n");
>> -        ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
>> +        ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(FFMAX(1, outlink->h >> 2), ff_filter_get_nb_threads(ctx)));
>>          return 1;
>>      }
>>      return 0;
>> @@ -347,6 +349,11 @@ static void blend_frames_c(BLEND_FUNC_PARAMS)
>>      }
>>  }
>>  
>> +void ff_blend_frames_ssse3(BLEND_FUNC_PARAMS);
>> +void ff_blend_frames_avx2(BLEND_FUNC_PARAMS);
>> +void ff_blend_frames16_sse4(BLEND_FUNC_PARAMS);
>> +void ff_blend_frames16_avx2(BLEND_FUNC_PARAMS);
>> +
>>  static void blend_frames16_c(BLEND_FUNC_PARAMS)
>>  {
>>      int line, pixel;
>> @@ -371,6 +378,7 @@ static int config_input(AVFilterLink *inlink)
>>      AVFilterContext *ctx = inlink->dst;
>>      FrameRateContext *s = ctx->priv;
>>      const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
>> +    int cpu_flags = av_get_cpu_flags();
>>      int plane;
>>  
>>      for (plane = 0; plane < 4; plane++) {
>> @@ -389,10 +397,20 @@ static int config_input(AVFilterLink *inlink)
>>  
>>      if (s->bitdepth == 8) {
>>          s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH8;
>> -        s->blend = blend_frames_c;
>> +        if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
>> +            s->blend = ff_blend_frames_avx2;
>> +        else if (ARCH_X86 && EXTERNAL_SSSE3(cpu_flags))
>> +            s->blend = ff_blend_frames_ssse3;
>> +        else
>> +            s->blend = blend_frames_c;
>>      } else {
>>          s->blend_factor_max = 1 << BLEND_FACTOR_DEPTH16;
>> -        s->blend = blend_frames16_c;
>> +        if (ARCH_X86 && EXTERNAL_AVX2_FAST(cpu_flags))
>> +            s->blend = ff_blend_frames16_avx2;
>> +        else if (ARCH_X86 && EXTERNAL_SSE4(cpu_flags))
>> +            s->blend = ff_blend_frames16_sse4;
>> +        else
>> +            s->blend = blend_frames16_c;
> 
> The simd function pointer initialization and the respective prototypes
> should be in a separate file in the x86 folder. In here you should only
> have something like
> 
> if (ARCH_X86)
>     ff_blend_frames_init_x86(s);

On second thought, seeing this is the framerate filter, a more correct
name would be ff_framerate_init_x86(). Blend may not be the only
function the filter could optimize with assembly in the future.

> 
> Then the corresponding pointer initialization inside that function. The
> prototype for ff_blend_frames_init_x86() should be in a new header.
> 
> See how vf_blend (and many other filters) do.
> 
>>      }
>>  
>>      return 0;



More information about the ffmpeg-devel mailing list