[FFmpeg-devel] [PATCH] avfilter/vf_convolution: add 16-column operation for filter_column() to prepare for x86 SIMD.

Song, Ruiling ruiling.song at intel.com
Mon Dec 2 08:38:04 EET 2019


> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of
> xujunzz at sjtu.edu.cn
> Sent: Wednesday, November 27, 2019 10:56 PM
> To: ffmpeg-devel at ffmpeg.org
> Cc: xujunzz at sjtu.edu.cn
> Subject: [FFmpeg-devel] [PATCH] avfilter/vf_convolution: add 16-column
> operation for filter_column() to prepare for x86 SIMD.
> 
> From: Xu Jun <xujunzz at sjtu.edu.cn>
> 
> In order to add x86 SIMD for filter_column(), I write a C function which
> processes 16 columns at a time.
> 
> Signed-off-by: Xu Jun <xujunzz at sjtu.edu.cn>
> ---
>  libavfilter/vf_convolution.c          | 56 +++++++++++++++++++++++++++
>  libavfilter/x86/vf_convolution_init.c | 23 +++++++++++
>  2 files changed, 79 insertions(+)
> 
> diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
> index d022f1a04a..5291415d48 100644
> --- a/libavfilter/vf_convolution.c
> +++ b/libavfilter/vf_convolution.c
> @@ -520,6 +520,61 @@ static int filter_slice(AVFilterContext *ctx, void *arg,
> int jobnr, int nb_jobs)
>              continue;
>          }
> 
> +        if (mode == MATRIX_COLUMN && s->filter[plane] != filter_column){
> +            for (y = slice_start; y < slice_end - 16; y+=16) {
Please take care of the coding style there should be white-space between variables and operators.
And also I think this piece of change make it harder to maintain, let's try to avoid code duplicate as much as we can.
> +                const int xoff = (y - slice_start) * bpc;
> +                const int yoff = radius * stride;
> +                for (x = 0; x < radius; x++) {
> +                    const int xoff = (y - slice_start) * bpc;
> +                    const int yoff = x * stride;
> +
> +                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
> +                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
> +                                    bias, matrix, c, 16, radius,
> +                                    dstride, stride);
> +                }
> +                s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc);
> +                s->filter[plane](dst + yoff + xoff, sizew - 2 * radius,
> +                                rdiv, bias, matrix, c, 16, radius,
> +                                dstride, stride);
> +                for (x = sizew - radius; x < sizew; x++) {
> +                    const int xoff = (y - slice_start) * bpc;
> +                    const int yoff = x * stride;
> +
> +                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
> +                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
> +                                    bias, matrix, c, 16, radius,
> +                                    dstride, stride);
> +                }
> +            }
> +            if (y < slice_end){
> +                const int xoff = (y - slice_start) * bpc;
> +                const int yoff = radius * stride;
> +                for (x = 0; x < radius; x++) {
> +                    const int xoff = (y - slice_start) * bpc;
> +                    const int yoff = x * stride;
> +
> +                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
> +                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
> +                                    bias, matrix, c, slice_end - y, radius,
> +                                    dstride, stride);
> +                }
> +                s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc);
> +                s->filter[plane](dst + yoff + xoff, sizew - 2 * radius,
> +                                rdiv, bias, matrix, c, slice_end - y, radius,
> +                                dstride, stride);
> +                for (x = sizew - radius; x < sizew; x++) {
> +                    const int xoff = (y - slice_start) * bpc;
> +                    const int yoff = x * stride;
> +
> +                    s->setup[plane](radius, c, src, stride, x, width, y, height, bpc);
> +                    s->filter[plane](dst + yoff + xoff, 1, rdiv,
> +                                    bias, matrix, c, slice_end - y, radius,
> +                                    dstride, stride);
> +                }
> +            }
> +        }
> +        else {
>          for (y = slice_start; y < slice_end; y++) {
>              const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc :
> radius * bpc;
>              const int yoff = mode == MATRIX_COLUMN ? radius * stride : 0;
> @@ -550,6 +605,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg,
> int jobnr, int nb_jobs)
>                  dst += dstride;
>          }
>      }
> +    }
> 
>      return 0;
>  }
> diff --git a/libavfilter/x86/vf_convolution_init.c
> b/libavfilter/x86/vf_convolution_init.c
> index d1e8c90ceb..6b1c2f0e9f 100644
> --- a/libavfilter/x86/vf_convolution_init.c
> +++ b/libavfilter/x86/vf_convolution_init.c
> @@ -34,6 +34,27 @@ void ff_filter_row_sse4(uint8_t *dst, int width,
>                          const uint8_t *c[], int peak, int radius,
>                          int dstride, int stride);
> 
This C code should not be in the x86-specific file.

Ruiling
> +static void filter_column16(uint8_t *dst, int height,
> +                          float rdiv, float bias, const int *const matrix,
> +                          const uint8_t *c[], int length, int radius,
> +                          int dstride, int stride)
> +{
> +    int y, off16;
> +
> +    for (y = 0; y < height; y++) {
> +        for (off16 = 0; off16 < length; off16++){
> +            int i, sum = 0;
> +
> +            for (i = 0; i < 2 * radius + 1; i++)
> +                sum += c[i][0 + y * stride + off16] * matrix[i];
> +
> +            sum = (int)(sum * rdiv + bias + 0.5f);
> +            dst[off16] = av_clip_uint8(sum);
> +        }
> +        dst += dstride;
> +    }
> +
> +}
> 
>  av_cold void ff_convolution_init_x86(ConvolutionContext *s)
>  {
> @@ -51,6 +72,8 @@ av_cold void
> ff_convolution_init_x86(ConvolutionContext *s)
>                  if (EXTERNAL_SSE4(cpu_flags))
>                      s->filter[i] = ff_filter_row_sse4;
>          }
> +        if (s->mode[i] == MATRIX_COLUMN)
> +            s->filter[i] = filter_column16;
>      }
>  #endif
>  }
> --
> 2.17.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".


More information about the ffmpeg-devel mailing list