[FFmpeg-devel] HEVC patch : adding ASM functions

Sun Feb 23 00:59:17 CET 2014

On Fri, Feb 21, 2014 at 04:07:53PM +0100, Pierre Edouard Lepere wrote:
> Hello
> 
> this patch changes some c functions for the MC and adds ASM functions that are enabled through pointers.
> 
> Best Regards,
> Pierre-Edouard LEPERE

fails to build: (on 32bit):

libavcodec/libavcodec.a(hevcdsp_init.o): In function `ff_hevcdsp_init_x86':
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:106: undefined reference to `ff_hevc_put_hevc_pel_pixels2_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:107: undefined reference to `ff_hevc_put_hevc_pel_pixels4_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:108: undefined reference to `ff_hevc_put_hevc_pel_pixels8_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:109: undefined reference to `ff_hevc_put_hevc_pel_pixels8_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:110: undefined reference to `ff_hevc_put_hevc_pel_pixels8_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:111: undefined reference to `ff_hevc_put_hevc_epel_h2_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:112: undefined reference to `ff_hevc_put_hevc_epel_h4_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:113: undefined reference to `ff_hevc_put_hevc_epel_h8_10_sse4'

[...]
> diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
> new file mode 100644
> index 0000000..178b5ae
> --- /dev/null
> +++ b/libavcodec/x86/hevcdsp_init.c
> @@ -0,0 +1,280 @@
> +/*
> + * Copyright (c) 2013 Seppo Tomperi
> + *
> + * This file is part of ffmpeg.
> + *
> + * ffmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * ffmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with ffmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */

please split the patches and set the git author correctly

> +
> +#include "config.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/x86/asm.h"
> +#include "libavutil/x86/cpu.h"
> +#include "libavcodec/get_bits.h" /* required for hevcdsp.h GetBitContext */
> +#include "libavcodec/hevcdsp.h"
> +
> +/***********************************/
> +
> +#define MCQ_FUNC(DIR, DEPTH, OPT)                                        \
> +    void ff_put_hevc_mc_pixels_ ## DIR ## _ ## DEPTH ## _ ## OPT(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int mx, int my);
> +
> +#define MCQ_FUNCS(type, depth) \
> +   MCQ_FUNC( 2, depth, sse4)    \
> +   MCQ_FUNC( 4, depth, sse4)    \
> +   MCQ_FUNC( 8, depth, sse4)    \
> +   MCQ_FUNC(16, depth, sse4)
> +
> +MCQ_FUNCS(uint8_t,   8)
> +
> +/***********************************/
> +/* deblocking */
> +
> +#define LFC_FUNC(DIR, DEPTH, OPT)                                        \
> +void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
> +
> +#define LFL_FUNC(DIR, DEPTH, OPT)                                        \
> +void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int *_beta, int *_tc, \
> +uint8_t *_no_p, uint8_t *_no_q);
> +
> +#define LFC_FUNCS(type, depth) \
> +LFC_FUNC(h, depth, sse2)  \
> +LFC_FUNC(v, depth, sse2)
> +
> +#define LFL_FUNCS(type, depth) \
> +LFL_FUNC(h, depth, ssse3)  \
> +LFL_FUNC(v, depth, ssse3)
> +
> +
> +LFC_FUNCS(uint8_t,   8)
> +LFL_FUNCS(uint8_t,   8)
> +
> +//LF_FUNCS(uint16_t, 10)
> +
> +
> +void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
> +{
> +    int mm_flags = av_get_cpu_flags();
> +
> +    if (bit_depth == 8) {
> +        if (EXTERNAL_MMX(mm_flags)) {
> +            /*if (mm_flags & AV_CPU_FLAG_CMOV)
> +                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; */
> +
> +            if (EXTERNAL_MMXEXT(mm_flags)) {
> +#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
> +                /* MMEXT optimizations */
> +#endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */
> +

> +                if (EXTERNAL_SSE2(mm_flags)) {
> +/*                    c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
> +                    c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;*/
> +                }
> +                if (EXTERNAL_SSSE3(mm_flags)) {
> +/*
> +                    c->transform_4x4_luma_add = ff_hevc_transform_4x4_luma_add_8_sse4;
> +                    c->transform_add[0] = ff_hevc_transform_4x4_add_8_sse4;
> +                    c->transform_add[1] = ff_hevc_transform_8x8_add_8_sse4;
> +                    c->transform_add[2] = ff_hevc_transform_16x16_add_8_sse4;
> +                    c->transform_add[3] = ff_hevc_transform_32x32_add_8_sse4;

please dont add outcommented parts that get removed in a subsequent
patch

[...]
>  hevc.c             |   55 ++++----
>  hevc.h             |    3 
>  hevcdsp.c          |   38 +++---
>  hevcdsp.h          |    7 -
>  hevcdsp_template.c |  327 ++++++++++++++++++++++++++++++++++-------------------
>  5 files changed, 271 insertions(+), 159 deletions(-)
> 37d2d2fb0c98e61ea5fed543f1c09b0cd50f84ce  0002-changed-qpel-functions.patch
> From 67a824303404ee9b9f3544f4f54179dbf65eea0b Mon Sep 17 00:00:00 2001
> From: plepere <pierre-edouard.lepere at insa-rennes.fr>
> Date: Fri, 21 Feb 2014 10:16:42 +0100
> Subject: [PATCH 2/6] changed qpel functions

please provide a more informative commit message
every commit is a change of the part that it changes

[...]
> diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
> index fe9c4e9..ce33d95 100644
> --- a/libavcodec/hevc.h
> +++ b/libavcodec/hevc.h
> @@ -71,6 +71,9 @@
>  #define EPEL_EXTRA_BEFORE 1
>  #define EPEL_EXTRA_AFTER  2
>  #define EPEL_EXTRA        3
> +#define QPEL_EXTRA_BEFORE 3
> +#define QPEL_EXTRA_AFTER  4
> +#define QPEL_EXTRA        7
>  
>  #define EDGE_EMU_BUFFER_STRIDE 80
>  
> diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
> index ce846d3..01c4bc6 100644
> --- a/libavcodec/hevcdsp.c
> +++ b/libavcodec/hevcdsp.c
> @@ -99,6 +99,12 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = {
>      { -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2 },
>  };
>  
> +DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters[3][16]) = {
> +    { -1,  4,-10, 58, 17, -5,  1,  0, -1,  4,-10, 58, 17, -5,  1,  0},
> +    { -1,  4,-11, 40, 40,-11,  4, -1, -1,  4,-11, 40, 40,-11,  4, -1},
> +    {  0,  1, -5, 17, 58,-10,  4, -1,  0,  1, -5, 17, 58,-10,  4, -1}
> +};
> +
>  #define BIT_DEPTH 8
>  #include "hevcdsp_template.c"
>  #undef BIT_DEPTH
> @@ -111,6 +117,21 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = {
>  #include "hevcdsp_template.c"
>  #undef BIT_DEPTH
>  
> +
> +#undef PEL_FUNC
> +#define PEL_FUNC(dst1, idx1, idx2, a, depth)                                   \
> +    hevcdsp->dst1[0][idx1][idx2] = hevcdsp->dst1[1][idx1][idx2] = hevcdsp->dst1[2][idx1][idx2] = hevcdsp->dst1[3][idx1][idx2] = hevcdsp->dst1[4][idx1][idx2]    = a ## _ ## depth
> +
> +
> +#undef QPEL_FUNCS
> +#define QPEL_FUNCS(depth)                                                     \
> +    PEL_FUNC(put_hevc_qpel, 0, 0, put_hevc_pel_pixels, depth);                \
> +    PEL_FUNC(put_hevc_qpel, 0, 1, put_hevc_qpel_h, depth);                    \
> +    PEL_FUNC(put_hevc_qpel, 1, 0, put_hevc_qpel_v, depth);                    \
> +    PEL_FUNC(put_hevc_qpel, 1, 1, put_hevc_qpel_hv, depth)
> +
> +
> +
>  void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
>  {
>  #undef FUNC
> @@ -139,22 +160,7 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
>      hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth);           \
>      hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth);           \
>                                                                              \
> -    hevcdsp->put_hevc_qpel[0][0] = FUNC(put_hevc_qpel_pixels, depth);       \
> -    hevcdsp->put_hevc_qpel[0][1] = FUNC(put_hevc_qpel_h1, depth);           \
> -    hevcdsp->put_hevc_qpel[0][2] = FUNC(put_hevc_qpel_h2, depth);           \
> -    hevcdsp->put_hevc_qpel[0][3] = FUNC(put_hevc_qpel_h3, depth);           \
> -    hevcdsp->put_hevc_qpel[1][0] = FUNC(put_hevc_qpel_v1, depth);           \
> -    hevcdsp->put_hevc_qpel[1][1] = FUNC(put_hevc_qpel_h1v1, depth);         \
> -    hevcdsp->put_hevc_qpel[1][2] = FUNC(put_hevc_qpel_h2v1, depth);         \
> -    hevcdsp->put_hevc_qpel[1][3] = FUNC(put_hevc_qpel_h3v1, depth);         \
> -    hevcdsp->put_hevc_qpel[2][0] = FUNC(put_hevc_qpel_v2, depth);           \
> -    hevcdsp->put_hevc_qpel[2][1] = FUNC(put_hevc_qpel_h1v2, depth);         \
> -    hevcdsp->put_hevc_qpel[2][2] = FUNC(put_hevc_qpel_h2v2, depth);         \
> -    hevcdsp->put_hevc_qpel[2][3] = FUNC(put_hevc_qpel_h3v2, depth);         \
> -    hevcdsp->put_hevc_qpel[3][0] = FUNC(put_hevc_qpel_v3, depth);           \
> -    hevcdsp->put_hevc_qpel[3][1] = FUNC(put_hevc_qpel_h1v3, depth);         \
> -    hevcdsp->put_hevc_qpel[3][2] = FUNC(put_hevc_qpel_h2v3, depth);         \
> -    hevcdsp->put_hevc_qpel[3][3] = FUNC(put_hevc_qpel_h3v3, depth);         \
> +    QPEL_FUNCS(depth);                                                      \
>                                                                              \
>      hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth);       \
>      hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth);            \
> diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
> index 4ebb742..4349824 100644
> --- a/libavcodec/hevcdsp.h
> +++ b/libavcodec/hevcdsp.h
> @@ -58,9 +58,9 @@ typedef struct HEVCDSPContext {
>                                 int height, int c_idx, uint8_t vert_edge,
>                                 uint8_t horiz_edge, uint8_t diag_edge);
>  
> -    void (*put_hevc_qpel[4][4])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
> -                                ptrdiff_t srcstride, int width, int height,
> -                                int16_t *mcbuffer);
> +    void (*put_hevc_qpel[5][2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
> +                                int width, int height, int mx, int my);
> +
>      void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
>                                  ptrdiff_t srcstride, int width, int height,
>                                  int mx, int my, int16_t *mcbuffer);
> @@ -105,6 +105,7 @@ typedef struct HEVCDSPContext {
>  void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
>  
>  extern const int8_t ff_hevc_epel_filters[7][16];
> +extern const int8_t ff_hevc_qpel_filters[3][16];
>  
[...]
>  static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
>                                           ptrdiff_t stride)
>  {
>      int i;
> -    pixel *dst   = (pixel *)_dst;
> -    int shift    = 7;
> -    int add      = 1 << (shift - 1);
> +    pixel *dst = (pixel *)_dst;
> +    int shift = 7;
> +    int add = 1 << (shift - 1);
>      int16_t *src = coeffs;
>  
>      stride /= sizeof(pixel);
> @@ -168,40 +168,40 @@ static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
>      }
>  
>      shift = 20 - BIT_DEPTH;
> -    add   = 1 << (shift - 1);
> +    add = 1 << (shift - 1);
>      for (i = 0; i < 4; i++) {
>          TR_4x4_LUMA(dst, coeffs, 1, ADD_AND_SCALE);
>          coeffs += 4;
> -        dst    += stride;
> +        dst += stride;
>      }
>  }

cosmetic changes and functional changes should be in seperate patches
otherwise its hard ro review, and thats not only now but also if
one (maybe you or someone else) debugs a regression maybe years later
...

also iam not sure the cosmetic changes are intended to be there at
all ?

but either way, great work
it improves speed from 11.463s to 10.735s  for
time make -j6 fate-hevc

Thanks

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The misfortune of the wise is better than the prosperity of the fool.
-- Epicurus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140223/223552d5/attachment.asc>