[FFmpeg-devel] HEVC patch : adding ASM functions
Michael Niedermayer
michaelni at gmx.at
Sun Feb 23 00:59:17 CET 2014
On Fri, Feb 21, 2014 at 04:07:53PM +0100, Pierre Edouard Lepere wrote:
> Hello
>
> this patch changes some c functions for the MC and adds ASM functions that are enabled through pointers.
>
> Best Regards,
> Pierre-Edouard LEPERE
fails to build: (on 32bit):
libavcodec/libavcodec.a(hevcdsp_init.o): In function `ff_hevcdsp_init_x86':
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:106: undefined reference to `ff_hevc_put_hevc_pel_pixels2_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:107: undefined reference to `ff_hevc_put_hevc_pel_pixels4_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:108: undefined reference to `ff_hevc_put_hevc_pel_pixels8_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:109: undefined reference to `ff_hevc_put_hevc_pel_pixels8_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:110: undefined reference to `ff_hevc_put_hevc_pel_pixels8_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:111: undefined reference to `ff_hevc_put_hevc_epel_h2_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:112: undefined reference to `ff_hevc_put_hevc_epel_h4_10_sse4'
/home/michael/ffmpeg-git/ffmpeg/libavcodec/x86/hevcdsp_init.c:113: undefined reference to `ff_hevc_put_hevc_epel_h8_10_sse4'
[...]
> diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
> new file mode 100644
> index 0000000..178b5ae
> --- /dev/null
> +++ b/libavcodec/x86/hevcdsp_init.c
> @@ -0,0 +1,280 @@
> +/*
> + * Copyright (c) 2013 Seppo Tomperi
> + *
> + * This file is part of ffmpeg.
> + *
> + * ffmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * ffmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with ffmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
please split the patches and set the git author correctly
> +
> +#include "config.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/x86/asm.h"
> +#include "libavutil/x86/cpu.h"
> +#include "libavcodec/get_bits.h" /* required for hevcdsp.h GetBitContext */
> +#include "libavcodec/hevcdsp.h"
> +
> +/***********************************/
> +
> +#define MCQ_FUNC(DIR, DEPTH, OPT) \
> + void ff_put_hevc_mc_pixels_ ## DIR ## _ ## DEPTH ## _ ## OPT(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int width, int height, int mx, int my);
> +
> +#define MCQ_FUNCS(type, depth) \
> + MCQ_FUNC( 2, depth, sse4) \
> + MCQ_FUNC( 4, depth, sse4) \
> + MCQ_FUNC( 8, depth, sse4) \
> + MCQ_FUNC(16, depth, sse4)
> +
> +MCQ_FUNCS(uint8_t, 8)
> +
> +/***********************************/
> +/* deblocking */
> +
> +#define LFC_FUNC(DIR, DEPTH, OPT) \
> +void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
> +
> +#define LFL_FUNC(DIR, DEPTH, OPT) \
> +void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int *_beta, int *_tc, \
> +uint8_t *_no_p, uint8_t *_no_q);
> +
> +#define LFC_FUNCS(type, depth) \
> +LFC_FUNC(h, depth, sse2) \
> +LFC_FUNC(v, depth, sse2)
> +
> +#define LFL_FUNCS(type, depth) \
> +LFL_FUNC(h, depth, ssse3) \
> +LFL_FUNC(v, depth, ssse3)
> +
> +
> +LFC_FUNCS(uint8_t, 8)
> +LFL_FUNCS(uint8_t, 8)
> +
> +//LF_FUNCS(uint16_t, 10)
> +
> +
> +void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
> +{
> + int mm_flags = av_get_cpu_flags();
> +
> + if (bit_depth == 8) {
> + if (EXTERNAL_MMX(mm_flags)) {
> + /*if (mm_flags & AV_CPU_FLAG_CMOV)
> + c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; */
> +
> + if (EXTERNAL_MMXEXT(mm_flags)) {
> +#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
> + /* MMEXT optimizations */
> +#endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */
> +
> + if (EXTERNAL_SSE2(mm_flags)) {
> +/* c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
> + c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;*/
> + }
> + if (EXTERNAL_SSSE3(mm_flags)) {
> +/*
> + c->transform_4x4_luma_add = ff_hevc_transform_4x4_luma_add_8_sse4;
> + c->transform_add[0] = ff_hevc_transform_4x4_add_8_sse4;
> + c->transform_add[1] = ff_hevc_transform_8x8_add_8_sse4;
> + c->transform_add[2] = ff_hevc_transform_16x16_add_8_sse4;
> + c->transform_add[3] = ff_hevc_transform_32x32_add_8_sse4;
please dont add outcommented parts that get removed in a subsequent
patch
[...]
> hevc.c | 55 ++++----
> hevc.h | 3
> hevcdsp.c | 38 +++---
> hevcdsp.h | 7 -
> hevcdsp_template.c | 327 ++++++++++++++++++++++++++++++++++-------------------
> 5 files changed, 271 insertions(+), 159 deletions(-)
> 37d2d2fb0c98e61ea5fed543f1c09b0cd50f84ce 0002-changed-qpel-functions.patch
> From 67a824303404ee9b9f3544f4f54179dbf65eea0b Mon Sep 17 00:00:00 2001
> From: plepere <pierre-edouard.lepere at insa-rennes.fr>
> Date: Fri, 21 Feb 2014 10:16:42 +0100
> Subject: [PATCH 2/6] changed qpel functions
please provide a more informative commit message
every commit is a change of the part that it changes
[...]
> diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
> index fe9c4e9..ce33d95 100644
> --- a/libavcodec/hevc.h
> +++ b/libavcodec/hevc.h
> @@ -71,6 +71,9 @@
> #define EPEL_EXTRA_BEFORE 1
> #define EPEL_EXTRA_AFTER 2
> #define EPEL_EXTRA 3
> +#define QPEL_EXTRA_BEFORE 3
> +#define QPEL_EXTRA_AFTER 4
> +#define QPEL_EXTRA 7
>
> #define EDGE_EMU_BUFFER_STRIDE 80
>
> diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
> index ce846d3..01c4bc6 100644
> --- a/libavcodec/hevcdsp.c
> +++ b/libavcodec/hevcdsp.c
> @@ -99,6 +99,12 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = {
> { -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2 },
> };
>
> +DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters[3][16]) = {
> + { -1, 4,-10, 58, 17, -5, 1, 0, -1, 4,-10, 58, 17, -5, 1, 0},
> + { -1, 4,-11, 40, 40,-11, 4, -1, -1, 4,-11, 40, 40,-11, 4, -1},
> + { 0, 1, -5, 17, 58,-10, 4, -1, 0, 1, -5, 17, 58,-10, 4, -1}
> +};
> +
> #define BIT_DEPTH 8
> #include "hevcdsp_template.c"
> #undef BIT_DEPTH
> @@ -111,6 +117,21 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = {
> #include "hevcdsp_template.c"
> #undef BIT_DEPTH
>
> +
> +#undef PEL_FUNC
> +#define PEL_FUNC(dst1, idx1, idx2, a, depth) \
> + hevcdsp->dst1[0][idx1][idx2] = hevcdsp->dst1[1][idx1][idx2] = hevcdsp->dst1[2][idx1][idx2] = hevcdsp->dst1[3][idx1][idx2] = hevcdsp->dst1[4][idx1][idx2] = a ## _ ## depth
> +
> +
> +#undef QPEL_FUNCS
> +#define QPEL_FUNCS(depth) \
> + PEL_FUNC(put_hevc_qpel, 0, 0, put_hevc_pel_pixels, depth); \
> + PEL_FUNC(put_hevc_qpel, 0, 1, put_hevc_qpel_h, depth); \
> + PEL_FUNC(put_hevc_qpel, 1, 0, put_hevc_qpel_v, depth); \
> + PEL_FUNC(put_hevc_qpel, 1, 1, put_hevc_qpel_hv, depth)
> +
> +
> +
> void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
> {
> #undef FUNC
> @@ -139,22 +160,7 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
> hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth); \
> hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth); \
> \
> - hevcdsp->put_hevc_qpel[0][0] = FUNC(put_hevc_qpel_pixels, depth); \
> - hevcdsp->put_hevc_qpel[0][1] = FUNC(put_hevc_qpel_h1, depth); \
> - hevcdsp->put_hevc_qpel[0][2] = FUNC(put_hevc_qpel_h2, depth); \
> - hevcdsp->put_hevc_qpel[0][3] = FUNC(put_hevc_qpel_h3, depth); \
> - hevcdsp->put_hevc_qpel[1][0] = FUNC(put_hevc_qpel_v1, depth); \
> - hevcdsp->put_hevc_qpel[1][1] = FUNC(put_hevc_qpel_h1v1, depth); \
> - hevcdsp->put_hevc_qpel[1][2] = FUNC(put_hevc_qpel_h2v1, depth); \
> - hevcdsp->put_hevc_qpel[1][3] = FUNC(put_hevc_qpel_h3v1, depth); \
> - hevcdsp->put_hevc_qpel[2][0] = FUNC(put_hevc_qpel_v2, depth); \
> - hevcdsp->put_hevc_qpel[2][1] = FUNC(put_hevc_qpel_h1v2, depth); \
> - hevcdsp->put_hevc_qpel[2][2] = FUNC(put_hevc_qpel_h2v2, depth); \
> - hevcdsp->put_hevc_qpel[2][3] = FUNC(put_hevc_qpel_h3v2, depth); \
> - hevcdsp->put_hevc_qpel[3][0] = FUNC(put_hevc_qpel_v3, depth); \
> - hevcdsp->put_hevc_qpel[3][1] = FUNC(put_hevc_qpel_h1v3, depth); \
> - hevcdsp->put_hevc_qpel[3][2] = FUNC(put_hevc_qpel_h2v3, depth); \
> - hevcdsp->put_hevc_qpel[3][3] = FUNC(put_hevc_qpel_h3v3, depth); \
> + QPEL_FUNCS(depth); \
> \
> hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth); \
> hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth); \
> diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
> index 4ebb742..4349824 100644
> --- a/libavcodec/hevcdsp.h
> +++ b/libavcodec/hevcdsp.h
> @@ -58,9 +58,9 @@ typedef struct HEVCDSPContext {
> int height, int c_idx, uint8_t vert_edge,
> uint8_t horiz_edge, uint8_t diag_edge);
>
> - void (*put_hevc_qpel[4][4])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
> - ptrdiff_t srcstride, int width, int height,
> - int16_t *mcbuffer);
> + void (*put_hevc_qpel[5][2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
> + int width, int height, int mx, int my);
> +
> void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
> ptrdiff_t srcstride, int width, int height,
> int mx, int my, int16_t *mcbuffer);
> @@ -105,6 +105,7 @@ typedef struct HEVCDSPContext {
> void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
>
> extern const int8_t ff_hevc_epel_filters[7][16];
> +extern const int8_t ff_hevc_qpel_filters[3][16];
>
[...]
> static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
> ptrdiff_t stride)
> {
> int i;
> - pixel *dst = (pixel *)_dst;
> - int shift = 7;
> - int add = 1 << (shift - 1);
> + pixel *dst = (pixel *)_dst;
> + int shift = 7;
> + int add = 1 << (shift - 1);
> int16_t *src = coeffs;
>
> stride /= sizeof(pixel);
> @@ -168,40 +168,40 @@ static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
> }
>
> shift = 20 - BIT_DEPTH;
> - add = 1 << (shift - 1);
> + add = 1 << (shift - 1);
> for (i = 0; i < 4; i++) {
> TR_4x4_LUMA(dst, coeffs, 1, ADD_AND_SCALE);
> coeffs += 4;
> - dst += stride;
> + dst += stride;
> }
> }
cosmetic changes and functional changes should be in seperate patches
otherwise its hard ro review, and thats not only now but also if
one (maybe you or someone else) debugs a regression maybe years later
...
also iam not sure the cosmetic changes are intended to be there at
all ?
but either way, great work
it improves speed from 11.463s to 10.735s for
time make -j6 fate-hevc
Thanks
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
The misfortune of the wise is better than the prosperity of the fool.
-- Epicurus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140223/223552d5/attachment.asc>
More information about the ffmpeg-devel
mailing list