[FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_vp3: Merge into hpeldsp
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Wed Sep 6 12:37:10 EEST 2023
Andreas Rheinhardt:
> Once upon a time, 413abbe16465a7b49472ac110e42939e853e24a1
> added versions of some put_no_rnd_pixels functions for use
> in VP3 and Theora (with an explicit check so that they are
> only used for VP3 and Theora). When this was moved to hpeldsp
> (from dsputil) in 3ced55d51c2e65b37e50d500dff88bcd80e01b9c,
> the check was replaced by a check for the bitexact flag
> (and a CONFIG_VP3_DECODER compile-time check), so that
> these functions were now used for other codecs as well.
>
> Later commit 1dfc3cf89d0eb026af28be46294b85d79499ffb5
> split off the "VP3-specific bits into a separate file",
> yet these bits were not really VP3-specific bits at all
> any more. (The error was repeated in commit
> 0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553.) This commit
> has not been reverted, because this would make future
> changes from Libav (from where it originated) harder,
> yet Libav is no more, so this commit effectively reverts
> 1dfc3cf89d0eb026af28be46294b85d79499ffb5.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
> ---
> libavcodec/x86/Makefile | 2 -
> libavcodec/x86/hpeldsp.asm | 77 ++++++++++++++++++++++++
> libavcodec/x86/hpeldsp.h | 4 --
> libavcodec/x86/hpeldsp_init.c | 14 +++--
> libavcodec/x86/hpeldsp_vp3.asm | 99 -------------------------------
> libavcodec/x86/hpeldsp_vp3_init.c | 43 --------------
> 6 files changed, 86 insertions(+), 153 deletions(-)
> delete mode 100644 libavcodec/x86/hpeldsp_vp3.asm
> delete mode 100644 libavcodec/x86/hpeldsp_vp3_init.c
>
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index 118daca333..b4cc5e0d08 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -75,7 +75,6 @@ OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o
> OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
> OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
> OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
> -OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o
> OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
> OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \
> x86/vp9dsp_init_10bpp.o \
> @@ -192,7 +191,6 @@ X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o
> X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
> X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
> X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
> -X86ASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o
> X86ASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
> X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
> x86/vp9intrapred_16bpp.o \
> diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
> index 7a2b7135d8..3bc278618c 100644
> --- a/libavcodec/x86/hpeldsp.asm
> +++ b/libavcodec/x86/hpeldsp.asm
> @@ -165,6 +165,47 @@ cglobal put_no_rnd_pixels8_x2, 4,5
> RET
>
>
> +; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> +INIT_MMX mmxext
> +cglobal put_no_rnd_pixels8_x2_exact, 4,5
> + lea r4, [r2*3]
> + pcmpeqb m6, m6
> +.loop:
> + mova m0, [r1]
> + mova m2, [r1+r2]
> + mova m1, [r1+1]
> + mova m3, [r1+r2+1]
> + pxor m0, m6
> + pxor m2, m6
> + pxor m1, m6
> + pxor m3, m6
> + PAVGB m0, m1
> + PAVGB m2, m3
> + pxor m0, m6
> + pxor m2, m6
> + mova [r0], m0
> + mova [r0+r2], m2
> + mova m0, [r1+r2*2]
> + mova m1, [r1+r2*2+1]
> + mova m2, [r1+r4]
> + mova m3, [r1+r4+1]
> + pxor m0, m6
> + pxor m1, m6
> + pxor m2, m6
> + pxor m3, m6
> + PAVGB m0, m1
> + PAVGB m2, m3
> + pxor m0, m6
> + pxor m2, m6
> + mova [r0+r2*2], m0
> + mova [r0+r4], m2
> + lea r1, [r1+r2*4]
> + lea r0, [r0+r2*4]
> + sub r3d, 4
> + jg .loop
> + RET
> +
> +
> ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> %macro PUT_PIXELS8_Y2 0
> %if cpuflag(sse2)
> @@ -235,6 +276,42 @@ cglobal put_no_rnd_pixels8_y2, 4,5
> RET
>
>
> +; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> +INIT_MMX mmxext
> +cglobal put_no_rnd_pixels8_y2_exact, 4,5
> + lea r4, [r2*3]
> + mova m0, [r1]
> + pcmpeqb m6, m6
> + add r1, r2
> + pxor m0, m6
> +.loop:
> + mova m1, [r1]
> + mova m2, [r1+r2]
> + pxor m1, m6
> + pxor m2, m6
> + PAVGB m0, m1
> + PAVGB m1, m2
> + pxor m0, m6
> + pxor m1, m6
> + mova [r0], m0
> + mova [r0+r2], m1
> + mova m1, [r1+r2*2]
> + mova m0, [r1+r4]
> + pxor m1, m6
> + pxor m0, m6
> + PAVGB m2, m1
> + PAVGB m1, m0
> + pxor m2, m6
> + pxor m1, m6
> + mova [r0+r2*2], m2
> + mova [r0+r4], m1
> + lea r1, [r1+r2*4]
> + lea r0, [r0+r2*4]
> + sub r3d, 4
> + jg .loop
> + RET
> +
> +
> ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> %macro AVG_PIXELS8_X2 0
> %if cpuflag(sse2)
> diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
> index fd740da72e..ac7e625fda 100644
> --- a/libavcodec/x86/hpeldsp.h
> +++ b/libavcodec/x86/hpeldsp.h
> @@ -22,8 +22,6 @@
> #include <stddef.h>
> #include <stdint.h>
>
> -#include "libavcodec/hpeldsp.h"
> -
> void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
>
> @@ -50,6 +48,4 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
> void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
>
> -void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
> -
> #endif /* AVCODEC_X86_HPELDSP_H */
> diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
> index 09c48c341e..f08c66f5c8 100644
> --- a/libavcodec/x86/hpeldsp_init.c
> +++ b/libavcodec/x86/hpeldsp_init.c
> @@ -22,8 +22,6 @@
> * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
> */
>
> -#include "config_components.h"
> -
> #include "libavutil/attributes.h"
> #include "libavutil/cpu.h"
> #include "libavutil/x86/cpu.h"
> @@ -47,10 +45,16 @@ void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> +void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
> + const uint8_t *pixels,
> + ptrdiff_t line_size, int h);
> void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> +void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
> + const uint8_t *pixels,
> + ptrdiff_t line_size, int h);
> void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
> @@ -183,6 +187,9 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
> c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
> c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
>
> + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
> + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
> +
> if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
> c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
> c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
> @@ -235,7 +242,4 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
>
> if (EXTERNAL_SSSE3(cpu_flags))
> hpeldsp_init_ssse3(c, flags);
> -
> - if (CONFIG_VP3_DECODER)
> - ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
> }
> diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
> deleted file mode 100644
> index e580133e45..0000000000
> --- a/libavcodec/x86/hpeldsp_vp3.asm
> +++ /dev/null
> @@ -1,99 +0,0 @@
> -;******************************************************************************
> -;* SIMD-optimized halfpel functions for VP3
> -;*
> -;* This file is part of FFmpeg.
> -;*
> -;* FFmpeg is free software; you can redistribute it and/or
> -;* modify it under the terms of the GNU Lesser General Public
> -;* License as published by the Free Software Foundation; either
> -;* version 2.1 of the License, or (at your option) any later version.
> -;*
> -;* FFmpeg is distributed in the hope that it will be useful,
> -;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> -;* Lesser General Public License for more details.
> -;*
> -;* You should have received a copy of the GNU Lesser General Public
> -;* License along with FFmpeg; if not, write to the Free Software
> -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> -;******************************************************************************
> -
> -%include "libavutil/x86/x86util.asm"
> -
> -SECTION .text
> -
> -; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> -INIT_MMX mmxext
> -cglobal put_no_rnd_pixels8_x2_exact, 4,5
> - lea r4, [r2*3]
> - pcmpeqb m6, m6
> -.loop:
> - mova m0, [r1]
> - mova m2, [r1+r2]
> - mova m1, [r1+1]
> - mova m3, [r1+r2+1]
> - pxor m0, m6
> - pxor m2, m6
> - pxor m1, m6
> - pxor m3, m6
> - PAVGB m0, m1
> - PAVGB m2, m3
> - pxor m0, m6
> - pxor m2, m6
> - mova [r0], m0
> - mova [r0+r2], m2
> - mova m0, [r1+r2*2]
> - mova m1, [r1+r2*2+1]
> - mova m2, [r1+r4]
> - mova m3, [r1+r4+1]
> - pxor m0, m6
> - pxor m1, m6
> - pxor m2, m6
> - pxor m3, m6
> - PAVGB m0, m1
> - PAVGB m2, m3
> - pxor m0, m6
> - pxor m2, m6
> - mova [r0+r2*2], m0
> - mova [r0+r4], m2
> - lea r1, [r1+r2*4]
> - lea r0, [r0+r2*4]
> - sub r3d, 4
> - jg .loop
> - RET
> -
> -
> -; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> -INIT_MMX mmxext
> -cglobal put_no_rnd_pixels8_y2_exact, 4,5
> - lea r4, [r2*3]
> - mova m0, [r1]
> - pcmpeqb m6, m6
> - add r1, r2
> - pxor m0, m6
> -.loop:
> - mova m1, [r1]
> - mova m2, [r1+r2]
> - pxor m1, m6
> - pxor m2, m6
> - PAVGB m0, m1
> - PAVGB m1, m2
> - pxor m0, m6
> - pxor m1, m6
> - mova [r0], m0
> - mova [r0+r2], m1
> - mova m1, [r1+r2*2]
> - mova m0, [r1+r4]
> - pxor m1, m6
> - pxor m0, m6
> - PAVGB m2, m1
> - PAVGB m1, m0
> - pxor m2, m6
> - pxor m1, m6
> - mova [r0+r2*2], m2
> - mova [r0+r4], m1
> - lea r1, [r1+r2*4]
> - lea r0, [r0+r2*4]
> - sub r3d, 4
> - jg .loop
> - RET
> diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
> deleted file mode 100644
> index 1dbd1ba6f9..0000000000
> --- a/libavcodec/x86/hpeldsp_vp3_init.c
> +++ /dev/null
> @@ -1,43 +0,0 @@
> -/*
> - * This file is part of FFmpeg.
> - *
> - * FFmpeg is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU Lesser General Public
> - * License as published by the Free Software Foundation; either
> - * version 2.1 of the License, or (at your option) any later version.
> - *
> - * FFmpeg is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - * Lesser General Public License for more details.
> - *
> - * You should have received a copy of the GNU Lesser General Public
> - * License along with FFmpeg; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - */
> -
> -#include "libavutil/attributes.h"
> -#include "libavutil/cpu.h"
> -#include "libavutil/x86/cpu.h"
> -
> -#include "libavcodec/avcodec.h"
> -#include "libavcodec/hpeldsp.h"
> -
> -#include "hpeldsp.h"
> -
> -void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
> - const uint8_t *pixels,
> - ptrdiff_t line_size, int h);
> -void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
> - const uint8_t *pixels,
> - ptrdiff_t line_size, int h);
> -
> -av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
> -{
> - if (EXTERNAL_MMXEXT(cpu_flags)) {
> - if (flags & AV_CODEC_FLAG_BITEXACT) {
> - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
> - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
> - }
> - }
> -}
Will apply this tomorrow unless there are objections.
- Andreas
More information about the ffmpeg-devel
mailing list