[FFmpeg-devel] [PATCH V7 1/2] libswscale/x86/yuv2rgb: Change inline assembly into nasm code

Michael Niedermayer michael at niedermayer.cc
Tue Jan 14 23:55:05 EET 2020


On Fri, Jan 10, 2020 at 01:38:15AM +0800, Ting Fu wrote:
> Signed-off-by: Ting Fu <ting.fu at intel.com>
> ---
> V7:
>     Fix compile issue when user configure with --disable-mmx.
>     Fix issue when running ./ffmpeg with --cpuflags mmx/ssse3.
>     Adjust the SIMD verify logic in libswscale/x86/yuv2rgb.c
> 
>  libswscale/x86/Makefile           |   1 +
>  libswscale/x86/swscale.c          |  16 +-
>  libswscale/x86/yuv2rgb.c          |  66 ++---
>  libswscale/x86/yuv2rgb_template.c | 467 ++++++------------------------
>  libswscale/x86/yuv_2_rgb.asm      | 270 +++++++++++++++++
>  5 files changed, 405 insertions(+), 415 deletions(-)
>  create mode 100644 libswscale/x86/yuv_2_rgb.asm

The commit message seems a bit terse
I think it should say if the sequence of instructions is unchanged
and if it was benchmaked. If its the same speed, when the code is run
the commit message should say that too

the principle of this (inline -> nasm) is fine of course.


> 
> diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
> index f317d5dd9b..831d5359aa 100644
> --- a/libswscale/x86/Makefile
> +++ b/libswscale/x86/Makefile
> @@ -12,3 +12,4 @@ X86ASM-OBJS                     += x86/input.o                          \
>                                     x86/output.o                         \
>                                     x86/scale.o                          \
>                                     x86/rgb_2_rgb.o                      \
> +                                   x86/yuv_2_rgb.o                      \
> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> index 0eed4f18d5..e9d474a1e8 100644
> --- a/libswscale/x86/swscale.c
> +++ b/libswscale/x86/swscale.c
> @@ -29,6 +29,14 @@
>  #include "libavutil/cpu.h"
>  #include "libavutil/pixdesc.h"
>  
> +const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
> +    0x0103010301030103LL,
> +    0x0200020002000200LL,};
> +
> +const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
> +    0x0602060206020602LL,
> +    0x0004000400040004LL,};
> +
>  #if HAVE_INLINE_ASM
>  
>  #define DITHER1XBPP
> @@ -38,14 +46,6 @@ DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
>  DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
>  DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
>  
> -const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
> -    0x0103010301030103LL,
> -    0x0200020002000200LL,};
> -
> -const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
> -    0x0602060206020602LL,
> -    0x0004000400040004LL,};
> -
>  DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
>  DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
>  DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
> diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
> index 5e2f77c20f..dd813d4deb 100644
> --- a/libswscale/x86/yuv2rgb.c
> +++ b/libswscale/x86/yuv2rgb.c
> @@ -37,7 +37,7 @@
>  #include "libavutil/x86/cpu.h"
>  #include "libavutil/cpu.h"
>  
> -#if HAVE_INLINE_ASM
> +#if HAVE_X86ASM
>  
>  #define DITHER1XBPP // only for MMX
>  
> @@ -50,32 +50,31 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
>  DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
>  
>  //MMX versions
> -#if HAVE_MMX_INLINE && HAVE_6REGS
> +#if HAVE_MMX
>  #undef RENAME
>  #undef COMPILE_TEMPLATE_MMXEXT
>  #define COMPILE_TEMPLATE_MMXEXT 0
>  #define RENAME(a) a ## _mmx
>  #include "yuv2rgb_template.c"
> -#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
> +#endif /* HAVE_MMX */
>  
>  // MMXEXT versions
> -#if HAVE_MMXEXT_INLINE && HAVE_6REGS
> +#if HAVE_MMXEXT
>  #undef RENAME
>  #undef COMPILE_TEMPLATE_MMXEXT
>  #define COMPILE_TEMPLATE_MMXEXT 1
>  #define RENAME(a) a ## _mmxext
>  #include "yuv2rgb_template.c"
> -#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
> +#endif /* HAVE_MMXEXT */
>  
> -#endif /* HAVE_INLINE_ASM */
> +#endif /* HAVE_X86ASM */
>  
>  av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
>  {
> -#if HAVE_MMX_INLINE && HAVE_6REGS
> +#if HAVE_X86ASM
>      int cpu_flags = av_get_cpu_flags();
>  
> -#if HAVE_MMXEXT_INLINE
> -    if (INLINE_MMXEXT(cpu_flags)) {
> +    if (EXTERNAL_MMXEXT(cpu_flags)) {
>          switch (c->dstFormat) {
>          case AV_PIX_FMT_RGB24:
>              return yuv420_rgb24_mmxext;
> @@ -83,37 +82,36 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
>              return yuv420_bgr24_mmxext;
>          }
>      }
> -#endif
>  
> -    if (INLINE_MMX(cpu_flags)) {
> +    if (EXTERNAL_MMX(cpu_flags)) {
>          switch (c->dstFormat) {
> -            case AV_PIX_FMT_RGB32:
> -                if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
> -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
> -                    return yuva420_rgb32_mmx;
> +        case AV_PIX_FMT_RGB32:
> +            if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
> +#if CONFIG_SWSCALE_ALPHA
> +                return yuva420_rgb32_mmx;
>  #endif
> -                    break;
> -                } else
> -                    return yuv420_rgb32_mmx;
> -            case AV_PIX_FMT_BGR32:
> -                if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
> -#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
> -                    return yuva420_bgr32_mmx;
> +                break;
> +            } else
> +                return yuv420_rgb32_mmx;
> +        case AV_PIX_FMT_BGR32:
> +            if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
> +#if CONFIG_SWSCALE_ALPHA
> +                return yuva420_bgr32_mmx;
>  #endif
> -                    break;
> -                } else
> -                    return yuv420_bgr32_mmx;
> -            case AV_PIX_FMT_RGB24:
> -                return yuv420_rgb24_mmx;
> -            case AV_PIX_FMT_BGR24:
> -                return yuv420_bgr24_mmx;
> -            case AV_PIX_FMT_RGB565:
> -                return yuv420_rgb16_mmx;
> -            case AV_PIX_FMT_RGB555:
> -                return yuv420_rgb15_mmx;
> +                break;
> +            } else
> +                return yuv420_bgr32_mmx;
> +        case AV_PIX_FMT_RGB24:
> +            return yuv420_rgb24_mmx;
> +        case AV_PIX_FMT_BGR24:
> +            return yuv420_bgr24_mmx;
> +        case AV_PIX_FMT_RGB565:
> +            return yuv420_rgb16_mmx;
> +        case AV_PIX_FMT_RGB555:
> +            return yuv420_rgb15_mmx;
>          }
>      }

this is a little messy to review
it is mostly reindention
yuv2rgb.c          |   66 +++----
and with -w
yuv2rgb.c          |   26 +--



[...]
> -static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
> -                                       int srcStride[],
> -                                       int srcSliceY, int srcSliceH,
> -                                       uint8_t *dst[], int dstStride[])
> +static int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
> +                                               int srcStride[],
> +                                               int srcSliceY, int srcSliceH,
> +                                               uint8_t *dst[], int dstStride[])

maybe the removial of inline should be a seperate patch
also there is the question why these wraper functions exist
These do change from a a "free thing in inline asm" to a
call overhead with C->NASM


>  {
>      int y, h_size, vshift;
> -
>      YUV2RGB_LOOP(2)
>  
>  #ifdef DITHER1XBPP
> -        c->blueDither  = ff_dither8[y       & 1];
> -        c->greenDither = ff_dither4[y       & 1];
> -        c->redDither   = ff_dither8[(y + 1) & 1];
> +    c->blueDither  = ff_dither8[y       & 1];
> +    c->greenDither = ff_dither4[y       & 1];
> +    c->redDither   = ff_dither8[(y + 1) & 1];

these changes make the patch harder to review and the resulting
commit harder to read too (and i manually matched these up above
it lookes worse in the actual diff


> -#endif
> -
> -        YUV2RGB_INITIAL_LOAD
> -        YUV2RGB
> -        RGB_PACK_INTERLEAVE
> -#ifdef DITHER1XBPP
> -        DITHER_RGB
>  #endif
> -        RGB_PACK16(pb_07, 0)
>  
> -    YUV2RGB_ENDLOOP(2)
> -    YUV2RGB_OPERANDS
> -    YUV2RGB_ENDFUNC

> +    RENAME(ff_yuv_420_rgb16)(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
> +    }
> +    return srcSliceH;

This doesnt look correctly indented


thanks



[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The smallest minority on earth is the individual. Those who deny 
individual rights cannot claim to be defenders of minorities. - Ayn Rand
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20200114/29fefd6a/attachment.sig>


More information about the ffmpeg-devel mailing list