[FFmpeg-devel] [PATCH] avcodec/cfhd: add x86 SIMD

James Almer jamrial at gmail.com
Sun Aug 16 17:33:47 EEST 2020


On 8/16/2020 11:09 AM, Paul B Mahol wrote:
> On 8/16/20, Paul B Mahol <onemda at gmail.com> wrote:
>> Hi,
>>
>> patch attached.
>>
>> Please help porting this to linux and 64bit calling convention.
>>
> 
> New patch attached, could build on x64, please report any build failure.

[...]

> diff --git a/libavcodec/x86/cfhddsp.asm b/libavcodec/x86/cfhddsp.asm
> new file mode 100644
> index 0000000000..80371e65c9
> --- /dev/null
> +++ b/libavcodec/x86/cfhddsp.asm
> @@ -0,0 +1,626 @@
> +;******************************************************************************
> +;* x86-optimized functions for the CFHD decoder
> +;* Copyright (c) 2020 Paul B Mahol
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +;******************************************************************************
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_RODATA
> +
> +factor_p1_p1: dw 1,  1, 1,  1, 1,  1, 1,  1,
> +factor_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1,
> +factor_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1,
> +factor_p11_n4: dw 11, -4, 11, -4, 11, -4, 11, -4,
> +factor_p5_p4: dw 5, 4, 5, 4, 5, 4, 5, 4,
> +pd_4: times 4 dd 4
> +pw_0: times 8 dw 0
> +pw_1023: times 8 dw 1023
> +pw_4095: times 8 dw 4095
> +
> +SECTION .text
> +
> +%macro CFHD_HORIZ_FILTER 1
> +%if %1 == 1023
> +cglobal cfhd_horiz_filter_clip10, 5, 6, 8, output, low, high, width, bpc
> +    DEFINE_ARGS    output, low, high, width, x, temp
> +    shl        widthd, 1
> +%define ostrideq widthq
> +%define lwidthq  widthq
> +%define hwidthq  widthq
> +%elif %1 == 4095
> +cglobal cfhd_horiz_filter_clip12, 5, 6, 8, output, low, high, width, bpc
> +    DEFINE_ARGS    output, low, high, width, x, temp
> +    shl        widthd, 1
> +%define ostrideq widthq
> +%define lwidthq  widthq
> +%define hwidthq  widthq
> +%else
> +%if ARCH_X86_64
> +cglobal cfhd_horiz_filter, 11, 11, 8, output, ostride, low, lwidth, high, hwidth, width, height
> +DEFINE_ARGS    output, ostride, low, lwidth, high, hwidth, width, height, x, y, temp
> +    shl  ostrided, 1
> +    shl   lwidthd, 1
> +    shl   hwidthd, 1
> +    shl    widthd, 1
> +
> +    mov        yq, heightq
> +    neg        yq
> +%else
> +cglobal cfhd_horiz_filter, 6, 6, 8, 64, output, x, low, y, high, temp, width, height
> +    shl        xd, 1
> +    shl        yd, 1
> +    shl     tempd, 1
> +
> +    mov dword [rsp +  0], xq
> +    mov dword [rsp +  8], yq
> +    mov dword [rsp + 16], tempq

These are four bytes on x86_32, not eight. Also, since all arguments
come from stack, you can simply move them back doing

mov xmp, xq
mov ymp, yq
mov tempmp, tempq
%define ostrideq xm
%define lwidthq ym
%define hwidthq tempm

Saving you the need to reserve space.

> +
> +    mov        yd, r6m

Just load r6/width normally in cglobal, you can use up to seven regs on
x86_32.

> +    shl        yd, 1
> +    mov dword [rsp + 24], yq
> +
> +    mov        yd, r7m
> +    neg        yq
> +
> +%define ostrideq [rsp +  0]
> +%define lwidthq  [rsp +  8]
> +%define hwidthq  [rsp + 16]
> +%define widthq   [rsp + 24]

If you're going to define widthq here like this, then you shouldn't
define width in cglobal. But as i said above, you have a reg free to
store it.


More information about the ffmpeg-devel mailing list