[FFmpeg-devel] [PATCH 1/4] lavc/aarch64: add hevc sao edge 16x16

Martin Storsjö martin at martin.st
Tue Oct 19 11:38:45 EEST 2021


On Thu, 7 Oct 2021, J. Dekker wrote:

> --bench on AWS Graviton:
>
> hevc_sao_edge_16x16_8_c: 1857.0
> hevc_sao_edge_16x16_8_neon: 211.0
> hevc_sao_edge_32x32_8_c: 7802.2
> hevc_sao_edge_32x32_8_neon: 808.2
> hevc_sao_edge_48x48_8_c: 16764.2
> hevc_sao_edge_48x48_8_neon: 1796.5
> hevc_sao_edge_64x64_8_c: 32647.5
> hevc_sao_edge_64x64_8_neon: 3118.5
>
> Signed-off-by: J. Dekker <jdek at itanimul.li>
> ---
> libavcodec/aarch64/hevcdsp_init_aarch64.c |  8 ++-
> libavcodec/aarch64/hevcdsp_sao_neon.S     | 66 +++++++++++++++++++++++
> 2 files changed, 72 insertions(+), 2 deletions(-)
>
> diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> index c785e46f79..747ff0412d 100644
> --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
> +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> @@ -57,8 +57,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src,
>                                   ptrdiff_t stride_dst, ptrdiff_t stride_src,
>                                   int16_t *sao_offset_val, int sao_left_class,
>                                   int width, int height);
> -
> -
> +void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
> +                                          int16_t *sao_offset_val, int eo, int width, int height);
>
> av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
> {
> @@ -76,6 +76,10 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
>         c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
>         c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
>         c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
> +        c->sao_edge_filter[1]          =
> +        c->sao_edge_filter[2]          =
> +        c->sao_edge_filter[3]          =
> +        c->sao_edge_filter[4]          = ff_hevc_sao_edge_filter_16x16_8_neon;
>     }
>     if (bit_depth == 10) {
>         c->add_residual[0]             = ff_hevc_add_residual_4x4_10_neon;
> diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
> index f9fed8345b..a7f054c075 100644
> --- a/libavcodec/aarch64/hevcdsp_sao_neon.S
> +++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
> @@ -85,3 +85,69 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
>         bne             1b
>         ret
> endfunc
> +
> +// ASSUMES STRIDE_SRC = 192
> +.Lsao_edge_pos:
> +.word 1 // horizontal
> +.word 192 // vertical
> +.word 192 + 1 // 45 degree
> +.word 192 - 1 // 135 degree
> +
> +// ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst,
> +//                                      int16 *sao_offset_val, int eo, int width, int height)
> +function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
> +       lsl             w4, w4, #2

Actually, the left indentation here is one char too little, compared with 
the existing function here in the same file, and compared with other asm 
sources. So instead of reindenting the old one, please indent the new one 
according to all other existing asm instead.

// Martin


More information about the ffmpeg-devel mailing list