[FFmpeg-devel] [PATCH 2/2] jpeg200dec: add ff_rct_int_{sse2, avx2}

Paul B Mahol onemda at gmail.com
Sat Jun 13 20:26:00 CEST 2015


Dana 13. 6. 2015. 18:28 osoba "James Almer" <jamrial at gmail.com> napisala je:
>
> Signed-off-by: James Almer <jamrial at gmail.com>
> ---
> Only sample i could find using reversible wavelet transform is
http://www.fnordware.com/j2k/relax.jp2
>
>  libavcodec/jpeg2000.c             |  1 +
>  libavcodec/x86/jpeg2000dsp.asm    | 36
++++++++++++++++++++++++++++++++++++
>  libavcodec/x86/jpeg2000dsp_init.c | 10 ++++++++++
>  3 files changed, 47 insertions(+)
>
> diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
> index af24e99..ec00ebc 100644
> --- a/libavcodec/jpeg2000.c
> +++ b/libavcodec/jpeg2000.c
> @@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component
*comp,
>          if (!comp->f_data)
>              return AVERROR(ENOMEM);
>      } else {
> +        csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
>          comp->f_data = NULL;
>          comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
>          if (!comp->i_data)
> diff --git a/libavcodec/x86/jpeg2000dsp.asm
b/libavcodec/x86/jpeg2000dsp.asm
> index 0d79ab7..712a298 100644
> --- a/libavcodec/x86/jpeg2000dsp.asm
> +++ b/libavcodec/x86/jpeg2000dsp.asm
> @@ -106,3 +106,39 @@ INIT_XMM sse
>  ICT_FLOAT 10
>  INIT_YMM avx
>  ICT_FLOAT 9
> +
>
+;***************************************************************************
> +; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int
csize)
>
+;***************************************************************************
> +%macro RCT_INT 0
> +cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
> +    shl  csized, 2
> +    add   src0q, csizeq
> +    add   src1q, csizeq
> +    add   src2q, csizeq
> +    neg  csizeq
> +
> +align 16
> +.loop:
> +    mova   m1, [src1q+csizeq]
> +    mova   m2, [src2q+csizeq]
> +    mova   m0, [src0q+csizeq]
> +    paddd  m3, m1, m2
> +    psrad  m3, 2
> +    psubd  m0, m3
> +    paddd  m1, m0
> +    paddd  m2, m0
> +    mova   [src1q+csizeq], m0
> +    mova   [src2q+csizeq], m1
> +    mova   [src0q+csizeq], m2
> +    add  csizeq, mmsize
> +    jl .loop
> +    REP_RET
> +%endmacro
> +
> +INIT_XMM sse2
> +RCT_INT
> +%if HAVE_AVX2_EXTERNAL
> +INIT_YMM avx2
> +RCT_INT
> +%endif
> diff --git a/libavcodec/x86/jpeg2000dsp_init.c
b/libavcodec/x86/jpeg2000dsp_init.c
> index 43b9ccd..0dbd2db 100644
> --- a/libavcodec/x86/jpeg2000dsp_init.c
> +++ b/libavcodec/x86/jpeg2000dsp_init.c
> @@ -26,6 +26,8 @@
>
>  void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
>  void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
> +void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
> +void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
>
>  av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
>  {
> @@ -34,7 +36,15 @@ av_cold void
ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
>          c->mct_decode[FF_DWT97] = ff_ict_float_sse;
>      }
>
> +    if (EXTERNAL_SSE2(cpu_flags)) {
> +        c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
> +    }
> +
>      if (EXTERNAL_AVX_FAST(cpu_flags)) {
>          c->mct_decode[FF_DWT97] = ff_ict_float_avx;
>      }
> +
> +    if (EXTERNAL_AVX2(cpu_flags)) {
> +        c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
> +    }
>  }
> --
> 2.4.3
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

How much is this faster?


More information about the ffmpeg-devel mailing list