[FFmpeg-cvslog] diracdsp: add dequantization SIMD
Rostislav Pehlivanov
git at videolan.org
Tue Jul 12 00:45:35 CEST 2016
ffmpeg | branch: master | Rostislav Pehlivanov <rpehlivanov at ob-encoder.com> | Thu Jun 23 18:06:56 2016 +0100| [80721cc1ff1f1c8c460c136184ed6416a73b4bfd] | committer: Rostislav Pehlivanov
diracdsp: add dequantization SIMD
Currently unused, to be used in the following commits.
Signed-off-by: Rostislav Pehlivanov <rpehlivanov at obe.tv>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=80721cc1ff1f1c8c460c136184ed6416a73b4bfd
---
libavcodec/diracdsp.c | 24 ++++++++++++++++++++++++
libavcodec/diracdsp.h | 4 ++++
libavcodec/x86/diracdsp.asm | 37 +++++++++++++++++++++++++++++++++++++
libavcodec/x86/diracdsp_init.c | 6 ++++++
4 files changed, 71 insertions(+)
diff --git a/libavcodec/diracdsp.c b/libavcodec/diracdsp.c
index ab8d149..cd1209e 100644
--- a/libavcodec/diracdsp.c
+++ b/libavcodec/diracdsp.c
@@ -189,6 +189,27 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride,
}
}
+#define DEQUANT_SUBBAND(PX) \
+static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride, \
+ const int qf, const int qs, int tot_v, int tot_h) \
+{ \
+ int i, y; \
+ for (y = 0; y < tot_v; y++) { \
+ PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \
+ for (i = 0; i < tot_h; i++) { \
+ c = *src_r++; \
+ sign = FFSIGN(c)*(!!c); \
+ c = (FFABS(c)*qf + qs) >> 2; \
+ *dst_r++ = c*sign; \
+ } \
+ src += tot_h << (sizeof(PX) >> 1); \
+ dst += stride; \
+ } \
+}
+
+DEQUANT_SUBBAND(int16_t)
+DEQUANT_SUBBAND(int32_t)
+
#define PIXFUNC(PFX, WIDTH) \
c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \
c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \
@@ -214,6 +235,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c;
c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c;
+ c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c;
+ c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c;
+
PIXFUNC(put, 8);
PIXFUNC(put, 16);
PIXFUNC(put, 32);
diff --git a/libavcodec/diracdsp.h b/libavcodec/diracdsp.h
index 25a872d..224828d 100644
--- a/libavcodec/diracdsp.h
+++ b/libavcodec/diracdsp.h
@@ -22,6 +22,7 @@
#define AVCODEC_DIRACDSP_H
#include <stdint.h>
+#include <stddef.h>
typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h);
typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h);
@@ -46,6 +47,9 @@ typedef struct {
void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+ /* 0-1: int16_t and int32_t asm/c, 2-3: int16 and int32_t, C only */
+ void (*dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
+
dirac_weight_func weight_dirac_pixels_tab[3];
dirac_biweight_func biweight_dirac_pixels_tab[3];
} DiracDSPContext;
diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm
index a042413..8e9f0fb 100644
--- a/libavcodec/x86/diracdsp.asm
+++ b/libavcodec/x86/diracdsp.asm
@@ -263,3 +263,40 @@ ADD_RECT sse2
HPEL_FILTER sse2
ADD_OBMC 32, sse2
ADD_OBMC 16, sse2
+
+INIT_XMM sse4
+
+; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
+cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
+ movd m2, qfd
+ movd m3, qsd
+ SPLATD m2
+ SPLATD m3
+ mov r4, tot_hq
+ mov r3, dstq
+
+ .loop_v:
+ mov tot_hq, r4
+ mov dstq, r3
+
+ .loop_h:
+ movu m0, [srcq]
+
+ pabsd m1, m0
+ pmulld m1, m2
+ paddd m1, m3
+ psrld m1, 2
+ psignd m1, m0
+
+ movu [dstq], m1
+
+ add srcq, mmsize
+ add dstq, mmsize
+ sub tot_hd, 4
+ jg .loop_h
+
+ add r3, strideq
+ dec tot_vd
+ jg .loop_v
+
+ RET
diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c
index 5fae798..26b885d 100644
--- a/libavcodec/x86/diracdsp_init.c
+++ b/libavcodec/x86/diracdsp_init.c
@@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src,
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
+
#if HAVE_YASM
#define HPEL_FILTER(MMSIZE, EXT) \
@@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
}
+
+ if (EXTERNAL_SSE4(mm_flags)) {
+ c->dequant_subband[1] = ff_dequant_subband_32_sse4;
+ }
}
More information about the ffmpeg-cvslog
mailing list