[FFmpeg-devel] [PATCH] x86/ttadsp: add ff_ttafilter_process_enc_{ssse3, sse4}

James Almer jamrial at gmail.com
Mon Aug 1 04:27:46 EEST 2016


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/Makefile          |  2 +-
 libavcodec/ttadsp.c          | 41 ++++++++++++++++++++++++++++++++++++-----
 libavcodec/ttadsp.h          |  3 +++
 libavcodec/ttaenc.c          | 38 ++++++--------------------------------
 libavcodec/x86/Makefile      |  2 ++
 libavcodec/x86/ttadsp.asm    | 24 ++++++++++++++++--------
 libavcodec/x86/ttadsp_init.c | 25 +++++++++++++++++++------
 7 files changed, 83 insertions(+), 52 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 33ac2b3..4355c13 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -552,7 +552,7 @@ OBJS-$(CONFIG_TRUESPEECH_DECODER)      += truespeech.o
 OBJS-$(CONFIG_TSCC_DECODER)            += tscc.o msrledec.o
 OBJS-$(CONFIG_TSCC2_DECODER)           += tscc2.o
 OBJS-$(CONFIG_TTA_DECODER)             += tta.o ttadata.o ttadsp.o
-OBJS-$(CONFIG_TTA_ENCODER)             += ttaenc.o ttadata.o
+OBJS-$(CONFIG_TTA_ENCODER)             += ttaenc.o ttadata.o ttadsp.o
 OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvqdec.o twinvq.o
 OBJS-$(CONFIG_TXD_DECODER)             += txd.o
 OBJS-$(CONFIG_ULTI_DECODER)            += ulti.o
diff --git a/libavcodec/ttadsp.c b/libavcodec/ttadsp.c
index 30b7ab9..32a87b2 100644
--- a/libavcodec/ttadsp.c
+++ b/libavcodec/ttadsp.c
@@ -18,9 +18,10 @@
 
 #include "ttadsp.h"
 
-static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl,
-                                    int32_t *error, int32_t *in, int32_t shift,
-                                    int32_t round) {
+static inline void ttafilter_process(int32_t *qm, int32_t *dx, int32_t *dl,
+                                     int32_t *error, int32_t *in, int32_t shift,
+                                     int32_t round, int enc)
+{
     if (*error < 0) {
         qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3];
         qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7];
@@ -40,17 +41,47 @@ static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl,
     dx[6] = ((dl[6] >> 30) | 2) & ~1;
     dx[7] = ((dl[7] >> 30) | 4) & ~3;
 
-    *error = *in;
-    *in += (round >> shift);
+    if (!enc) {
+        *error = *in;
+        *in += (round >> shift);
+    }
 
     dl[4] = -dl[5]; dl[5] = -dl[6];
     dl[6] = *in - dl[7]; dl[7] = *in;
     dl[5] += dl[6]; dl[4] += dl[5];
+
+    if (enc) {
+        *in -= (round >> shift);
+        *error = *in;
+    }
+}
+
+#if CONFIG_TTA_DECODER
+static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl,
+                                    int32_t *error, int32_t *in, int32_t shift,
+                                    int32_t round)
+{
+    ttafilter_process(qm, dx, dl, error, in, shift, round, 0);
+}
+#endif
+
+#if CONFIG_TTA_ENCODER
+static void ttafilter_process_enc_c(int32_t *qm, int32_t *dx, int32_t *dl,
+                                    int32_t *error, int32_t *in, int32_t shift,
+                                    int32_t round)
+{
+    ttafilter_process(qm, dx, dl, error, in, shift, round, 1);
 }
+#endif
 
 av_cold void ff_ttadsp_init(TTADSPContext *c)
 {
+#if CONFIG_TTA_DECODER
     c->ttafilter_process_dec = ttafilter_process_dec_c;
+#endif
+#if CONFIG_TTA_ENCODER
+    c->ttafilter_process_enc = ttafilter_process_enc_c;
+#endif
 
     if (ARCH_X86)
         ff_ttadsp_init_x86(c);
diff --git a/libavcodec/ttadsp.h b/libavcodec/ttadsp.h
index 56930f1..df73998 100644
--- a/libavcodec/ttadsp.h
+++ b/libavcodec/ttadsp.h
@@ -26,6 +26,9 @@ typedef struct TTADSPContext {
     void (*ttafilter_process_dec)(int32_t *qm, int32_t *dx, int32_t *dl,
                                   int32_t *error, int32_t *in, int32_t shift,
                                   int32_t round);
+    void (*ttafilter_process_enc)(int32_t *qm, int32_t *dx, int32_t *dl,
+                                  int32_t *error, int32_t *in, int32_t shift,
+                                  int32_t round);
 } TTADSPContext;
 
 void ff_ttadsp_init(TTADSPContext *c);
diff --git a/libavcodec/ttaenc.c b/libavcodec/ttaenc.c
index 2f1c8db..5ccf98b 100644
--- a/libavcodec/ttaenc.c
+++ b/libavcodec/ttaenc.c
@@ -20,6 +20,7 @@
 
 #define BITSTREAM_WRITER_LE
 #include "ttadata.h"
+#include "ttadsp.h"
 #include "avcodec.h"
 #include "put_bits.h"
 #include "internal.h"
@@ -29,6 +30,7 @@ typedef struct TTAEncContext {
     const AVCRC *crc_table;
     int bps;
     TTAChannel *ch_ctx;
+    TTADSPContext dsp;
 } TTAEncContext;
 
 static av_cold int tta_encode_init(AVCodecContext *avctx)
@@ -57,38 +59,9 @@ static av_cold int tta_encode_init(AVCodecContext *avctx)
     if (!s->ch_ctx)
         return AVERROR(ENOMEM);
 
-    return 0;
-}
-
-static inline void ttafilter_process(TTAFilter *c, int32_t *in)
-{
-    register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round;
-
-    if (c->error < 0) {
-        qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3];
-        qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7];
-    } else if (c->error > 0) {
-        qm[0] += dx[0]; qm[1] += dx[1]; qm[2] += dx[2]; qm[3] += dx[3];
-        qm[4] += dx[4]; qm[5] += dx[5]; qm[6] += dx[6]; qm[7] += dx[7];
-    }
+    ff_ttadsp_init(&s->dsp);
 
-    sum += dl[0] * qm[0] + dl[1] * qm[1] + dl[2] * qm[2] + dl[3] * qm[3] +
-           dl[4] * qm[4] + dl[5] * qm[5] + dl[6] * qm[6] + dl[7] * qm[7];
-
-    dx[0] = dx[1]; dx[1] = dx[2]; dx[2] = dx[3]; dx[3] = dx[4];
-    dl[0] = dl[1]; dl[1] = dl[2]; dl[2] = dl[3]; dl[3] = dl[4];
-
-    dx[4] = ((dl[4] >> 30) | 1);
-    dx[5] = ((dl[5] >> 30) | 2) & ~1;
-    dx[6] = ((dl[6] >> 30) | 2) & ~1;
-    dx[7] = ((dl[7] >> 30) | 4) & ~3;
-
-    dl[4] = -dl[5]; dl[5] = -dl[6];
-    dl[6] = *in - dl[7]; dl[7] = *in;
-    dl[5] += dl[6]; dl[4] += dl[5];
-
-    *in -= (sum >> c->shift);
-    c->error = *in;
+    return 0;
 }
 
 static int32_t get_sample(const AVFrame *frame, int sample,
@@ -155,7 +128,8 @@ pkt_alloc:
         }
         c->predictor = temp;
 
-        ttafilter_process(filter, &value);
+        s->dsp.ttafilter_process_enc(filter->qm, filter->dx, filter->dl, &filter->error, &value,
+                                     filter->shift, filter->round);
         outval = (value > 0) ? (value << 1) - 1: -value << 1;
 
         k = rice->k0;
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 839b5bc..cc2b3c4 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -61,6 +61,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
 OBJS-$(CONFIG_TAK_DECODER)             += x86/takdsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp_init.o
 OBJS-$(CONFIG_TTA_DECODER)             += x86/ttadsp_init.o
+OBJS-$(CONFIG_TTA_ENCODER)             += x86/ttadsp_init.o
 OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
 OBJS-$(CONFIG_V210_ENCODER)            += x86/v210enc_init.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
@@ -160,6 +161,7 @@ YASM-OBJS-$(CONFIG_SVQ1_ENCODER)       += x86/svq1enc.o
 YASM-OBJS-$(CONFIG_TAK_DECODER)        += x86/takdsp.o
 YASM-OBJS-$(CONFIG_TRUEHD_DECODER)     += x86/mlpdsp.o
 YASM-OBJS-$(CONFIG_TTA_DECODER)        += x86/ttadsp.o
+YASM-OBJS-$(CONFIG_TTA_ENCODER)        += x86/ttadsp.o
 YASM-OBJS-$(CONFIG_V210_ENCODER)       += x86/v210enc.o
 YASM-OBJS-$(CONFIG_V210_DECODER)       += x86/v210.o
 YASM-OBJS-$(CONFIG_VORBIS_DECODER)     += x86/vorbisdsp.o
diff --git a/libavcodec/x86/ttadsp.asm b/libavcodec/x86/ttadsp.asm
index 8f48949..1c664f2 100644
--- a/libavcodec/x86/ttadsp.asm
+++ b/libavcodec/x86/ttadsp.asm
@@ -29,9 +29,9 @@ pd_1224:  dd 1, 2, 2, 4
 
 SECTION .text
 
-%macro TTA_FILTER 2
+%macro TTA_FILTER 3
 INIT_XMM %1
-cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round
+cglobal ttafilter_process_%2, 5,5,%3, qm, dx, dl, error, in, shift, round
     mova       m2, [qmq       ]
     mova       m3, [qmq + 0x10]
     mova       m4, [dxq       ]
@@ -94,13 +94,19 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round
     mova       [dlq       ], m2
     mova       [dxq       ], m5
     mova       [dxq + 0x10], m4
-    movd       m0, [inq]            ; filter->error = *in;
-    movd       [errorq], m0         ;
 
-    movd       m2, shiftm           ; *in += (sum >> filter->shift);
+    movd       m2, shiftm           ;
+    movd       m0, [inq]
     psrad      m6, m2               ;
-    paddd      m0, m6               ;
+%ifidn %2, dec
+    movd       [errorq], m0         ; filter->error = *in;
+    paddd      m0, m6               ; *in += (sum >> filter->shift);
     movd       [inq], m0            ;
+%else
+    psubd      m3, m0, m6           ;
+    movd       [inq], m3            ; *in -= (sum >> filter->shift);
+    movd       [errorq], m3         ; filter->error = *in;
+%endif
 
     psrldq     m1, 4                ;
     pslldq     m0, 12               ; filter->dl[4] = -filter->dl[5];
@@ -115,5 +121,7 @@ cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round
     RET
 %endmacro
 
-TTA_FILTER ssse3, 8
-TTA_FILTER sse4,  7
+TTA_FILTER ssse3, dec, 8
+TTA_FILTER sse4,  dec, 7
+TTA_FILTER ssse3, enc, 8
+TTA_FILTER sse4,  enc, 7
diff --git a/libavcodec/x86/ttadsp_init.c b/libavcodec/x86/ttadsp_init.c
index 47dc87f..75c444c 100644
--- a/libavcodec/x86/ttadsp_init.c
+++ b/libavcodec/x86/ttadsp_init.c
@@ -22,21 +22,34 @@
 #include "libavutil/x86/cpu.h"
 #include "config.h"
 
-void ff_ttafilter_process_dec_ssse3(int32_t *qm, int32_t *dx, int32_t *dl,
-                                    int32_t *error, int32_t *in, int32_t shift,
-                                    int32_t round);
-void ff_ttafilter_process_dec_sse4(int32_t *qm, int32_t *dx, int32_t *dl,
-                                   int32_t *error, int32_t *in, int32_t shift,
-                                   int32_t round);
+#define TTAFILTER_PROCESS(opt) \
+void ff_ttafilter_process_dec_##opt(int32_t *qm, int32_t *dx, int32_t *dl,      \
+                                    int32_t *error, int32_t *in, int32_t shift, \
+                                    int32_t round);                             \
+void ff_ttafilter_process_enc_##opt(int32_t *qm, int32_t *dx, int32_t *dl,      \
+                                    int32_t *error, int32_t *in, int32_t shift, \
+                                    int32_t round)
+
+TTAFILTER_PROCESS(ssse3);
+TTAFILTER_PROCESS(sse4);
 
 av_cold void ff_ttadsp_init_x86(TTADSPContext *c)
 {
 #if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
+#if CONFIG_TTA_DECODER
     if (EXTERNAL_SSSE3(cpu_flags))
         c->ttafilter_process_dec = ff_ttafilter_process_dec_ssse3;
     if (EXTERNAL_SSE4(cpu_flags))
         c->ttafilter_process_dec = ff_ttafilter_process_dec_sse4;
 #endif
+
+#if CONFIG_TTA_ENCODER
+    if (EXTERNAL_SSSE3(cpu_flags))
+        c->ttafilter_process_enc = ff_ttafilter_process_enc_ssse3;
+    if (EXTERNAL_SSE4(cpu_flags))
+        c->ttafilter_process_enc = ff_ttafilter_process_enc_sse4;
+#endif
+#endif
 }
-- 
2.9.1



More information about the ffmpeg-devel mailing list