[FFmpeg-cvslog] avcodec/wmalosslessdec: improve 24bit support

Wed Apr 13 23:02:32 CEST 2016

ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Wed Apr 13 20:36:26 2016 +0200| [56759f69a6015b3ce6bdf4b7ae441bb44b097e5e] | committer: Paul B Mahol

avcodec/wmalosslessdec: improve 24bit support

Signed-off-by: Paul B Mahol <onemda at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=56759f69a6015b3ce6bdf4b7ae441bb44b097e5e
---

 libavcodec/lossless_audiodsp.c |   14 ++++++++++++++
 libavcodec/lossless_audiodsp.h |    5 +++++
 libavcodec/wmalosslessdec.c    |   18 +++++++++---------
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/libavcodec/lossless_audiodsp.c b/libavcodec/lossless_audiodsp.c
index 32f4c9e..55495d0 100644
--- a/libavcodec/lossless_audiodsp.c
+++ b/libavcodec/lossless_audiodsp.c
@@ -36,9 +36,23 @@ static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2,
     return res;
 }
 
+static int32_t scalarproduct_and_madd_int32_c(int32_t *v1, const int32_t *v2,
+                                              const int32_t *v3,
+                                              int order, int mul)
+{
+    int res = 0;
+
+    while (order--) {
+        res   += *v1 * *v2++;
+        *v1++ += mul * *v3++;
+    }
+    return res;
+}
+
 av_cold void ff_llauddsp_init(LLAudDSPContext *c)
 {
     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
+    c->scalarproduct_and_madd_int32 = scalarproduct_and_madd_int32_c;
 
     if (ARCH_ARM)
         ff_llauddsp_init_arm(c);
diff --git a/libavcodec/lossless_audiodsp.h b/libavcodec/lossless_audiodsp.h
index 79ca30d..930fe30 100644
--- a/libavcodec/lossless_audiodsp.h
+++ b/libavcodec/lossless_audiodsp.h
@@ -36,6 +36,11 @@ typedef struct LLAudDSPContext {
                                             const int16_t *v2,
                                             const int16_t *v3,
                                             int len, int mul);
+
+    int32_t (*scalarproduct_and_madd_int32)(int32_t *v1 /* align 16 */,
+                                            const int32_t *v2,
+                                            const int32_t *v3,
+                                            int len, int mul);
 } LLAudDSPContext;
 
 void ff_llauddsp_init(LLAudDSPContext *c);
diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 4d50915..f7f249b 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -135,8 +135,8 @@ typedef struct WmallDecodeCtx {
     int8_t  mclms_scaling;
     int16_t mclms_coeffs[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS * 32];
     int16_t mclms_coeffs_cur[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS];
-    int16_t mclms_prevvalues[WMALL_MAX_CHANNELS * 2 * 32];
-    int16_t mclms_updates[WMALL_MAX_CHANNELS * 2 * 32];
+    int32_t mclms_prevvalues[WMALL_MAX_CHANNELS * 2 * 32];
+    int32_t mclms_updates[WMALL_MAX_CHANNELS * 2 * 32];
     int     mclms_recent;
 
     int     movave_scaling;
@@ -147,9 +147,9 @@ typedef struct WmallDecodeCtx {
         int scaling;
         int coefsend;
         int bitsend;
-        DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
-        DECLARE_ALIGNED(16, int16_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
-        DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int32_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int32_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int32_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
         int recent;
     } cdlms[WMALL_MAX_CHANNELS][9];
 
@@ -657,10 +657,10 @@ static void mclms_update(WmallDecodeCtx *s, int icoef, int *pred)
     if (s->mclms_recent == 0) {
         memcpy(&s->mclms_prevvalues[order * num_channels],
                s->mclms_prevvalues,
-               sizeof(int16_t) * order * num_channels);
+               sizeof(int32_t) * order * num_channels);
         memcpy(&s->mclms_updates[order * num_channels],
                s->mclms_updates,
-               sizeof(int16_t) * order * num_channels);
+               sizeof(int32_t) * order * num_channels);
         s->mclms_recent = num_channels * order;
     }
 }
@@ -719,7 +719,7 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input)
     s->cdlms[ich][ilms].lms_updates[recent + (order >> 3)] >>= 1;
     s->cdlms[ich][ilms].recent = recent;
     memset(s->cdlms[ich][ilms].lms_updates + recent + order, 0,
-           sizeof(s->cdlms[ich][ilms].lms_updates) - 2*(recent+order));
+           sizeof(s->cdlms[ich][ilms].lms_updates) - 4*(recent+order));
 }
 
 static void use_high_update_speed(WmallDecodeCtx *s, int ich)
@@ -767,7 +767,7 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch,
         for (icoef = coef_begin; icoef < coef_end; icoef++) {
             pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
             residue = s->channel_residues[ch][icoef];
-            pred += s->dsp.scalarproduct_and_madd_int16(s->cdlms[ch][ilms].coefs,
+            pred += s->dsp.scalarproduct_and_madd_int32(s->cdlms[ch][ilms].coefs,
                                                         s->cdlms[ch][ilms].lms_prevvalues
                                                             + s->cdlms[ch][ilms].recent,
                                                         s->cdlms[ch][ilms].lms_updates