[FFmpeg-devel] [PATCH 3/9] New fmtconvert method, int32_to_float_fmul_scalar_array

Mon Jul 15 19:28:11 CEST 2013

This is similar to int32_to_float_fmul_scalar, but
loads a new scalar multiplier every 8 input samples.
This enables the use of much larger input arrays, which
is important for pipelining on some CPUs (such as
ARMv6).

Signed-off-by: Ben Avison <bavison at riscosopen.org>
---
 libavcodec/dcadec.c     |   23 +++++++++++++++--------
 libavcodec/fmtconvert.c |    7 +++++++
 libavcodec/fmtconvert.h |   14 ++++++++++++++
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index dec7e28..d4adf73 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -1306,7 +1306,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
 
     /* FIXME */
     float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index];
-    LOCAL_ALIGNED_16(int32_t, block, [8]);
+    LOCAL_ALIGNED_16(int32_t, block, [8 * DCA_SUBBANDS]);
 
     /*
      * Audio data
@@ -1319,6 +1319,8 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
         quant_step_table = lossy_quant_d;
 
     for (k = base_channel; k < s->prim_channels; k++) {
+        float rscale[DCA_SUBBANDS];
+
         if (get_bits_left(&s->gb) < 0)
             return AVERROR_INVALIDDATA;
 
@@ -1341,11 +1343,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
              * Extract bits from the bit stream
              */
             if (!abits) {
-                memset(subband_samples[k][l], 0, 8 * sizeof(subband_samples[0][0][0]));
+                rscale[l] = 0;
+                memset(block + 8 * l, 0, 8 * sizeof(block[0]));
             } else {
                 /* Deal with transients */
                 int sfi = s->transition_mode[k][l] && subsubframe >= s->transition_mode[k][l];
-                float rscale = quant_step_size * s->scale_factor[k][l][sfi] *
+                rscale[l] = quant_step_size * s->scale_factor[k][l][sfi] *
                                s->scalefactor_adj[k][sel];
 
                 if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) {
@@ -1359,7 +1362,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                         block_code1 = get_bits(&s->gb, size);
                         block_code2 = get_bits(&s->gb, size);
                         err = decode_blockcodes(block_code1, block_code2,
-                                                levels, block);
+                                                levels, block + 8 * l);
                         if (err) {
                             av_log(s->avctx, AV_LOG_ERROR,
                                    "ERROR: block code look-up failed\n");
@@ -1368,19 +1371,23 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                     } else {
                         /* no coding */
                         for (m = 0; m < 8; m++)
-                            block[m] = get_sbits(&s->gb, abits - 3);
+                            block[8 * l + m] = get_sbits(&s->gb, abits - 3);
                     }
                 } else {
                     /* Huffman coded */
                     for (m = 0; m < 8; m++)
-                        block[m] = get_bitalloc(&s->gb,
+                        block[8 * l + m] = get_bitalloc(&s->gb,
                                                 &dca_smpl_bitalloc[abits], sel);
                 }
 
-                s->fmt_conv.int32_to_float_fmul_scalar(subband_samples[k][l],
-                                                       block, rscale, 8);
             }
+        }
 
+        s->fmt_conv.int32_to_float_fmul_scalar_array(&s->fmt_conv, subband_samples[k][0],
+                                                     block, rscale, 8 * s->vq_start_subband[k]);
+
+        for (l = 0; l < s->vq_start_subband[k]; l++) {
+            int m;
             /*
              * Inverse ADPCM if in prediction mode
              */
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index afcbf0d..19c3f98 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -30,6 +30,12 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int32_t *src, float m
         dst[i] = src[i] * mul;
 }
 
+static void int32_to_float_fmul_scalar_array_c(FmtConvertContext *c, float *dst, const int *src, float *mul, int len){
+    int i;
+    for(i=0; i<len; i+=8)
+        c->int32_to_float_fmul_scalar(dst, src, *mul++, 8);
+}
+
 static av_always_inline int float_to_int16_one(const float *src){
     return av_clip_int16(lrintf(*src));
 }
@@ -79,6 +85,7 @@ void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+    c->int32_to_float_fmul_scalar_array = int32_to_float_fmul_scalar_array_c;
     c->float_to_int16             = float_to_int16_c;
     c->float_to_int16_interleave  = float_to_int16_interleave_c;
     c->float_interleave           = ff_float_interleave_c;
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index ea04489..323e075 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -38,6 +38,20 @@ typedef struct FmtConvertContext {
     void (*int32_to_float_fmul_scalar)(float *dst, const int32_t *src, float mul, int len);
 
     /**
+     * Convert an array of int32_t to float and multiply by a float value from another array,
+     * stepping along the float array once for each 8 integers.
+     * @param c   pointer to FmtConvertContext.
+     * @param dst destination array of float.
+     *            constraints: 16-byte aligned
+     * @param src source array of int32_t.
+     *            constraints: 16-byte aligned
+     * @param mul source array of float multipliers.
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     */
+    void (*int32_to_float_fmul_scalar_array)(struct FmtConvertContext *c, float *dst, const int *src, float *mul, int len);
+
+    /**
      * Convert an array of float to an array of int16_t.
      *
      * Convert floats from in the range [-32768.0,32767.0] to ints
-- 
1.7.5.4