[FFmpeg-cvslog] dca: NEON optimised high freq VQ decoding
Mans Rullgard
git at videolan.org
Sat Oct 1 03:06:41 CEST 2011
ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Wed Sep 28 21:53:03 2011 +0100| [bf5d46d8e6de1f3035adb63b50924f474fabb578] | committer: Mans Rullgard
dca: NEON optimised high freq VQ decoding
Signed-off-by: Mans Rullgard <mans at mansr.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bf5d46d8e6de1f3035adb63b50924f474fabb578
---
libavcodec/arm/dca.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
libavcodec/dca.c | 27 +++++++++++++++++++--------
libavcodec/dcadata.h | 2 +-
3 files changed, 69 insertions(+), 9 deletions(-)
diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h
new file mode 100644
index 0000000..c4c024a
--- /dev/null
+++ b/libavcodec/arm/dca.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_DCA_H
+#define AVCODEC_ARM_DCA_H
+
+#include <stdint.h>
+#include "config.h"
+
+#if HAVE_NEON && HAVE_INLINE_ASM
+
+#define int8x8_fmul_int32 int8x8_fmul_int32
+static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
+{
+ __asm__ ("vcvt.f32.s32 %2, %2, #4 \n"
+ "vld1.8 {d0}, [%1,:64] \n"
+ "vmovl.s8 q0, d0 \n"
+ "vmovl.s16 q1, d1 \n"
+ "vmovl.s16 q0, d0 \n"
+ "vcvt.f32.s32 q0, q0 \n"
+ "vcvt.f32.s32 q1, q1 \n"
+ "vmul.f32 q0, q0, %y2 \n"
+ "vmul.f32 q1, q1, %y2 \n"
+ "vst1.32 {q0-q1}, [%m0,:128] \n"
+ : "=Um"(*(float (*)[8])dst)
+ : "r"(src), "x"(scale)
+ : "d0", "d1", "d2", "d3");
+}
+
+#endif
+
+#endif /* AVCODEC_ARM_DCA_H */
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 735d7ba..e963fe0 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -42,6 +42,10 @@
#include "dcadsp.h"
#include "fmtconvert.h"
+#if ARCH_ARM
+# include "arm/dca.h"
+#endif
+
//#define TRACE
#define DCA_PRIM_CHANNELS_MAX (7)
@@ -320,7 +324,7 @@ typedef struct {
int lfe_scale_factor;
/* Subband samples history (for ADPCM) */
- float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
+ DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
DECLARE_ALIGNED(32, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32];
int hist_index[DCA_PRIM_CHANNELS_MAX];
@@ -1057,6 +1061,16 @@ static int decode_blockcode(int code, int levels, int *values)
static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 };
static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 };
+#ifndef int8x8_fmul_int32
+static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
+{
+ float fscale = scale / 16.0;
+ int i;
+ for (i = 0; i < 8; i++)
+ dst[i] = src[i] * fscale;
+}
+#endif
+
static int dca_subsubframe(DCAContext * s, int base_channel, int block_index)
{
int k, l;
@@ -1161,19 +1175,16 @@ static int dca_subsubframe(DCAContext * s, int base_channel, int block_index)
for (l = s->vq_start_subband[k]; l < s->subband_activity[k]; l++) {
/* 1 vector -> 32 samples but we only need the 8 samples
* for this subsubframe. */
- int m;
+ int hfvq = s->high_freq_vq[k][l];
if (!s->debug_flag & 0x01) {
av_log(s->avctx, AV_LOG_DEBUG, "Stream with high frequencies VQ coding\n");
s->debug_flag |= 0x01;
}
- for (m = 0; m < 8; m++) {
- subband_samples[k][l][m] =
- high_freq_vq[s->high_freq_vq[k][l]][subsubframe * 8 +
- m]
- * (float) s->scale_factor[k][l][0] / 16.0;
- }
+ int8x8_fmul_int32(subband_samples[k][l],
+ &high_freq_vq[hfvq][subsubframe * 8],
+ s->scale_factor[k][l][0]);
}
}
diff --git a/libavcodec/dcadata.h b/libavcodec/dcadata.h
index ed3ec4e..0a83cdf 100644
--- a/libavcodec/dcadata.h
+++ b/libavcodec/dcadata.h
@@ -4224,7 +4224,7 @@ static const float lossless_quant_d[32] = {
/* Vector quantization tables */
-static const int8_t high_freq_vq[1024][32] =
+DECLARE_ALIGNED(8, static const int8_t, high_freq_vq)[1024][32] =
{
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
More information about the ffmpeg-cvslog
mailing list