[FFmpeg-cvslog] dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil
Justin Ruggles
git at videolan.org
Tue Nov 27 13:47:06 CET 2012
ffmpeg | branch: master | Justin Ruggles <justin.ruggles at gmail.com> | Sat Sep 22 18:13:57 2012 -0400| [284ea790d89441fa1e6b2d72d3c1ed6d61972f0b] | committer: Justin Ruggles
dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=284ea790d89441fa1e6b2d72d3c1ed6d61972f0b
---
libavcodec/aacdec.c | 12 +++++------
libavcodec/arm/dsputil_init_neon.c | 3 ---
libavcodec/arm/dsputil_neon.S | 38 -----------------------------------
libavcodec/dsputil.c | 9 ---------
libavcodec/dsputil.h | 10 ---------
libavcodec/libmp3lame.c | 14 ++++++-------
libavcodec/wmaenc.c | 2 +-
libavcodec/wmaprodec.c | 22 +++++++++++---------
libavutil/arm/float_dsp_init_neon.c | 4 ++++
libavutil/arm/float_dsp_neon.S | 38 +++++++++++++++++++++++++++++++++++
libavutil/float_dsp.c | 9 +++++++++
libavutil/float_dsp.h | 15 ++++++++++++++
12 files changed, 93 insertions(+), 83 deletions(-)
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index af17acf..a69f055 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -1360,7 +1360,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len);
scale = sf[idx] / sqrtf(band_energy);
- ac->dsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
+ ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len);
}
} else {
const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
@@ -1506,7 +1506,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
}
} while (len -= 2);
- ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
+ ac->fdsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
}
}
@@ -1730,10 +1730,10 @@ static void apply_intensity_stereo(AACContext *ac, ChannelElement *cpe, int ms_p
c *= 1 - 2 * cpe->ms_mask[idx];
scale = c * sce1->sf[idx];
for (group = 0; group < ics->group_len[g]; group++)
- ac->dsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
- coef0 + group * 128 + offsets[i],
- scale,
- offsets[i + 1] - offsets[i]);
+ ac->fdsp.vector_fmul_scalar(coef1 + group * 128 + offsets[i],
+ coef0 + group * 128 + offsets[i],
+ scale,
+ offsets[i + 1] - offsets[i]);
}
} else {
int bt_run_end = sce1->band_type_run_end[idx];
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index a132f6f..b2e7204 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -144,8 +144,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win, int len);
-void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
- int len);
void ff_butterflies_float_neon(float *v1, float *v2, int len);
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
@@ -305,7 +303,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
}
c->vector_fmul_window = ff_vector_fmul_window_neon;
- c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
c->butterflies_float = ff_butterflies_float_neon;
c->scalarproduct_float = ff_scalarproduct_float_neon;
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index ca1d2de..cf9ad9e 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -642,44 +642,6 @@ function ff_vorbis_inverse_coupling_neon, export=1
endfunc
#endif
-function ff_vector_fmul_scalar_neon, export=1
-VFP len .req r2
-NOVFP len .req r3
-VFP vdup.32 q8, d0[0]
-NOVFP vdup.32 q8, r2
- bics r12, len, #15
- beq 3f
- vld1.32 {q0},[r1,:128]!
- vld1.32 {q1},[r1,:128]!
-1: vmul.f32 q0, q0, q8
- vld1.32 {q2},[r1,:128]!
- vmul.f32 q1, q1, q8
- vld1.32 {q3},[r1,:128]!
- vmul.f32 q2, q2, q8
- vst1.32 {q0},[r0,:128]!
- vmul.f32 q3, q3, q8
- vst1.32 {q1},[r0,:128]!
- subs r12, r12, #16
- beq 2f
- vld1.32 {q0},[r1,:128]!
- vst1.32 {q2},[r0,:128]!
- vld1.32 {q1},[r1,:128]!
- vst1.32 {q3},[r0,:128]!
- b 1b
-2: vst1.32 {q2},[r0,:128]!
- vst1.32 {q3},[r0,:128]!
- ands len, len, #15
- it eq
- bxeq lr
-3: vld1.32 {q0},[r1,:128]!
- vmul.f32 q0, q0, q8
- vst1.32 {q0},[r0,:128]!
- subs len, len, #4
- bgt 3b
- bx lr
- .unreq len
-endfunc
-
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 7a3fdba..d4471dc 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2392,14 +2392,6 @@ static void vector_fmul_window_c(float *dst, const float *src0,
}
}
-static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
- int len)
-{
- int i;
- for (i = 0; i < len; i++)
- dst[i] = src[i] * mul;
-}
-
static void butterflies_float_c(float *restrict v1, float *restrict v2,
int len)
{
@@ -2869,7 +2861,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->scalarproduct_float = ff_scalarproduct_float_c;
c->butterflies_float = butterflies_float_c;
c->butterflies_float_interleave = butterflies_float_interleave_c;
- c->vector_fmul_scalar = vector_fmul_scalar_c;
c->shrink[0]= av_image_copy_plane;
c->shrink[1]= ff_shrink22;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index f48aa96..5640f3a 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -383,16 +383,6 @@ typedef struct DSPContext {
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
/**
- * Multiply a vector of floats by a scalar float. Source and
- * destination vectors must overlap exactly or not at all.
- * @param dst result vector, 16-byte aligned
- * @param src input vector, 16-byte aligned
- * @param mul scalar value
- * @param len length of vector, multiple of 4
- */
- void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
- int len);
- /**
* Calculate the scalar product of two vectors of floats.
* @param v1 first vector, 16-byte aligned
* @param v2 second vector, 16-byte aligned
diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index 600f6fd..264a0e2 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c
@@ -28,12 +28,12 @@
#include "libavutil/channel_layout.h"
#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "audio_frame_queue.h"
-#include "dsputil.h"
#include "internal.h"
#include "mpegaudio.h"
#include "mpegaudiodecheader.h"
@@ -50,7 +50,7 @@ typedef struct LAMEContext {
int reservoir;
float *samples_flt[2];
AudioFrameQueue afq;
- DSPContext dsp;
+ AVFloatDSPContext fdsp;
} LAMEContext;
@@ -167,7 +167,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
if (ret < 0)
goto error;
- ff_dsputil_init(&s->dsp, avctx);
+ avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
return 0;
error:
@@ -205,10 +205,10 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
return AVERROR(EINVAL);
}
for (ch = 0; ch < avctx->channels; ch++) {
- s->dsp.vector_fmul_scalar(s->samples_flt[ch],
- (const float *)frame->data[ch],
- 32768.0f,
- FFALIGN(frame->nb_samples, 8));
+ s->fdsp.vector_fmul_scalar(s->samples_flt[ch],
+ (const float *)frame->data[ch],
+ 32768.0f,
+ FFALIGN(frame->nb_samples, 8));
}
ENCODE_BUFFER(lame_encode_buffer_float, float, s->samples_flt);
break;
diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c
index 13d8a1c..044114b 100644
--- a/libavcodec/wmaenc.c
+++ b/libavcodec/wmaenc.c
@@ -111,7 +111,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const AVFrame *frame)
for (ch = 0; ch < avctx->channels; ch++) {
memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
- s->dsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
+ s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
s->dsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], win, len);
s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
mdct->mdct_calc(mdct, s->coefs[ch], s->output);
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index 43fdbc0..ac0cce1 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -86,6 +86,7 @@
* subframe in order to reconstruct the output samples.
*/
+#include "libavutil/float_dsp.h"
#include "libavutil/intfloat.h"
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
@@ -170,6 +171,7 @@ typedef struct WMAProDecodeCtx {
AVCodecContext* avctx; ///< codec context for av_log
AVFrame frame; ///< AVFrame for decoded output
DSPContext dsp; ///< accelerated DSP functions
+ AVFloatDSPContext fdsp;
uint8_t frame_data[MAX_FRAMESIZE +
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
PutBitContext pb; ///< context for filling the frame_data buffer
@@ -280,6 +282,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
s->avctx = avctx;
ff_dsputil_init(&s->dsp, avctx);
+ avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
+
init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
@@ -1008,12 +1012,12 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
}
} else if (s->avctx->channels == 2) {
int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
- s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0],
- ch_data[0] + sfb[0],
- 181.0 / 128, len);
- s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0],
- ch_data[1] + sfb[0],
- 181.0 / 128, len);
+ s->fdsp.vector_fmul_scalar(ch_data[0] + sfb[0],
+ ch_data[0] + sfb[0],
+ 181.0 / 128, len);
+ s->fdsp.vector_fmul_scalar(ch_data[1] + sfb[0],
+ ch_data[1] + sfb[0],
+ 181.0 / 128, len);
}
}
}
@@ -1259,9 +1263,9 @@ static int decode_subframe(WMAProDecodeCtx *s)
s->channel[c].scale_factor_step;
const float quant = pow(10.0, exp / 20.0);
int start = s->cur_sfb_offsets[b];
- s->dsp.vector_fmul_scalar(s->tmp + start,
- s->channel[c].coeffs + start,
- quant, end - start);
+ s->fdsp.vector_fmul_scalar(s->tmp + start,
+ s->channel[c].coeffs + start,
+ quant, end - start);
}
/** apply imdct (imdct_half == DCTIV with reverse) */
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c
index 3ca0288..88eb4b3 100644
--- a/libavutil/arm/float_dsp_init_neon.c
+++ b/libavutil/arm/float_dsp_init_neon.c
@@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
int len);
+void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
+ int len);
+
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
{
fdsp->vector_fmul = ff_vector_fmul_neon;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
+ fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
}
diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S
index 4aa6f83..6d7bd52 100644
--- a/libavutil/arm/float_dsp_neon.S
+++ b/libavutil/arm/float_dsp_neon.S
@@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2
bx lr
.unreq len
endfunc
+
+function ff_vector_fmul_scalar_neon, export=1
+VFP len .req r2
+NOVFP len .req r3
+VFP vdup.32 q8, d0[0]
+NOVFP vdup.32 q8, r2
+ bics r12, len, #15
+ beq 3f
+ vld1.32 {q0},[r1,:128]!
+ vld1.32 {q1},[r1,:128]!
+1: vmul.f32 q0, q0, q8
+ vld1.32 {q2},[r1,:128]!
+ vmul.f32 q1, q1, q8
+ vld1.32 {q3},[r1,:128]!
+ vmul.f32 q2, q2, q8
+ vst1.32 {q0},[r0,:128]!
+ vmul.f32 q3, q3, q8
+ vst1.32 {q1},[r0,:128]!
+ subs r12, r12, #16
+ beq 2f
+ vld1.32 {q0},[r1,:128]!
+ vst1.32 {q2},[r0,:128]!
+ vld1.32 {q1},[r1,:128]!
+ vst1.32 {q3},[r0,:128]!
+ b 1b
+2: vst1.32 {q2},[r0,:128]!
+ vst1.32 {q3},[r0,:128]!
+ ands len, len, #15
+ it eq
+ bxeq lr
+3: vld1.32 {q0},[r1,:128]!
+ vmul.f32 q0, q0, q8
+ vst1.32 {q0},[r0,:128]!
+ subs len, len, #4
+ bgt 3b
+ bx lr
+ .unreq len
+endfunc
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 2e90939..b6b1181 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -36,10 +36,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
dst[i] += src[i] * mul;
}
+static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
+ int len)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ dst[i] = src[i] * mul;
+}
+
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
{
fdsp->vector_fmul = vector_fmul_c;
fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
+ fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
#if ARCH_ARM
ff_float_dsp_init_arm(fdsp);
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 95cef62..cb4b28f 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext {
*/
void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
int len);
+
+ /**
+ * Multiply a vector of floats by a scalar float. Source and
+ * destination vectors must overlap exactly or not at all.
+ *
+ * @param dst result vector
+ * constraints: 16-byte aligned
+ * @param src input vector
+ * constraints: 16-byte aligned
+ * @param mul scalar value
+ * @param len length of vector
+ * constraints: multiple of 4
+ */
+ void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
+ int len);
} AVFloatDSPContext;
/**
More information about the ffmpeg-cvslog
mailing list