[FFmpeg-cvslog] r14908 - in trunk/libavcodec: aac.c aac.h
superdump
subversion
Fri Aug 22 20:21:22 CEST 2008
Author: superdump
Date: Fri Aug 22 20:21:22 2008
New Revision: 14908
Log:
Use ff_imdct_half() and vector_fmul_window() for IMDCT and windowing. Reduce
buffer sizes accordingly. This produces a ~10% overall decoding perfomance
improvement.
Modified:
trunk/libavcodec/aac.c
trunk/libavcodec/aac.h
Modified: trunk/libavcodec/aac.c
==============================================================================
--- trunk/libavcodec/aac.c (original)
+++ trunk/libavcodec/aac.c Fri Aug 22 20:21:22 2008
@@ -1166,11 +1166,11 @@ static void imdct_and_windowing(AACConte
float * in = sce->coeffs;
float * out = sce->ret;
float * saved = sce->saved;
- const float * lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
const float * swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
const float * lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
const float * swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
float * buf = ac->buf_mdct;
+ DECLARE_ALIGNED(16, float, temp[128]);
int i;
// imdct
@@ -1179,12 +1179,10 @@ static void imdct_and_windowing(AACConte
av_log(ac->avccontext, AV_LOG_WARNING,
"Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
"If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
- for (i = 0; i < 2048; i += 256) {
- ff_imdct_calc(&ac->mdct_small, buf + i, in + i/2);
- ac->dsp.vector_fmul_reverse(ac->revers + i/2, buf + i + 128, swindow, 128);
- }
+ for (i = 0; i < 1024; i += 128)
+ ff_imdct_half(&ac->mdct_small, buf + i, in + i);
} else
- ff_imdct_calc(&ac->mdct, buf, in);
+ ff_imdct_half(&ac->mdct, buf, in);
/* window overlapping
* NOTE: To simplify the overlapping code, all 'meaningless' short to long
@@ -1194,38 +1192,38 @@ static void imdct_and_windowing(AACConte
*/
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
- ac->dsp.vector_fmul_add_add(out, buf, lwindow_prev, saved, ac->add_bias, 1024, 1);
+ ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, ac->add_bias, 512);
} else {
for (i = 0; i < 448; i++)
out[i] = saved[i] + ac->add_bias;
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
- ac->dsp.vector_fmul_add_add(out + 448 + 0*128, buf + 0*128, swindow_prev, saved + 448 , ac->add_bias, 128, 1);
- ac->dsp.vector_fmul_add_add(out + 448 + 1*128, buf + 2*128, swindow, ac->revers + 0*128, ac->add_bias, 128, 1);
- ac->dsp.vector_fmul_add_add(out + 448 + 2*128, buf + 4*128, swindow, ac->revers + 1*128, ac->add_bias, 128, 1);
- ac->dsp.vector_fmul_add_add(out + 448 + 3*128, buf + 6*128, swindow, ac->revers + 2*128, ac->add_bias, 128, 1);
- ac->dsp.vector_fmul_add_add(out + 448 + 4*128, buf + 8*128, swindow, ac->revers + 3*128, ac->add_bias, 64, 1);
+ ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, ac->add_bias, 64);
+ ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, ac->add_bias, 64);
+ ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, ac->add_bias, 64);
+ ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, ac->add_bias, 64);
+ ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, ac->add_bias, 64);
+ memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
} else {
- ac->dsp.vector_fmul_add_add(out + 448, buf + 448, swindow_prev, saved + 448, ac->add_bias, 128, 1);
+ ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, ac->add_bias, 64);
for (i = 576; i < 1024; i++)
- out[i] = buf[i] + saved[i] + ac->add_bias;
+ out[i] = buf[i-512] + ac->add_bias;
}
}
// buffer update
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
- ac->dsp.vector_fmul_add_add(saved, buf + 1024 + 64, swindow + 64, ac->revers + 3*128+64, 0, 64, 1);
- ac->dsp.vector_fmul_add_add(saved + 64, buf + 1024 + 2*128, swindow, ac->revers + 4*128, 0, 128, 1);
- ac->dsp.vector_fmul_add_add(saved + 192, buf + 1024 + 4*128, swindow, ac->revers + 5*128, 0, 128, 1);
- ac->dsp.vector_fmul_add_add(saved + 320, buf + 1024 + 6*128, swindow, ac->revers + 6*128, 0, 128, 1);
- memcpy( saved + 448, ac->revers + 7*128, 128 * sizeof(float));
- memset( saved + 576, 0, 448 * sizeof(float));
+ for (i = 0; i < 64; i++)
+ saved[i] = temp[64 + i] - ac->add_bias;
+ ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
+ ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
+ ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
+ memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
- memcpy(saved, buf + 1024, 448 * sizeof(float));
- ac->dsp.vector_fmul_reverse(saved + 448, buf + 1024 + 448, swindow, 128);
- memset(saved + 576, 0, 448 * sizeof(float));
+ memcpy( saved, buf + 512, 448 * sizeof(float));
+ memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
} else { // LONG_STOP or ONLY_LONG
- ac->dsp.vector_fmul_reverse(saved, buf + 1024, lwindow, 1024);
+ memcpy( saved, buf + 512, 512 * sizeof(float));
}
}
Modified: trunk/libavcodec/aac.h
==============================================================================
--- trunk/libavcodec/aac.h (original)
+++ trunk/libavcodec/aac.h Fri Aug 22 20:21:22 2008
@@ -206,7 +206,7 @@ typedef struct {
int band_type_run_end[120]; ///< band type run end points
float sf[120]; ///< scalefactors
DECLARE_ALIGNED_16(float, coeffs[1024]); ///< coefficients for IMDCT
- DECLARE_ALIGNED_16(float, saved[1024]); ///< overlap
+ DECLARE_ALIGNED_16(float, saved[512]); ///< overlap
DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output
} SingleChannelElement;
@@ -247,8 +247,7 @@ typedef struct {
* @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
* @{
*/
- DECLARE_ALIGNED_16(float, buf_mdct[2048]);
- DECLARE_ALIGNED_16(float, revers[1024]);
+ DECLARE_ALIGNED_16(float, buf_mdct[1024]);
/** @} */
/**
More information about the ffmpeg-cvslog
mailing list