[FFmpeg-cvslog] mpegaudiodec: interleave iMDCT buffer to simplify future SIMD implementations
Vitor Sessak
git at videolan.org
Tue Jan 10 03:52:26 CET 2012
ffmpeg | branch: master | Vitor Sessak <vitor1001 at gmail.com> | Tue Jan 3 21:40:57 2012 +0100| [421c99a4a7c116fc2d4e7a6c866c2209852ef581] | committer: Ronald S. Bultje
mpegaudiodec: interleave iMDCT buffer to simplify future SIMD implementations
Signed-off-by: Ronald S. Bultje <rsbultje at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=421c99a4a7c116fc2d4e7a6c866c2209852ef581
---
libavcodec/mpegaudiodec.c | 50 ++++++++++++++++++++++----------------------
1 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index cd7b7f5..702476b 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -621,17 +621,17 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win)
t0 = s0 + s1;
t1 = s0 - s1;
- out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[9 + j];
- out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[8 - j];
- buf[ 9 + j ] = MULH3(t0, win[18 + 9 + j], 1);
- buf[ 8 - j ] = MULH3(t0, win[18 + 8 - j], 1);
+ out[(9 + j) * SBLIMIT] = MULH3(t1, win[ 9 + j], 1) + buf[4*(9 + j)];
+ out[(8 - j) * SBLIMIT] = MULH3(t1, win[ 8 - j], 1) + buf[4*(8 - j)];
+ buf[4 * ( 9 + j )] = MULH3(t0, win[18 + 9 + j], 1);
+ buf[4 * ( 8 - j )] = MULH3(t0, win[18 + 8 - j], 1);
t0 = s2 + s3;
t1 = s2 - s3;
- out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[9 + 8 - j];
- out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[ j];
- buf[ 9 + 8 - j ] = MULH3(t0, win[18 + 9 + 8 - j], 1);
- buf[ j ] = MULH3(t0, win[18 + j], 1);
+ out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[ 9 + 8 - j], 1) + buf[4*(9 + 8 - j)];
+ out[ j * SBLIMIT] = MULH3(t1, win[ j], 1) + buf[4*( j)];
+ buf[4 * ( 9 + 8 - j )] = MULH3(t0, win[18 + 9 + 8 - j], 1);
+ buf[4 * ( j )] = MULH3(t0, win[18 + j], 1);
i += 4;
}
@@ -639,10 +639,10 @@ static void imdct36(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in, INTFLOAT *win)
s1 = MULH3(tmp[17], icos36h[4], 2);
t0 = s0 + s1;
t1 = s0 - s1;
- out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[9 + 4];
- out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[8 - 4];
- buf[ 9 + 4 ] = MULH3(t0, win[18 + 9 + 4], 1);
- buf[ 8 - 4 ] = MULH3(t0, win[18 + 8 - 4], 1);
+ out[(9 + 4) * SBLIMIT] = MULH3(t1, win[ 9 + 4], 1) + buf[4*(9 + 4)];
+ out[(8 - 4) * SBLIMIT] = MULH3(t1, win[ 8 - 4], 1) + buf[4*(8 - 4)];
+ buf[4 * ( 9 + 4 )] = MULH3(t0, win[18 + 9 + 4], 1);
+ buf[4 * ( 8 - 4 )] = MULH3(t0, win[18 + 8 - 4], 1);
}
/* return the number of decoded frames */
@@ -1407,7 +1407,7 @@ static void compute_imdct(MPADecodeContext *s, GranuleDef *g,
imdct36(out_ptr, buf, ptr, win);
out_ptr += 18 * SBLIMIT;
ptr += 18;
- buf += 18;
+ buf += (j&3) != 3 ? 1 : (4*18-3);
}
for (j = mdct_long_end; j < sblimit; j++) {
/* select frequency inversion */
@@ -1415,40 +1415,40 @@ static void compute_imdct(MPADecodeContext *s, GranuleDef *g,
out_ptr = sb_samples + j;
for (i = 0; i < 6; i++) {
- *out_ptr = buf[i];
+ *out_ptr = buf[4*i];
out_ptr += SBLIMIT;
}
imdct12(out2, ptr + 0);
for (i = 0; i < 6; i++) {
- *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*1];
- buf[i + 6*2] = MULH3(out2[i + 6], win[i + 6], 1);
+ *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[4*(i + 6*1)];
+ buf[4*(i + 6*2)] = MULH3(out2[i + 6], win[i + 6], 1);
out_ptr += SBLIMIT;
}
imdct12(out2, ptr + 1);
for (i = 0; i < 6; i++) {
- *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[i + 6*2];
- buf[i + 6*0] = MULH3(out2[i + 6], win[i + 6], 1);
+ *out_ptr = MULH3(out2[i ], win[i ], 1) + buf[4*(i + 6*2)];
+ buf[4*(i + 6*0)] = MULH3(out2[i + 6], win[i + 6], 1);
out_ptr += SBLIMIT;
}
imdct12(out2, ptr + 2);
for (i = 0; i < 6; i++) {
- buf[i + 6*0] = MULH3(out2[i ], win[i ], 1) + buf[i + 6*0];
- buf[i + 6*1] = MULH3(out2[i + 6], win[i + 6], 1);
- buf[i + 6*2] = 0;
+ buf[4*(i + 6*0)] = MULH3(out2[i ], win[i ], 1) + buf[4*(i + 6*0)];
+ buf[4*(i + 6*1)] = MULH3(out2[i + 6], win[i + 6], 1);
+ buf[4*(i + 6*2)] = 0;
}
ptr += 18;
- buf += 18;
+ buf += (j&3) != 3 ? 1 : (4*18-3);
}
/* zero bands */
for (j = sblimit; j < SBLIMIT; j++) {
/* overlap */
out_ptr = sb_samples + j;
for (i = 0; i < 18; i++) {
- *out_ptr = buf[i];
- buf[i] = 0;
+ *out_ptr = buf[4*i];
+ buf[4*i] = 0;
out_ptr += SBLIMIT;
}
- buf += 18;
+ buf += (j&3) != 3 ? 1 : (4*18-3);
}
}
More information about the ffmpeg-cvslog
mailing list