[FFmpeg-cvslog] aacps: align some arrays

Sun May 6 22:18:58 CEST 2012

ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Fri Jan 27 01:24:55 2012 +0000| [47d18d5354e06d4ef7349449fd049b516d6b0ee2] | committer: Mans Rullgard

aacps: align some arrays

This is required for SIMD optimisations.

Signed-off-by: Mans Rullgard <mans at mansr.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=47d18d5354e06d4ef7349449fd049b516d6b0ee2
---

 libavcodec/aacps.c          |   19 +++++++++++--------
 libavcodec/aacps.h          |   20 ++++++++++----------
 libavcodec/aacps_tablegen.c |   20 ++++++++++----------
 libavcodec/aacps_tablegen.h |   15 ++++++++-------
 libavcodec/aacpsdsp.c       |    2 +-
 libavcodec/aacpsdsp.h       |    2 +-
 6 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/libavcodec/aacps.c b/libavcodec/aacps.c
index 7578034..fa7e9ac 100644
--- a/libavcodec/aacps.c
+++ b/libavcodec/aacps.c
@@ -27,6 +27,7 @@
 #include "aacps.h"
 #include "aacps_tablegen.h"
 #include "aacpsdata.c"
+#include "dsputil.h"
 
 #define PS_BASELINE 0  ///< Operate in Baseline PS mode
                        ///< Baseline implies 10 or 20 stereo bands,
@@ -284,7 +285,7 @@ err:
 
 /** Split one subband into 2 subsubbands with a symmetric real filter.
  * The filter must have its non-center even coefficients equal to zero. */
-static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7], int len, int reverse)
+static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[8], int len, int reverse)
 {
     int i, j;
     for (i = 0; i < len; i++, in++) {
@@ -304,11 +305,11 @@ static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7
 }
 
 /** Split one subband into 6 subsubbands with a complex filter */
-static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[7][2], int len)
+static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[8][2], int len)
 {
     int i;
     int N = 8;
-    float temp[8][2];
+    LOCAL_ALIGNED_16(float, temp, [8], [2]);
 
     for (i = 0; i < len; i++, in++) {
         dsp->hybrid_analysis(temp, in, filter, 1, N);
@@ -327,7 +328,7 @@ static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], c
     }
 }
 
-static void hybrid4_8_12_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[7][2], int N, int len)
+static void hybrid4_8_12_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2], const float (*filter)[8][2], int N, int len)
 {
     int i;
 
@@ -607,8 +608,8 @@ static void map_val_20_to_34(float par[PS_MAX_NR_IIDICC])
 
 static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[32][2], int is34)
 {
-    float power[34][PS_QMF_TIME_SLOTS] = {{0}};
-    float transient_gain[34][PS_QMF_TIME_SLOTS];
+    LOCAL_ALIGNED_16(float, power, [34], [PS_QMF_TIME_SLOTS]);
+    LOCAL_ALIGNED_16(float, transient_gain, [34], [PS_QMF_TIME_SLOTS]);
     float *peak_decay_nrg = ps->peak_decay_nrg;
     float *power_smooth = ps->power_smooth;
     float *peak_decay_diff_smooth = ps->peak_decay_diff_smooth;
@@ -621,6 +622,8 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
     int i, k, m, n;
     int n0 = 0, nL = 32;
 
+    memset(power, 0, 34 * sizeof(*power));
+
     if (is34 != ps->is34bands_old) {
         memset(ps->peak_decay_nrg,         0, sizeof(ps->peak_decay_nrg));
         memset(ps->power_smooth,           0, sizeof(ps->power_smooth));
@@ -883,8 +886,8 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
 
 int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float R[2][38][64], int top)
 {
-    float Lbuf[91][32][2];
-    float Rbuf[91][32][2];
+    LOCAL_ALIGNED_16(float, Lbuf, [91], [32][2]);
+    LOCAL_ALIGNED_16(float, Rbuf, [91], [32][2]);
     const int len = 32;
     int is34 = ps->is34bands;
 
diff --git a/libavcodec/aacps.h b/libavcodec/aacps.h
index d5c355d..6cdac24 100644
--- a/libavcodec/aacps.h
+++ b/libavcodec/aacps.h
@@ -61,16 +61,16 @@ typedef struct {
     int    is34bands;
     int    is34bands_old;
 
-    float  in_buf[5][44][2];
-    float  delay[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2];
-    float  ap_delay[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2];
-    float  peak_decay_nrg[34];
-    float  power_smooth[34];
-    float  peak_decay_diff_smooth[34];
-    float  H11[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
-    float  H12[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
-    float  H21[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
-    float  H22[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, float, in_buf)[5][44][2];
+    DECLARE_ALIGNED(16, float, delay)[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2];
+    DECLARE_ALIGNED(16, float, ap_delay)[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2];
+    DECLARE_ALIGNED(16, float, peak_decay_nrg)[34];
+    DECLARE_ALIGNED(16, float, power_smooth)[34];
+    DECLARE_ALIGNED(16, float, peak_decay_diff_smooth)[34];
+    DECLARE_ALIGNED(16, float, H11)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, float, H12)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, float, H21)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, float, H22)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
     int8_t opd_hist[PS_MAX_NR_IIDICC];
     int8_t ipd_hist[PS_MAX_NR_IIDICC];
     PSDSPContext dsp;
diff --git a/libavcodec/aacps_tablegen.c b/libavcodec/aacps_tablegen.c
index 8650226..635737d 100644
--- a/libavcodec/aacps_tablegen.c
+++ b/libavcodec/aacps_tablegen.c
@@ -69,23 +69,23 @@ int main(void)
     write_float_3d_array(HB, 46, 8, 4);
     printf("};\n");
 
-    printf("static const float f20_0_8[8][7][2] = {\n");
-    write_float_3d_array(f20_0_8, 8, 7, 2);
+    printf("static const DECLARE_ALIGNED(16, float, f20_0_8)[8][8][2] = {\n");
+    write_float_3d_array(f20_0_8, 8, 8, 2);
     printf("};\n");
-    printf("static const float f34_0_12[12][7][2] = {\n");
-    write_float_3d_array(f34_0_12, 12, 7, 2);
+    printf("static const DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2] = {\n");
+    write_float_3d_array(f34_0_12, 12, 8, 2);
     printf("};\n");
-    printf("static const float f34_1_8[8][7][2] = {\n");
-    write_float_3d_array(f34_1_8, 8, 7, 2);
+    printf("static const DECLARE_ALIGNED(16, float, f34_1_8)[8][8][2] = {\n");
+    write_float_3d_array(f34_1_8, 8, 8, 2);
     printf("};\n");
-    printf("static const float f34_2_4[4][7][2] = {\n");
-    write_float_3d_array(f34_2_4, 4, 7, 2);
+    printf("static const DECLARE_ALIGNED(16, float, f34_2_4)[4][8][2] = {\n");
+    write_float_3d_array(f34_2_4, 4, 8, 2);
     printf("};\n");
 
-    printf("static const float Q_fract_allpass[2][50][3][2] = {\n");
+    printf("static const DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2] = {\n");
     write_float_4d_array(Q_fract_allpass, 2, 50, 3, 2);
     printf("};\n");
-    printf("static const float phi_fract[2][50][2] = {\n");
+    printf("static const DECLARE_ALIGNED(16, float, phi_fract)[2][50][2] = {\n");
     write_float_3d_array(phi_fract, 2, 50, 2);
     printf("};\n");
 
diff --git a/libavcodec/aacps_tablegen.h b/libavcodec/aacps_tablegen.h
index 5041f44..d71a373 100644
--- a/libavcodec/aacps_tablegen.h
+++ b/libavcodec/aacps_tablegen.h
@@ -31,6 +31,7 @@
 #else
 #include "libavutil/common.h"
 #include "libavutil/mathematics.h"
+#include "libavutil/mem.h"
 #define NR_ALLPASS_BANDS20 30
 #define NR_ALLPASS_BANDS34 50
 #define PS_AP_LINKS 3
@@ -38,12 +39,12 @@ static float pd_re_smooth[8*8*8];
 static float pd_im_smooth[8*8*8];
 static float HA[46][8][4];
 static float HB[46][8][4];
-static float f20_0_8 [ 8][7][2];
-static float f34_0_12[12][7][2];
-static float f34_1_8 [ 8][7][2];
-static float f34_2_4 [ 4][7][2];
-static float Q_fract_allpass[2][50][3][2];
-static float phi_fract[2][50][2];
+static DECLARE_ALIGNED(16, float, f20_0_8) [ 8][8][2];
+static DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2];
+static DECLARE_ALIGNED(16, float, f34_1_8) [ 8][8][2];
+static DECLARE_ALIGNED(16, float, f34_2_4) [ 4][8][2];
+static DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2];
+static DECLARE_ALIGNED(16, float, phi_fract)[2][50][2];
 
 static const float g0_Q8[] = {
     0.00746082949812f, 0.02270420949825f, 0.04546865930473f, 0.07266113929591f,
@@ -65,7 +66,7 @@ static const float g2_Q4[] = {
      0.16486303567403f,  0.23279856662996f, 0.25f
 };
 
-static void make_filters_from_proto(float (*filter)[7][2], const float *proto, int bands)
+static void make_filters_from_proto(float (*filter)[8][2], const float *proto, int bands)
 {
     int q, n;
     for (q = 0; q < bands; q++) {
diff --git a/libavcodec/aacpsdsp.c b/libavcodec/aacpsdsp.c
index 3d9eb61..c8a2c31 100644
--- a/libavcodec/aacpsdsp.c
+++ b/libavcodec/aacpsdsp.c
@@ -40,7 +40,7 @@ static void ps_mul_pair_single_c(float (*dst)[2], float (*src0)[2], float *src1,
 }
 
 static void ps_hybrid_analysis_c(float (*out)[2], float (*in)[2],
-                                 const float (*filter)[7][2],
+                                 const float (*filter)[8][2],
                                  int stride, int n)
 {
     int i, j;
diff --git a/libavcodec/aacpsdsp.h b/libavcodec/aacpsdsp.h
index 909d341..08d7490 100644
--- a/libavcodec/aacpsdsp.h
+++ b/libavcodec/aacpsdsp.h
@@ -30,7 +30,7 @@ typedef struct PSDSPContext {
     void (*mul_pair_single)(float (*dst)[2], float (*src0)[2], float *src1,
                             int n);
     void (*hybrid_analysis)(float (*out)[2], float (*in)[2],
-                            const float (*filter)[7][2],
+                            const float (*filter)[8][2],
                             int stride, int n);
     void (*hybrid_analysis_ileave)(float (*out)[32][2], float L[2][38][64],
                                    int i, int len);