[FFmpeg-cvslog] r20884 - in trunk/libavcodec: celp_filters.c celp_filters.h

vitor subversion
Wed Dec 16 18:09:33 CET 2009


Author: vitor
Date: Wed Dec 16 18:09:33 2009
New Revision: 20884

Log:
Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.

Modified:
   trunk/libavcodec/celp_filters.c
   trunk/libavcodec/celp_filters.h

Modified: trunk/libavcodec/celp_filters.c
==============================================================================
--- trunk/libavcodec/celp_filters.c	Wed Dec 16 18:08:23 2009	(r20883)
+++ trunk/libavcodec/celp_filters.c	Wed Dec 16 18:09:33 2009	(r20884)
@@ -93,7 +93,102 @@ void ff_celp_lp_synthesis_filterf(float 
 {
     int i,n;
 
-    for (n = 0; n < buffer_length; n++) {
+    float out0, out1, out2, out3;
+    float old_out0, old_out1, old_out2, old_out3;
+    float a,b,c;
+
+    a = filter_coeffs[0];
+    b = filter_coeffs[1];
+    c = filter_coeffs[2];
+    b -= filter_coeffs[0] * filter_coeffs[0];
+    c -= filter_coeffs[1] * filter_coeffs[0];
+    c -= filter_coeffs[0] * b;
+
+    old_out0 = out[-4];
+    old_out1 = out[-3];
+    old_out2 = out[-2];
+    old_out3 = out[-1];
+    for (n = 0; n <= buffer_length - 4; n+=4) {
+        float tmp0,tmp1,tmp2,tmp3;
+        float val;
+
+        out0 = in[0];
+        out1 = in[1];
+        out2 = in[2];
+        out3 = in[3];
+
+        out0 -= filter_coeffs[2] * old_out1;
+        out1 -= filter_coeffs[2] * old_out2;
+        out2 -= filter_coeffs[2] * old_out3;
+
+        out0 -= filter_coeffs[1] * old_out2;
+        out1 -= filter_coeffs[1] * old_out3;
+
+        out0 -= filter_coeffs[0] * old_out3;
+
+        val = filter_coeffs[3];
+
+        out0 -= val * old_out0;
+        out1 -= val * old_out1;
+        out2 -= val * old_out2;
+        out3 -= val * old_out3;
+
+        old_out3 = out[-5];
+
+        for (i = 5; i <= filter_length; i += 2) {
+            val = filter_coeffs[i-1];
+
+            out0 -= val * old_out3;
+            out1 -= val * old_out0;
+            out2 -= val * old_out1;
+            out3 -= val * old_out2;
+
+            old_out2 = out[-i-1];
+
+            val = filter_coeffs[i];
+
+            out0 -= val * old_out2;
+            out1 -= val * old_out3;
+            out2 -= val * old_out0;
+            out3 -= val * old_out1;
+
+            FFSWAP(float, old_out0, old_out2);
+            old_out1 = old_out3;
+            old_out3 = out[-i-2];
+        }
+
+        tmp0 = out0;
+        tmp1 = out1;
+        tmp2 = out2;
+        tmp3 = out3;
+
+        out3 -= a * tmp2;
+        out2 -= a * tmp1;
+        out1 -= a * tmp0;
+
+        out3 -= b * tmp1;
+        out2 -= b * tmp0;
+
+        out3 -= c * tmp0;
+
+
+        out[0] = out0;
+        out[1] = out1;
+        out[2] = out2;
+        out[3] = out3;
+
+        old_out0 = out0;
+        old_out1 = out1;
+        old_out2 = out2;
+        old_out3 = out3;
+
+        out += 4;
+        in  += 4;
+    }
+
+    out -= n;
+    in -= n;
+    for (; n < buffer_length; n++) {
         out[n] = in[n];
         for (i = 1; i <= filter_length; i++)
             out[n] -= filter_coeffs[i-1] * out[n-i];

Modified: trunk/libavcodec/celp_filters.h
==============================================================================
--- trunk/libavcodec/celp_filters.h	Wed Dec 16 18:08:23 2009	(r20883)
+++ trunk/libavcodec/celp_filters.h	Wed Dec 16 18:09:33 2009	(r20884)
@@ -90,7 +90,8 @@ int ff_celp_lp_synthesis_filter(int16_t 
  * @param filter_coeffs filter coefficients.
  * @param in input signal
  * @param buffer_length amount of data to process
- * @param filter_length filter length (10 for 10th order LP filter)
+ * @param filter_length filter length (10 for 10th order LP filter). Must be
+ *                      greater than 4 and even.
  *
  * @note Output buffer must contain filter_length samples of past
  *       speech data before pointer.



More information about the ffmpeg-cvslog mailing list