[FFmpeg-cvslog] avfilter/dctdnoiz: use 32-bit (float) operations instead of 64 (double) for DCTs

Clément Bœsch git at videolan.org
Fri Aug 8 20:00:30 CEST 2014


ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Fri Aug  8 19:59:15 2014 +0200| [1ba7c6ead24f1485ad537cfdedb2a6cf5ca8e869] | committer: Clément Bœsch

avfilter/dctdnoiz: use 32-bit (float) operations instead of 64 (double) for DCTs

This makes the code about 1.5x faster without any noticeable difference
in the output.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1ba7c6ead24f1485ad537cfdedb2a6cf5ca8e869
---

 libavfilter/vf_dctdnoiz.c |  272 ++++++++++++++++++++++-----------------------
 1 file changed, 136 insertions(+), 136 deletions(-)

diff --git a/libavfilter/vf_dctdnoiz.c b/libavfilter/vf_dctdnoiz.c
index cbfbe53..83a38ed 100644
--- a/libavfilter/vf_dctdnoiz.c
+++ b/libavfilter/vf_dctdnoiz.c
@@ -102,20 +102,20 @@ static void av_always_inline fdct8_1d(float *dst, const float *src,
         const float x09 = x01 + x02;
         const float x0a = x00 - x03;
         const float x0b = x01 - x02;
-        const float x0c = 1.38703984532215*x04 + 0.275899379282943*x07;
-        const float x0d = 1.17587560241936*x05 + 0.785694958387102*x06;
-        const float x0e = -0.785694958387102*x05 + 1.17587560241936*x06;
-        const float x0f = 0.275899379282943*x04 - 1.38703984532215*x07;
-        const float x10 = 0.353553390593274 * (x0c - x0d);
-        const float x11 = 0.353553390593274 * (x0e - x0f);
-        dst[0*dst_stridea] = 0.353553390593274 * (x08 + x09);
-        dst[1*dst_stridea] = 0.353553390593274 * (x0c + x0d);
-        dst[2*dst_stridea] = 0.461939766255643*x0a + 0.191341716182545*x0b;
-        dst[3*dst_stridea] = 0.707106781186547 * (x10 - x11);
-        dst[4*dst_stridea] = 0.353553390593274 * (x08 - x09);
-        dst[5*dst_stridea] = 0.707106781186547 * (x10 + x11);
-        dst[6*dst_stridea] = 0.191341716182545*x0a - 0.461939766255643*x0b;
-        dst[7*dst_stridea] = 0.353553390593274 * (x0e + x0f);
+        const float x0c = 1.38703984532215f*x04 + 0.275899379282943f*x07;
+        const float x0d = 1.17587560241936f*x05 + 0.785694958387102f*x06;
+        const float x0e = -0.785694958387102f*x05 + 1.17587560241936f*x06;
+        const float x0f = 0.275899379282943f*x04 - 1.38703984532215f*x07;
+        const float x10 = 0.353553390593274f * (x0c - x0d);
+        const float x11 = 0.353553390593274f * (x0e - x0f);
+        dst[0*dst_stridea] = 0.353553390593274f * (x08 + x09);
+        dst[1*dst_stridea] = 0.353553390593274f * (x0c + x0d);
+        dst[2*dst_stridea] = 0.461939766255643f*x0a + 0.191341716182545f*x0b;
+        dst[3*dst_stridea] = 0.707106781186547f * (x10 - x11);
+        dst[4*dst_stridea] = 0.353553390593274f * (x08 - x09);
+        dst[5*dst_stridea] = 0.707106781186547f * (x10 + x11);
+        dst[6*dst_stridea] = 0.191341716182545f*x0a - 0.461939766255643f*x0b;
+        dst[7*dst_stridea] = 0.353553390593274f * (x0e + x0f);
         dst += dst_strideb;
         src += src_strideb;
     }
@@ -129,37 +129,37 @@ static void av_always_inline idct8_1d(float *dst, const float *src,
     int i;
 
     for (i = 0; i < 8; i++) {
-        const float x00 = 1.4142135623731*src[0*src_stridea];
-        const float x01 = 1.38703984532215*src[1*src_stridea] + 0.275899379282943*src[7*src_stridea];
-        const float x02 = 1.30656296487638*src[2*src_stridea] + 0.541196100146197*src[6*src_stridea];
-        const float x03 = 1.17587560241936*src[3*src_stridea] + 0.785694958387102*src[5*src_stridea];
-        const float x04 = 1.4142135623731*src[4*src_stridea];
-        const float x05 = -0.785694958387102*src[3*src_stridea] + 1.17587560241936*src[5*src_stridea];
-        const float x06 = 0.541196100146197*src[2*src_stridea] - 1.30656296487638*src[6*src_stridea];
-        const float x07 = -0.275899379282943*src[1*src_stridea] + 1.38703984532215*src[7*src_stridea];
+        const float x00 =  1.4142135623731f  *src[0*src_stridea];
+        const float x01 =  1.38703984532215f *src[1*src_stridea] + 0.275899379282943f*src[7*src_stridea];
+        const float x02 =  1.30656296487638f *src[2*src_stridea] + 0.541196100146197f*src[6*src_stridea];
+        const float x03 =  1.17587560241936f *src[3*src_stridea] + 0.785694958387102f*src[5*src_stridea];
+        const float x04 =  1.4142135623731f  *src[4*src_stridea];
+        const float x05 = -0.785694958387102f*src[3*src_stridea] + 1.17587560241936f*src[5*src_stridea];
+        const float x06 =  0.541196100146197f*src[2*src_stridea] - 1.30656296487638f*src[6*src_stridea];
+        const float x07 = -0.275899379282943f*src[1*src_stridea] + 1.38703984532215f*src[7*src_stridea];
         const float x09 = x00 + x04;
         const float x0a = x01 + x03;
-        const float x0b = 1.4142135623731*x02;
+        const float x0b = 1.4142135623731f*x02;
         const float x0c = x00 - x04;
         const float x0d = x01 - x03;
-        const float x0e = 0.353553390593274 * (x09 - x0b);
-        const float x0f = 0.353553390593274 * (x0c + x0d);
-        const float x10 = 0.353553390593274 * (x0c - x0d);
-        const float x11 = 1.4142135623731*x06;
+        const float x0e = 0.353553390593274f * (x09 - x0b);
+        const float x0f = 0.353553390593274f * (x0c + x0d);
+        const float x10 = 0.353553390593274f * (x0c - x0d);
+        const float x11 = 1.4142135623731f*x06;
         const float x12 = x05 + x07;
         const float x13 = x05 - x07;
-        const float x14 = 0.353553390593274 * (x11 + x12);
-        const float x15 = 0.353553390593274 * (x11 - x12);
-        const float x16 = 0.5*x13;
+        const float x14 = 0.353553390593274f * (x11 + x12);
+        const float x15 = 0.353553390593274f * (x11 - x12);
+        const float x16 = 0.5f*x13;
         const float x08 = -x15;
-        dst[0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.25 * (x09 + x0b) + 0.353553390593274*x0a;
-        dst[1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547 * (x0f - x08);
-        dst[2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547 * (x0f + x08);
-        dst[3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547 * (x0e + x16);
-        dst[4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547 * (x0e - x16);
-        dst[5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547 * (x10 - x14);
-        dst[6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547 * (x10 + x14);
-        dst[7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.25 * (x09 + x0b) - 0.353553390593274*x0a;
+        dst[0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.25f * (x09 + x0b) + 0.353553390593274f*x0a;
+        dst[1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547f * (x0f - x08);
+        dst[2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547f * (x0f + x08);
+        dst[3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547f * (x0e + x16);
+        dst[4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547f * (x0e - x16);
+        dst[5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547f * (x10 - x14);
+        dst[6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547f * (x10 + x14);
+        dst[7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.25f * (x09 + x0b) - 0.353553390593274f*x0a;
         dst += dst_strideb;
         src += src_strideb;
     }
@@ -201,50 +201,50 @@ static void av_always_inline fdct16_1d(float *dst, const float *src,
         const float x19 = x11 + x12;
         const float x1a = x10 - x13;
         const float x1b = x11 - x12;
-        const float x1c = 1.38703984532215*x14 + 0.275899379282943*x17;
-        const float x1d = 1.17587560241936*x15 + 0.785694958387102*x16;
-        const float x1e = -0.785694958387102*x15 + 1.17587560241936*x16;
-        const float x1f = 0.275899379282943*x14 - 1.38703984532215*x17;
-        const float x20 = 0.25 * (x1c - x1d);
-        const float x21 = 0.25 * (x1e - x1f);
-        const float x22 = 1.40740373752638*x08 + 0.138617169199091*x0f;
-        const float x23 = 1.35331800117435*x09 + 0.410524527522357*x0e;
-        const float x24 = 1.24722501298667*x0a + 0.666655658477747*x0d;
-        const float x25 = 1.09320186700176*x0b + 0.897167586342636*x0c;
-        const float x26 = -0.897167586342636*x0b + 1.09320186700176*x0c;
-        const float x27 = 0.666655658477747*x0a - 1.24722501298667*x0d;
-        const float x28 = -0.410524527522357*x09 + 1.35331800117435*x0e;
-        const float x29 = 0.138617169199091*x08 - 1.40740373752638*x0f;
+        const float x1c =   1.38703984532215f*x14 + 0.275899379282943f*x17;
+        const float x1d =   1.17587560241936f*x15 + 0.785694958387102f*x16;
+        const float x1e = -0.785694958387102f*x15 + 1.17587560241936f *x16;
+        const float x1f =  0.275899379282943f*x14 - 1.38703984532215f *x17;
+        const float x20 = 0.25f * (x1c - x1d);
+        const float x21 = 0.25f * (x1e - x1f);
+        const float x22 =  1.40740373752638f *x08 + 0.138617169199091f*x0f;
+        const float x23 =  1.35331800117435f *x09 + 0.410524527522357f*x0e;
+        const float x24 =  1.24722501298667f *x0a + 0.666655658477747f*x0d;
+        const float x25 =  1.09320186700176f *x0b + 0.897167586342636f*x0c;
+        const float x26 = -0.897167586342636f*x0b + 1.09320186700176f *x0c;
+        const float x27 =  0.666655658477747f*x0a - 1.24722501298667f *x0d;
+        const float x28 = -0.410524527522357f*x09 + 1.35331800117435f *x0e;
+        const float x29 =  0.138617169199091f*x08 - 1.40740373752638f *x0f;
         const float x2a = x22 + x25;
         const float x2b = x23 + x24;
         const float x2c = x22 - x25;
         const float x2d = x23 - x24;
-        const float x2e = 0.25 * (x2a - x2b);
-        const float x2f = 0.326640741219094*x2c + 0.135299025036549*x2d;
-        const float x30 = 0.135299025036549*x2c - 0.326640741219094*x2d;
+        const float x2e = 0.25f * (x2a - x2b);
+        const float x2f = 0.326640741219094f*x2c + 0.135299025036549f*x2d;
+        const float x30 = 0.135299025036549f*x2c - 0.326640741219094f*x2d;
         const float x31 = x26 + x29;
         const float x32 = x27 + x28;
         const float x33 = x26 - x29;
         const float x34 = x27 - x28;
-        const float x35 = 0.25 * (x31 - x32);
-        const float x36 = 0.326640741219094*x33 + 0.135299025036549*x34;
-        const float x37 = 0.135299025036549*x33 - 0.326640741219094*x34;
-        dst[ 0*dst_stridea] = 0.25 * (x18 + x19);
-        dst[ 1*dst_stridea] = 0.25 * (x2a + x2b);
-        dst[ 2*dst_stridea] = 0.25 * (x1c + x1d);
-        dst[ 3*dst_stridea] = 0.707106781186547 * (x2f - x37);
-        dst[ 4*dst_stridea] = 0.326640741219094*x1a + 0.135299025036549*x1b;
-        dst[ 5*dst_stridea] = 0.707106781186547 * (x2f + x37);
-        dst[ 6*dst_stridea] = 0.707106781186547 * (x20 - x21);
-        dst[ 7*dst_stridea] = 0.707106781186547 * (x2e + x35);
-        dst[ 8*dst_stridea] = 0.25 * (x18 - x19);
-        dst[ 9*dst_stridea] = 0.707106781186547 * (x2e - x35);
-        dst[10*dst_stridea] = 0.707106781186547 * (x20 + x21);
-        dst[11*dst_stridea] = 0.707106781186547 * (x30 - x36);
-        dst[12*dst_stridea] = 0.135299025036549*x1a - 0.326640741219094*x1b;
-        dst[13*dst_stridea] = 0.707106781186547 * (x30 + x36);
-        dst[14*dst_stridea] = 0.25 * (x1e + x1f);
-        dst[15*dst_stridea] = 0.25 * (x31 + x32);
+        const float x35 = 0.25f * (x31 - x32);
+        const float x36 = 0.326640741219094f*x33 + 0.135299025036549f*x34;
+        const float x37 = 0.135299025036549f*x33 - 0.326640741219094f*x34;
+        dst[ 0*dst_stridea] = 0.25f * (x18 + x19);
+        dst[ 1*dst_stridea] = 0.25f * (x2a + x2b);
+        dst[ 2*dst_stridea] = 0.25f * (x1c + x1d);
+        dst[ 3*dst_stridea] = 0.707106781186547f * (x2f - x37);
+        dst[ 4*dst_stridea] = 0.326640741219094f*x1a + 0.135299025036549f*x1b;
+        dst[ 5*dst_stridea] = 0.707106781186547f * (x2f + x37);
+        dst[ 6*dst_stridea] = 0.707106781186547f * (x20 - x21);
+        dst[ 7*dst_stridea] = 0.707106781186547f * (x2e + x35);
+        dst[ 8*dst_stridea] = 0.25f * (x18 - x19);
+        dst[ 9*dst_stridea] = 0.707106781186547f * (x2e - x35);
+        dst[10*dst_stridea] = 0.707106781186547f * (x20 + x21);
+        dst[11*dst_stridea] = 0.707106781186547f * (x30 - x36);
+        dst[12*dst_stridea] = 0.135299025036549f*x1a - 0.326640741219094f*x1b;
+        dst[13*dst_stridea] = 0.707106781186547f * (x30 + x36);
+        dst[14*dst_stridea] = 0.25f * (x1e + x1f);
+        dst[15*dst_stridea] = 0.25f * (x31 + x32);
         dst += dst_strideb;
         src += src_strideb;
     }
@@ -258,91 +258,91 @@ static void av_always_inline idct16_1d(float *dst, const float *src,
     int i;
 
     for (i = 0; i < 16; i++) {
-        const float x00 =  1.4142135623731  *src[ 0*src_stridea];
-        const float x01 =  1.40740373752638 *src[ 1*src_stridea] + 0.138617169199091*src[15*src_stridea];
-        const float x02 =  1.38703984532215 *src[ 2*src_stridea] + 0.275899379282943*src[14*src_stridea];
-        const float x03 =  1.35331800117435 *src[ 3*src_stridea] + 0.410524527522357*src[13*src_stridea];
-        const float x04 =  1.30656296487638 *src[ 4*src_stridea] + 0.541196100146197*src[12*src_stridea];
-        const float x05 =  1.24722501298667 *src[ 5*src_stridea] + 0.666655658477747*src[11*src_stridea];
-        const float x06 =  1.17587560241936 *src[ 6*src_stridea] + 0.785694958387102*src[10*src_stridea];
-        const float x07 =  1.09320186700176 *src[ 7*src_stridea] + 0.897167586342636*src[ 9*src_stridea];
-        const float x08 =  1.4142135623731  *src[ 8*src_stridea];
-        const float x09 = -0.897167586342636*src[ 7*src_stridea] + 1.09320186700176*src[ 9*src_stridea];
-        const float x0a =  0.785694958387102*src[ 6*src_stridea] - 1.17587560241936*src[10*src_stridea];
-        const float x0b = -0.666655658477747*src[ 5*src_stridea] + 1.24722501298667*src[11*src_stridea];
-        const float x0c =  0.541196100146197*src[ 4*src_stridea] - 1.30656296487638*src[12*src_stridea];
-        const float x0d = -0.410524527522357*src[ 3*src_stridea] + 1.35331800117435*src[13*src_stridea];
-        const float x0e =  0.275899379282943*src[ 2*src_stridea] - 1.38703984532215*src[14*src_stridea];
-        const float x0f = -0.138617169199091*src[ 1*src_stridea] + 1.40740373752638*src[15*src_stridea];
+        const float x00 =  1.4142135623731f  *src[ 0*src_stridea];
+        const float x01 =  1.40740373752638f *src[ 1*src_stridea] + 0.138617169199091f*src[15*src_stridea];
+        const float x02 =  1.38703984532215f *src[ 2*src_stridea] + 0.275899379282943f*src[14*src_stridea];
+        const float x03 =  1.35331800117435f *src[ 3*src_stridea] + 0.410524527522357f*src[13*src_stridea];
+        const float x04 =  1.30656296487638f *src[ 4*src_stridea] + 0.541196100146197f*src[12*src_stridea];
+        const float x05 =  1.24722501298667f *src[ 5*src_stridea] + 0.666655658477747f*src[11*src_stridea];
+        const float x06 =  1.17587560241936f *src[ 6*src_stridea] + 0.785694958387102f*src[10*src_stridea];
+        const float x07 =  1.09320186700176f *src[ 7*src_stridea] + 0.897167586342636f*src[ 9*src_stridea];
+        const float x08 =  1.4142135623731f  *src[ 8*src_stridea];
+        const float x09 = -0.897167586342636f*src[ 7*src_stridea] + 1.09320186700176f*src[ 9*src_stridea];
+        const float x0a =  0.785694958387102f*src[ 6*src_stridea] - 1.17587560241936f*src[10*src_stridea];
+        const float x0b = -0.666655658477747f*src[ 5*src_stridea] + 1.24722501298667f*src[11*src_stridea];
+        const float x0c =  0.541196100146197f*src[ 4*src_stridea] - 1.30656296487638f*src[12*src_stridea];
+        const float x0d = -0.410524527522357f*src[ 3*src_stridea] + 1.35331800117435f*src[13*src_stridea];
+        const float x0e =  0.275899379282943f*src[ 2*src_stridea] - 1.38703984532215f*src[14*src_stridea];
+        const float x0f = -0.138617169199091f*src[ 1*src_stridea] + 1.40740373752638f*src[15*src_stridea];
         const float x12 = x00 + x08;
         const float x13 = x01 + x07;
         const float x14 = x02 + x06;
         const float x15 = x03 + x05;
-        const float x16 = 1.4142135623731*x04;
+        const float x16 = 1.4142135623731f*x04;
         const float x17 = x00 - x08;
         const float x18 = x01 - x07;
         const float x19 = x02 - x06;
         const float x1a = x03 - x05;
         const float x1d = x12 + x16;
         const float x1e = x13 + x15;
-        const float x1f = 1.4142135623731*x14;
+        const float x1f = 1.4142135623731f*x14;
         const float x20 = x12 - x16;
         const float x21 = x13 - x15;
-        const float x22 = 0.25 * (x1d - x1f);
-        const float x23 = 0.25 * (x20 + x21);
-        const float x24 = 0.25 * (x20 - x21);
-        const float x25 = 1.4142135623731*x17;
-        const float x26 = 1.30656296487638*x18 + 0.541196100146197*x1a;
-        const float x27 = 1.4142135623731*x19;
-        const float x28 = -0.541196100146197*x18 + 1.30656296487638*x1a;
-        const float x29 = 0.176776695296637 * (x25 + x27) + 0.25*x26;
-        const float x2a = 0.25 * (x25 - x27);
-        const float x2b = 0.176776695296637 * (x25 + x27) - 0.25*x26;
-        const float x2c = 0.353553390593274*x28;
-        const float x1b = 0.707106781186547 * (x2a - x2c);
-        const float x1c = 0.707106781186547 * (x2a + x2c);
-        const float x2d = 1.4142135623731*x0c;
+        const float x22 = 0.25f * (x1d - x1f);
+        const float x23 = 0.25f * (x20 + x21);
+        const float x24 = 0.25f * (x20 - x21);
+        const float x25 = 1.4142135623731f*x17;
+        const float x26 = 1.30656296487638f*x18 + 0.541196100146197f*x1a;
+        const float x27 = 1.4142135623731f*x19;
+        const float x28 = -0.541196100146197f*x18 + 1.30656296487638f*x1a;
+        const float x29 = 0.176776695296637f * (x25 + x27) + 0.25f*x26;
+        const float x2a = 0.25f * (x25 - x27);
+        const float x2b = 0.176776695296637f * (x25 + x27) - 0.25f*x26;
+        const float x2c = 0.353553390593274f*x28;
+        const float x1b = 0.707106781186547f * (x2a - x2c);
+        const float x1c = 0.707106781186547f * (x2a + x2c);
+        const float x2d = 1.4142135623731f*x0c;
         const float x2e = x0b + x0d;
         const float x2f = x0a + x0e;
         const float x30 = x09 + x0f;
         const float x31 = x09 - x0f;
         const float x32 = x0a - x0e;
         const float x33 = x0b - x0d;
-        const float x37 = 1.4142135623731*x2d;
-        const float x38 = 1.30656296487638*x2e + 0.541196100146197*x30;
-        const float x39 = 1.4142135623731*x2f;
-        const float x3a = -0.541196100146197*x2e + 1.30656296487638*x30;
-        const float x3b = 0.176776695296637 * (x37 + x39) + 0.25*x38;
-        const float x3c = 0.25 * (x37 - x39);
-        const float x3d = 0.176776695296637 * (x37 + x39) - 0.25*x38;
-        const float x3e = 0.353553390593274*x3a;
-        const float x34 = 0.707106781186547 * (x3c - x3e);
-        const float x35 = 0.707106781186547 * (x3c + x3e);
-        const float x3f = 1.4142135623731*x32;
+        const float x37 = 1.4142135623731f*x2d;
+        const float x38 = 1.30656296487638f*x2e + 0.541196100146197f*x30;
+        const float x39 = 1.4142135623731f*x2f;
+        const float x3a = -0.541196100146197f*x2e + 1.30656296487638f*x30;
+        const float x3b = 0.176776695296637f * (x37 + x39) + 0.25f*x38;
+        const float x3c = 0.25f * (x37 - x39);
+        const float x3d = 0.176776695296637f * (x37 + x39) - 0.25f*x38;
+        const float x3e = 0.353553390593274f*x3a;
+        const float x34 = 0.707106781186547f * (x3c - x3e);
+        const float x35 = 0.707106781186547f * (x3c + x3e);
+        const float x3f = 1.4142135623731f*x32;
         const float x40 = x31 + x33;
         const float x41 = x31 - x33;
-        const float x42 = 0.25 * (x3f + x40);
-        const float x43 = 0.25 * (x3f - x40);
-        const float x44 = 0.353553390593274*x41;
+        const float x42 = 0.25f * (x3f + x40);
+        const float x43 = 0.25f * (x3f - x40);
+        const float x44 = 0.353553390593274f*x41;
         const float x36 = -x43;
         const float x10 = -x34;
         const float x11 = -x3d;
-        dst[ 0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.176776695296637 * (x1d + x1f) + 0.25*x1e;
-        dst[ 1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547 * (x29 - x11);
-        dst[ 2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547 * (x29 + x11);
-        dst[ 3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547 * (x23 + x36);
-        dst[ 4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547 * (x23 - x36);
-        dst[ 5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547 * (x1b - x35);
-        dst[ 6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547 * (x1b + x35);
-        dst[ 7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.707106781186547 * (x22 + x44);
-        dst[ 8*dst_stridea] = (add ? dst[ 8*dst_stridea] : 0) + 0.707106781186547 * (x22 - x44);
-        dst[ 9*dst_stridea] = (add ? dst[ 9*dst_stridea] : 0) + 0.707106781186547 * (x1c - x10);
-        dst[10*dst_stridea] = (add ? dst[10*dst_stridea] : 0) + 0.707106781186547 * (x1c + x10);
-        dst[11*dst_stridea] = (add ? dst[11*dst_stridea] : 0) + 0.707106781186547 * (x24 + x42);
-        dst[12*dst_stridea] = (add ? dst[12*dst_stridea] : 0) + 0.707106781186547 * (x24 - x42);
-        dst[13*dst_stridea] = (add ? dst[13*dst_stridea] : 0) + 0.707106781186547 * (x2b - x3b);
-        dst[14*dst_stridea] = (add ? dst[14*dst_stridea] : 0) + 0.707106781186547 * (x2b + x3b);
-        dst[15*dst_stridea] = (add ? dst[15*dst_stridea] : 0) + 0.176776695296637 * (x1d + x1f) - 0.25*x1e;
+        dst[ 0*dst_stridea] = (add ? dst[ 0*dst_stridea] : 0) + 0.176776695296637f * (x1d + x1f) + 0.25f*x1e;
+        dst[ 1*dst_stridea] = (add ? dst[ 1*dst_stridea] : 0) + 0.707106781186547f * (x29 - x11);
+        dst[ 2*dst_stridea] = (add ? dst[ 2*dst_stridea] : 0) + 0.707106781186547f * (x29 + x11);
+        dst[ 3*dst_stridea] = (add ? dst[ 3*dst_stridea] : 0) + 0.707106781186547f * (x23 + x36);
+        dst[ 4*dst_stridea] = (add ? dst[ 4*dst_stridea] : 0) + 0.707106781186547f * (x23 - x36);
+        dst[ 5*dst_stridea] = (add ? dst[ 5*dst_stridea] : 0) + 0.707106781186547f * (x1b - x35);
+        dst[ 6*dst_stridea] = (add ? dst[ 6*dst_stridea] : 0) + 0.707106781186547f * (x1b + x35);
+        dst[ 7*dst_stridea] = (add ? dst[ 7*dst_stridea] : 0) + 0.707106781186547f * (x22 + x44);
+        dst[ 8*dst_stridea] = (add ? dst[ 8*dst_stridea] : 0) + 0.707106781186547f * (x22 - x44);
+        dst[ 9*dst_stridea] = (add ? dst[ 9*dst_stridea] : 0) + 0.707106781186547f * (x1c - x10);
+        dst[10*dst_stridea] = (add ? dst[10*dst_stridea] : 0) + 0.707106781186547f * (x1c + x10);
+        dst[11*dst_stridea] = (add ? dst[11*dst_stridea] : 0) + 0.707106781186547f * (x24 + x42);
+        dst[12*dst_stridea] = (add ? dst[12*dst_stridea] : 0) + 0.707106781186547f * (x24 - x42);
+        dst[13*dst_stridea] = (add ? dst[13*dst_stridea] : 0) + 0.707106781186547f * (x2b - x3b);
+        dst[14*dst_stridea] = (add ? dst[14*dst_stridea] : 0) + 0.707106781186547f * (x2b + x3b);
+        dst[15*dst_stridea] = (add ? dst[15*dst_stridea] : 0) + 0.176776695296637f * (x1d + x1f) - 0.25f*x1e;
         dst += dst_strideb;
         src += src_strideb;
     }



More information about the ffmpeg-cvslog mailing list