[MPlayer-dev-eng] [PATCH] fixed point faad, gnu assembler etc.

Reimar Döffinger Reimar.Doeffinger at stud.uni-karlsruhe.de
Sat Apr 22 20:36:30 CEST 2006


On Sat, Apr 22, 2006 at 08:28:29PM +0200, Reimar D?ffinger wrote:
> This one seems better, and does produce quite sane-looking code (tested
> with gcc 4.1 for 64 bit, gcc 2.95 for 32).
> Heavy benchmarking (and any other kind of testing) would still be welcome though.

bah..
-------------- next part --------------
Index: libfaad2/Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/libfaad2/Makefile,v
retrieving revision 1.10
diff -u -r1.10 Makefile
--- libfaad2/Makefile	18 Apr 2006 19:39:29 -0000	1.10
+++ libfaad2/Makefile	19 Apr 2006 09:46:21 -0000
@@ -48,7 +48,7 @@
 
 # Uncomment this to use the FIXED_POINT implementation of FAAD2.
 # This should improve performance, especially for SBR files.
-#CFLAGS  = -I. $(OPTFLAGS) -DFIXED_POINT
+CFLAGS  = -I. $(OPTFLAGS) -DFIXED_POINT
 
 .SUFFIXES: .c .o
 
Index: libfaad2/fixed.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfaad2/fixed.h,v
retrieving revision 1.7
diff -u -r1.7 fixed.h
--- libfaad2/fixed.h	18 Apr 2006 19:39:30 -0000	1.7
+++ libfaad2/fixed.h	19 Apr 2006 09:46:29 -0000
@@ -226,12 +226,67 @@
     *y2 = yt2 << (FRAC_SIZE-FRAC_BITS);
 }
 
+#elif defined(__GNUC__) && (defined (ARCH_X86) || defined(ARCH_X86_64))
+#define MUL_S(A,B,S) \
+  __asm__ __volatile__ (\
+    "imull %2              \n\t"\
+    "shrd %3, %%edx, %%eax \n\t"\
+    : "=&a" (A) : "%0" (A), "mr" (B), "i" (S) : "%edx");
+
+  static INLINE real_t MUL_R(real_t A, real_t B) {
+    MUL_S(A, B, REAL_BITS);
+    return A;
+  }
+
+  static INLINE real_t MUL_C(real_t A, real_t B) {
+    MUL_S(A, B, COEF_BITS);
+    return A;
+  }
+
+  static INLINE real_t MUL_F(real_t A, real_t B) {
+    MUL_S(A, B, FRAC_BITS);
+    return A;
+  }
+
+  static INLINE real_t MUL_Q2(real_t A, real_t B) {
+    MUL_S(A, B, Q2_BITS);
+    return A;
+  }
+
+  static INLINE real_t MUL_SHIFT6(real_t A, real_t B) {
+    MUL_S(A, B, 6);
+    return A;
+  }
+
+  static INLINE real_t MUL_SHIFT23(real_t A, real_t B) {
+    MUL_S(A, B, 23);
+    return A;
+  }
+
+#define _MulHigh(D, A, B) \
+  {\
+    register real_t dummy; \
+    __asm__ __volatile__ ( \
+      "imull %3                \n\t" \
+      : "=d" (D), "=&a" (dummy) : "%1" (A), "mr" (B)); \
+  }
+
+  static INLINE void ComplexMult(real_t *y1, real_t *y2, real_t x1, real_t x2,
+                                 real_t c1, real_t c2) {
+    register real_t t1, t2;
+    _MulHigh(t1, x1, c1);
+    _MulHigh(t2, x2, c2);
+    *y1 = (t1 + t2)<<(FRAC_SIZE-FRAC_BITS);
+    _MulHigh(t1, x2, c1);
+    _MulHigh(t2, x1, c2);
+    *y2 = (t1 - t2)<<(FRAC_SIZE-FRAC_BITS);
+  }
 #else
 
   /* multiply with real shift */
-  #define MUL_R(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (REAL_BITS-1))) >> REAL_BITS)
+  #define MUL_R(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> REAL_BITS)
   /* multiply with coef shift */
-  #define MUL_C(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (COEF_BITS-1))) >> COEF_BITS)
+  #define MUL_C(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> COEF_BITS)
   /* multiply with fractional shift */
 #if defined(_WIN32_WCE) && defined(_ARM_)
   /* eVC for PocketPC has an intrinsic function that returns only the high 32 bits of a 32x32 bit multiply */
@@ -240,12 +295,12 @@
       return _MulHigh(A,B) << (32-FRAC_BITS);
   }
 #else
-  #define _MulHigh(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (FRAC_SIZE-1))) >> FRAC_SIZE)
-  #define MUL_F(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (FRAC_BITS-1))) >> FRAC_BITS)
+  #define _MulHigh(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> FRAC_SIZE)
+  #define MUL_F(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> FRAC_BITS)
 #endif
-  #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS)
-  #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6)
-  #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23)
+  #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> Q2_BITS)
+  #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> 6)
+  #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> 23)
 
 /* Complex multiplication */
 static INLINE void ComplexMult(real_t *y1, real_t *y2,


More information about the MPlayer-dev-eng mailing list