[MPlayer-dev-eng] [PATCH] fixed point faad, gnu assembler etc.
Reimar Döffinger
Reimar.Doeffinger at stud.uni-karlsruhe.de
Wed Apr 19 22:28:35 CEST 2006
Hello,
I think this patch will make the biggest speed difference for faad,
though it will reduce the quality.
I also think it might break some files (SBR not supported in fixed point
mode, could that be?).
So please test extensively.
Greetings,
Reimar Döffinger
-------------- next part --------------
Index: libfaad2/Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/libfaad2/Makefile,v
retrieving revision 1.10
diff -u -r1.10 Makefile
--- libfaad2/Makefile 18 Apr 2006 19:39:29 -0000 1.10
+++ libfaad2/Makefile 19 Apr 2006 09:46:21 -0000
@@ -48,7 +48,7 @@
# Uncomment this to use the FIXED_POINT implementation of FAAD2.
# This should improve performance, especially for SBR files.
-#CFLAGS = -I. $(OPTFLAGS) -DFIXED_POINT
+CFLAGS = -I. $(OPTFLAGS) -DFIXED_POINT
.SUFFIXES: .c .o
Index: libfaad2/fixed.h
===================================================================
RCS file: /cvsroot/mplayer/main/libfaad2/fixed.h,v
retrieving revision 1.7
diff -u -r1.7 fixed.h
--- libfaad2/fixed.h 18 Apr 2006 19:39:30 -0000 1.7
+++ libfaad2/fixed.h 19 Apr 2006 09:46:29 -0000
@@ -226,12 +226,62 @@
*y2 = yt2 << (FRAC_SIZE-FRAC_BITS);
}
+#elif defined(__GNUC__) && (defined (ARCH_X86) || defined(ARCH_X86_64))
+#define MUL_S(A,B,S) \
+ __asm__ __volatile__ (\
+ "imul %1 \n\t"\
+ "shrd %%cl, %%edx, %%eax \n\t"\
+ : "+a" (A) : "r" (B), "c" (S) : "%edx");
+
+ static INLINE real_t MUL_R(real_t A, real_t B) {
+ MUL_S(A, B, REAL_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_C(real_t A, real_t B) {
+ MUL_S(A, B, COEF_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_F(real_t A, real_t B) {
+ MUL_S(A, B, FRAC_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_Q2(real_t A, real_t B) {
+ MUL_S(A, B, Q2_BITS);
+ return A;
+ }
+
+ static INLINE real_t MUL_SHIFT6(real_t A, real_t B) {
+ MUL_S(A, B, 6);
+ return A;
+ }
+
+ static INLINE real_t MUL_SHIFT23(real_t A, real_t B) {
+ MUL_S(A, B, 23);
+ return A;
+ }
+
+ static INLINE real_t _MulHigh(real_t A, real_t B) {
+ __asm__ __volatile__ (\
+ "imul %1 \n\t"\
+ "mov %%edx, %%eax \n\t"\
+ : "+a" (A) : "r" (B) : "%edx");
+ return A;
+ }
+
+ static INLINE void ComplexMult(real_t *y1, real_t *y2, real_t x1, real_t x2,
+ real_t c1, real_t c2) {
+ *y1 = (_MulHigh(x1, c1) + _MulHigh(x2, c2))<<(FRAC_SIZE-FRAC_BITS);
+ *y2 = (_MulHigh(x2, c1) - _MulHigh(x1, c2))<<(FRAC_SIZE-FRAC_BITS);
+ }
#else
/* multiply with real shift */
- #define MUL_R(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (REAL_BITS-1))) >> REAL_BITS)
+ #define MUL_R(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> REAL_BITS)
/* multiply with coef shift */
- #define MUL_C(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (COEF_BITS-1))) >> COEF_BITS)
+ #define MUL_C(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> COEF_BITS)
/* multiply with fractional shift */
#if defined(_WIN32_WCE) && defined(_ARM_)
/* eVC for PocketPC has an intrinsic function that returns only the high 32 bits of a 32x32 bit multiply */
@@ -240,12 +290,12 @@
return _MulHigh(A,B) << (32-FRAC_BITS);
}
#else
- #define _MulHigh(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (FRAC_SIZE-1))) >> FRAC_SIZE)
- #define MUL_F(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (FRAC_BITS-1))) >> FRAC_BITS)
+ #define _MulHigh(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> FRAC_SIZE)
+ #define MUL_F(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> FRAC_BITS)
#endif
- #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS)
- #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6)
- #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23)
+ #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> Q2_BITS)
+ #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> 6)
+ #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)) >> 23)
/* Complex multiplication */
static INLINE void ComplexMult(real_t *y1, real_t *y2,
More information about the MPlayer-dev-eng
mailing list