[Ffmpeg-devel] [patch] OS X Intel support
Sam Hocevar
sam
Thu Feb 16 22:55:46 CET 2006
Hello. I am afraid you are not going to like these patches (I do not
like most of it myself), but due to Apple shipping deprecated build
tools (as in "not modified since the days of the NeXT") with their Intel
machines, we have to cope with retarded versions of the x86 assemblers.
I split the changes in four parts:
patch-ffmpeg-nop.diff: replaces "/nop" with "#nop" in the inline
assembly code, because "/" is not recognised as a comment symbol.
patch-ffmpeg-inline-asm-macros.diff: replaces inline assembly macros
with strictly equivalent C macros, because the gas shipped with OS X
Intel does not support macros.
patch-ffmpeg-p2align.diff: replaces ".balign 2**X" constructs with
the strictly equivalent (yet not as readable) ".p2align X", because
again gas does not support .balign.
patch-ffmpeg-0b.diff: replaces "$0b00011001" with the equivalent
"$0x19", because, guess what. I put the binary value into comments
so as not to lose the information.
I hope most of these changes have a chance to get into the CVS.
Thanks in advance,
--
Sam.
-------------- next part --------------
Index: libavcodec/i386/simple_idct_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/simple_idct_mmx.c,v
retrieving revision 1.16
diff -u -r1.16 simple_idct_mmx.c
--- libavcodec/i386/simple_idct_mmx.c 12 Jan 2006 22:43:18 -0000 1.16
+++ libavcodec/i386/simple_idct_mmx.c 26 Jan 2006 22:02:55 -0000
@@ -459,13 +459,12 @@
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
#else
-
#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
"movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
"movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
@@ -783,10 +782,10 @@
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
@@ -860,10 +859,10 @@
"movd %%mm5, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
@@ -928,10 +927,10 @@
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
@@ -1007,10 +1006,10 @@
"movd %%mm5, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
@@ -1073,10 +1072,10 @@
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
"jmp 9f \n\t"
"#.balign 16 \n\t"\
@@ -1141,10 +1140,10 @@
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
"jmp 9f \n\t"
@@ -1217,10 +1216,10 @@
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
"jmp 9f \n\t"
@@ -1259,10 +1258,10 @@
"movq %%mm0, 80+" #dst " \n\t"
//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
-//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
-//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
+IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),#nop, 20)
+//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),#nop, 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),#nop, 20)
+//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),#nop, 20)
#endif
-------------- next part --------------
Index: libavcodec/i386/fdct_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/fdct_mmx.c,v
retrieving revision 1.19
diff -u -r1.19 fdct_mmx.c
--- libavcodec/i386/fdct_mmx.c 22 Dec 2005 01:10:09 -0000 1.19
+++ libavcodec/i386/fdct_mmx.c 16 Feb 2006 21:37:31 -0000
@@ -350,61 +350,61 @@
static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
{
+#define FDCT_ROW_SSE2_H1(i,t) \
+ "movq " #i "(%0), %%xmm2 \n\t" \
+ "movq " #i "+8(%0), %%xmm0 \n\t" \
+ "movdqa " #t "+32(%1), %%xmm3 \n\t" \
+ "movdqa " #t "+48(%1), %%xmm7 \n\t" \
+ "movdqa " #t "(%1), %%xmm4 \n\t" \
+ "movdqa " #t "+16(%1), %%xmm5 \n\t"
+
+#define FDCT_ROW_SSE2_H2(i,t) \
+ "movq " #i "(%0), %%xmm2 \n\t" \
+ "movq " #i "+8(%0), %%xmm0 \n\t" \
+ "movdqa " #t "+32(%1), %%xmm3 \n\t" \
+ "movdqa " #t "+48(%1), %%xmm7 \n\t"
+
+#define FDCT_ROW_SSE2(i) \
+ "movq %%xmm2, %%xmm1 \n\t" \
+ "pshuflw $27, %%xmm0, %%xmm0 \n\t" \
+ "paddsw %%xmm0, %%xmm1 \n\t" \
+ "psubsw %%xmm0, %%xmm2 \n\t" \
+ "punpckldq %%xmm2, %%xmm1 \n\t" \
+ "pshufd $78, %%xmm1, %%xmm2 \n\t" \
+ "pmaddwd %%xmm2, %%xmm3 \n\t" \
+ "pmaddwd %%xmm1, %%xmm7 \n\t" \
+ "pmaddwd %%xmm5, %%xmm2 \n\t" \
+ "pmaddwd %%xmm4, %%xmm1 \n\t" \
+ "paddd %%xmm7, %%xmm3 \n\t" \
+ "paddd %%xmm2, %%xmm1 \n\t" \
+ "paddd %%xmm6, %%xmm3 \n\t" \
+ "paddd %%xmm6, %%xmm1 \n\t" \
+ "psrad %3, %%xmm3 \n\t" \
+ "psrad %3, %%xmm1 \n\t" \
+ "packssdw %%xmm3, %%xmm1 \n\t" \
+ "movdqa %%xmm1, " #i "(%4) \n\t"
+
asm volatile(
- ".macro FDCT_ROW_SSE2_H1 i t \n\t"
- "movq \\i(%0), %%xmm2 \n\t"
- "movq \\i+8(%0), %%xmm0 \n\t"
- "movdqa \\t+32(%1), %%xmm3 \n\t"
- "movdqa \\t+48(%1), %%xmm7 \n\t"
- "movdqa \\t(%1), %%xmm4 \n\t"
- "movdqa \\t+16(%1), %%xmm5 \n\t"
- ".endm \n\t"
- ".macro FDCT_ROW_SSE2_H2 i t \n\t"
- "movq \\i(%0), %%xmm2 \n\t"
- "movq \\i+8(%0), %%xmm0 \n\t"
- "movdqa \\t+32(%1), %%xmm3 \n\t"
- "movdqa \\t+48(%1), %%xmm7 \n\t"
- ".endm \n\t"
- ".macro FDCT_ROW_SSE2 i \n\t"
- "movq %%xmm2, %%xmm1 \n\t"
- "pshuflw $27, %%xmm0, %%xmm0 \n\t"
- "paddsw %%xmm0, %%xmm1 \n\t"
- "psubsw %%xmm0, %%xmm2 \n\t"
- "punpckldq %%xmm2, %%xmm1 \n\t"
- "pshufd $78, %%xmm1, %%xmm2 \n\t"
- "pmaddwd %%xmm2, %%xmm3 \n\t"
- "pmaddwd %%xmm1, %%xmm7 \n\t"
- "pmaddwd %%xmm5, %%xmm2 \n\t"
- "pmaddwd %%xmm4, %%xmm1 \n\t"
- "paddd %%xmm7, %%xmm3 \n\t"
- "paddd %%xmm2, %%xmm1 \n\t"
- "paddd %%xmm6, %%xmm3 \n\t"
- "paddd %%xmm6, %%xmm1 \n\t"
- "psrad %3, %%xmm3 \n\t"
- "psrad %3, %%xmm1 \n\t"
- "packssdw %%xmm3, %%xmm1 \n\t"
- "movdqa %%xmm1, \\i(%4) \n\t"
- ".endm \n\t"
"movdqa (%2), %%xmm6 \n\t"
- "FDCT_ROW_SSE2_H1 0 0 \n\t"
- "FDCT_ROW_SSE2 0 \n\t"
- "FDCT_ROW_SSE2_H2 64 0 \n\t"
- "FDCT_ROW_SSE2 64 \n\t"
-
- "FDCT_ROW_SSE2_H1 16 64 \n\t"
- "FDCT_ROW_SSE2 16 \n\t"
- "FDCT_ROW_SSE2_H2 112 64 \n\t"
- "FDCT_ROW_SSE2 112 \n\t"
-
- "FDCT_ROW_SSE2_H1 32 128 \n\t"
- "FDCT_ROW_SSE2 32 \n\t"
- "FDCT_ROW_SSE2_H2 96 128 \n\t"
- "FDCT_ROW_SSE2 96 \n\t"
-
- "FDCT_ROW_SSE2_H1 48 192 \n\t"
- "FDCT_ROW_SSE2 48 \n\t"
- "FDCT_ROW_SSE2_H2 80 192 \n\t"
- "FDCT_ROW_SSE2 80 \n\t"
+ FDCT_ROW_SSE2_H1(0,0)
+ FDCT_ROW_SSE2(0)
+ FDCT_ROW_SSE2_H2(64,0)
+ FDCT_ROW_SSE2(64)
+
+ FDCT_ROW_SSE2_H1(16,64)
+ FDCT_ROW_SSE2(16)
+ FDCT_ROW_SSE2_H2(112,64)
+ FDCT_ROW_SSE2(112)
+
+ FDCT_ROW_SSE2_H1(32,128)
+ FDCT_ROW_SSE2(32)
+ FDCT_ROW_SSE2_H2(96,128)
+ FDCT_ROW_SSE2(96)
+
+ FDCT_ROW_SSE2_H1(48,192)
+ FDCT_ROW_SSE2(48)
+ FDCT_ROW_SSE2_H2(80,192)
+ FDCT_ROW_SSE2(80)
:
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
);
-------------- next part --------------
Index: libavcodec/i386/dsputil_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx.c,v
retrieving revision 1.109
diff -u -r1.109 dsputil_mmx.c
--- libavcodec/i386/dsputil_mmx.c 12 Jan 2006 22:43:17 -0000 1.109
+++ libavcodec/i386/dsputil_mmx.c 26 Jan 2006 22:02:55 -0000
@@ -52,7 +52,7 @@
static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL;
static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL;
-#define JUMPALIGN() __asm __volatile (".balign 8"::)
+#define JUMPALIGN() __asm __volatile (".p2align 3"::)
#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
#define MOVQ_WONE(regd) \
@@ -195,7 +195,7 @@
asm volatile(
"mov $-128, %%"REG_a" \n\t"
"pxor %%mm7, %%mm7 \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0), %%mm0 \n\t"
"movq (%0, %2), %%mm2 \n\t"
@@ -223,7 +223,7 @@
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
"mov $-128, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0), %%mm0 \n\t"
"movq (%1), %%mm2 \n\t"
@@ -366,7 +366,7 @@
{
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movd (%1), %%mm0 \n\t"
"movd (%1, %3), %%mm1 \n\t"
@@ -392,7 +392,7 @@
{
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t"
@@ -418,7 +418,7 @@
{
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq 8(%1), %%mm4 \n\t"
Index: libavcodec/i386/dsputil_mmx_avg.h
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx_avg.h,v
retrieving revision 1.29
diff -u -r1.29 dsputil_mmx_avg.h
--- libavcodec/i386/dsputil_mmx_avg.h 12 Jan 2006 22:43:17 -0000 1.29
+++ libavcodec/i386/dsputil_mmx_avg.h 26 Jan 2006 22:02:55 -0000
@@ -754,7 +754,7 @@
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
PAVGB" 1(%1), %%mm0 \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t"
"movq (%1, %3), %%mm1 \n\t"
Index: libavcodec/i386/dsputil_mmx_rnd.h
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h,v
retrieving revision 1.22
diff -u -r1.22 dsputil_mmx_rnd.h
--- libavcodec/i386/dsputil_mmx_rnd.h 12 Jan 2006 22:43:17 -0000 1.22
+++ libavcodec/i386/dsputil_mmx_rnd.h 26 Jan 2006 22:02:55 -0000
@@ -28,7 +28,7 @@
MOVQ_BFE(mm6);
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
@@ -69,7 +69,7 @@
"movq %%mm4, (%3) \n\t"
"add %5, %3 \n\t"
"decl %0 \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
@@ -110,7 +110,7 @@
MOVQ_BFE(mm6);
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t"
@@ -168,7 +168,7 @@
"movq %%mm5, 8(%3) \n\t"
"add %5, %3 \n\t"
"decl %0 \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
@@ -206,7 +206,7 @@
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm2 \n\t"
@@ -246,7 +246,7 @@
"paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
@@ -458,7 +458,7 @@
__asm __volatile(
"lea (%3, %3), %%"REG_a" \n\t"
"movq (%1), %%mm0 \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t"
@@ -509,7 +509,7 @@
"paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"add %3, %1 \n\t"
- ".balign 8 \n\t"
+ ".p2align 3 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
Index: libavcodec/i386/motion_est_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/motion_est_mmx.c,v
retrieving revision 1.19
diff -u -r1.19 motion_est_mmx.c
--- libavcodec/i386/motion_est_mmx.c 12 Jan 2006 22:43:18 -0000 1.19
+++ libavcodec/i386/motion_est_mmx.c 26 Jan 2006 22:02:55 -0000
@@ -34,7 +34,7 @@
{
long len= -(stride*h);
asm volatile(
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
@@ -70,7 +70,7 @@
{
long len= -(stride*h);
asm volatile(
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
@@ -92,7 +92,7 @@
{
long len= -(stride*h);
asm volatile(
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t"
@@ -118,7 +118,7 @@
{ //FIXME reuse src
long len= -(stride*h);
asm volatile(
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"movq "MANGLE(bone)", %%mm5 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
@@ -155,7 +155,7 @@
{
long len= -(stride*h);
asm volatile(
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t"
@@ -193,7 +193,7 @@
{
long len= -(stride*h);
asm volatile(
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t"
Index: libavcodec/i386/mpegvideo_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/mpegvideo_mmx.c,v
retrieving revision 1.37
diff -u -r1.37 mpegvideo_mmx.c
--- libavcodec/i386/mpegvideo_mmx.c 12 Jan 2006 22:43:18 -0000 1.37
+++ libavcodec/i386/mpegvideo_mmx.c 26 Jan 2006 22:02:55 -0000
@@ -66,7 +66,7 @@
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
@@ -129,7 +129,7 @@
"packssdw %%mm5, %%mm5 \n\t"
"psubw %%mm5, %%mm7 \n\t"
"pxor %%mm4, %%mm4 \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0, %3), %%mm0 \n\t"
"movq 8(%0, %3), %%mm1 \n\t"
@@ -222,7 +222,7 @@
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
@@ -285,7 +285,7 @@
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
@@ -357,7 +357,7 @@
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
@@ -418,7 +418,7 @@
"packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
Index: libavcodec/i386/mpegvideo_mmx_template.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c,v
retrieving revision 1.29
diff -u -r1.29 mpegvideo_mmx_template.c
--- libavcodec/i386/mpegvideo_mmx_template.c 12 Jan 2006 22:43:18 -0000 1.29
+++ libavcodec/i386/mpegvideo_mmx_template.c 26 Jan 2006 22:02:55 -0000
@@ -111,7 +111,7 @@
"pxor %%mm6, %%mm6 \n\t"
"psubw (%3), %%mm6 \n\t" // -bias[0]
"mov $-128, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
@@ -155,7 +155,7 @@
"pxor %%mm7, %%mm7 \n\t" // 0
"pxor %%mm4, %%mm4 \n\t" // 0
"mov $-128, %%"REG_a" \n\t"
- ".balign 16 \n\t"
+ ".p2align 4 \n\t"
"1: \n\t"
"pxor %%mm1, %%mm1 \n\t" // 0
"movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
-------------- next part --------------
Index: libavcodec/i386/idct_mmx_xvid.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/idct_mmx_xvid.c,v
retrieving revision 1.4
diff -u -r1.4 idct_mmx_xvid.c
--- libavcodec/i386/idct_mmx_xvid.c 12 Jan 2006 22:43:18 -0000 1.4
+++ libavcodec/i386/idct_mmx_xvid.c 26 Jan 2006 22:02:55 -0000
@@ -295,17 +295,17 @@
"movq 8+" #A1 ",%%mm1 \n\t"/* 1 ; x7 x6 x5 x4*/\
"movq %%mm0,%%mm2 \n\t"/* 2 ; x3 x2 x1 x0*/\
"movq " #A3 ",%%mm3 \n\t"/* 3 ; w05 w04 w01 w00*/\
- "pshufw $0b10001000,%%mm0,%%mm0 \n\t"/* x2 x0 x2 x0*/\
+ "pshufw $0x88,%%mm0,%%mm0 \n\t"/* 10001000 ; x2 x0 x2 x0*/\
"movq 8+" #A3 ",%%mm4 \n\t"/* 4 ; w07 w06 w03 w02*/\
"movq %%mm1,%%mm5 \n\t"/* 5 ; x7 x6 x5 x4*/\
"pmaddwd %%mm0,%%mm3 \n\t"/* x2*w05+x0*w04 x2*w01+x0*w00*/\
"movq 32+" #A3 ",%%mm6 \n\t"/* 6 ; w21 w20 w17 w16*/\
- "pshufw $0b10001000,%%mm1,%%mm1 \n\t"/* x6 x4 x6 x4*/\
+ "pshufw $0x88,%%mm1,%%mm1 \n\t"/* 10001000 ; x6 x4 x6 x4*/\
"pmaddwd %%mm1,%%mm4 \n\t"/* x6*w07+x4*w06 x6*w03+x4*w02*/\
"movq 40+" #A3 ",%%mm7 \n\t"/* 7 ; w23 w22 w19 w18*/\
- "pshufw $0b11011101,%%mm2,%%mm2 \n\t"/* x3 x1 x3 x1*/\
+ "pshufw $0xdd,%%mm2,%%mm2 \n\t"/* 11011101 ; x3 x1 x3 x1*/\
"pmaddwd %%mm2,%%mm6 \n\t"/* x3*w21+x1*w20 x3*w17+x1*w16*/\
- "pshufw $0b11011101,%%mm5,%%mm5 \n\t"/* x7 x5 x7 x5*/\
+ "pshufw $0xdd,%%mm5,%%mm5 \n\t"/* 11011101 ; x7 x5 x7 x5*/\
"pmaddwd %%mm5,%%mm7 \n\t"/* x7*w23+x5*w22 x7*w19+x5*w18*/\
"paddd " #A4 ",%%mm3 \n\t"/* +%4*/\
"pmaddwd 16+" #A3 ",%%mm0 \n\t"/* x2*w13+x0*w12 x2*w09+x0*w08*/\
@@ -330,7 +330,7 @@
"packssdw %%mm0,%%mm3 \n\t"/* 0 ; y3 y2 y1 y0*/\
"packssdw %%mm4,%%mm7 \n\t"/* 4 ; y6 y7 y4 y5*/\
"movq %%mm3, " #A2 " \n\t"/* 3 ; save y3 y2 y1 y0*/\
- "pshufw $0b10110001,%%mm7,%%mm7 \n\t"/* y7 y6 y5 y4*/\
+ "pshufw $0xb1,%%mm7,%%mm7 \n\t"/* 10110001 ; y7 y6 y5 y4*/\
"movq %%mm7,8 +" #A2 "\n\t"/* 7 ; save y7 y6 y5 y4*/\
More information about the ffmpeg-devel
mailing list