[Ffmpeg-cvslog] CVS: ffmpeg/libavcodec/i386 dsputil_mmx.c, 1.101, 1.102 mpegvideo_mmx_template.c, 1.25, 1.26
Michael Niedermayer CVS
michael
Wed Sep 21 23:17:11 CEST 2005
Update of /cvsroot/ffmpeg/ffmpeg/libavcodec/i386
In directory mail:/var2/tmp/cvs-serv17066
Modified Files:
dsputil_mmx.c mpegvideo_mmx_template.c
Log Message:
replace a few mov + psrlq with pshufw, there are more cases which could benefit from this but they would require us to duplicate some functions ...
the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)
Index: dsputil_mmx.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx.c,v
retrieving revision 1.101
retrieving revision 1.102
diff -u -d -r1.101 -r1.102
--- dsputil_mmx.c 10 Sep 2005 19:30:40 -0000 1.101
+++ dsputil_mmx.c 21 Sep 2005 21:17:09 -0000 1.102
@@ -1621,11 +1621,9 @@
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
- "movq %%mm0, %%mm1 \n\t"
- "psrlq $32, %%mm0 \n\t"
+ "pshufw $0x0E, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "psrlq $16, %%mm0 \n\t"
+ "pshufw $0x01, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
Index: mpegvideo_mmx_template.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -d -r1.25 -r1.26
--- mpegvideo_mmx_template.c 28 Oct 2004 10:12:57 -0000 1.25
+++ mpegvideo_mmx_template.c 21 Sep 2005 21:17:09 -0000 1.26
@@ -22,7 +22,11 @@
#ifdef HAVE_MMX2
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
-
+#define PMAX(a,b) \
+ "pshufw $0x0E," #a ", " #b " \n\t"\
+ PMAXW(b, a)\
+ "pshufw $0x01," #a ", " #b " \n\t"\
+ PMAXW(b, a)
#else
#define SPREADW(a) \
"punpcklwd " #a ", " #a " \n\t"\
@@ -30,6 +34,14 @@
#define PMAXW(a,b) \
"psubusw " #a ", " #b " \n\t"\
"paddw " #a ", " #b " \n\t"
+#define PMAX(a,b) \
+ "movq " #a ", " #b " \n\t"\
+ "psrlq $32, " #a " \n\t"\
+ PMAXW(b, a)\
+ "movq " #a ", " #b " \n\t"\
+ "psrlq $16, " #a " \n\t"\
+ PMAXW(b, a)
+
#endif
static int RENAME(dct_quantize)(MpegEncContext *s,
@@ -119,12 +131,7 @@
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $32, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $16, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
+ PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
@@ -170,12 +177,7 @@
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $32, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $16, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
+ PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
More information about the ffmpeg-cvslog
mailing list