[FFmpeg-cvslog] x86: Use consistent 3dnowext function and macro name suffixes
Diego Biurrun
git at videolan.org
Fri Aug 3 23:30:31 CEST 2012
ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Wed Aug 1 15:31:43 2012 +0200| [ca844b7be9c69c91113094ef21d720f1ca80db60] | committer: Diego Biurrun
x86: Use consistent 3dnowext function and macro name suffixes
Currently there is a wild mix of 3dn2/3dnow2/3dnowext. Switching to
"3dnowext", which is a more common name of the CPU flag, as reported
e.g. by the Linux kernel, unifies this.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ca844b7be9c69c91113094ef21d720f1ca80db60
---
libavcodec/x86/dsputil_mmx.c | 14 ++++++------
libavcodec/x86/fft.c | 6 ++---
libavcodec/x86/fft.h | 6 ++---
libavcodec/x86/fft_mmx.asm | 46 +++++++++++++++++++--------------------
libavcodec/x86/fmtconvert.asm | 6 ++---
libavcodec/x86/fmtconvert_mmx.c | 10 +++++----
libavutil/x86/x86inc.asm | 2 +-
7 files changed, 46 insertions(+), 44 deletions(-)
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 827705c..d26f612 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2358,9 +2358,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2],
}
#if HAVE_6REGS
-static void vector_fmul_window_3dnow2(float *dst, const float *src0,
- const float *src1, const float *win,
- int len)
+static void vector_fmul_window_3dnowext(float *dst, const float *src0,
+ const float *src1, const float *win,
+ int len)
{
x86_reg i = -len * 4;
x86_reg j = len * 4 - 8;
@@ -2809,11 +2809,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
#endif
}
-static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx,
- int mm_flags)
+static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx,
+ int mm_flags)
{
#if HAVE_6REGS && HAVE_INLINE_ASM
- c->vector_fmul_window = vector_fmul_window_3dnow2;
+ c->vector_fmul_window = vector_fmul_window_3dnowext;
#endif
}
@@ -3051,7 +3051,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
dsputil_init_3dnow(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT)
- dsputil_init_3dnow2(c, avctx, mm_flags);
+ dsputil_init_3dnowext(c, avctx, mm_flags);
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE)
dsputil_init_sse(c, avctx, mm_flags);
diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index f1c1c9d..fcde3fa 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -34,9 +34,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
}
if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
/* 3DNowEx for K7 */
- s->imdct_calc = ff_imdct_calc_3dnow2;
- s->imdct_half = ff_imdct_half_3dnow2;
- s->fft_calc = ff_fft_calc_3dnow2;
+ s->imdct_calc = ff_imdct_calc_3dnowext;
+ s->imdct_half = ff_imdct_half_3dnowext;
+ s->fft_calc = ff_fft_calc_3dnowext;
}
#endif
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h
index 1cefe7a..6e80b95 100644
--- a/libavcodec/x86/fft.h
+++ b/libavcodec/x86/fft.h
@@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z);
-void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z);
void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input);
-void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input);
-void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index ac53296..7c0e9de 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -93,14 +93,14 @@ cextern cos_ %+ i
SECTION_TEXT
-%macro T2_3DN 4 ; z0, z1, mem0, mem1
+%macro T2_3DNOW 4 ; z0, z1, mem0, mem1
mova %1, %3
mova %2, %1
pfadd %1, %4
pfsub %2, %4
%endmacro
-%macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1
+%macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1
mova %5, %3
pfsub %3, %4
pfadd %5, %4 ; {t6,t5}
@@ -444,13 +444,13 @@ fft16_sse:
ret
-%macro FFT48_3DN 0
+%macro FFT48_3DNOW 0
align 16
fft4 %+ SUFFIX:
- T2_3DN m0, m1, Z(0), Z(1)
+ T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2)
mova m3, Z(3)
- T4_3DN m0, m1, m2, m3, m4, m5
+ T4_3DNOW m0, m1, m2, m3, m4, m5
PUNPCK m0, m1, m4
PUNPCK m2, m3, m5
mova Z(0), m0
@@ -461,14 +461,14 @@ fft4 %+ SUFFIX:
align 16
fft8 %+ SUFFIX:
- T2_3DN m0, m1, Z(0), Z(1)
+ T2_3DNOW m0, m1, Z(0), Z(1)
mova m2, Z(2)
mova m3, Z(3)
- T4_3DN m0, m1, m2, m3, m4, m5
+ T4_3DNOW m0, m1, m2, m3, m4, m5
mova Z(0), m0
mova Z(2), m2
- T2_3DN m4, m5, Z(4), Z(5)
- T2_3DN m6, m7, Z2(6), Z2(7)
+ T2_3DNOW m4, m5, Z(4), Z(5)
+ T2_3DNOW m6, m7, Z2(6), Z2(7)
PSWAPD m0, m5
PSWAPD m2, m7
pxor m0, [ps_m1p1]
@@ -477,12 +477,12 @@ fft8 %+ SUFFIX:
pfadd m7, m2
pfmul m5, [ps_root2]
pfmul m7, [ps_root2]
- T4_3DN m1, m3, m5, m7, m0, m2
+ T4_3DNOW m1, m3, m5, m7, m0, m2
mova Z(5), m5
mova Z2(7), m7
mova m0, Z(0)
mova m2, Z(2)
- T4_3DN m0, m2, m4, m6, m5, m7
+ T4_3DNOW m0, m2, m4, m6, m5, m7
PUNPCK m0, m1, m5
PUNPCK m2, m3, m7
mova Z(0), m0
@@ -500,7 +500,7 @@ fft8 %+ SUFFIX:
%if ARCH_X86_32
%macro PSWAPD 2
-%if cpuflag(3dnow2)
+%if cpuflag(3dnowext)
pswapd %1, %2
%elifidn %1, %2
movd [r0+12], %1
@@ -512,11 +512,11 @@ fft8 %+ SUFFIX:
%endif
%endmacro
-INIT_MMX 3dnow2
-FFT48_3DN
+INIT_MMX 3dnowext
+FFT48_3DNOW
INIT_MMX 3dnow
-FFT48_3DN
+FFT48_3DNOW
%endif
%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
@@ -633,7 +633,7 @@ cglobal fft_calc, 2,5,8
%if ARCH_X86_32
INIT_MMX 3dnow
FFT_CALC_FUNC
-INIT_MMX 3dnow2
+INIT_MMX 3dnowext
FFT_CALC_FUNC
%endif
INIT_XMM sse
@@ -727,7 +727,7 @@ cglobal imdct_calc, 3,5,3
%if ARCH_X86_32
INIT_MMX 3dnow
IMDCT_CALC_FUNC
-INIT_MMX 3dnow2
+INIT_MMX 3dnowext
IMDCT_CALC_FUNC
%endif
@@ -743,8 +743,8 @@ INIT_MMX 3dnow
%define unpckhps punpckhdq
DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q]
DECL_PASS pass_interleave_3dnow, PASS_BIG 0
-%define pass_3dnow2 pass_3dnow
-%define pass_interleave_3dnow2 pass_interleave_3dnow
+%define pass_3dnowext pass_3dnow
+%define pass_interleave_3dnowext pass_interleave_3dnow
%endif
%ifdef PIC
@@ -813,7 +813,7 @@ DECL_FFT 5, _interleave
INIT_MMX 3dnow
DECL_FFT 4
DECL_FFT 4, _interleave
-INIT_MMX 3dnow2
+INIT_MMX 3dnowext
DECL_FFT 4
DECL_FFT 4, _interleave
%endif
@@ -845,7 +845,7 @@ INIT_XMM sse
PSWAPD m5, m3
pfmul m2, m3
pfmul m6, m5
-%if cpuflag(3dnow2)
+%if cpuflag(3dnowext)
pfpnacc m0, m4
pfpnacc m2, m6
%else
@@ -1018,7 +1018,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
xor r4, r4
sub r4, r3
%endif
-%if notcpuflag(3dnow2) && mmsize == 8
+%if notcpuflag(3dnowext) && mmsize == 8
movd m7, [ps_m1m1m1m1]
%endif
.pre:
@@ -1102,7 +1102,7 @@ DECL_IMDCT POSROTATESHUF
INIT_MMX 3dnow
DECL_IMDCT POSROTATESHUF_3DNOW
-INIT_MMX 3dnow2
+INIT_MMX 3dnowext
DECL_IMDCT POSROTATESHUF_3DNOW
%endif
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 4916e7a..0fd14fe 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
%macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e
%endmacro
-%macro PSWAPD_3DN1 2
+%macro PSWAPD_3DNOW 2
movq %1, %2
psrlq %1, 32
punpckldq %1, %2
@@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
%define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6 sse
%define cvtps2pi pf2id
-%define pswapd PSWAPD_3DN1
+%define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6 3dnow
%undef pswapd
-FLOAT_TO_INT16_INTERLEAVE6 3dn2
+FLOAT_TO_INT16_INTERLEAVE6 3dnowext
%undef cvtps2pi
;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index aaf634d..fbdc526 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
-void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
+void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
@@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow)
FLOAT_TO_INT16_INTERLEAVE(sse)
FLOAT_TO_INT16_INTERLEAVE(sse2)
-static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
+static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
+ long len, int channels)
+{
if(channels==6)
- ff_float_to_int16_interleave6_3dn2(dst, src, len);
+ ff_float_to_int16_interleave6_3dnowext(dst, src, len);
else
float_to_int16_interleave_3dnow(dst, src, len, channels);
}
@@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
}
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
+ c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
}
}
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 7a75951..03e6c07 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -557,7 +557,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags_mmx (1<<0)
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
-%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
+%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
%assign cpuflags_sse (1<<4) | cpuflags_mmx2
%assign cpuflags_sse2 (1<<5) | cpuflags_sse
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
More information about the ffmpeg-cvslog
mailing list