[FFmpeg-cvslog] vp9/x86: make filter_48/84/88_h work on 32-bit.
Ronald S. Bultje
git at videolan.org
Sat Dec 27 23:26:35 CET 2014
ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Fri Dec 26 15:15:50 2014 -0500| [b26bc3520f9ae0f025d2b3787a3fa33febae24af] | committer: Ronald S. Bultje
vp9/x86: make filter_48/84/88_h work on 32-bit.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b26bc3520f9ae0f025d2b3787a3fa33febae24af
---
libavcodec/x86/vp9dsp_init.c | 12 ++------
libavcodec/x86/vp9lpf.asm | 62 ++++++++++++++++++++++++++++++------------
2 files changed, 48 insertions(+), 26 deletions(-)
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 37b5e0f..7bb31ff 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -348,17 +348,11 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
- if (ARCH_X86_64) { \
- dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
- } \
+ dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
- if (ARCH_X86_64) { \
- dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
- } \
+ dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
- if (ARCH_X86_64) { \
- dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
- } \
+ dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
} while (0)
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index deec530..838b785 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -937,9 +937,12 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
mova m3, [P0]
mova m4, [Q0]
mova m5, [Q1]
+%if ARCH_X86_64
mova m6, [Q2]
+%endif
mova m7, [Q3]
DEFINE_REAL_P7_TO_Q7
+%if ARCH_X86_64
SBUTTERFLY bw, 0, 1, 8
SBUTTERFLY bw, 2, 3, 8
SBUTTERFLY bw, 4, 5, 8
@@ -952,22 +955,47 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
SBUTTERFLY dq, 1, 5, 8
SBUTTERFLY dq, 2, 6, 8
SBUTTERFLY dq, 3, 7, 8
- movh [P7], m0
- movhps [P6], m0
- movh [Q0], m1
- movhps [Q1], m1
- movh [P3], m2
- movhps [P2], m2
- movh [Q4], m3
- movhps [Q5], m3
- movh [P5], m4
- movhps [P4], m4
- movh [Q2], m5
- movhps [Q3], m5
- movh [P1], m6
- movhps [P0], m6
- movh [Q6], m7
- movhps [Q7], m7
+%else
+ SBUTTERFLY bw, 0, 1, 6
+ mova [rsp+64], m1
+ mova m6, [rsp+96]
+ SBUTTERFLY bw, 2, 3, 1
+ SBUTTERFLY bw, 4, 5, 1
+ SBUTTERFLY bw, 6, 7, 1
+ SBUTTERFLY wd, 0, 2, 1
+ mova [rsp+96], m2
+ mova m1, [rsp+64]
+ SBUTTERFLY wd, 1, 3, 2
+ SBUTTERFLY wd, 4, 6, 2
+ SBUTTERFLY wd, 5, 7, 2
+ SBUTTERFLY dq, 0, 4, 2
+ SBUTTERFLY dq, 1, 5, 2
+ movh [Q0], m1
+ movhps [Q1], m1
+ mova m2, [rsp+96]
+ SBUTTERFLY dq, 2, 6, 1
+ SBUTTERFLY dq, 3, 7, 1
+%endif
+ SWAP 3, 6
+ SWAP 1, 4
+ movh [P7], m0
+ movhps [P6], m0
+ movh [P5], m1
+ movhps [P4], m1
+ movh [P3], m2
+ movhps [P2], m2
+ movh [P1], m3
+ movhps [P0], m3
+%if ARCH_X86_64
+ movh [Q0], m4
+ movhps [Q1], m4
+%endif
+ movh [Q2], m5
+ movhps [Q3], m5
+ movh [Q4], m6
+ movhps [Q5], m6
+ movh [Q6], m7
+ movhps [Q7], m7
%endif
%endif
@@ -977,7 +1005,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride,
%macro LPF_16_VH 5
INIT_XMM %5
LOOPFILTER v, %1, %2, 0, %4
-%if ARCH_X86_64 || %1 == 44
+%if ARCH_X86_64 || %1 != 16
LOOPFILTER h, %1, %2, %3, %4
%endif
%endmacro
More information about the ffmpeg-cvslog
mailing list