[FFmpeg-cvslog] x86/hevc_add_res: merge last remaining changes from 3d6535983282bea542dac2e568ae50da5796be34
James Almer
git at videolan.org
Sat Apr 1 02:50:36 EEST 2017
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Fri Mar 31 20:42:16 2017 -0300| [6171f178e70ebe75e5964531f47ccc32455d5557] | committer: James Almer
x86/hevc_add_res: merge last remaining changes from 3d6535983282bea542dac2e568ae50da5796be34
See https://lists.libav.org/pipermail/libav-devel/2016-October/079829.html
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6171f178e70ebe75e5964531f47ccc32455d5557
---
doc/libav-merge.txt | 1 -
libavcodec/x86/hevc_add_res.asm | 46 ++++++++++++++---------------------------
2 files changed, 15 insertions(+), 32 deletions(-)
diff --git a/doc/libav-merge.txt b/doc/libav-merge.txt
index 0cbd9f4..30518c0 100644
--- a/doc/libav-merge.txt
+++ b/doc/libav-merge.txt
@@ -97,7 +97,6 @@ Stuff that didn't reach the codebase:
- VAAPI VP8 decode hwaccel (currently under review: http://ffmpeg.org/pipermail/ffmpeg-devel/2017-February/thread.html#207348)
- Removal of the custom atomic API (5cc0057f49, see http://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209003.html)
- Use the new bitstream filter for extracting extradata (8e2ea69135 and 096a8effa3, see https://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209068.html)
-- ADD_RES_MMX_4_8 in libavcodec/x86/hevc_add_res.asm probably needs updating (see 589880710)
- Read aac_adtstoasc extradata updates from packet side data on Matroska once mov and the bsf in question are fixed (See 13a211e632 and 5ef1959080)
Collateral damage that needs work locally:
diff --git a/libavcodec/x86/hevc_add_res.asm b/libavcodec/x86/hevc_add_res.asm
index d97e4ab..36d4d8e 100644
--- a/libavcodec/x86/hevc_add_res.asm
+++ b/libavcodec/x86/hevc_add_res.asm
@@ -28,25 +28,23 @@ cextern pw_1023
; the add_res macros and functions were largely inspired by h264_idct.asm from the x264 project
%macro ADD_RES_MMX_4_8 0
- mova m2, [r1]
- mova m4, [r1+8]
+ mova m0, [r1]
+ mova m2, [r1+8]
+ pxor m1, m1
pxor m3, m3
+ psubw m1, m0
psubw m3, m2
- packuswb m2, m2
- packuswb m3, m3
- pxor m5, m5
- psubw m5, m4
- packuswb m4, m4
- packuswb m5, m5
-
- movh m0, [r0]
- movh m1, [r0+r2]
+ packuswb m0, m2
+ packuswb m1, m3
+
+ movd m2, [r0]
+ movd m3, [r0+r2]
+ punpckldq m2, m3
paddusb m0, m2
- paddusb m1, m4
- psubusb m0, m3
- psubusb m1, m5
- movh [r0], m0
- movh [r0+r2], m1
+ psubusb m0, m1
+ movd [r0], m0
+ psrlq m0, 32
+ movd [r0+r2], m0
%endmacro
@@ -95,15 +93,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
vinserti128 m2, m2, [r1+%1+32], 1
vinserti128 m6, m6, [r1+%1+48], 1
%endif
-%if cpuflag(avx)
psubw m1, m0, m2
psubw m5, m0, m6
-%else
- mova m1, m0
- mova m5, m0
- psubw m1, m2
- psubw m5, m6
-%endif
packuswb m2, m6
packuswb m1, m5
@@ -113,15 +104,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
vinserti128 m4, m4, [r1+%1+96 ], 1
vinserti128 m6, m6, [r1+%1+112], 1
%endif
-%if cpuflag(avx)
psubw m3, m0, m4
psubw m5, m0, m6
-%else
- mova m3, m0
- mova m5, m0
- psubw m3, m4
- psubw m5, m6
-%endif
packuswb m4, m6
packuswb m3, m5
@@ -192,7 +176,7 @@ cglobal hevc_add_residual_32_8, 3, 5, 7
dec r4d
jg .loop
RET
-%endif
+%endif ;HAVE_AVX2_EXTERNAL
%macro ADD_RES_SSE_8_10 4
mova m0, [%4]
More information about the ffmpeg-cvslog
mailing list