[FFmpeg-cvslog] x86/hevc_add_res: merge last remaining changes from 3d6535983282bea542dac2e568ae50da5796be34

James Almer git at videolan.org
Sat Apr 1 02:50:36 EEST 2017


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Fri Mar 31 20:42:16 2017 -0300| [6171f178e70ebe75e5964531f47ccc32455d5557] | committer: James Almer

x86/hevc_add_res: merge last remaining changes from 3d6535983282bea542dac2e568ae50da5796be34

See https://lists.libav.org/pipermail/libav-devel/2016-October/079829.html

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6171f178e70ebe75e5964531f47ccc32455d5557
---

 doc/libav-merge.txt             |  1 -
 libavcodec/x86/hevc_add_res.asm | 46 ++++++++++++++---------------------------
 2 files changed, 15 insertions(+), 32 deletions(-)

diff --git a/doc/libav-merge.txt b/doc/libav-merge.txt
index 0cbd9f4..30518c0 100644
--- a/doc/libav-merge.txt
+++ b/doc/libav-merge.txt
@@ -97,7 +97,6 @@ Stuff that didn't reach the codebase:
 - VAAPI VP8 decode hwaccel (currently under review: http://ffmpeg.org/pipermail/ffmpeg-devel/2017-February/thread.html#207348)
 - Removal of the custom atomic API (5cc0057f49, see http://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209003.html)
 - Use the new bitstream filter for extracting extradata (8e2ea69135 and 096a8effa3, see https://ffmpeg.org/pipermail/ffmpeg-devel/2017-March/209068.html)
-- ADD_RES_MMX_4_8 in libavcodec/x86/hevc_add_res.asm probably needs updating (see 589880710)
 - Read aac_adtstoasc extradata updates from packet side data on Matroska once mov and the bsf in question are fixed (See 13a211e632 and 5ef1959080)
 
 Collateral damage that needs work locally:
diff --git a/libavcodec/x86/hevc_add_res.asm b/libavcodec/x86/hevc_add_res.asm
index d97e4ab..36d4d8e 100644
--- a/libavcodec/x86/hevc_add_res.asm
+++ b/libavcodec/x86/hevc_add_res.asm
@@ -28,25 +28,23 @@ cextern pw_1023
 
 ; the add_res macros and functions were largely inspired by h264_idct.asm from the x264 project
 %macro ADD_RES_MMX_4_8 0
-    mova              m2, [r1]
-    mova              m4, [r1+8]
+    mova              m0, [r1]
+    mova              m2, [r1+8]
+    pxor              m1, m1
     pxor              m3, m3
+    psubw             m1, m0
     psubw             m3, m2
-    packuswb          m2, m2
-    packuswb          m3, m3
-    pxor              m5, m5
-    psubw             m5, m4
-    packuswb          m4, m4
-    packuswb          m5, m5
-
-    movh              m0, [r0]
-    movh              m1, [r0+r2]
+    packuswb          m0, m2
+    packuswb          m1, m3
+
+    movd              m2, [r0]
+    movd              m3, [r0+r2]
+    punpckldq         m2, m3
     paddusb           m0, m2
-    paddusb           m1, m4
-    psubusb           m0, m3
-    psubusb           m1, m5
-    movh            [r0], m0
-    movh         [r0+r2], m1
+    psubusb           m0, m1
+    movd            [r0], m0
+    psrlq             m0, 32
+    movd         [r0+r2], m0
 %endmacro
 
 
@@ -95,15 +93,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
     vinserti128       m2, m2, [r1+%1+32], 1
     vinserti128       m6, m6, [r1+%1+48], 1
 %endif
-%if cpuflag(avx)
     psubw             m1, m0, m2
     psubw             m5, m0, m6
-%else
-    mova              m1, m0
-    mova              m5, m0
-    psubw             m1, m2
-    psubw             m5, m6
-%endif
     packuswb          m2, m6
     packuswb          m1, m5
 
@@ -113,15 +104,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
     vinserti128       m4, m4, [r1+%1+96 ], 1
     vinserti128       m6, m6, [r1+%1+112], 1
 %endif
-%if cpuflag(avx)
     psubw             m3, m0, m4
     psubw             m5, m0, m6
-%else
-    mova              m3, m0
-    mova              m5, m0
-    psubw             m3, m4
-    psubw             m5, m6
-%endif
     packuswb          m4, m6
     packuswb          m3, m5
 
@@ -192,7 +176,7 @@ cglobal hevc_add_residual_32_8, 3, 5, 7
     dec                 r4d
     jg .loop
     RET
-%endif
+%endif ;HAVE_AVX2_EXTERNAL
 
 %macro ADD_RES_SSE_8_10 4
     mova              m0, [%4]



More information about the ffmpeg-cvslog mailing list