[FFmpeg-devel] [PATCH 6/7] x86: lavc: share more constant through defines

Christophe Gisquet christophe.gisquet at gmail.com
Thu Feb 5 20:20:44 CET 2015


---
 libavcodec/x86/constants.c              | 22 ++++++++++++++++------
 libavcodec/x86/constants.h              | 16 +++++++++-------
 libavcodec/x86/h264_deblock_10bit.asm   |  6 ++----
 libavcodec/x86/h264_idct_10bit.asm      |  4 +++-
 libavcodec/x86/h264_intrapred_10bit.asm |  3 ++-
 libavcodec/x86/h264_qpel_10bit.asm      |  4 ++--
 libavcodec/x86/h264_weight_10bit.asm    |  3 ++-
 libavcodec/x86/hevc_deblock.asm         |  3 ++-
 libavcodec/x86/hevc_mc.asm              | 21 ++++++++++++++-------
 libavcodec/x86/hevc_res_add.asm         |  6 +++---
 libavcodec/x86/v210enc.asm              |  9 ++++++---
 libavcodec/x86/vp3dsp.asm               |  2 +-
 12 files changed, 62 insertions(+), 37 deletions(-)

diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index ddd009b..553dd49 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -35,21 +35,30 @@ DECLARE_ALIGNED(8,  const uint64_t, ff_pw_15)   =   0x000F000F000F000FULL;
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_16)   = { 0x0010001000100010ULL, 0x0010001000100010ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_17)   = { 0x0011001100110011ULL, 0x0011001100110011ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_18)   = { 0x0012001200120012ULL, 0x0012001200120012ULL };
-DECLARE_ALIGNED(8,  const uint64_t, ff_pw_20)   =   0x0014001400140014ULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_20)   = { 0x0014001400140014ULL, 0x0014001400140014ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_32)   = { 0x0020002000200020ULL, 0x0020002000200020ULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_42)   =   0x002A002A002A002AULL;
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_53)   =   0x0035003500350035ULL;
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_64)   = { 0x0040004000400040ULL, 0x0040004000400040ULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_96)   =   0x0060006000600060ULL;
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_128)  =   0x0080008000800080ULL;
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_255)  = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_255)  = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
+                                                    0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 0x0100010001000100ULL, 0x0100010001000100ULL,
                                                     0x0100010001000100ULL, 0x0100010001000100ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 0x0200020002000200ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 0x0200020002000200ULL,
+                                                    0x0200020002000200ULL, 0x0200020002000200ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL,
+                                                    0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL};
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL,
+                                                    0x0400040004000400ULL, 0x0400040004000400ULL};
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL,
+                                                    0x0800080008000800ULL, 0x0800080008000800ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4096) = { 0x1000100010001000ULL, 0x1000100010001000ULL,
+                                                    0x1000100010001000ULL, 0x1000100010001000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL,
+                                                    0x2000200020002000ULL, 0x2000200020002000ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
                                                     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
 
@@ -63,6 +72,7 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x030
                                                     0x0303030303030303ULL, 0x0303030303030303ULL };
 DECLARE_ALIGNED(32, const xmm_reg,  ff_pb_15)   = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_FE)   = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;
 
 DECLARE_ALIGNED(16, const xmm_reg,  ff_ps_neg)  = { 0x8000000080000000ULL, 0x8000000080000000ULL };
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index 0b3c874..33dbb65 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -35,18 +35,20 @@ extern const xmm_reg  ff_pw_9;
 extern const uint64_t ff_pw_15;
 extern const xmm_reg  ff_pw_16;
 extern const xmm_reg  ff_pw_18;
-extern const uint64_t ff_pw_20;
+extern const xmm_reg  ff_pw_20;
 extern const xmm_reg  ff_pw_32;
 extern const uint64_t ff_pw_42;
 extern const uint64_t ff_pw_53;
 extern const xmm_reg  ff_pw_64;
 extern const uint64_t ff_pw_96;
 extern const uint64_t ff_pw_128;
-extern const xmm_reg  ff_pw_255;
-extern const xmm_reg  ff_pw_512;
-extern const xmm_reg  ff_pw_1024;
-extern const xmm_reg  ff_pw_2048;
-extern const xmm_reg  ff_pw_8192;
+extern const ymm_reg  ff_pw_255;
+extern const ymm_reg  ff_pw_512;
+extern const ymm_reg  ff_pw_1023;
+extern const ymm_reg  ff_pw_1024;
+extern const ymm_reg  ff_pw_2048;
+extern const ymm_reg  ff_pw_4096;
+extern const ymm_reg  ff_pw_8192;
 extern const ymm_reg  ff_pw_m1;
 
 extern const ymm_reg  ff_pb_0;
@@ -54,7 +56,7 @@ extern const ymm_reg  ff_pb_1;
 extern const ymm_reg  ff_pb_2;
 extern const ymm_reg  ff_pb_3;
 extern const xmm_reg  ff_pb_80;
-extern const xmm_reg  ff_pb_F8;
+extern const xmm_reg  ff_pb_FE;
 extern const uint64_t ff_pb_FC;
 
 extern const xmm_reg  ff_ps_neg;
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index d8ace17..ebf8a3f 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -26,15 +26,13 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
-
-pw_pixel_max: times 8 dw ((1 << 10)-1)
-
 SECTION .text
 
 cextern pw_2
 cextern pw_3
 cextern pw_4
+cextern pw_1023
+%define pw_pixel_max pw_1023
 
 ; out: %4 = |%1-%2|-%3
 ; clobbers: %5
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 5c3acb1..cc115b0 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -26,11 +26,13 @@
 
 SECTION_RODATA
 
-pw_pixel_max: times 8 dw ((1 << 10)-1)
 pd_32:        times 4 dd 32
 
 SECTION .text
 
+cextern pw_1023
+%define pw_pixel_max pw_1023
+
 ;-----------------------------------------------------------------------------
 ; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
 ;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index b60a210..9aeb702 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -26,6 +26,8 @@
 
 SECTION_RODATA
 
+cextern pw_1023
+%define pw_pixel_max pw_1023
 cextern pw_512
 cextern pw_16
 cextern pw_8
@@ -35,7 +37,6 @@ cextern pw_1
 
 pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4
 pw_m3:        times 8 dw -3
-pw_pixel_max: times 8 dw ((1 << 10)-1)
 pd_17:        times 4 dd 17
 pd_16:        times 4 dd 16
 
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index e7ce1b8..757c425 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -26,12 +26,12 @@
 
 SECTION_RODATA 32
 
+cextern pw_1023
+%define pw_pixel_max pw_1023
 cextern pw_16
 cextern pw_1
 cextern pb_0
 
-pw_pixel_max: times 8 dw ((1 << 10)-1)
-
 pad10: times 8 dw 10*1023
 pad20: times 8 dw 20*1023
 pad30: times 8 dw 30*1023
diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm
index 5d94962..f924e55 100644
--- a/libavcodec/x86/h264_weight_10bit.asm
+++ b/libavcodec/x86/h264_weight_10bit.asm
@@ -26,11 +26,12 @@
 
 SECTION_RODATA 32
 
-pw_pixel_max: times 8 dw ((1 << 10)-1)
 sq_1: dq 1
       dq 0
 
 cextern pw_1
+cextern pw_1023
+%define pw_pixel_max pw_1023
 
 SECTION .text
 
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index f92cb2c..48a5975 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -26,8 +26,9 @@
 
 SECTION_RODATA
 
+cextern pw_1023
+%define pw_pixel_max_10 pw_1023
 pw_pixel_max_12: times 8 dw ((1 << 12)-1)
-pw_pixel_max_10: times 8 dw ((1 << 10)-1)
 pw_m2:           times 8 dw -2
 pd_1 :           times 4 dd  1
 
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 2b016f6..07b0f77 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -21,14 +21,21 @@
 %include "libavutil/x86/x86util.asm"
 
 SECTION_RODATA 32
-pw_8:                   times 16 dw  (1 <<  9)
-pw_10:                  times 16 dw  (1 << 11)
-pw_12:                  times 16 dw  (1 << 13)
+cextern pw_255
+cextern pw_512
+cextern pw_2048
+cextern pw_8192
+cextern pw_1023
+cextern pw_1024
+cextern pw_4096
+%define pw_8 pw_512
+%define pw_10 ff_pw_2048
+%define pw_12 pw_8192
+%define pw_bi_10 pw_1024
+%define pw_bi_12 pw_4096
+%define max_pixels_8 pw_255
+%define max_pixels_10 pw_1023
 pw_bi_8:                times 16 dw  (1 <<  8)
-pw_bi_10:               times 16 dw  (1 << 10)
-pw_bi_12:               times 16 dw  (1 << 12)
-max_pixels_8:           times 16 dw ((1 <<  8)-1)
-max_pixels_10:          times 16 dw ((1 << 10)-1)
 max_pixels_12:          times 16 dw ((1 << 12)-1)
 cextern pd_1
 cextern pb_0
diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm
index 24b88ee..dc3e88a 100644
--- a/libavcodec/x86/hevc_res_add.asm
+++ b/libavcodec/x86/hevc_res_add.asm
@@ -20,11 +20,11 @@
 ; */
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA 32
-max_pixels_10:          times 16  dw ((1 << 10)-1)
+SECTION .text
 
+cextern pw_1023
+%define max_pixels_10 pw_1023
 
-SECTION .text
 
 ;the tr_add macros and functions were largely inspired by x264 project's code in the h264_idct.asm file
 %macro TR_ADD_MMX_4_8 0
diff --git a/libavcodec/x86/v210enc.asm b/libavcodec/x86/v210enc.asm
index 3245de3..751675f 100644
--- a/libavcodec/x86/v210enc.asm
+++ b/libavcodec/x86/v210enc.asm
@@ -23,7 +23,8 @@
 
 SECTION_RODATA
 
-v210_enc_min_10: times 8 dw 0x4
+cextern pw_4
+%define v210_enc_min_10 pw_4
 v210_enc_max_10: times 8 dw 0x3fb
 
 v210_enc_luma_mult_10: dw 4,1,16,4,1,16,0,0
@@ -32,8 +33,10 @@ v210_enc_luma_shuf_10: db -1,0,1,-1,2,3,4,5,-1,6,7,-1,8,9,10,11
 v210_enc_chroma_mult_10: dw 1,4,16,0,16,1,4,0
 v210_enc_chroma_shuf_10: db 0,1,8,9,-1,2,3,-1,10,11,4,5,-1,12,13,-1
 
-v210_enc_min_8: times 16 db 0x1
-v210_enc_max_8: times 16 db 0xfe
+cextern pb_1
+%define v210_enc_min_8 pb_1
+cextern pb_FE
+%define v210_enc_max_8 pb_FE
 
 v210_enc_luma_shuf_8: db 6,-1,7,-1,8,-1,9,-1,10,-1,11,-1,-1,-1,-1,-1
 v210_enc_luma_mult_8: dw 16,4,64,16,4,64,0,0
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 7ad4c31..ee5a6bf 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -36,11 +36,11 @@ vp3_idct_data: times 8 dw 64277
 pb_7:  times 8 db 0x07
 pb_1F: times 8 db 0x1f
 pb_81: times 8 db 0x81
-pb_FE: times 8 db 0xFE
 
 cextern pb_1
 cextern pb_3
 cextern pb_80
+cextern pb_FE
 
 cextern pw_8
 
-- 
1.9.2.msysgit.0



More information about the ffmpeg-devel mailing list