[FFmpeg-cvslog] x86: lavc: share more constant through defines
Christophe Gisquet
git at videolan.org
Sat Feb 7 17:54:11 CET 2015
ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Wed Feb 4 11:41:45 2015 +0100| [ed450d4acfc9fea0dae2df2b0a543ec0d602d31a] | committer: Michael Niedermayer
x86: lavc: share more constant through defines
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ed450d4acfc9fea0dae2df2b0a543ec0d602d31a
---
libavcodec/x86/constants.c | 22 ++++++++++++++++------
libavcodec/x86/constants.h | 16 +++++++++-------
libavcodec/x86/h264_deblock_10bit.asm | 6 ++----
libavcodec/x86/h264_idct_10bit.asm | 4 +++-
libavcodec/x86/h264_intrapred_10bit.asm | 3 ++-
libavcodec/x86/h264_qpel_10bit.asm | 4 ++--
libavcodec/x86/h264_weight_10bit.asm | 3 ++-
libavcodec/x86/hevc_deblock.asm | 3 ++-
libavcodec/x86/hevc_mc.asm | 21 ++++++++++++++-------
libavcodec/x86/hevc_res_add.asm | 6 +++---
libavcodec/x86/v210enc.asm | 9 ++++++---
libavcodec/x86/vp3dsp.asm | 2 +-
12 files changed, 62 insertions(+), 37 deletions(-)
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index ddd009b..553dd49 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -35,21 +35,30 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = 0x000F000F000F000FULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_17) = { 0x0011001100110011ULL, 0x0011001100110011ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL };
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = 0x0014001400140014ULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_20) = { 0x0014001400140014ULL, 0x0014001400140014ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_255) = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_255) = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
+ 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL,
0x0100010001000100ULL, 0x0100010001000100ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL,
+ 0x0200020002000200ULL, 0x0200020002000200ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL,
+ 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL};
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL,
+ 0x0400040004000400ULL, 0x0400040004000400ULL};
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL,
+ 0x0800080008000800ULL, 0x0800080008000800ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_4096) = { 0x1000100010001000ULL, 0x1000100010001000ULL,
+ 0x1000100010001000ULL, 0x1000100010001000ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL,
+ 0x2000200020002000ULL, 0x2000200020002000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
@@ -63,6 +72,7 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x030
0x0303030303030303ULL, 0x0303030303030303ULL };
DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL };
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index 0b3c874..33dbb65 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -35,18 +35,20 @@ extern const xmm_reg ff_pw_9;
extern const uint64_t ff_pw_15;
extern const xmm_reg ff_pw_16;
extern const xmm_reg ff_pw_18;
-extern const uint64_t ff_pw_20;
+extern const xmm_reg ff_pw_20;
extern const xmm_reg ff_pw_32;
extern const uint64_t ff_pw_42;
extern const uint64_t ff_pw_53;
extern const xmm_reg ff_pw_64;
extern const uint64_t ff_pw_96;
extern const uint64_t ff_pw_128;
-extern const xmm_reg ff_pw_255;
-extern const xmm_reg ff_pw_512;
-extern const xmm_reg ff_pw_1024;
-extern const xmm_reg ff_pw_2048;
-extern const xmm_reg ff_pw_8192;
+extern const ymm_reg ff_pw_255;
+extern const ymm_reg ff_pw_512;
+extern const ymm_reg ff_pw_1023;
+extern const ymm_reg ff_pw_1024;
+extern const ymm_reg ff_pw_2048;
+extern const ymm_reg ff_pw_4096;
+extern const ymm_reg ff_pw_8192;
extern const ymm_reg ff_pw_m1;
extern const ymm_reg ff_pb_0;
@@ -54,7 +56,7 @@ extern const ymm_reg ff_pb_1;
extern const ymm_reg ff_pb_2;
extern const ymm_reg ff_pb_3;
extern const xmm_reg ff_pb_80;
-extern const xmm_reg ff_pb_F8;
+extern const xmm_reg ff_pb_FE;
extern const uint64_t ff_pb_FC;
extern const xmm_reg ff_ps_neg;
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index d8ace17..ebf8a3f 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -26,15 +26,13 @@
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA
-
-pw_pixel_max: times 8 dw ((1 << 10)-1)
-
SECTION .text
cextern pw_2
cextern pw_3
cextern pw_4
+cextern pw_1023
+%define pw_pixel_max pw_1023
; out: %4 = |%1-%2|-%3
; clobbers: %5
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 5c3acb1..cc115b0 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -26,11 +26,13 @@
SECTION_RODATA
-pw_pixel_max: times 8 dw ((1 << 10)-1)
pd_32: times 4 dd 32
SECTION .text
+cextern pw_1023
+%define pw_pixel_max pw_1023
+
;-----------------------------------------------------------------------------
; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index b60a210..9aeb702 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -26,6 +26,8 @@
SECTION_RODATA
+cextern pw_1023
+%define pw_pixel_max pw_1023
cextern pw_512
cextern pw_16
cextern pw_8
@@ -35,7 +37,6 @@ cextern pw_1
pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4
pw_m3: times 8 dw -3
-pw_pixel_max: times 8 dw ((1 << 10)-1)
pd_17: times 4 dd 17
pd_16: times 4 dd 16
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index e7ce1b8..757c425 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -26,12 +26,12 @@
SECTION_RODATA 32
+cextern pw_1023
+%define pw_pixel_max pw_1023
cextern pw_16
cextern pw_1
cextern pb_0
-pw_pixel_max: times 8 dw ((1 << 10)-1)
-
pad10: times 8 dw 10*1023
pad20: times 8 dw 20*1023
pad30: times 8 dw 30*1023
diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm
index 5d94962..f924e55 100644
--- a/libavcodec/x86/h264_weight_10bit.asm
+++ b/libavcodec/x86/h264_weight_10bit.asm
@@ -26,11 +26,12 @@
SECTION_RODATA 32
-pw_pixel_max: times 8 dw ((1 << 10)-1)
sq_1: dq 1
dq 0
cextern pw_1
+cextern pw_1023
+%define pw_pixel_max pw_1023
SECTION .text
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index f92cb2c..48a5975 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -26,8 +26,9 @@
SECTION_RODATA
+cextern pw_1023
+%define pw_pixel_max_10 pw_1023
pw_pixel_max_12: times 8 dw ((1 << 12)-1)
-pw_pixel_max_10: times 8 dw ((1 << 10)-1)
pw_m2: times 8 dw -2
pd_1 : times 4 dd 1
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 2b016f6..b16f3b4 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -21,14 +21,21 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA 32
-pw_8: times 16 dw (1 << 9)
-pw_10: times 16 dw (1 << 11)
-pw_12: times 16 dw (1 << 13)
+cextern pw_255
+cextern pw_512
+cextern pw_2048
+cextern pw_8192
+cextern pw_1023
+cextern pw_1024
+cextern pw_4096
+%define pw_8 pw_512
+%define pw_10 pw_2048
+%define pw_12 pw_8192
+%define pw_bi_10 pw_1024
+%define pw_bi_12 pw_4096
+%define max_pixels_8 pw_255
+%define max_pixels_10 pw_1023
pw_bi_8: times 16 dw (1 << 8)
-pw_bi_10: times 16 dw (1 << 10)
-pw_bi_12: times 16 dw (1 << 12)
-max_pixels_8: times 16 dw ((1 << 8)-1)
-max_pixels_10: times 16 dw ((1 << 10)-1)
max_pixels_12: times 16 dw ((1 << 12)-1)
cextern pd_1
cextern pb_0
diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm
index 24b88ee..dc3e88a 100644
--- a/libavcodec/x86/hevc_res_add.asm
+++ b/libavcodec/x86/hevc_res_add.asm
@@ -20,11 +20,11 @@
; */
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA 32
-max_pixels_10: times 16 dw ((1 << 10)-1)
+SECTION .text
+cextern pw_1023
+%define max_pixels_10 pw_1023
-SECTION .text
;the tr_add macros and functions were largely inspired by x264 project's code in the h264_idct.asm file
%macro TR_ADD_MMX_4_8 0
diff --git a/libavcodec/x86/v210enc.asm b/libavcodec/x86/v210enc.asm
index 3245de3..751675f 100644
--- a/libavcodec/x86/v210enc.asm
+++ b/libavcodec/x86/v210enc.asm
@@ -23,7 +23,8 @@
SECTION_RODATA
-v210_enc_min_10: times 8 dw 0x4
+cextern pw_4
+%define v210_enc_min_10 pw_4
v210_enc_max_10: times 8 dw 0x3fb
v210_enc_luma_mult_10: dw 4,1,16,4,1,16,0,0
@@ -32,8 +33,10 @@ v210_enc_luma_shuf_10: db -1,0,1,-1,2,3,4,5,-1,6,7,-1,8,9,10,11
v210_enc_chroma_mult_10: dw 1,4,16,0,16,1,4,0
v210_enc_chroma_shuf_10: db 0,1,8,9,-1,2,3,-1,10,11,4,5,-1,12,13,-1
-v210_enc_min_8: times 16 db 0x1
-v210_enc_max_8: times 16 db 0xfe
+cextern pb_1
+%define v210_enc_min_8 pb_1
+cextern pb_FE
+%define v210_enc_max_8 pb_FE
v210_enc_luma_shuf_8: db 6,-1,7,-1,8,-1,9,-1,10,-1,11,-1,-1,-1,-1,-1
v210_enc_luma_mult_8: dw 16,4,64,16,4,64,0,0
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 7ad4c31..ee5a6bf 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -36,11 +36,11 @@ vp3_idct_data: times 8 dw 64277
pb_7: times 8 db 0x07
pb_1F: times 8 db 0x1f
pb_81: times 8 db 0x81
-pb_FE: times 8 db 0xFE
cextern pb_1
cextern pb_3
cextern pb_80
+cextern pb_FE
cextern pw_8
More information about the ffmpeg-cvslog
mailing list