[FFmpeg-devel] [PATCH 5/7] x86: lavc: share more constants

Christophe Gisquet christophe.gisquet at gmail.com
Thu Feb 5 20:20:43 CET 2015


---
 libavcodec/x86/ac3dsp.asm          |  2 +-
 libavcodec/x86/constants.c         |  9 ++++++++-
 libavcodec/x86/constants.h         |  4 ++++
 libavcodec/x86/h264_qpel_10bit.asm |  2 +-
 libavcodec/x86/hevc_mc.asm         | 14 +++++++-------
 libavcodec/x86/hevc_sao.asm        |  2 +-
 libavcodec/x86/huffyuvdsp.asm      |  4 ++--
 libavcodec/x86/vp9intrapred.asm    |  4 ++--
 8 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index b244416..675ade3 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -32,7 +32,7 @@ pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
 
 ; used in ff_ac3_extract_exponents()
-pd_1:   times 4 dd 1
+cextern pd_1
 pd_151: times 4 dd 151
 
 ; used in ff_apply_window_int16()
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index a7cb75d..ddd009b 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -53,12 +53,19 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8192) = { 0x2000200020002000ULL, 0x200
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
                                                     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
 
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_0)    = { 0x0000000000000000ULL, 0x0000000000000000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_0)    = { 0x0000000000000000ULL, 0x0000000000000000ULL,
+                                                    0x0000000000000000ULL, 0x0000000000000000ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_1)    = { 0x0101010101010101ULL, 0x0101010101010101ULL,
                                                     0x0101010101010101ULL, 0x0101010101010101ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_2)    = { 0x0202020202020202ULL, 0x0202020202020202ULL,
+                                                    0x0202020202020202ULL, 0x0202020202020202ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x0303030303030303ULL,
                                                     0x0303030303030303ULL, 0x0303030303030303ULL };
+DECLARE_ALIGNED(32, const xmm_reg,  ff_pb_15)   = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;
 
 DECLARE_ALIGNED(16, const xmm_reg,  ff_ps_neg)  = { 0x8000000080000000ULL, 0x8000000080000000ULL };
+
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_1)    = { 0x0000000100000001ULL, 0x0000000100000001ULL,
+                                                    0x0000000100000001ULL, 0x0000000100000001ULL };
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index 094dd42..0b3c874 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -49,7 +49,9 @@ extern const xmm_reg  ff_pw_2048;
 extern const xmm_reg  ff_pw_8192;
 extern const ymm_reg  ff_pw_m1;
 
+extern const ymm_reg  ff_pb_0;
 extern const ymm_reg  ff_pb_1;
+extern const ymm_reg  ff_pb_2;
 extern const ymm_reg  ff_pb_3;
 extern const xmm_reg  ff_pb_80;
 extern const xmm_reg  ff_pb_F8;
@@ -57,4 +59,6 @@ extern const uint64_t ff_pb_FC;
 
 extern const xmm_reg  ff_ps_neg;
 
+extern const ymm_reg  ff_pd_1;
+
 #endif /* AVCODEC_X86_CONSTANTS_H */
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index d65660d..e7ce1b8 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -28,7 +28,7 @@ SECTION_RODATA 32
 
 cextern pw_16
 cextern pw_1
-pb_0: times 32 db 0 ; we do not use cextern here as old llvm-gcc fails to align it correctly
+cextern pb_0
 
 pw_pixel_max: times 8 dw ((1 << 10)-1)
 
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 04d00ce..2b016f6 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -30,8 +30,8 @@ pw_bi_12:               times 16 dw  (1 << 12)
 max_pixels_8:           times 16 dw ((1 <<  8)-1)
 max_pixels_10:          times 16 dw ((1 << 10)-1)
 max_pixels_12:          times 16 dw ((1 << 12)-1)
-zero:                   times 8  dd 0
-one_per_32:             times 8  dd 1
+cextern pd_1
+cextern pb_0
 
 SECTION_TEXT 32
 %macro EPEL_TABLE 4
@@ -665,9 +665,9 @@ QPEL_TABLE 10, 8, w, avx2
 %if %2 == 8
     packuswb          %3, %4
 %else
-    CLIPW             %3, [zero], [max_pixels_%2]
+    CLIPW             %3, [pb_0], [max_pixels_%2]
 %if (%1 > 8 && notcpuflag(avx)) || %1 > 16
-    CLIPW             %4, [zero], [max_pixels_%2]
+    CLIPW             %4, [pb_0], [max_pixels_%2]
 %endif
 %endif
 %endmacro
@@ -1428,7 +1428,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
     punpcklwd        m2, m2
 %endif
     dec           SHIFT
-    movdqu           m5, [one_per_32]
+    movdqu           m5, [pd_1]
     movd             m6, SHIFT
     pshufd           m2, m2, 0
     mov           SHIFT, oxm
@@ -1465,7 +1465,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
 %if %2 == 8
     packuswb          m0, m0
 %else
-    CLIPW             m0, [zero], [max_pixels_%2]
+    CLIPW             m0, [pb_0], [max_pixels_%2]
 %endif
     PEL_%2STORE%1   dstq, m0, m1
     add             dstq, dststrideq             ; dst += dststride
@@ -1536,7 +1536,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2,
 %if %2 == 8
     packuswb          m0, m0
 %else
-     CLIPW            m0, [zero], [max_pixels_%2]
+     CLIPW            m0, [pb_0], [max_pixels_%2]
 %endif
     PEL_%2STORE%1   dstq, m0, m1
     add             dstq, dststrideq             ; dst += dststride
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 6058967..5136121 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -27,7 +27,6 @@ SECTION_RODATA 32
 
 pw_mask10: times 16 dw 0x03FF
 pw_mask12: times 16 dw 0x0FFF
-pb_2:      times 32 db 2
 pw_m2:     times 16 dw -2
 pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
 pb_eo:                   db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
@@ -35,6 +34,7 @@ cextern pw_m1
 cextern pw_1
 cextern pw_2
 cextern pb_1
+cextern pb_2
 
 SECTION_TEXT
 
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index cc48556..85ee56d 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -23,7 +23,7 @@
 %include "libavutil/x86/x86util.asm"
 
 SECTION_RODATA
-pb_f: times 16 db 15
+cextern pb_15
 pb_zzzzzzzz77777777: times 8 db -1
 pb_7: times 8 db 7
 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
@@ -157,7 +157,7 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
 
 INIT_XMM sse4
 cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
-    mova    m5, [pb_f]
+    mova    m5, [pb_15]
     mova    m6, [pb_zzzzzzzz77777777]
     mova    m4, [pb_zzzz3333zzzzbbbb]
     mova    m3, [pb_zz11zz55zz99zzdd]
diff --git a/libavcodec/x86/vp9intrapred.asm b/libavcodec/x86/vp9intrapred.asm
index 169676f..08b3ae8 100644
--- a/libavcodec/x86/vp9intrapred.asm
+++ b/libavcodec/x86/vp9intrapred.asm
@@ -64,8 +64,7 @@ pb_6xm1_BDF_0to6: times 6 db -1
                   db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
 pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
 
-pb_2:  times 32 db 2
-pb_15: times 16 db 15
+cextern pb_15
 pb_15x0_1xm1: times 15 db 0
               db -1
 pb_0to2_5x3: db 0, 1, 2
@@ -76,6 +75,7 @@ pb_6x0_2xm1: times 6 db 0
              times 2 db -1
 
 cextern pb_1
+cextern pb_2
 cextern pb_3
 cextern pw_2
 cextern pw_4
-- 
1.9.2.msysgit.0



More information about the ffmpeg-devel mailing list