[FFmpeg-cvslog] avcodec/x86/hevc/dsp_init: Make ff_hevc_put_bi_[eq]?pel* funcs static

Andreas Rheinhardt git at videolan.org
Wed Apr 9 15:03:04 EEST 2025


ffmpeg | branch: master | Andreas Rheinhardt <andreas.rheinhardt at outlook.com> | Thu Apr  3 18:44:47 2025 +0200| [6875f495d916b9c7a8d58a1bc84d2cd25c186d80] | committer: Andreas Rheinhardt

avcodec/x86/hevc/dsp_init: Make ff_hevc_put_bi_[eq]?pel* funcs static

Given that there are actually ASM functions of this type,
one can't simply remove the ff_ prefix from the definitions
and declare them as static. Yet one can do nearly that
if one keeps the ff_ prefix and removes the declarations
for the (now static) functions defined in dsp_init.c
from hevcdsp.h and if one defines the functions in the correct
order (smaller width first) so that no forward declarations
are necessary (which was already true).

The new declarations avoid nested macros to simplify things.
It nevertheless turned out to be beneficial line-wise.

(It would be possible to avoid most of these declarations:
It is legal to repeat a function declaration without static
if the first declaration declared a function as static.
So if the macros simply declared all the functions that
they call, one could avoid declarations for the functions
that are called. While this is legal C, it unfortuntaly
clashes with GCC's -Wredundant-decls (which configure enables)
and it is also ugly, as these macro definitions would
provide declarations used in ff_hevc_dsp_init_x86().)

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6875f495d916b9c7a8d58a1bc84d2cd25c186d80
---

 libavcodec/x86/hevc/dsp_init.c |   8 +-
 libavcodec/x86/hevcdsp.h       | 224 +++++++++++++++++------------------------
 2 files changed, 95 insertions(+), 137 deletions(-)

diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index f8057dee9f..dc4157b071 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -217,7 +217,7 @@ static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dst
     }                                                                                                           \
 }
 #define mc_rep_bi_func(name, bitd, step, W, opt) \
-void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src,         \
+static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src,  \
                                                ptrdiff_t _srcstride, const int16_t *_src2,                      \
                                                int height, intptr_t mx, intptr_t my, int width)                 \
 {                                                                                                               \
@@ -257,7 +257,7 @@ static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dsts
                                                    height, mx, my, width);                                      \
 }
 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
-void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,           \
+static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,    \
                                                ptrdiff_t _srcstride, const int16_t *src2,                       \
                                                int height, intptr_t mx, intptr_t my, int width)                 \
 {                                                                                                               \
@@ -284,7 +284,7 @@ static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src
 }
 
 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)                                    \
-void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,         \
+static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,  \
                                                ptrdiff_t _srcstride, const int16_t *src2,                     \
                                                int height, intptr_t mx, intptr_t my, int width)               \
 {                                                                                                             \
@@ -320,7 +320,7 @@ static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src,
 }
 
 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                             \
-void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,          \
+static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src,   \
                                               ptrdiff_t _srcstride, const int16_t *src2,                      \
                                               int height, intptr_t mx, intptr_t my, int width)                \
 {                                                                                                             \
diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h
index b89a67f2e9..b18d9449d7 100644
--- a/libavcodec/x86/hevcdsp.h
+++ b/libavcodec/x86/hevcdsp.h
@@ -37,35 +37,17 @@ dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt;
 dst ## _bi_w[idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt
 
 
-#define PEL_PROTOTYPE(name, D, opt) \
-void ff_hevc_put_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);      \
+typedef void bi_pel_func(uint8_t *_dst, ptrdiff_t _dststride,
+                         const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+                         int height, intptr_t mx, intptr_t my, int width);
 
+#define BI_PEL_PROTOTYPE(name, W, D, opt) \
+bi_pel_func ff_hevc_put_bi_ ## name ## W ## _ ## D ## _##opt
 
 ///////////////////////////////////////////////////////////////////////////////
 // MC functions
 ///////////////////////////////////////////////////////////////////////////////
 
-#define EPEL_PROTOTYPES(fname, bitd, opt) \
-        PEL_PROTOTYPE(fname##4,  bitd, opt); \
-        PEL_PROTOTYPE(fname##6,  bitd, opt); \
-        PEL_PROTOTYPE(fname##8,  bitd, opt); \
-        PEL_PROTOTYPE(fname##12, bitd, opt); \
-        PEL_PROTOTYPE(fname##16, bitd, opt); \
-        PEL_PROTOTYPE(fname##24, bitd, opt); \
-        PEL_PROTOTYPE(fname##32, bitd, opt); \
-        PEL_PROTOTYPE(fname##48, bitd, opt); \
-        PEL_PROTOTYPE(fname##64, bitd, opt)
-
-#define QPEL_PROTOTYPES(fname, bitd, opt) \
-        PEL_PROTOTYPE(fname##4,  bitd, opt); \
-        PEL_PROTOTYPE(fname##8,  bitd, opt); \
-        PEL_PROTOTYPE(fname##12, bitd, opt); \
-        PEL_PROTOTYPE(fname##16, bitd, opt); \
-        PEL_PROTOTYPE(fname##24, bitd, opt); \
-        PEL_PROTOTYPE(fname##32, bitd, opt); \
-        PEL_PROTOTYPE(fname##48, bitd, opt); \
-        PEL_PROTOTYPE(fname##64, bitd, opt)
-
 #define WEIGHTING_PROTOTYPE(width, bitd, opt) \
 void ff_hevc_put_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, int height, int denom,  int _wx, int _ox);      \
 void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, const int16_t *_src2, int height, int denom,  int _wx0,  int _wx1, int _ox0, int _ox1)
@@ -83,125 +65,101 @@ void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride,
 
 
 ///////////////////////////////////////////////////////////////////////////////
-// QPEL_PIXELS EPEL_PIXELS
+// EPEL_PIXELS
 ///////////////////////////////////////////////////////////////////////////////
-EPEL_PROTOTYPES(pel_pixels ,  8, sse4);
-EPEL_PROTOTYPES(pel_pixels , 10, sse4);
-EPEL_PROTOTYPES(pel_pixels , 12, sse4);
-
-void ff_hevc_put_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-
-void ff_hevc_put_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
-void ff_hevc_put_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width);
+
+BI_PEL_PROTOTYPE(pel_pixels,  4,  8, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  4, 10, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  4, 12, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  6,  8, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  6, 10, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  6, 12, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  8,  8, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  8, 10, sse4);
+BI_PEL_PROTOTYPE(pel_pixels,  8, 12, sse4);
+BI_PEL_PROTOTYPE(pel_pixels, 12,  8, sse4);
+BI_PEL_PROTOTYPE(pel_pixels, 16,  8, sse4);
+BI_PEL_PROTOTYPE(pel_pixels, 16, 10, avx2);
+BI_PEL_PROTOTYPE(pel_pixels, 32,  8, avx2);
 
 ///////////////////////////////////////////////////////////////////////////////
 // EPEL
 ///////////////////////////////////////////////////////////////////////////////
-EPEL_PROTOTYPES(epel_h ,  8, sse4);
-EPEL_PROTOTYPES(epel_h , 10, sse4);
-EPEL_PROTOTYPES(epel_h , 12, sse4);
-
-EPEL_PROTOTYPES(epel_v ,  8, sse4);
-EPEL_PROTOTYPES(epel_v , 10, sse4);
-EPEL_PROTOTYPES(epel_v , 12, sse4);
-
-EPEL_PROTOTYPES(epel_hv ,  8, sse4);
-EPEL_PROTOTYPES(epel_hv , 10, sse4);
-EPEL_PROTOTYPES(epel_hv , 12, sse4);
-
-PEL_PROTOTYPE(epel_h16, 8, avx2);
-PEL_PROTOTYPE(epel_h24, 8, avx2);
-PEL_PROTOTYPE(epel_h32, 8, avx2);
-PEL_PROTOTYPE(epel_h48, 8, avx2);
-PEL_PROTOTYPE(epel_h64, 8, avx2);
-
-PEL_PROTOTYPE(epel_h16,10, avx2);
-PEL_PROTOTYPE(epel_h24,10, avx2);
-PEL_PROTOTYPE(epel_h32,10, avx2);
-PEL_PROTOTYPE(epel_h48,10, avx2);
-PEL_PROTOTYPE(epel_h64,10, avx2);
-
-PEL_PROTOTYPE(epel_v16, 8, avx2);
-PEL_PROTOTYPE(epel_v24, 8, avx2);
-PEL_PROTOTYPE(epel_v32, 8, avx2);
-PEL_PROTOTYPE(epel_v48, 8, avx2);
-PEL_PROTOTYPE(epel_v64, 8, avx2);
-
-PEL_PROTOTYPE(epel_v16,10, avx2);
-PEL_PROTOTYPE(epel_v24,10, avx2);
-PEL_PROTOTYPE(epel_v32,10, avx2);
-PEL_PROTOTYPE(epel_v48,10, avx2);
-PEL_PROTOTYPE(epel_v64,10, avx2);
-
-PEL_PROTOTYPE(epel_hv16, 8, avx2);
-PEL_PROTOTYPE(epel_hv24, 8, avx2);
-PEL_PROTOTYPE(epel_hv32, 8, avx2);
-PEL_PROTOTYPE(epel_hv48, 8, avx2);
-PEL_PROTOTYPE(epel_hv64, 8, avx2);
-
-PEL_PROTOTYPE(epel_hv16,10, avx2);
-PEL_PROTOTYPE(epel_hv24,10, avx2);
-PEL_PROTOTYPE(epel_hv32,10, avx2);
-PEL_PROTOTYPE(epel_hv48,10, avx2);
-PEL_PROTOTYPE(epel_hv64,10, avx2);
+
+BI_PEL_PROTOTYPE(epel_h,   4,  8, sse4);
+BI_PEL_PROTOTYPE(epel_h,   4, 10, sse4);
+BI_PEL_PROTOTYPE(epel_h,   4, 12, sse4);
+BI_PEL_PROTOTYPE(epel_h,   6,  8, sse4);
+BI_PEL_PROTOTYPE(epel_h,   6, 10, sse4);
+BI_PEL_PROTOTYPE(epel_h,   6, 12, sse4);
+BI_PEL_PROTOTYPE(epel_h,   8,  8, sse4);
+BI_PEL_PROTOTYPE(epel_h,   8, 10, sse4);
+BI_PEL_PROTOTYPE(epel_h,   8, 12, sse4);
+BI_PEL_PROTOTYPE(epel_h,  12,  8, sse4);
+BI_PEL_PROTOTYPE(epel_h,  16,  8, sse4);
+BI_PEL_PROTOTYPE(epel_h,  16, 10, avx2);
+BI_PEL_PROTOTYPE(epel_h,  32,  8, avx2);
+
+BI_PEL_PROTOTYPE(epel_hv,  4,  8, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  4, 10, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  4, 12, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  6,  8, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  6, 10, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  6, 12, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  8,  8, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  8, 10, sse4);
+BI_PEL_PROTOTYPE(epel_hv,  8, 12, sse4);
+BI_PEL_PROTOTYPE(epel_hv, 16,  8, sse4);
+BI_PEL_PROTOTYPE(epel_hv, 16, 10, avx2);
+BI_PEL_PROTOTYPE(epel_hv, 32,  8, avx2);
+
+BI_PEL_PROTOTYPE(epel_v,   4,  8, sse4);
+BI_PEL_PROTOTYPE(epel_v,   4, 10, sse4);
+BI_PEL_PROTOTYPE(epel_v,   4, 12, sse4);
+BI_PEL_PROTOTYPE(epel_v,   6,  8, sse4);
+BI_PEL_PROTOTYPE(epel_v,   6, 10, sse4);
+BI_PEL_PROTOTYPE(epel_v,   6, 12, sse4);
+BI_PEL_PROTOTYPE(epel_v,   8,  8, sse4);
+BI_PEL_PROTOTYPE(epel_v,   8, 10, sse4);
+BI_PEL_PROTOTYPE(epel_v,   8, 12, sse4);
+BI_PEL_PROTOTYPE(epel_v,  12,  8, sse4);
+BI_PEL_PROTOTYPE(epel_v,  16,  8, sse4);
+BI_PEL_PROTOTYPE(epel_v,  16, 10, avx2);
+BI_PEL_PROTOTYPE(epel_v,  32,  8, avx2);
 
 ///////////////////////////////////////////////////////////////////////////////
 // QPEL
 ///////////////////////////////////////////////////////////////////////////////
-QPEL_PROTOTYPES(qpel_h ,  8, sse4);
-QPEL_PROTOTYPES(qpel_h , 10, sse4);
-QPEL_PROTOTYPES(qpel_h , 12, sse4);
-
-QPEL_PROTOTYPES(qpel_v,  8, sse4);
-QPEL_PROTOTYPES(qpel_v, 10, sse4);
-QPEL_PROTOTYPES(qpel_v, 12, sse4);
-
-QPEL_PROTOTYPES(qpel_hv,  8, sse4);
-QPEL_PROTOTYPES(qpel_hv, 10, sse4);
-QPEL_PROTOTYPES(qpel_hv, 12, sse4);
-
-PEL_PROTOTYPE(qpel_h16, 8, avx2);
-PEL_PROTOTYPE(qpel_h24, 8, avx2);
-PEL_PROTOTYPE(qpel_h32, 8, avx2);
-PEL_PROTOTYPE(qpel_h48, 8, avx2);
-PEL_PROTOTYPE(qpel_h64, 8, avx2);
-
-PEL_PROTOTYPE(qpel_h16,10, avx2);
-PEL_PROTOTYPE(qpel_h24,10, avx2);
-PEL_PROTOTYPE(qpel_h32,10, avx2);
-PEL_PROTOTYPE(qpel_h48,10, avx2);
-PEL_PROTOTYPE(qpel_h64,10, avx2);
-
-PEL_PROTOTYPE(qpel_v16, 8, avx2);
-PEL_PROTOTYPE(qpel_v24, 8, avx2);
-PEL_PROTOTYPE(qpel_v32, 8, avx2);
-PEL_PROTOTYPE(qpel_v48, 8, avx2);
-PEL_PROTOTYPE(qpel_v64, 8, avx2);
-
-PEL_PROTOTYPE(qpel_v16,10, avx2);
-PEL_PROTOTYPE(qpel_v24,10, avx2);
-PEL_PROTOTYPE(qpel_v32,10, avx2);
-PEL_PROTOTYPE(qpel_v48,10, avx2);
-PEL_PROTOTYPE(qpel_v64,10, avx2);
-
-PEL_PROTOTYPE(qpel_hv16, 8, avx2);
-PEL_PROTOTYPE(qpel_hv24, 8, avx2);
-PEL_PROTOTYPE(qpel_hv32, 8, avx2);
-PEL_PROTOTYPE(qpel_hv48, 8, avx2);
-PEL_PROTOTYPE(qpel_hv64, 8, avx2);
-
-PEL_PROTOTYPE(qpel_hv16,10, avx2);
-PEL_PROTOTYPE(qpel_hv24,10, avx2);
-PEL_PROTOTYPE(qpel_hv32,10, avx2);
-PEL_PROTOTYPE(qpel_hv48,10, avx2);
-PEL_PROTOTYPE(qpel_hv64,10, avx2);
+
+BI_PEL_PROTOTYPE(qpel_h,   4,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_h,   4, 10, sse4);
+BI_PEL_PROTOTYPE(qpel_h,   4, 12, sse4);
+BI_PEL_PROTOTYPE(qpel_h,   8,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_h,   8, 10, sse4);
+BI_PEL_PROTOTYPE(qpel_h,   8, 12, sse4);
+BI_PEL_PROTOTYPE(qpel_h,  12,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_h,  16,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_h,  16, 10, avx2);
+BI_PEL_PROTOTYPE(qpel_h,  32,  8, avx2);
+
+BI_PEL_PROTOTYPE(qpel_hv,  4,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_hv,  4, 10, sse4);
+BI_PEL_PROTOTYPE(qpel_hv,  4, 12, sse4);
+BI_PEL_PROTOTYPE(qpel_hv,  8,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_hv,  8, 10, sse4);
+BI_PEL_PROTOTYPE(qpel_hv,  8, 12, sse4);
+BI_PEL_PROTOTYPE(qpel_hv, 16, 10, avx2);
+
+BI_PEL_PROTOTYPE(qpel_v,   4,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_v,   4, 10, sse4);
+BI_PEL_PROTOTYPE(qpel_v,   4, 12, sse4);
+BI_PEL_PROTOTYPE(qpel_v,   8,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_v,   8, 10, sse4);
+BI_PEL_PROTOTYPE(qpel_v,   8, 12, sse4);
+BI_PEL_PROTOTYPE(qpel_v,  12,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_v,  16,  8, sse4);
+BI_PEL_PROTOTYPE(qpel_v,  16, 10, avx2);
+BI_PEL_PROTOTYPE(qpel_v,  32,  8, avx2);
 
 WEIGHTING_PROTOTYPES(8, sse4);
 WEIGHTING_PROTOTYPES(10, sse4);



More information about the ffmpeg-cvslog mailing list