[FFmpeg-cvslog] hevcdsp: split the pred functions by width

Anton Khirnov git at videolan.org
Tue Dec 15 10:47:13 CET 2015


ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Fri Jul 24 18:56:54 2015 +0200| [688417399c69aadd4c287bdb0dec82ef8799011c] | committer: Anton Khirnov

hevcdsp: split the pred functions by width

This should allow for more efficient SIMD.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=688417399c69aadd4c287bdb0dec82ef8799011c
---

 libavcodec/hevc.c             |  118 ++++++++++++++++++++---------------------
 libavcodec/hevcdsp.c          |   33 ++++++++++--
 libavcodec/hevcdsp.h          |   36 ++++++++-----
 libavcodec/hevcdsp_template.c |   81 +++++++++++++++++++++-------
 4 files changed, 174 insertions(+), 94 deletions(-)

diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index 63d3bc7..699e680 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -1725,32 +1725,32 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
-                                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
-                                     s->sh.luma_offset_l0[current_mv.ref_idx[0]],
-                                     dst0, s->frame->linesize[0], tmp,
-                                     tmpstride, nPbW, nPbH);
+            s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
+                                               s->sh.luma_weight_l0[current_mv.ref_idx[0]],
+                                               s->sh.luma_offset_l0[current_mv.ref_idx[0]],
+                                               dst0, s->frame->linesize[0], tmp,
+                                               tmpstride, nPbH);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
+            s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
         }
         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
-                                     s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
-                                     dst1, s->frame->linesize[1], tmp, tmpstride,
-                                     nPbW / 2, nPbH / 2);
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
-                                     s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
-                                     dst2, s->frame->linesize[2], tmp2, tmpstride,
-                                     nPbW / 2, nPbH / 2);
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
+                                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
+                                                      dst1, s->frame->linesize[1], tmp, tmpstride,
+                                                      nPbH / 2);
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
+                                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
+                                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
+                                                      nPbH / 2);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
         }
     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
@@ -1761,13 +1761,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
-                                      s->sh.luma_weight_l1[current_mv.ref_idx[1]],
-                                      s->sh.luma_offset_l1[current_mv.ref_idx[1]],
-                                      dst0, s->frame->linesize[0], tmp, tmpstride,
-                                      nPbW, nPbH);
+            s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
+                                               s->sh.luma_weight_l1[current_mv.ref_idx[1]],
+                                               s->sh.luma_offset_l1[current_mv.ref_idx[1]],
+                                               dst0, s->frame->linesize[0], tmp, tmpstride,
+                                               nPbH);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
+            s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
         }
 
         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
@@ -1775,17 +1775,17 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
-                                     s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
-                                     dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
-                                     s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
-                                     dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
+                                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
+                                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
+                                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
+                                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
         }
     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
@@ -1800,16 +1800,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
-                                         s->sh.luma_weight_l0[current_mv.ref_idx[0]],
-                                         s->sh.luma_weight_l1[current_mv.ref_idx[1]],
-                                         s->sh.luma_offset_l0[current_mv.ref_idx[0]],
-                                         s->sh.luma_offset_l1[current_mv.ref_idx[1]],
-                                         dst0, s->frame->linesize[0],
-                                         tmp, tmp2, tmpstride, nPbW, nPbH);
+            s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
+                                                   s->sh.luma_weight_l0[current_mv.ref_idx[0]],
+                                                   s->sh.luma_weight_l1[current_mv.ref_idx[1]],
+                                                   s->sh.luma_offset_l0[current_mv.ref_idx[0]],
+                                                   s->sh.luma_offset_l1[current_mv.ref_idx[1]],
+                                                   dst0, s->frame->linesize[0],
+                                                   tmp, tmp2, tmpstride, nPbH);
         } else {
-            s->hevcdsp.put_unweighted_pred_avg(dst0, s->frame->linesize[0],
-                                               tmp, tmp2, tmpstride, nPbW, nPbH);
+            s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
+                                                         tmp, tmp2, tmpstride, nPbH);
         }
 
         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
@@ -1819,23 +1819,23 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
 
         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
-                                         s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
-                                         s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
-                                         s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
-                                         s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
-                                         dst1, s->frame->linesize[1], tmp, tmp3,
-                                         tmpstride, nPbW / 2, nPbH / 2);
-            s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
-                                         s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
-                                         s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
-                                         s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
-                                         s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
-                                         dst2, s->frame->linesize[2], tmp2, tmp4,
-                                         tmpstride, nPbW / 2, nPbH / 2);
+            s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
+                                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
+                                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
+                                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
+                                                          dst1, s->frame->linesize[1], tmp, tmp3,
+                                                          tmpstride, nPbH / 2);
+            s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
+                                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
+                                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
+                                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
+                                                          dst2, s->frame->linesize[2], tmp2, tmp4,
+                                                          tmpstride, nPbH / 2);
         } else {
-            s->hevcdsp.put_unweighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_unweighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
         }
     }
 }
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 7f42399..67c2705 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -128,6 +128,18 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     hevcdsp->put_hevc_epel[1][0][i] = FUNC(put_hevc_epel_v_      ## width, depth);  \
     hevcdsp->put_hevc_epel[1][1][i] = FUNC(put_hevc_epel_hv_     ## width, depth);  \
 
+#define PRED_FUNC(i, width, depth)                                                        \
+    hevcdsp->put_unweighted_pred[i]     = FUNC(put_unweighted_pred_ ## width, depth);     \
+    hevcdsp->put_unweighted_pred_avg[i] = FUNC(put_unweighted_pred_avg_ ## width, depth); \
+    hevcdsp->weighted_pred[i]           = FUNC(put_weighted_pred_ ## width, depth);       \
+    hevcdsp->weighted_pred_avg[i]       = FUNC(put_weighted_pred_avg_ ## width, depth);   \
+
+#define PRED_FUNC_CHROMA(i, width, depth)                                                        \
+    hevcdsp->put_unweighted_pred_chroma[i]     = FUNC(put_unweighted_pred_ ## width, depth);     \
+    hevcdsp->put_unweighted_pred_avg_chroma[i] = FUNC(put_unweighted_pred_avg_ ## width, depth); \
+    hevcdsp->weighted_pred_chroma[i]           = FUNC(put_weighted_pred_ ## width, depth);       \
+    hevcdsp->weighted_pred_avg_chroma[i]       = FUNC(put_weighted_pred_avg_ ## width, depth);   \
+
 #define HEVC_DSP(depth)                                                     \
     hevcdsp->put_pcm                = FUNC(put_pcm, depth);                 \
     hevcdsp->transquant_bypass[0]   = FUNC(transquant_bypass4x4, depth);    \
@@ -169,11 +181,22 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     EPEL_FUNC(6, 24, depth);                                                \
     EPEL_FUNC(7, 32, depth);                                                \
                                                                             \
-    hevcdsp->put_unweighted_pred   = FUNC(put_unweighted_pred, depth);      \
-    hevcdsp->put_unweighted_pred_avg = FUNC(put_unweighted_pred_avg, depth);    \
-                                                                            \
-    hevcdsp->weighted_pred         = FUNC(weighted_pred, depth);            \
-    hevcdsp->weighted_pred_avg     = FUNC(weighted_pred_avg, depth);        \
+    PRED_FUNC(0, 4,  depth);                                                \
+    PRED_FUNC(1, 8,  depth);                                                \
+    PRED_FUNC(2, 12, depth);                                                \
+    PRED_FUNC(3, 16, depth);                                                \
+    PRED_FUNC(4, 24, depth);                                                \
+    PRED_FUNC(5, 32, depth);                                                \
+    PRED_FUNC(6, 48, depth);                                                \
+    PRED_FUNC(7, 64, depth);                                                \
+    PRED_FUNC_CHROMA(0, 2,  depth);                                         \
+    PRED_FUNC_CHROMA(1, 4,  depth);                                         \
+    PRED_FUNC_CHROMA(2, 6, depth);                                          \
+    PRED_FUNC_CHROMA(3, 8, depth);                                          \
+    PRED_FUNC_CHROMA(4, 12, depth);                                         \
+    PRED_FUNC_CHROMA(5, 16, depth);                                         \
+    PRED_FUNC_CHROMA(6, 24, depth);                                         \
+    PRED_FUNC_CHROMA(7, 32, depth);                                         \
                                                                             \
     hevcdsp->hevc_h_loop_filter_luma     = FUNC(hevc_h_loop_filter_luma, depth);   \
     hevcdsp->hevc_v_loop_filter_luma     = FUNC(hevc_v_loop_filter_luma, depth);   \
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index e906c5e..59dd9b2 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -65,18 +65,30 @@ typedef struct HEVCDSPContext {
                                    ptrdiff_t srcstride, int height,
                                    int mx, int my, int16_t *mcbuffer);
 
-    void (*put_unweighted_pred)(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
-                                ptrdiff_t srcstride, int width, int height);
-    void (*put_unweighted_pred_avg)(uint8_t *dst, ptrdiff_t dststride,
-                                    int16_t *src1, int16_t *src2,
-                                    ptrdiff_t srcstride, int width, int height);
-    void (*weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
-                          uint8_t *dst, ptrdiff_t dststride, int16_t *src,
-                          ptrdiff_t srcstride, int width, int height);
-    void (*weighted_pred_avg)(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag,
-                              int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst,
-                              ptrdiff_t dststride, int16_t *src1, int16_t *src2,
-                              ptrdiff_t srcstride, int width, int height);
+    void (*put_unweighted_pred[8])(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
+                                   ptrdiff_t srcstride, int height);
+    void (*put_unweighted_pred_chroma[8])(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
+                                          ptrdiff_t srcstride, int height);
+    void (*put_unweighted_pred_avg[8])(uint8_t *dst, ptrdiff_t dststride,
+                                       int16_t *src1, int16_t *src2,
+                                       ptrdiff_t srcstride, int height);
+    void (*put_unweighted_pred_avg_chroma[8])(uint8_t *dst, ptrdiff_t dststride,
+                                              int16_t *src1, int16_t *src2,
+                                              ptrdiff_t srcstride, int height);
+    void (*weighted_pred[8])(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
+                             uint8_t *dst, ptrdiff_t dststride, int16_t *src,
+                             ptrdiff_t srcstride, int height);
+    void (*weighted_pred_chroma[8])(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
+                                    uint8_t *dst, ptrdiff_t dststride, int16_t *src,
+                                    ptrdiff_t srcstride, int height);
+    void (*weighted_pred_avg[8])(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag,
+                                 int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst,
+                                 ptrdiff_t dststride, int16_t *src1, int16_t *src2,
+                                 ptrdiff_t srcstride, int height);
+    void (*weighted_pred_avg_chroma[8])(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag,
+                                        int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst,
+                                        ptrdiff_t dststride, int16_t *src1, int16_t *src2,
+                                        ptrdiff_t srcstride, int height);
 
     void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
                                     int beta, int *tc,
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index d832904..723f4d4 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -1130,9 +1130,10 @@ EPEL(6)
 EPEL(4)
 EPEL(2)
 
-static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
-                                      int16_t *src, ptrdiff_t srcstride,
-                                      int width, int height)
+static av_always_inline void
+FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
+                          int16_t *src, ptrdiff_t srcstride,
+                          int width, int height)
 {
     int x, y;
     pixel *dst          = (pixel *)_dst;
@@ -1152,10 +1153,11 @@ static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
     }
 }
 
-static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
-                                          int16_t *src1, int16_t *src2,
-                                          ptrdiff_t srcstride,
-                                          int width, int height)
+static av_always_inline void
+FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
+                              int16_t *src1, int16_t *src2,
+                              ptrdiff_t srcstride,
+                              int width, int height)
 {
     int x, y;
     pixel *dst          = (pixel *)_dst;
@@ -1177,10 +1179,11 @@ static void FUNC(put_unweighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
     }
 }
 
-static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
-                                uint8_t *_dst, ptrdiff_t _dststride,
-                                int16_t *src, ptrdiff_t srcstride,
-                                int width, int height)
+static av_always_inline void
+FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
+                    uint8_t *_dst, ptrdiff_t _dststride,
+                    int16_t *src, ptrdiff_t srcstride,
+                    int width, int height)
 {
     int shift, log2Wd, wx, ox, x, y, offset;
     pixel *dst          = (pixel *)_dst;
@@ -1205,13 +1208,14 @@ static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
     }
 }
 
-static void FUNC(weighted_pred_avg)(uint8_t denom,
-                                    int16_t wl0Flag, int16_t wl1Flag,
-                                    int16_t ol0Flag, int16_t ol1Flag,
-                                    uint8_t *_dst, ptrdiff_t _dststride,
-                                    int16_t *src1, int16_t *src2,
-                                    ptrdiff_t srcstride,
-                                    int width, int height)
+static av_always_inline void
+FUNC(weighted_pred_avg)(uint8_t denom,
+                        int16_t wl0Flag, int16_t wl1Flag,
+                        int16_t ol0Flag, int16_t ol1Flag,
+                        uint8_t *_dst, ptrdiff_t _dststride,
+                        int16_t *src1, int16_t *src2,
+                        ptrdiff_t srcstride,
+                        int width, int height)
 {
     int shift, log2Wd, w0, w1, o0, o1, x, y;
     pixel *dst = (pixel *)_dst;
@@ -1234,6 +1238,47 @@ static void FUNC(weighted_pred_avg)(uint8_t denom,
     }
 }
 
+#define PUT_PRED(w)                                                                            \
+static void FUNC(put_unweighted_pred_ ## w)(uint8_t *dst, ptrdiff_t dststride,                 \
+                                            int16_t *src, ptrdiff_t srcstride,                 \
+                                            int height)                                        \
+{                                                                                              \
+    FUNC(put_unweighted_pred)(dst, dststride, src, srcstride, w, height);                      \
+}                                                                                              \
+static void FUNC(put_unweighted_pred_avg_ ## w)(uint8_t *dst, ptrdiff_t dststride,             \
+                                                int16_t *src1, int16_t *src2,                  \
+                                                ptrdiff_t srcstride, int height)               \
+{                                                                                              \
+    FUNC(put_unweighted_pred_avg)(dst, dststride, src1, src2, srcstride, w, height);           \
+}                                                                                              \
+static void FUNC(put_weighted_pred_ ## w)(uint8_t denom, int16_t weight, int16_t offset,       \
+                                          uint8_t *dst, ptrdiff_t dststride,                   \
+                                          int16_t *src, ptrdiff_t srcstride, int height)       \
+{                                                                                              \
+    FUNC(weighted_pred)(denom, weight, offset,                                                 \
+                        dst, dststride, src, srcstride, w, height);                            \
+}                                                                                              \
+static void FUNC(put_weighted_pred_avg_ ## w)(uint8_t denom, int16_t weight0, int16_t weight1, \
+                                              int16_t offset0, int16_t offset1,                \
+                                              uint8_t *dst, ptrdiff_t dststride,               \
+                                              int16_t *src1, int16_t *src2,                    \
+                                              ptrdiff_t srcstride, int height)                 \
+{                                                                                              \
+    FUNC(weighted_pred_avg)(denom, weight0, weight1, offset0, offset1,                         \
+                            dst, dststride, src1, src2, srcstride, w, height);                 \
+}
+
+PUT_PRED(64)
+PUT_PRED(48)
+PUT_PRED(32)
+PUT_PRED(24)
+PUT_PRED(16)
+PUT_PRED(12)
+PUT_PRED(8)
+PUT_PRED(6)
+PUT_PRED(4)
+PUT_PRED(2)
+
 // line zero
 #define P3 pix[-4 * xstride]
 #define P2 pix[-3 * xstride]



More information about the ffmpeg-cvslog mailing list