[FFmpeg-cvslog] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers

Anton Khirnov git at videolan.org
Fri May 31 20:32:31 EEST 2024


ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Sun Apr  7 11:23:48 2024 +0200| [f0aece90d99ce7ceacc8ef947d01cfdeedae1d6c] | committer: Anton Khirnov

lavc/hevcdec: allocate local_ctx as array of structs rather than pointers

It is more efficient and easier to manage.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f0aece90d99ce7ceacc8ef947d01cfdeedae1d6c
---

 libavcodec/hevcdec.c | 55 +++++++++++++++++++++++++++++-----------------------
 libavcodec/hevcdec.h |  6 +++++-
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index e84f45e3f8..88a481c043 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2598,7 +2598,7 @@ static int hls_slice_data(HEVCContext *s)
 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *hevc_lclist,
                                 int job, int self_id)
 {
-    HEVCLocalContext *lc = ((HEVCLocalContext**)hevc_lclist)[self_id];
+    HEVCLocalContext *lc = &((HEVCLocalContext*)hevc_lclist)[self_id];
     const HEVCContext *const s = lc->parent;
     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
     int more_data   = 1;
@@ -2682,7 +2682,7 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
 {
     const uint8_t *data = nal->data;
     int length          = nal->size;
-    HEVCLocalContext *lc = s->HEVClc;
+    HEVCLocalContext *lc;
     int *ret;
     int64_t offset;
     int64_t startheader, cmpt = 0;
@@ -2696,19 +2696,31 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
         return AVERROR_INVALIDDATA;
     }
 
-    for (i = 1; i < s->threads_number; i++) {
-        if (i < s->nb_local_ctx)
-            continue;
-        s->local_ctx[i] = av_mallocz(sizeof(HEVCLocalContext));
-        if (!s->local_ctx[i])
+    if (s->threads_number > s->nb_local_ctx) {
+        HEVCLocalContext *tmp = av_malloc_array(s->threads_number, sizeof(*s->local_ctx));
+
+        if (!tmp)
             return AVERROR(ENOMEM);
-        s->nb_local_ctx++;
 
-        s->local_ctx[i]->logctx = s->avctx;
-        s->local_ctx[i]->parent = s;
-        s->local_ctx[i]->common_cabac_state = &s->cabac;
+        memcpy(tmp, s->local_ctx, sizeof(*s->local_ctx) * s->nb_local_ctx);
+        av_free(s->local_ctx);
+        s->local_ctx = tmp;
+        s->HEVClc    = &s->local_ctx[0];
+
+        for (unsigned i = s->nb_local_ctx; i < s->threads_number; i++) {
+            tmp = &s->local_ctx[i];
+
+            memset(tmp, 0, sizeof(*tmp));
+
+            tmp->logctx             = s->avctx;
+            tmp->parent             = s;
+            tmp->common_cabac_state = &s->cabac;
+        }
+
+        s->nb_local_ctx = s->threads_number;
     }
 
+    lc     = &s->local_ctx[0];
     offset = (lc->gb.index >> 3);
 
     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
@@ -2744,8 +2756,8 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
     s->data = data;
 
     for (i = 1; i < s->threads_number; i++) {
-        s->local_ctx[i]->first_qp_group = 1;
-        s->local_ctx[i]->qp_y = s->HEVClc->qp_y;
+        s->local_ctx[i].first_qp_group = 1;
+        s->local_ctx[i].qp_y = s->HEVClc->qp_y;
     }
 
     atomic_store(&s->wpp_err, 0);
@@ -3474,12 +3486,6 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
     av_freep(&s->sh.offset);
     av_freep(&s->sh.size);
 
-    if (s->local_ctx) {
-        for (i = 1; i < s->nb_local_ctx; i++) {
-            av_freep(&s->local_ctx[i]);
-        }
-    }
-    av_freep(&s->HEVClc);
     av_freep(&s->local_ctx);
 
     ff_h2645_packet_uninit(&s->pkt);
@@ -3496,15 +3502,16 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
-    s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
-    s->local_ctx = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number);
-    if (!s->HEVClc || !s->local_ctx)
+    s->local_ctx = av_mallocz(sizeof(*s->local_ctx));
+    if (!s->local_ctx)
         return AVERROR(ENOMEM);
+    s->nb_local_ctx = 1;
+
+    s->HEVClc = &s->local_ctx[0];
+
     s->HEVClc->parent = s;
     s->HEVClc->logctx = avctx;
     s->HEVClc->common_cabac_state = &s->cabac;
-    s->local_ctx[0] = s->HEVClc;
-    s->nb_local_ctx = 1;
 
     s->output_frame = av_frame_alloc();
     if (!s->output_frame)
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index ca68fb54a7..5aa3d40450 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -439,13 +439,17 @@ typedef struct HEVCLocalContext {
     /* properties of the boundary of the current CTB for the purposes
      * of the deblocking filter */
     int boundary_flags;
+
+    // an array of these structs is used for per-thread state - pad its size
+    // to avoid false sharing
+    char padding[128];
 } HEVCLocalContext;
 
 typedef struct HEVCContext {
     const AVClass *c;  // needed by private avoptions
     AVCodecContext *avctx;
 
-    HEVCLocalContext    **local_ctx;
+    HEVCLocalContext     *local_ctx;
     unsigned           nb_local_ctx;
 
     HEVCLocalContext    *HEVClc;



More information about the ffmpeg-cvslog mailing list