[FFmpeg-cvslog] avcodec/lagarith: switch to planar rgb

Paul B Mahol git at videolan.org
Mon Sep 10 00:00:58 EEST 2018


ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Sun Sep  9 01:05:56 2018 +0200| [b2ffecbd0ccccfd1e379096bb62c15b06bb6ab63] | committer: Paul B Mahol

avcodec/lagarith: switch to planar rgb

Speed goes from 363 fps to 428 fps for 640x480 video.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b2ffecbd0ccccfd1e379096bb62c15b06bb6ab63
---

 libavcodec/lagarith.c                   | 108 +++++++++-----------------------
 tests/ref/fate/lagarith-red             |  50 +++++++--------
 tests/ref/fate/lagarith-rgb24           |   8 +--
 tests/ref/fate/lagarith-ticket4119      |   4 +-
 tests/ref/fate/lagarith-ticket4119-cfr  | 100 ++++++++++++++---------------
 tests/ref/fate/lagarith-ticket4119-drop |   4 +-
 tests/ref/fate/lagarith-ticket4119-pass |   4 +-
 tests/ref/fate/lagarith-ticket4119-vfr  |   4 +-
 8 files changed, 118 insertions(+), 164 deletions(-)

diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 3d3b4d41f4..d88c5f5ae7 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -53,9 +53,6 @@ typedef struct LagarithContext {
     LLVidDSPContext llviddsp;
     int zeros;                  /**< number of consecutive zero bytes encountered */
     int zeros_rem;              /**< number of zero bytes remaining to output */
-    uint8_t *rgb_planes;
-    int      rgb_planes_allocated;
-    int rgb_stride;
 } LagarithContext;
 
 /**
@@ -544,7 +541,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
     uint8_t frametype;
     uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
     uint32_t offs[4];
-    uint8_t *srcs[4], *dst;
+    uint8_t *srcs[4];
     int i, j, planes = 3;
     int ret;
 
@@ -557,70 +554,60 @@ static int lag_decode_frame(AVCodecContext *avctx,
 
     switch (frametype) {
     case FRAME_SOLID_RGBA:
-        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        avctx->pix_fmt = AV_PIX_FMT_GBRAP;
     case FRAME_SOLID_GRAY:
         if (frametype == FRAME_SOLID_GRAY)
             if (avctx->bits_per_coded_sample == 24) {
-                avctx->pix_fmt = AV_PIX_FMT_RGB24;
+                avctx->pix_fmt = AV_PIX_FMT_GBRP;
             } else {
-                avctx->pix_fmt = AV_PIX_FMT_0RGB32;
+                avctx->pix_fmt = AV_PIX_FMT_GBRAP;
                 planes = 4;
             }
 
         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
             return ret;
 
-        dst = p->data[0];
         if (frametype == FRAME_SOLID_RGBA) {
-            int qwidth = avctx->width>>2;
-            uint64_t c = ((uint64_t)offset_gu << 32) | offset_gu;
-        for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < qwidth; i++) {
-                AV_WN64(dst + i * 16    , c);
-                AV_WN64(dst + i * 16 + 8, c);
+            for (i = 0; i < avctx->height; i++) {
+                memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
+                memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
+                memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
+                memset(p->data[3] + i * p->linesize[3], buf[4], avctx->width);
             }
-            for (i = 4*qwidth; i < avctx->width; i++)
-                AV_WN32(dst + i * 4, offset_gu);
-            dst += p->linesize[0];
-        }
         } else {
-            for (j = 0; j < avctx->height; j++) {
-                memset(dst, buf[1], avctx->width * planes);
-                dst += p->linesize[0];
+            for (i = 0; i < avctx->height; i++) {
+                for (j = 0; j < planes; j++)
+                    memset(p->data[j] + i * p->linesize[j], buf[1], avctx->width);
             }
         }
         break;
     case FRAME_SOLID_COLOR:
         if (avctx->bits_per_coded_sample == 24) {
-            avctx->pix_fmt = AV_PIX_FMT_RGB24;
+            avctx->pix_fmt = AV_PIX_FMT_GBRP;
         } else {
-            avctx->pix_fmt = AV_PIX_FMT_RGB32;
-            offset_gu |= 0xFFU << 24;
+            avctx->pix_fmt = AV_PIX_FMT_GBRAP;
         }
 
         if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
             return ret;
 
-        dst = p->data[0];
-        for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < avctx->width; i++)
-                if (avctx->bits_per_coded_sample == 24) {
-                    AV_WB24(dst + i * 3, offset_gu);
-                } else {
-                    AV_WN32(dst + i * 4, offset_gu);
-                }
-            dst += p->linesize[0];
+        for (i = 0; i < avctx->height; i++) {
+            memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
+            memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
+            memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
+            if (avctx->pix_fmt == AV_PIX_FMT_GBRAP)
+                memset(p->data[3] + i * p->linesize[3], 0xFFu, avctx->width);
         }
         break;
     case FRAME_ARITH_RGBA:
-        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        avctx->pix_fmt = AV_PIX_FMT_GBRAP;
         planes = 4;
         offset_ry += 4;
         offs[3] = AV_RL32(buf + 9);
     case FRAME_ARITH_RGB24:
     case FRAME_U_RGB24:
         if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24)
-            avctx->pix_fmt = AV_PIX_FMT_RGB24;
+            avctx->pix_fmt = AV_PIX_FMT_GBRP;
 
         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
             return ret;
@@ -629,15 +616,8 @@ static int lag_decode_frame(AVCodecContext *avctx,
         offs[1] = offset_gu;
         offs[2] = offset_ry;
 
-        l->rgb_stride = FFALIGN(avctx->width, 16);
-        av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated,
-                       l->rgb_stride * avctx->height * planes + 1);
-        if (!l->rgb_planes) {
-            av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n");
-            return AVERROR(ENOMEM);
-        }
         for (i = 0; i < planes; i++)
-            srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride;
+            srcs[i] = p->data[i] + (avctx->height - 1) * p->linesize[i];
         for (i = 0; i < planes; i++)
             if (buf_size <= offs[i]) {
                 av_log(avctx, AV_LOG_ERROR,
@@ -648,32 +628,16 @@ static int lag_decode_frame(AVCodecContext *avctx,
         for (i = 0; i < planes; i++)
             lag_decode_arith_plane(l, srcs[i],
                                    avctx->width, avctx->height,
-                                   -l->rgb_stride, buf + offs[i],
+                                   -p->linesize[i], buf + offs[i],
                                    buf_size - offs[i]);
-        dst = p->data[0];
-        for (i = 0; i < planes; i++)
-            srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height;
-        for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < avctx->width; i++) {
-                uint8_t r, g, b, a;
-                r = srcs[0][i];
-                g = srcs[1][i];
-                b = srcs[2][i];
-                r += g;
-                b += g;
-                if (frametype == FRAME_ARITH_RGBA) {
-                    a = srcs[3][i];
-                    AV_WN32(dst + i * 4, MKBETAG(a, r, g, b));
-                } else {
-                    dst[i * 3 + 0] = r;
-                    dst[i * 3 + 1] = g;
-                    dst[i * 3 + 2] = b;
-                }
-            }
-            dst += p->linesize[0];
-            for (i = 0; i < planes; i++)
-                srcs[i] += l->rgb_stride;
+        for (i = 0; i < avctx->height; i++) {
+            l->llviddsp.add_bytes(p->data[0] + i * p->linesize[0], p->data[1] + i * p->linesize[1], avctx->width);
+            l->llviddsp.add_bytes(p->data[2] + i * p->linesize[2], p->data[1] + i * p->linesize[1], avctx->width);
         }
+        FFSWAP(uint8_t*, p->data[0], p->data[1]);
+        FFSWAP(int, p->linesize[0], p->linesize[1]);
+        FFSWAP(uint8_t*, p->data[2], p->data[1]);
+        FFSWAP(int, p->linesize[2], p->linesize[1]);
         break;
     case FRAME_ARITH_YUY2:
         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
@@ -757,15 +721,6 @@ static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx)
 }
 #endif
 
-static av_cold int lag_decode_end(AVCodecContext *avctx)
-{
-    LagarithContext *l = avctx->priv_data;
-
-    av_freep(&l->rgb_planes);
-
-    return 0;
-}
-
 AVCodec ff_lagarith_decoder = {
     .name           = "lagarith",
     .long_name      = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
@@ -774,7 +729,6 @@ AVCodec ff_lagarith_decoder = {
     .priv_data_size = sizeof(LagarithContext),
     .init           = lag_decode_init,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
-    .close          = lag_decode_end,
     .decode         = lag_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/tests/ref/fate/lagarith-red b/tests/ref/fate/lagarith-red
index 0e065d60d5..7cfb792d43 100644
--- a/tests/ref/fate/lagarith-red
+++ b/tests/ref/fate/lagarith-red
@@ -3,28 +3,28 @@
 #codec_id 0: rawvideo
 #dimensions 0: 320x240
 #sar 0: 0/1
-0,          0,          0,        1,   230400, 0x67dfe576
-0,          1,          1,        1,   230400, 0x67dfe576
-0,          2,          2,        1,   230400, 0x67dfe576
-0,          3,          3,        1,   230400, 0x67dfe576
-0,          4,          4,        1,   230400, 0x67dfe576
-0,          5,          5,        1,   230400, 0x67dfe576
-0,          6,          6,        1,   230400, 0x67dfe576
-0,          7,          7,        1,   230400, 0x67dfe576
-0,          8,          8,        1,   230400, 0x67dfe576
-0,          9,          9,        1,   230400, 0x67dfe576
-0,         10,         10,        1,   230400, 0x67dfe576
-0,         11,         11,        1,   230400, 0x67dfe576
-0,         12,         12,        1,   230400, 0x67dfe576
-0,         13,         13,        1,   230400, 0x67dfe576
-0,         14,         14,        1,   230400, 0x67dfe576
-0,         15,         15,        1,   230400, 0x67dfe576
-0,         16,         16,        1,   230400, 0x67dfe576
-0,         17,         17,        1,   230400, 0x67dfe576
-0,         18,         18,        1,   230400, 0x67dfe576
-0,         19,         19,        1,   230400, 0x67dfe576
-0,         20,         20,        1,   230400, 0x67dfe576
-0,         21,         21,        1,   230400, 0x67dfe576
-0,         22,         22,        1,   230400, 0x67dfe576
-0,         23,         23,        1,   230400, 0x67dfe576
-0,         24,         24,        1,   230400, 0x67dfe576
+0,          0,          0,        1,   230400, 0x77f0e576
+0,          1,          1,        1,   230400, 0x77f0e576
+0,          2,          2,        1,   230400, 0x77f0e576
+0,          3,          3,        1,   230400, 0x77f0e576
+0,          4,          4,        1,   230400, 0x77f0e576
+0,          5,          5,        1,   230400, 0x77f0e576
+0,          6,          6,        1,   230400, 0x77f0e576
+0,          7,          7,        1,   230400, 0x77f0e576
+0,          8,          8,        1,   230400, 0x77f0e576
+0,          9,          9,        1,   230400, 0x77f0e576
+0,         10,         10,        1,   230400, 0x77f0e576
+0,         11,         11,        1,   230400, 0x77f0e576
+0,         12,         12,        1,   230400, 0x77f0e576
+0,         13,         13,        1,   230400, 0x77f0e576
+0,         14,         14,        1,   230400, 0x77f0e576
+0,         15,         15,        1,   230400, 0x77f0e576
+0,         16,         16,        1,   230400, 0x77f0e576
+0,         17,         17,        1,   230400, 0x77f0e576
+0,         18,         18,        1,   230400, 0x77f0e576
+0,         19,         19,        1,   230400, 0x77f0e576
+0,         20,         20,        1,   230400, 0x77f0e576
+0,         21,         21,        1,   230400, 0x77f0e576
+0,         22,         22,        1,   230400, 0x77f0e576
+0,         23,         23,        1,   230400, 0x77f0e576
+0,         24,         24,        1,   230400, 0x77f0e576
diff --git a/tests/ref/fate/lagarith-rgb24 b/tests/ref/fate/lagarith-rgb24
index 63250c6bdc..dea49e91bf 100644
--- a/tests/ref/fate/lagarith-rgb24
+++ b/tests/ref/fate/lagarith-rgb24
@@ -3,7 +3,7 @@
 #codec_id 0: rawvideo
 #dimensions 0: 480x256
 #sar 0: 0/1
-0,          0,          0,        1,   368640, 0x26f74db2
-0,          1,          1,        1,   368640, 0x63b29ea4
-0,          2,          2,        1,   368640, 0x19467f03
-0,          3,          3,        1,   368640, 0x5fdc3575
+0,          0,          0,        1,   368640, 0x18364db2
+0,          1,          1,        1,   368640, 0x60e79ea4
+0,          2,          2,        1,   368640, 0xb28a7f03
+0,          3,          3,        1,   368640, 0x2ed83575
diff --git a/tests/ref/fate/lagarith-ticket4119 b/tests/ref/fate/lagarith-ticket4119
index c46ef041e4..c1de9dce0a 100644
--- a/tests/ref/fate/lagarith-ticket4119
+++ b/tests/ref/fate/lagarith-ticket4119
@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         50,         50,        1,   691200, 0x906d474c
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         50,         50,        1,   691200, 0x1fa0474c
diff --git a/tests/ref/fate/lagarith-ticket4119-cfr b/tests/ref/fate/lagarith-ticket4119-cfr
index 324fe4483f..1b689011b4 100644
--- a/tests/ref/fate/lagarith-ticket4119-cfr
+++ b/tests/ref/fate/lagarith-ticket4119-cfr
@@ -27,53 +27,53 @@
 0,         21,         21,        1,   691200, 0x00000000
 0,         22,         22,        1,   691200, 0x00000000
 0,         23,         23,        1,   691200, 0x00000000
-0,         24,         24,        1,   691200, 0xc88a6f24
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         26,         26,        1,   691200, 0xc88a6f24
-0,         27,         27,        1,   691200, 0xc88a6f24
-0,         28,         28,        1,   691200, 0xc88a6f24
-0,         29,         29,        1,   691200, 0xc88a6f24
-0,         30,         30,        1,   691200, 0xc88a6f24
-0,         31,         31,        1,   691200, 0xc88a6f24
-0,         32,         32,        1,   691200, 0xc88a6f24
-0,         33,         33,        1,   691200, 0xc88a6f24
-0,         34,         34,        1,   691200, 0xc88a6f24
-0,         35,         35,        1,   691200, 0xc88a6f24
-0,         36,         36,        1,   691200, 0xc88a6f24
-0,         37,         37,        1,   691200, 0xc88a6f24
-0,         38,         38,        1,   691200, 0xc88a6f24
-0,         39,         39,        1,   691200, 0xc88a6f24
-0,         40,         40,        1,   691200, 0xc88a6f24
-0,         41,         41,        1,   691200, 0xc88a6f24
-0,         42,         42,        1,   691200, 0xc88a6f24
-0,         43,         43,        1,   691200, 0xc88a6f24
-0,         44,         44,        1,   691200, 0xc88a6f24
-0,         45,         45,        1,   691200, 0xc88a6f24
-0,         46,         46,        1,   691200, 0xc88a6f24
-0,         47,         47,        1,   691200, 0xc88a6f24
-0,         48,         48,        1,   691200, 0xc88a6f24
-0,         49,         49,        1,   691200, 0x906d474c
-0,         50,         50,        1,   691200, 0x906d474c
-0,         51,         51,        1,   691200, 0x906d474c
-0,         52,         52,        1,   691200, 0x906d474c
-0,         53,         53,        1,   691200, 0x906d474c
-0,         54,         54,        1,   691200, 0x906d474c
-0,         55,         55,        1,   691200, 0x906d474c
-0,         56,         56,        1,   691200, 0x906d474c
-0,         57,         57,        1,   691200, 0x906d474c
-0,         58,         58,        1,   691200, 0x906d474c
-0,         59,         59,        1,   691200, 0x906d474c
-0,         60,         60,        1,   691200, 0x906d474c
-0,         61,         61,        1,   691200, 0x906d474c
-0,         62,         62,        1,   691200, 0x906d474c
-0,         63,         63,        1,   691200, 0x906d474c
-0,         64,         64,        1,   691200, 0x906d474c
-0,         65,         65,        1,   691200, 0x906d474c
-0,         66,         66,        1,   691200, 0x906d474c
-0,         67,         67,        1,   691200, 0x906d474c
-0,         68,         68,        1,   691200, 0x906d474c
-0,         69,         69,        1,   691200, 0x906d474c
-0,         70,         70,        1,   691200, 0x906d474c
-0,         71,         71,        1,   691200, 0x906d474c
-0,         72,         72,        1,   691200, 0x906d474c
-0,         73,         73,        1,   691200, 0x906d474c
+0,         24,         24,        1,   691200, 0x1c4a6f24
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         26,         26,        1,   691200, 0x1c4a6f24
+0,         27,         27,        1,   691200, 0x1c4a6f24
+0,         28,         28,        1,   691200, 0x1c4a6f24
+0,         29,         29,        1,   691200, 0x1c4a6f24
+0,         30,         30,        1,   691200, 0x1c4a6f24
+0,         31,         31,        1,   691200, 0x1c4a6f24
+0,         32,         32,        1,   691200, 0x1c4a6f24
+0,         33,         33,        1,   691200, 0x1c4a6f24
+0,         34,         34,        1,   691200, 0x1c4a6f24
+0,         35,         35,        1,   691200, 0x1c4a6f24
+0,         36,         36,        1,   691200, 0x1c4a6f24
+0,         37,         37,        1,   691200, 0x1c4a6f24
+0,         38,         38,        1,   691200, 0x1c4a6f24
+0,         39,         39,        1,   691200, 0x1c4a6f24
+0,         40,         40,        1,   691200, 0x1c4a6f24
+0,         41,         41,        1,   691200, 0x1c4a6f24
+0,         42,         42,        1,   691200, 0x1c4a6f24
+0,         43,         43,        1,   691200, 0x1c4a6f24
+0,         44,         44,        1,   691200, 0x1c4a6f24
+0,         45,         45,        1,   691200, 0x1c4a6f24
+0,         46,         46,        1,   691200, 0x1c4a6f24
+0,         47,         47,        1,   691200, 0x1c4a6f24
+0,         48,         48,        1,   691200, 0x1c4a6f24
+0,         49,         49,        1,   691200, 0x1fa0474c
+0,         50,         50,        1,   691200, 0x1fa0474c
+0,         51,         51,        1,   691200, 0x1fa0474c
+0,         52,         52,        1,   691200, 0x1fa0474c
+0,         53,         53,        1,   691200, 0x1fa0474c
+0,         54,         54,        1,   691200, 0x1fa0474c
+0,         55,         55,        1,   691200, 0x1fa0474c
+0,         56,         56,        1,   691200, 0x1fa0474c
+0,         57,         57,        1,   691200, 0x1fa0474c
+0,         58,         58,        1,   691200, 0x1fa0474c
+0,         59,         59,        1,   691200, 0x1fa0474c
+0,         60,         60,        1,   691200, 0x1fa0474c
+0,         61,         61,        1,   691200, 0x1fa0474c
+0,         62,         62,        1,   691200, 0x1fa0474c
+0,         63,         63,        1,   691200, 0x1fa0474c
+0,         64,         64,        1,   691200, 0x1fa0474c
+0,         65,         65,        1,   691200, 0x1fa0474c
+0,         66,         66,        1,   691200, 0x1fa0474c
+0,         67,         67,        1,   691200, 0x1fa0474c
+0,         68,         68,        1,   691200, 0x1fa0474c
+0,         69,         69,        1,   691200, 0x1fa0474c
+0,         70,         70,        1,   691200, 0x1fa0474c
+0,         71,         71,        1,   691200, 0x1fa0474c
+0,         72,         72,        1,   691200, 0x1fa0474c
+0,         73,         73,        1,   691200, 0x1fa0474c
diff --git a/tests/ref/fate/lagarith-ticket4119-drop b/tests/ref/fate/lagarith-ticket4119-drop
index abc58d5cb5..c7738aeca8 100644
--- a/tests/ref/fate/lagarith-ticket4119-drop
+++ b/tests/ref/fate/lagarith-ticket4119-drop
@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,          1,          1,        1,   691200, 0xc88a6f24
-0,          2,          2,        1,   691200, 0x906d474c
+0,          1,          1,        1,   691200, 0x1c4a6f24
+0,          2,          2,        1,   691200, 0x1fa0474c
diff --git a/tests/ref/fate/lagarith-ticket4119-pass b/tests/ref/fate/lagarith-ticket4119-pass
index c46ef041e4..c1de9dce0a 100644
--- a/tests/ref/fate/lagarith-ticket4119-pass
+++ b/tests/ref/fate/lagarith-ticket4119-pass
@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         50,         50,        1,   691200, 0x906d474c
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         50,         50,        1,   691200, 0x1fa0474c
diff --git a/tests/ref/fate/lagarith-ticket4119-vfr b/tests/ref/fate/lagarith-ticket4119-vfr
index c46ef041e4..c1de9dce0a 100644
--- a/tests/ref/fate/lagarith-ticket4119-vfr
+++ b/tests/ref/fate/lagarith-ticket4119-vfr
@@ -4,5 +4,5 @@
 #dimensions 0: 640x360
 #sar 0: 0/1
 0,          0,          0,        1,   691200, 0x00000000
-0,         25,         25,        1,   691200, 0xc88a6f24
-0,         50,         50,        1,   691200, 0x906d474c
+0,         25,         25,        1,   691200, 0x1c4a6f24
+0,         50,         50,        1,   691200, 0x1fa0474c



More information about the ffmpeg-cvslog mailing list