[FFmpeg-devel] [PATCH] swscale/output: template-ize yuv2nv12cX 10-bit and 16-bit cases

rcombs rcombs at rcombs.me
Thu Dec 23 11:02:08 EET 2021


Fixes incorrect big-endian output introduced in 88d804b7ffa20caab2e8e2809da974c41f7fd8fc

Avoids making the filter-time BE check more expensive
---
 libswscale/output.c                      | 48 ++++++++++++++++++++----
 tests/ref/fate/filter-pixdesc-p210be     |  2 +-
 tests/ref/fate/filter-pixdesc-p216be     |  2 +-
 tests/ref/fate/filter-pixdesc-p410be     |  2 +-
 tests/ref/fate/filter-pixdesc-p416be     |  2 +-
 tests/ref/fate/filter-pixfmts-copy       |  8 ++--
 tests/ref/fate/filter-pixfmts-crop       |  8 ++--
 tests/ref/fate/filter-pixfmts-field      |  8 ++--
 tests/ref/fate/filter-pixfmts-fieldorder |  8 ++--
 tests/ref/fate/filter-pixfmts-hflip      |  8 ++--
 tests/ref/fate/filter-pixfmts-il         |  8 ++--
 tests/ref/fate/filter-pixfmts-null       |  8 ++--
 tests/ref/fate/filter-pixfmts-scale      |  8 ++--
 tests/ref/fate/filter-pixfmts-transpose  |  4 +-
 tests/ref/fate/filter-pixfmts-vflip      |  8 ++--
 15 files changed, 83 insertions(+), 49 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 4b4b186be9..e7cea49096 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -180,17 +180,18 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
     }
 }
 
-static void yuv2p016cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+static av_always_inline void
+yuv2nv12cX_16_c_template(int big_endian, const uint8_t *chrDither,
                          const int16_t *chrFilter, int chrFilterSize,
                          const int16_t **chrUSrc, const int16_t **chrVSrc,
-                         uint8_t *dest8, int chrDstW)
+                         uint8_t *dest8, int chrDstW, int output_bits)
 {
     uint16_t *dest = (uint16_t*)dest8;
     const int32_t **uSrc = (const int32_t **)chrUSrc;
     const int32_t **vSrc = (const int32_t **)chrVSrc;
     int shift = 15;
-    int big_endian = dstFormat == AV_PIX_FMT_P016BE;
     int i, j;
+    av_assert0(output_bits == 16);
 
     for (i = 0; i < chrDstW; i++) {
         int u = 1 << (shift - 1);
@@ -367,6 +368,7 @@ static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filter
                          filterSize, (const typeX_t **) src, \
                          (uint16_t *) dest, dstW, is_be, bits); \
 }
+
 yuv2NBPS( 9, BE, 1, 10, int16_t)
 yuv2NBPS( 9, LE, 0, 10, int16_t)
 yuv2NBPS(10, BE, 1, 10, int16_t)
@@ -378,6 +380,23 @@ yuv2NBPS(14, LE, 0, 10, int16_t)
 yuv2NBPS(16, BE, 1, 16, int32_t)
 yuv2NBPS(16, LE, 0, 16, int32_t)
 
+
+static void yuv2nv12cX_16LE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+                              const int16_t *chrFilter, int chrFilterSize,
+                              const int16_t **chrUSrc, const int16_t **chrVSrc,
+                              uint8_t *dest8, int chrDstW)
+{
+    yuv2nv12cX_16_c_template(0, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW, 16);
+}
+
+static void yuv2nv12cX_16BE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+                              const int16_t *chrFilter, int chrFilterSize,
+                              const int16_t **chrUSrc, const int16_t **chrVSrc,
+                              uint8_t *dest8, int chrDstW)
+{
+    yuv2nv12cX_16_c_template(1, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW, 16);
+}
+
 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
                            const int16_t **src, uint8_t *dest, int dstW,
                            const uint8_t *dither, int offset)
@@ -477,14 +496,13 @@ static void yuv2p010lX_c(const int16_t *filter, int filterSize,
     }
 }
 
-static void yuv2p010cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+static void yuv2p010cX_c(int big_endian, const uint8_t *chrDither,
                          const int16_t *chrFilter, int chrFilterSize,
                          const int16_t **chrUSrc, const int16_t **chrVSrc,
                          uint8_t *dest8, int chrDstW)
 {
     uint16_t *dest = (uint16_t*)dest8;
     int shift = 17;
-    int big_endian = dstFormat == AV_PIX_FMT_P010BE;
     int i, j;
 
     for (i = 0; i < chrDstW; i++) {
@@ -529,6 +547,22 @@ static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
     yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
 }
 
+static void yuv2p010cX_LE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+                            const int16_t *chrFilter, int chrFilterSize,
+                            const int16_t **chrUSrc, const int16_t **chrVSrc,
+                            uint8_t *dest8, int chrDstW)
+{
+    yuv2p010cX_c(0, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW);
+}
+
+static void yuv2p010cX_BE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+                            const int16_t *chrFilter, int chrFilterSize,
+                            const int16_t **chrUSrc, const int16_t **chrVSrc,
+                            uint8_t *dest8, int chrDstW)
+{
+    yuv2p010cX_c(1, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW);
+}
+
 #undef output_pixel
 
 
@@ -2568,14 +2602,14 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
         dstFormat == AV_PIX_FMT_P410LE || dstFormat == AV_PIX_FMT_P410BE) {
         *yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
         *yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
-        *yuv2nv12cX = yuv2p010cX_c;
+        *yuv2nv12cX = isBE(dstFormat) ? yuv2p010cX_BE_c : yuv2p010cX_LE_c;
     } else if (is16BPS(dstFormat)) {
         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
         if (dstFormat == AV_PIX_FMT_P016LE || dstFormat == AV_PIX_FMT_P016BE ||
             dstFormat == AV_PIX_FMT_P216LE || dstFormat == AV_PIX_FMT_P216BE ||
             dstFormat == AV_PIX_FMT_P416LE || dstFormat == AV_PIX_FMT_P416BE) {
-          *yuv2nv12cX = yuv2p016cX_c;
+          *yuv2nv12cX = isBE(dstFormat) ? yuv2nv12cX_16BE_c : yuv2nv12cX_16LE_c;
         }
     } else if (isNBPS(dstFormat)) {
         if (desc->comp[0].depth == 9) {
diff --git a/tests/ref/fate/filter-pixdesc-p210be b/tests/ref/fate/filter-pixdesc-p210be
index b7d15ff93d..9ff89d14a0 100644
--- a/tests/ref/fate/filter-pixdesc-p210be
+++ b/tests/ref/fate/filter-pixdesc-p210be
@@ -1 +1 @@
-pixdesc-p210be      9f3465e388d91beeb5cb7fe0011c5a67
+pixdesc-p210be      016fd90989d14914bbbcc7dc2968bef0
diff --git a/tests/ref/fate/filter-pixdesc-p216be b/tests/ref/fate/filter-pixdesc-p216be
index 657136996a..932c5b2708 100644
--- a/tests/ref/fate/filter-pixdesc-p216be
+++ b/tests/ref/fate/filter-pixdesc-p216be
@@ -1 +1 @@
-pixdesc-p216be      db5cabe6e5f1814a6d20e8398aec4785
+pixdesc-p216be      d95084fa0758169851f57455a9624a2e
diff --git a/tests/ref/fate/filter-pixdesc-p410be b/tests/ref/fate/filter-pixdesc-p410be
index 26ff981622..27de3ee0bb 100644
--- a/tests/ref/fate/filter-pixdesc-p410be
+++ b/tests/ref/fate/filter-pixdesc-p410be
@@ -1 +1 @@
-pixdesc-p410be      85671676fa52d0350c918f45417f3c64
+pixdesc-p410be      33d7e8e5d6a85cc22fcbf0c12c7bafd0
diff --git a/tests/ref/fate/filter-pixdesc-p416be b/tests/ref/fate/filter-pixdesc-p416be
index 5a23be5d72..f67b553d42 100644
--- a/tests/ref/fate/filter-pixdesc-p416be
+++ b/tests/ref/fate/filter-pixdesc-p416be
@@ -1 +1 @@
-pixdesc-p416be      a7d8a859ce47c3860e0fee31539a84b0
+pixdesc-p416be      6a4b1b2fc8435acfc82312109f13bc58
diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy
index b090739bd2..f06fa1574e 100644
--- a/tests/ref/fate/filter-pixfmts-copy
+++ b/tests/ref/fate/filter-pixfmts-copy
@@ -63,13 +63,13 @@ p010be              7f9842d6015026136bad60d03c035cc3
 p010le              c453421b9f726bdaf2bacf59a492c43b
 p016be              7f9842d6015026136bad60d03c035cc3
 p016le              c453421b9f726bdaf2bacf59a492c43b
-p210be              6df2a72ee297e53f9ac7f96acf0ef5d5
+p210be              847e9c6e292b17349e69570829252b3e
 p210le              c06e4b76cf504e908128081f92b60ce2
-p216be              01d10b0d17c9f575b512dff36623a85b
+p216be              f5009974fc1cd5d552705eeb52de35d9
 p216le              2f634e1a3cd5c9c122e0f2ebadb3503d
-p410be              d9af5b8126ea7457edaf0c90ad0cb2b7
+p410be              7c2509d2df4bbb199ab653ebb6dce61e
 p410le              527761e1f4381007044679710a352ecc
-p416be              fc5c1c45567de4a6bc9dbc8eef30116d
+p416be              fd828e966d45ae908f5d2d4b3349b816
 p416le              983064bfd506be1e26cd57bafc14ae50
 pal8                ff5929f5b42075793b2c34cb441bede5
 rgb0                0de71e5a1f97f81fb51397a0435bfa72
diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop
index 93353ad16a..8b26ab9c53 100644
--- a/tests/ref/fate/filter-pixfmts-crop
+++ b/tests/ref/fate/filter-pixfmts-crop
@@ -61,13 +61,13 @@ p010be              8b2de2eb6b099bbf355bfc55a0694ddc
 p010le              373b50c766dfd0a8e79c9a73246d803a
 p016be              8b2de2eb6b099bbf355bfc55a0694ddc
 p016le              373b50c766dfd0a8e79c9a73246d803a
-p210be              b75f0e53a245e49af955fe210fc31bb8
+p210be              2947f43774352ef61f9e83777548c7c5
 p210le              74fcd5a32eee687eebe002c884103963
-p216be              89cb3a4bd44ba624c1395e7ea6998dde
+p216be              41351128eaf636041c8987698730391a
 p216le              e56f5e5b0d4460d56f27a5df8a4a1462
-p410be              37e56737c2421aa59a33c57423d58616
+p410be              e17c78ff059363177548412e6ab4e65f
 p410le              75f910c7282d8065d97f502ba974c481
-p416be              13b2dc247bdb0ab7e5532f75048f5a2c
+p416be              52f08b8a56a09d6e954c2eab6cf24d99
 p416le              ecb78b327ea5cfe1fff82945c1fca310
 pal8                1f2cdc8e718f95c875dbc1034a688bfb
 rgb0                736646b70dd9a0be22b8da8041e35035
diff --git a/tests/ref/fate/filter-pixfmts-field b/tests/ref/fate/filter-pixfmts-field
index bcd1a0a45b..c4838d1446 100644
--- a/tests/ref/fate/filter-pixfmts-field
+++ b/tests/ref/fate/filter-pixfmts-field
@@ -63,13 +63,13 @@ p010be              a0311a09bba7383553267d2b3b9c075e
 p010le              ee09a18aefa3ebe97715b3a7312cb8ff
 p016be              a0311a09bba7383553267d2b3b9c075e
 p016le              ee09a18aefa3ebe97715b3a7312cb8ff
-p210be              341db7c98afd2767d48cdd72e224df2f
+p210be              58d46f566ab28e3bcfb715c7aa53cf58
 p210le              8d68f7655a3d76f2f8436bd25beb3973
-p216be              0dde930860e940dced179884c359f720
+p216be              dd1f3e0bb5c49775a598ab29802fc268
 p216le              b573c0473a1368813d077487cc9bce0e
-p410be              9e9a812b74854226271c5f7dc18c37b7
+p410be              658fd0d92eb327cbd562abafc8694db7
 p410le              c981188c7fd9f32988a9f4732303f82b
-p416be              203203e6788a80b52d2ca6ba629beb9c
+p416be              66616bf2320464b5e9b6372d48b6b9a9
 p416le              1039b97bbe42ef0af1bc46d2c0fc819e
 pal8                0658c18dcd8d052d59dfbe23f5b368d9
 rgb0                ca3fa6e865b91b3511c7f2bf62830059
diff --git a/tests/ref/fate/filter-pixfmts-fieldorder b/tests/ref/fate/filter-pixfmts-fieldorder
index 761ef422b5..32c06bae4c 100644
--- a/tests/ref/fate/filter-pixfmts-fieldorder
+++ b/tests/ref/fate/filter-pixfmts-fieldorder
@@ -55,13 +55,13 @@ grayf32be           1aa7960131f880c54fe3c77f13448674
 grayf32le           4029ac9d197f255794c1b9e416520fc7
 nv24                4fdbef26042c77f012df114e666efdb2
 nv42                59608290fece913e6b7d61edf581a529
-p210be              82958903f553e9d2d91549bd44559a5a
+p210be              ca2ce2c25db43dcd14729b2a72a7c604
 p210le              755363012d8801b96ead2e8b1b4d2ab8
-p216be              7159f11beb9138932f8d60b95efe96dc
+p216be              17741c0cdb65914ad13c5114121a175f
 p216le              c0c888ab7bde56638732344076b3b2ba
-p410be              411f89fadbee1ca43d2918eba583bea5
+p410be              b6d65b820198ca6ff0103d9794727792
 p410le              2771dd3ae54a439921f51c29e79b6799
-p416be              de6b84bd524e8fcfc251634cae416069
+p416be              a0f8b5acad8fafc45fc7b2275fac1d84
 p416le              2e73af44eb933580da59981176848dcc
 rgb0                2e3d8c91c7a83d451593dfd06607ff39
 rgb24               b82577f8215d3dc2681be60f1da247af
diff --git a/tests/ref/fate/filter-pixfmts-hflip b/tests/ref/fate/filter-pixfmts-hflip
index 1fc26b9fb5..4d3efe3cdc 100644
--- a/tests/ref/fate/filter-pixfmts-hflip
+++ b/tests/ref/fate/filter-pixfmts-hflip
@@ -61,13 +61,13 @@ p010be              744b13e44d39e1ff7588983fa03e0101
 p010le              a50b160346ab94f55a425065b57006f0
 p016be              744b13e44d39e1ff7588983fa03e0101
 p016le              a50b160346ab94f55a425065b57006f0
-p210be              174cdf99f18658724e269bf38d2b653b
+p210be              6f5a76d6467b86d55fe5589d3af8a7ea
 p210le              b6982912b2376371edea4fccf99fe40c
-p216be              c58f03c6668ab0fbc3ee1a2da051e28c
+p216be              c1b58f61cd6df9cf01c3086786fb8a69
 p216le              1f5213bebf4c99634f57290f5ad99c0d
-p410be              aa40aa32be7aa353252bac70b5edc175
+p410be              2e06214ea84595aa1294239b0f1e900f
 p410le              1143c811c383e4461b1192dca0c74246
-p416be              8863e9156ee7edcb6b9e6ac01a2e338c
+p416be              da6807d924b63a54b804d32e427524bf
 p416le              a42b88cabc4395aa0bf1bcbbc876f48f
 pal8                5b7c77d99817b4f52339742a47de7797
 rgb0                0092452f37d73da20193265ace0b7d57
diff --git a/tests/ref/fate/filter-pixfmts-il b/tests/ref/fate/filter-pixfmts-il
index 7e7f057afa..4623f2420c 100644
--- a/tests/ref/fate/filter-pixfmts-il
+++ b/tests/ref/fate/filter-pixfmts-il
@@ -63,13 +63,13 @@ p010be              3df51286ef66b53e3e283dbbab582263
 p010le              eadcd8241e97e35b2b47d5eb2eaea6cd
 p016be              3df51286ef66b53e3e283dbbab582263
 p016le              eadcd8241e97e35b2b47d5eb2eaea6cd
-p210be              4992fe87c600dfb177b1e2e6aa0f922c
+p210be              29ec4e8912d456cd15203a96487c42e8
 p210le              c695064fb9f2cc4e35957d4d649cc281
-p216be              98b73479f0ea9843768c162c449c3ac5
+p216be              ad85bdc59755608602608a9438bb82ea
 p216le              77757390da383a90981e461d128d8789
-p410be              a7183a01888b47a4d9f3672073c7ea7d
+p410be              2128861337e660232e6fb664cc4de3e6
 p410le              6cf3a3e199b327f4f013e0346410d7a8
-p416be              4dc4aebf18e09e8f8b49db90ae5ec127
+p416be              47dec75cefeb6220be7731bc25b7be9c
 p416le              4990b51ff889d9ee23e68997f81c09f1
 rgb0                cfaf68671e43248267d8cd50cae8c13f
 rgb24               88894f608cf33ba310f21996748d77a7
diff --git a/tests/ref/fate/filter-pixfmts-null b/tests/ref/fate/filter-pixfmts-null
index b090739bd2..f06fa1574e 100644
--- a/tests/ref/fate/filter-pixfmts-null
+++ b/tests/ref/fate/filter-pixfmts-null
@@ -63,13 +63,13 @@ p010be              7f9842d6015026136bad60d03c035cc3
 p010le              c453421b9f726bdaf2bacf59a492c43b
 p016be              7f9842d6015026136bad60d03c035cc3
 p016le              c453421b9f726bdaf2bacf59a492c43b
-p210be              6df2a72ee297e53f9ac7f96acf0ef5d5
+p210be              847e9c6e292b17349e69570829252b3e
 p210le              c06e4b76cf504e908128081f92b60ce2
-p216be              01d10b0d17c9f575b512dff36623a85b
+p216be              f5009974fc1cd5d552705eeb52de35d9
 p216le              2f634e1a3cd5c9c122e0f2ebadb3503d
-p410be              d9af5b8126ea7457edaf0c90ad0cb2b7
+p410be              7c2509d2df4bbb199ab653ebb6dce61e
 p410le              527761e1f4381007044679710a352ecc
-p416be              fc5c1c45567de4a6bc9dbc8eef30116d
+p416be              fd828e966d45ae908f5d2d4b3349b816
 p416le              983064bfd506be1e26cd57bafc14ae50
 pal8                ff5929f5b42075793b2c34cb441bede5
 rgb0                0de71e5a1f97f81fb51397a0435bfa72
diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale
index a5a4ac4cba..43074b84a7 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -63,13 +63,13 @@ p010be              1d6726d94bf1385996a9a9840dd0e878
 p010le              4b316f2b9e18972299beb73511278fa8
 p016be              31e204018cbb53f8988c4e1174ea8ce9
 p016le              d5afe557f492a09317e525d7cb782f5b
-p210be              42be1e97427247317444afa836969667
+p210be              2cc6dfcf5e006c8ed5238988a06fd45e
 p210le              04efb8f14a9d98417af40954a06aa187
-p216be              caa0268d0f6779343a4432b6bc832c5b
+p216be              2f649a226812c8e5a553c4e22d301684
 p216le              c8f65811f717a12706a598561c6df46d
-p410be              f580b8dcf5a826c94258eeba837fd874
+p410be              354cd1324ad382df1a3d573833323cce
 p410le              90fdd95ec4482c127d98307550a885c6
-p416be              a1242f80d32705a757f4d3553542ae1f
+p416be              aa54294859a8e6cb2c9cf64d343fdb60
 p416le              d91a0858ea8d2cf1ed29f179c9ad9666
 pal8                29e10892009b2cfe431815ec3052ed3b
 rgb0                fbd27e98154efb7535826afed41e9bb0
diff --git a/tests/ref/fate/filter-pixfmts-transpose b/tests/ref/fate/filter-pixfmts-transpose
index dc3a0628ee..922666cf95 100644
--- a/tests/ref/fate/filter-pixfmts-transpose
+++ b/tests/ref/fate/filter-pixfmts-transpose
@@ -61,9 +61,9 @@ p010be              ad0de2cc9bff81688b182a870fcf7000
 p010le              e7ff5143595021246733ce6bd0a769e8
 p016be              ad0de2cc9bff81688b182a870fcf7000
 p016le              e7ff5143595021246733ce6bd0a769e8
-p410be              171453dc34dd3c77659914e2202c5aa6
+p410be              8b3e0ccb31b6a20ff00a29253fb2dec3
 p410le              4e5f78dfccda9a6387e81354a56a033a
-p416be              ff09601f127101a8ce8997b9ae0fd6bf
+p416be              350a90bda53349435d89ec13533726b7
 p416le              7bb46e2aec65669a27502ec452941237
 rgb0                31ea5da7fe779c6ea0a33f1d28aad918
 rgb24               47654cabaaad79170b90afd5a02161dd
diff --git a/tests/ref/fate/filter-pixfmts-vflip b/tests/ref/fate/filter-pixfmts-vflip
index 7736e372ad..3a53bb5837 100644
--- a/tests/ref/fate/filter-pixfmts-vflip
+++ b/tests/ref/fate/filter-pixfmts-vflip
@@ -63,13 +63,13 @@ p010be              06e9354b6e0e38ba41736352cedc0bd5
 p010le              fd18d322bffbf5816902c13102872e22
 p016be              06e9354b6e0e38ba41736352cedc0bd5
 p016le              fd18d322bffbf5816902c13102872e22
-p210be              328b09bb0c70571617901322b4194023
+p210be              ca886ab2b3ea5c153f1954b3709f7249
 p210le              d71c2d4e483030ffd87fa6a68c83fce0
-p216be              e4ab026532db1dfee38cedef384e605b
+p216be              7f268f755ed02592b3a49fd5f7bd48bb
 p216le              2c0a660762527706799c4705ca50a9c5
-p410be              637fb064c2ce173de5cf431aa9267914
+p410be              4c603e4464ed3f34cc432b4d1f912082
 p410le              849308a1cdf41e055019cf311d1b2201
-p416be              8e9cf1b695c0a33b6094dd6c7b3722d9
+p416be              7e7657ab40cf953351a14ea76e296519
 p416le              0991d7fff4e2caf36be219ecdd9619d4
 pal8                450b0155d0f2d5628bf95a442db5f817
 rgb0                56a7ea69541bcd27bef6a5615784722b
-- 
2.33.1



More information about the ffmpeg-devel mailing list