[FFmpeg-devel] [PATCH 01/16] swscale/output: template-ize yuv2nv12cX 10-bit and 16-bit cases
rcombs
rcombs at rcombs.me
Fri Dec 24 05:08:49 EET 2021
Fixes incorrect big-endian output introduced in 88d804b7ffa20caab2e8e2809da974c41f7fd8fc
Avoids making the filter-time BE check more expensive
---
libswscale/output.c | 48 ++++++++++++++++++++----
tests/ref/fate/filter-pixdesc-p210be | 2 +-
tests/ref/fate/filter-pixdesc-p216be | 2 +-
tests/ref/fate/filter-pixdesc-p410be | 2 +-
tests/ref/fate/filter-pixdesc-p416be | 2 +-
tests/ref/fate/filter-pixfmts-copy | 8 ++--
tests/ref/fate/filter-pixfmts-crop | 8 ++--
tests/ref/fate/filter-pixfmts-field | 8 ++--
tests/ref/fate/filter-pixfmts-fieldorder | 8 ++--
tests/ref/fate/filter-pixfmts-hflip | 8 ++--
tests/ref/fate/filter-pixfmts-il | 8 ++--
tests/ref/fate/filter-pixfmts-null | 8 ++--
tests/ref/fate/filter-pixfmts-scale | 8 ++--
tests/ref/fate/filter-pixfmts-transpose | 4 +-
tests/ref/fate/filter-pixfmts-vflip | 8 ++--
15 files changed, 83 insertions(+), 49 deletions(-)
diff --git a/libswscale/output.c b/libswscale/output.c
index 4b4b186be9..e7cea49096 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -180,17 +180,18 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
}
}
-static void yuv2p016cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+static av_always_inline void
+yuv2nv12cX_16_c_template(int big_endian, const uint8_t *chrDither,
const int16_t *chrFilter, int chrFilterSize,
const int16_t **chrUSrc, const int16_t **chrVSrc,
- uint8_t *dest8, int chrDstW)
+ uint8_t *dest8, int chrDstW, int output_bits)
{
uint16_t *dest = (uint16_t*)dest8;
const int32_t **uSrc = (const int32_t **)chrUSrc;
const int32_t **vSrc = (const int32_t **)chrVSrc;
int shift = 15;
- int big_endian = dstFormat == AV_PIX_FMT_P016BE;
int i, j;
+ av_assert0(output_bits == 16);
for (i = 0; i < chrDstW; i++) {
int u = 1 << (shift - 1);
@@ -367,6 +368,7 @@ static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filter
filterSize, (const typeX_t **) src, \
(uint16_t *) dest, dstW, is_be, bits); \
}
+
yuv2NBPS( 9, BE, 1, 10, int16_t)
yuv2NBPS( 9, LE, 0, 10, int16_t)
yuv2NBPS(10, BE, 1, 10, int16_t)
@@ -378,6 +380,23 @@ yuv2NBPS(14, LE, 0, 10, int16_t)
yuv2NBPS(16, BE, 1, 16, int32_t)
yuv2NBPS(16, LE, 0, 16, int32_t)
+
+static void yuv2nv12cX_16LE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+ const int16_t *chrFilter, int chrFilterSize,
+ const int16_t **chrUSrc, const int16_t **chrVSrc,
+ uint8_t *dest8, int chrDstW)
+{
+ yuv2nv12cX_16_c_template(0, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW, 16);
+}
+
+static void yuv2nv12cX_16BE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+ const int16_t *chrFilter, int chrFilterSize,
+ const int16_t **chrUSrc, const int16_t **chrVSrc,
+ uint8_t *dest8, int chrDstW)
+{
+ yuv2nv12cX_16_c_template(1, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW, 16);
+}
+
static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
@@ -477,14 +496,13 @@ static void yuv2p010lX_c(const int16_t *filter, int filterSize,
}
}
-static void yuv2p010cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+static void yuv2p010cX_c(int big_endian, const uint8_t *chrDither,
const int16_t *chrFilter, int chrFilterSize,
const int16_t **chrUSrc, const int16_t **chrVSrc,
uint8_t *dest8, int chrDstW)
{
uint16_t *dest = (uint16_t*)dest8;
int shift = 17;
- int big_endian = dstFormat == AV_PIX_FMT_P010BE;
int i, j;
for (i = 0; i < chrDstW; i++) {
@@ -529,6 +547,22 @@ static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
}
+static void yuv2p010cX_LE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+ const int16_t *chrFilter, int chrFilterSize,
+ const int16_t **chrUSrc, const int16_t **chrVSrc,
+ uint8_t *dest8, int chrDstW)
+{
+ yuv2p010cX_c(0, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW);
+}
+
+static void yuv2p010cX_BE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
+ const int16_t *chrFilter, int chrFilterSize,
+ const int16_t **chrUSrc, const int16_t **chrVSrc,
+ uint8_t *dest8, int chrDstW)
+{
+ yuv2p010cX_c(1, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW);
+}
+
#undef output_pixel
@@ -2568,14 +2602,14 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
dstFormat == AV_PIX_FMT_P410LE || dstFormat == AV_PIX_FMT_P410BE) {
*yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
*yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
- *yuv2nv12cX = yuv2p010cX_c;
+ *yuv2nv12cX = isBE(dstFormat) ? yuv2p010cX_BE_c : yuv2p010cX_LE_c;
} else if (is16BPS(dstFormat)) {
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
if (dstFormat == AV_PIX_FMT_P016LE || dstFormat == AV_PIX_FMT_P016BE ||
dstFormat == AV_PIX_FMT_P216LE || dstFormat == AV_PIX_FMT_P216BE ||
dstFormat == AV_PIX_FMT_P416LE || dstFormat == AV_PIX_FMT_P416BE) {
- *yuv2nv12cX = yuv2p016cX_c;
+ *yuv2nv12cX = isBE(dstFormat) ? yuv2nv12cX_16BE_c : yuv2nv12cX_16LE_c;
}
} else if (isNBPS(dstFormat)) {
if (desc->comp[0].depth == 9) {
diff --git a/tests/ref/fate/filter-pixdesc-p210be b/tests/ref/fate/filter-pixdesc-p210be
index b7d15ff93d..9ff89d14a0 100644
--- a/tests/ref/fate/filter-pixdesc-p210be
+++ b/tests/ref/fate/filter-pixdesc-p210be
@@ -1 +1 @@
-pixdesc-p210be 9f3465e388d91beeb5cb7fe0011c5a67
+pixdesc-p210be 016fd90989d14914bbbcc7dc2968bef0
diff --git a/tests/ref/fate/filter-pixdesc-p216be b/tests/ref/fate/filter-pixdesc-p216be
index 657136996a..932c5b2708 100644
--- a/tests/ref/fate/filter-pixdesc-p216be
+++ b/tests/ref/fate/filter-pixdesc-p216be
@@ -1 +1 @@
-pixdesc-p216be db5cabe6e5f1814a6d20e8398aec4785
+pixdesc-p216be d95084fa0758169851f57455a9624a2e
diff --git a/tests/ref/fate/filter-pixdesc-p410be b/tests/ref/fate/filter-pixdesc-p410be
index 26ff981622..27de3ee0bb 100644
--- a/tests/ref/fate/filter-pixdesc-p410be
+++ b/tests/ref/fate/filter-pixdesc-p410be
@@ -1 +1 @@
-pixdesc-p410be 85671676fa52d0350c918f45417f3c64
+pixdesc-p410be 33d7e8e5d6a85cc22fcbf0c12c7bafd0
diff --git a/tests/ref/fate/filter-pixdesc-p416be b/tests/ref/fate/filter-pixdesc-p416be
index 5a23be5d72..f67b553d42 100644
--- a/tests/ref/fate/filter-pixdesc-p416be
+++ b/tests/ref/fate/filter-pixdesc-p416be
@@ -1 +1 @@
-pixdesc-p416be a7d8a859ce47c3860e0fee31539a84b0
+pixdesc-p416be 6a4b1b2fc8435acfc82312109f13bc58
diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy
index b090739bd2..f06fa1574e 100644
--- a/tests/ref/fate/filter-pixfmts-copy
+++ b/tests/ref/fate/filter-pixfmts-copy
@@ -63,13 +63,13 @@ p010be 7f9842d6015026136bad60d03c035cc3
p010le c453421b9f726bdaf2bacf59a492c43b
p016be 7f9842d6015026136bad60d03c035cc3
p016le c453421b9f726bdaf2bacf59a492c43b
-p210be 6df2a72ee297e53f9ac7f96acf0ef5d5
+p210be 847e9c6e292b17349e69570829252b3e
p210le c06e4b76cf504e908128081f92b60ce2
-p216be 01d10b0d17c9f575b512dff36623a85b
+p216be f5009974fc1cd5d552705eeb52de35d9
p216le 2f634e1a3cd5c9c122e0f2ebadb3503d
-p410be d9af5b8126ea7457edaf0c90ad0cb2b7
+p410be 7c2509d2df4bbb199ab653ebb6dce61e
p410le 527761e1f4381007044679710a352ecc
-p416be fc5c1c45567de4a6bc9dbc8eef30116d
+p416be fd828e966d45ae908f5d2d4b3349b816
p416le 983064bfd506be1e26cd57bafc14ae50
pal8 ff5929f5b42075793b2c34cb441bede5
rgb0 0de71e5a1f97f81fb51397a0435bfa72
diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop
index 93353ad16a..8b26ab9c53 100644
--- a/tests/ref/fate/filter-pixfmts-crop
+++ b/tests/ref/fate/filter-pixfmts-crop
@@ -61,13 +61,13 @@ p010be 8b2de2eb6b099bbf355bfc55a0694ddc
p010le 373b50c766dfd0a8e79c9a73246d803a
p016be 8b2de2eb6b099bbf355bfc55a0694ddc
p016le 373b50c766dfd0a8e79c9a73246d803a
-p210be b75f0e53a245e49af955fe210fc31bb8
+p210be 2947f43774352ef61f9e83777548c7c5
p210le 74fcd5a32eee687eebe002c884103963
-p216be 89cb3a4bd44ba624c1395e7ea6998dde
+p216be 41351128eaf636041c8987698730391a
p216le e56f5e5b0d4460d56f27a5df8a4a1462
-p410be 37e56737c2421aa59a33c57423d58616
+p410be e17c78ff059363177548412e6ab4e65f
p410le 75f910c7282d8065d97f502ba974c481
-p416be 13b2dc247bdb0ab7e5532f75048f5a2c
+p416be 52f08b8a56a09d6e954c2eab6cf24d99
p416le ecb78b327ea5cfe1fff82945c1fca310
pal8 1f2cdc8e718f95c875dbc1034a688bfb
rgb0 736646b70dd9a0be22b8da8041e35035
diff --git a/tests/ref/fate/filter-pixfmts-field b/tests/ref/fate/filter-pixfmts-field
index bcd1a0a45b..c4838d1446 100644
--- a/tests/ref/fate/filter-pixfmts-field
+++ b/tests/ref/fate/filter-pixfmts-field
@@ -63,13 +63,13 @@ p010be a0311a09bba7383553267d2b3b9c075e
p010le ee09a18aefa3ebe97715b3a7312cb8ff
p016be a0311a09bba7383553267d2b3b9c075e
p016le ee09a18aefa3ebe97715b3a7312cb8ff
-p210be 341db7c98afd2767d48cdd72e224df2f
+p210be 58d46f566ab28e3bcfb715c7aa53cf58
p210le 8d68f7655a3d76f2f8436bd25beb3973
-p216be 0dde930860e940dced179884c359f720
+p216be dd1f3e0bb5c49775a598ab29802fc268
p216le b573c0473a1368813d077487cc9bce0e
-p410be 9e9a812b74854226271c5f7dc18c37b7
+p410be 658fd0d92eb327cbd562abafc8694db7
p410le c981188c7fd9f32988a9f4732303f82b
-p416be 203203e6788a80b52d2ca6ba629beb9c
+p416be 66616bf2320464b5e9b6372d48b6b9a9
p416le 1039b97bbe42ef0af1bc46d2c0fc819e
pal8 0658c18dcd8d052d59dfbe23f5b368d9
rgb0 ca3fa6e865b91b3511c7f2bf62830059
diff --git a/tests/ref/fate/filter-pixfmts-fieldorder b/tests/ref/fate/filter-pixfmts-fieldorder
index 761ef422b5..32c06bae4c 100644
--- a/tests/ref/fate/filter-pixfmts-fieldorder
+++ b/tests/ref/fate/filter-pixfmts-fieldorder
@@ -55,13 +55,13 @@ grayf32be 1aa7960131f880c54fe3c77f13448674
grayf32le 4029ac9d197f255794c1b9e416520fc7
nv24 4fdbef26042c77f012df114e666efdb2
nv42 59608290fece913e6b7d61edf581a529
-p210be 82958903f553e9d2d91549bd44559a5a
+p210be ca2ce2c25db43dcd14729b2a72a7c604
p210le 755363012d8801b96ead2e8b1b4d2ab8
-p216be 7159f11beb9138932f8d60b95efe96dc
+p216be 17741c0cdb65914ad13c5114121a175f
p216le c0c888ab7bde56638732344076b3b2ba
-p410be 411f89fadbee1ca43d2918eba583bea5
+p410be b6d65b820198ca6ff0103d9794727792
p410le 2771dd3ae54a439921f51c29e79b6799
-p416be de6b84bd524e8fcfc251634cae416069
+p416be a0f8b5acad8fafc45fc7b2275fac1d84
p416le 2e73af44eb933580da59981176848dcc
rgb0 2e3d8c91c7a83d451593dfd06607ff39
rgb24 b82577f8215d3dc2681be60f1da247af
diff --git a/tests/ref/fate/filter-pixfmts-hflip b/tests/ref/fate/filter-pixfmts-hflip
index 1fc26b9fb5..4d3efe3cdc 100644
--- a/tests/ref/fate/filter-pixfmts-hflip
+++ b/tests/ref/fate/filter-pixfmts-hflip
@@ -61,13 +61,13 @@ p010be 744b13e44d39e1ff7588983fa03e0101
p010le a50b160346ab94f55a425065b57006f0
p016be 744b13e44d39e1ff7588983fa03e0101
p016le a50b160346ab94f55a425065b57006f0
-p210be 174cdf99f18658724e269bf38d2b653b
+p210be 6f5a76d6467b86d55fe5589d3af8a7ea
p210le b6982912b2376371edea4fccf99fe40c
-p216be c58f03c6668ab0fbc3ee1a2da051e28c
+p216be c1b58f61cd6df9cf01c3086786fb8a69
p216le 1f5213bebf4c99634f57290f5ad99c0d
-p410be aa40aa32be7aa353252bac70b5edc175
+p410be 2e06214ea84595aa1294239b0f1e900f
p410le 1143c811c383e4461b1192dca0c74246
-p416be 8863e9156ee7edcb6b9e6ac01a2e338c
+p416be da6807d924b63a54b804d32e427524bf
p416le a42b88cabc4395aa0bf1bcbbc876f48f
pal8 5b7c77d99817b4f52339742a47de7797
rgb0 0092452f37d73da20193265ace0b7d57
diff --git a/tests/ref/fate/filter-pixfmts-il b/tests/ref/fate/filter-pixfmts-il
index 7e7f057afa..4623f2420c 100644
--- a/tests/ref/fate/filter-pixfmts-il
+++ b/tests/ref/fate/filter-pixfmts-il
@@ -63,13 +63,13 @@ p010be 3df51286ef66b53e3e283dbbab582263
p010le eadcd8241e97e35b2b47d5eb2eaea6cd
p016be 3df51286ef66b53e3e283dbbab582263
p016le eadcd8241e97e35b2b47d5eb2eaea6cd
-p210be 4992fe87c600dfb177b1e2e6aa0f922c
+p210be 29ec4e8912d456cd15203a96487c42e8
p210le c695064fb9f2cc4e35957d4d649cc281
-p216be 98b73479f0ea9843768c162c449c3ac5
+p216be ad85bdc59755608602608a9438bb82ea
p216le 77757390da383a90981e461d128d8789
-p410be a7183a01888b47a4d9f3672073c7ea7d
+p410be 2128861337e660232e6fb664cc4de3e6
p410le 6cf3a3e199b327f4f013e0346410d7a8
-p416be 4dc4aebf18e09e8f8b49db90ae5ec127
+p416be 47dec75cefeb6220be7731bc25b7be9c
p416le 4990b51ff889d9ee23e68997f81c09f1
rgb0 cfaf68671e43248267d8cd50cae8c13f
rgb24 88894f608cf33ba310f21996748d77a7
diff --git a/tests/ref/fate/filter-pixfmts-null b/tests/ref/fate/filter-pixfmts-null
index b090739bd2..f06fa1574e 100644
--- a/tests/ref/fate/filter-pixfmts-null
+++ b/tests/ref/fate/filter-pixfmts-null
@@ -63,13 +63,13 @@ p010be 7f9842d6015026136bad60d03c035cc3
p010le c453421b9f726bdaf2bacf59a492c43b
p016be 7f9842d6015026136bad60d03c035cc3
p016le c453421b9f726bdaf2bacf59a492c43b
-p210be 6df2a72ee297e53f9ac7f96acf0ef5d5
+p210be 847e9c6e292b17349e69570829252b3e
p210le c06e4b76cf504e908128081f92b60ce2
-p216be 01d10b0d17c9f575b512dff36623a85b
+p216be f5009974fc1cd5d552705eeb52de35d9
p216le 2f634e1a3cd5c9c122e0f2ebadb3503d
-p410be d9af5b8126ea7457edaf0c90ad0cb2b7
+p410be 7c2509d2df4bbb199ab653ebb6dce61e
p410le 527761e1f4381007044679710a352ecc
-p416be fc5c1c45567de4a6bc9dbc8eef30116d
+p416be fd828e966d45ae908f5d2d4b3349b816
p416le 983064bfd506be1e26cd57bafc14ae50
pal8 ff5929f5b42075793b2c34cb441bede5
rgb0 0de71e5a1f97f81fb51397a0435bfa72
diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale
index a5a4ac4cba..43074b84a7 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -63,13 +63,13 @@ p010be 1d6726d94bf1385996a9a9840dd0e878
p010le 4b316f2b9e18972299beb73511278fa8
p016be 31e204018cbb53f8988c4e1174ea8ce9
p016le d5afe557f492a09317e525d7cb782f5b
-p210be 42be1e97427247317444afa836969667
+p210be 2cc6dfcf5e006c8ed5238988a06fd45e
p210le 04efb8f14a9d98417af40954a06aa187
-p216be caa0268d0f6779343a4432b6bc832c5b
+p216be 2f649a226812c8e5a553c4e22d301684
p216le c8f65811f717a12706a598561c6df46d
-p410be f580b8dcf5a826c94258eeba837fd874
+p410be 354cd1324ad382df1a3d573833323cce
p410le 90fdd95ec4482c127d98307550a885c6
-p416be a1242f80d32705a757f4d3553542ae1f
+p416be aa54294859a8e6cb2c9cf64d343fdb60
p416le d91a0858ea8d2cf1ed29f179c9ad9666
pal8 29e10892009b2cfe431815ec3052ed3b
rgb0 fbd27e98154efb7535826afed41e9bb0
diff --git a/tests/ref/fate/filter-pixfmts-transpose b/tests/ref/fate/filter-pixfmts-transpose
index dc3a0628ee..922666cf95 100644
--- a/tests/ref/fate/filter-pixfmts-transpose
+++ b/tests/ref/fate/filter-pixfmts-transpose
@@ -61,9 +61,9 @@ p010be ad0de2cc9bff81688b182a870fcf7000
p010le e7ff5143595021246733ce6bd0a769e8
p016be ad0de2cc9bff81688b182a870fcf7000
p016le e7ff5143595021246733ce6bd0a769e8
-p410be 171453dc34dd3c77659914e2202c5aa6
+p410be 8b3e0ccb31b6a20ff00a29253fb2dec3
p410le 4e5f78dfccda9a6387e81354a56a033a
-p416be ff09601f127101a8ce8997b9ae0fd6bf
+p416be 350a90bda53349435d89ec13533726b7
p416le 7bb46e2aec65669a27502ec452941237
rgb0 31ea5da7fe779c6ea0a33f1d28aad918
rgb24 47654cabaaad79170b90afd5a02161dd
diff --git a/tests/ref/fate/filter-pixfmts-vflip b/tests/ref/fate/filter-pixfmts-vflip
index 7736e372ad..3a53bb5837 100644
--- a/tests/ref/fate/filter-pixfmts-vflip
+++ b/tests/ref/fate/filter-pixfmts-vflip
@@ -63,13 +63,13 @@ p010be 06e9354b6e0e38ba41736352cedc0bd5
p010le fd18d322bffbf5816902c13102872e22
p016be 06e9354b6e0e38ba41736352cedc0bd5
p016le fd18d322bffbf5816902c13102872e22
-p210be 328b09bb0c70571617901322b4194023
+p210be ca886ab2b3ea5c153f1954b3709f7249
p210le d71c2d4e483030ffd87fa6a68c83fce0
-p216be e4ab026532db1dfee38cedef384e605b
+p216be 7f268f755ed02592b3a49fd5f7bd48bb
p216le 2c0a660762527706799c4705ca50a9c5
-p410be 637fb064c2ce173de5cf431aa9267914
+p410be 4c603e4464ed3f34cc432b4d1f912082
p410le 849308a1cdf41e055019cf311d1b2201
-p416be 8e9cf1b695c0a33b6094dd6c7b3722d9
+p416be 7e7657ab40cf953351a14ea76e296519
p416le 0991d7fff4e2caf36be219ecdd9619d4
pal8 450b0155d0f2d5628bf95a442db5f817
rgb0 56a7ea69541bcd27bef6a5615784722b
--
2.33.1
More information about the ffmpeg-devel
mailing list