[FFmpeg-devel] [PATCH v2 2/2] libswcale/input: use more accurate rgbf32 yuv conversions

mindmark at gmail.com mindmark at gmail.com
Tue Sep 29 06:44:34 EEST 2020


From: Mark Reid <mindmark at gmail.com>

---
 libswscale/input.c                  |  12 ++-
 tests/ref/fate/filter-pixfmts-scale |   8 +-
 tests/ref/fate/sws-floatimg-cmp     | 122 ++++++++++++++--------------
 3 files changed, 70 insertions(+), 72 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index 064ed5902f..67a85b0418 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -984,15 +984,14 @@ static av_always_inline void planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t *_dstV,
     uint16_t *dstV       = (uint16_t *)_dstV;
     int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
     int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-    int bpc = 16;
-    int shift = 14;
+
     for (i = 0; i < width; i++) {
         int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
         int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
         int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
 
-        dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14);
-        dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14);
+        dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+        dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
     }
 }
 
@@ -1003,14 +1002,13 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s
     uint16_t *dst    = (uint16_t *)_dst;
 
     int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
-    int bpc = 16;
-    int shift = 14;
+
     for (i = 0; i < width; i++) {
         int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
         int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
         int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
 
-        dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14));
+        dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
     }
 }
 
diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale
index d7020ad2c3..30e7cd5b06 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -25,8 +25,8 @@ gbrap12be           1d9b57766ba9c2192403f43967cb9af0
 gbrap12le           bb1ba1c157717db3dd612a76d38a018e
 gbrap16be           c72b935a6e57a8e1c37bff08c2db55b1
 gbrap16le           13eb0e62b1ac9c1c86c81521eaefab5f
-gbrapf32be          42e53d9edccbd9e09c4cd78780ba92f3
-gbrapf32le          eebf3973ef94c841f0a1ceb1ed61621d
+gbrapf32be          366b804d5697276e8c481c4bdf05a00b
+gbrapf32le          558a268e6d6b907449d1056afab78f29
 gbrp                dc3387f925f972c61aae7eb23cdc19f0
 gbrp10be            0277d4c3a8498d75e2783fb81379e481
 gbrp10le            f3d70f8ab845c3c9b8f7452e4a6e285a
@@ -38,8 +38,8 @@ gbrp16be            5fc826cfabebfc1442cb793c4b6303e2
 gbrp16le            1b3e0b63d47a3e1b6b20931316883bf2
 gbrp9be             d9c88968001e1452ff31fbc8d16b18a0
 gbrp9le             2ccfed0816bf6bd4bb3a5b7591d9603a
-gbrpf32be           4614d32e4417f80e0adcc1bdcf6cde42
-gbrpf32le           1366ee77e5559672260bbe51040e28b2
+gbrpf32be           f3d0cefdf11c861001880772d817aac8
+gbrpf32le           290468205c1c18a0667edfca45061aee
 gray                221201cc7cfc4964eacd8b3e426fd276
 gray10be            9452756d0b37f4f5c7cae7635e22d747
 gray10le            37fd2e1ec6b66410212d39a342e864df
diff --git a/tests/ref/fate/sws-floatimg-cmp b/tests/ref/fate/sws-floatimg-cmp
index 24204254c4..cf6788fc23 100644
--- a/tests/ref/fate/sws-floatimg-cmp
+++ b/tests/ref/fate/sws-floatimg-cmp
@@ -1,120 +1,120 @@
 gbrpf32le -> yuv444p16le -> gbrpf32le
-avg diff: 0.003852
+avg diff: 0.000125
 min diff: 0.000000
-max diff: 0.006638
+max diff: 0.000501
 gbrpf32le -> yuv444p -> gbrpf32le
-avg diff: 0.004316
+avg diff: 0.001804
 min diff: 0.000000
-max diff: 0.012704
+max diff: 0.006399
 gbrpf32le -> yuv444p9le -> gbrpf32le
-avg diff: 0.004053
-min diff: 0.000001
-max diff: 0.009402
+avg diff: 0.000906
+min diff: 0.000000
+max diff: 0.003313
 gbrpf32le -> yuv444p10le -> gbrpf32le
-avg diff: 0.003960
+avg diff: 0.000467
 min diff: 0.000000
-max diff: 0.008123
+max diff: 0.001912
 gbrpf32le -> yuv444p12le -> gbrpf32le
-avg diff: 0.003878
+avg diff: 0.000166
 min diff: 0.000000
-max diff: 0.007011
+max diff: 0.000802
 gbrpf32le -> yuv444p14le -> gbrpf32le
-avg diff: 0.003868
+avg diff: 0.000127
 min diff: 0.000000
-max diff: 0.006729
+max diff: 0.000524
 gbrpf32le -> rgb24 -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> bgr24 -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> rgba -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> bgra -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> argb -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> abgr -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> 0rgb -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> 0bgr -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> rgb0 -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> bgr0 -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> rgb48le -> gbrpf32le
-avg diff: 0.003851
+avg diff: 0.000249
 min diff: 0.000000
-max diff: 0.007076
+max diff: 0.000990
 gbrpf32le -> bgr48le -> gbrpf32le
-avg diff: 0.003851
+avg diff: 0.000249
 min diff: 0.000000
-max diff: 0.007076
+max diff: 0.000990
 gbrpf32le -> rgba64le -> gbrpf32le
-avg diff: 0.003851
+avg diff: 0.000249
 min diff: 0.000000
-max diff: 0.007076
+max diff: 0.000990
 gbrpf32le -> bgra64le -> gbrpf32le
-avg diff: 0.003851
+avg diff: 0.000249
 min diff: 0.000000
-max diff: 0.007076
+max diff: 0.000990
 gbrpf32le -> gbrp -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> gbrap -> gbrpf32le
-avg diff: 0.004122
+avg diff: 0.001011
 min diff: 0.000000
-max diff: 0.008975
+max diff: 0.004229
 gbrpf32le -> gbrp9le -> gbrpf32le
-avg diff: 0.007737
+avg diff: 0.003917
 min diff: 0.000000
-max diff: 0.014009
+max diff: 0.007870
 gbrpf32le -> gbrp10le -> gbrpf32le
-avg diff: 0.007662
+avg diff: 0.003841
 min diff: 0.000000
-max diff: 0.013605
+max diff: 0.007456
 gbrpf32le -> gbrap10le -> gbrpf32le
-avg diff: 0.007662
+avg diff: 0.003841
 min diff: 0.000000
-max diff: 0.013605
+max diff: 0.007456
 gbrpf32le -> gbrp12le -> gbrpf32le
-avg diff: 0.007622
+avg diff: 0.003796
 min diff: 0.000000
-max diff: 0.013335
+max diff: 0.007140
 gbrpf32le -> gbrap12le -> gbrpf32le
-avg diff: 0.007622
+avg diff: 0.003796
 min diff: 0.000000
-max diff: 0.013335
+max diff: 0.007140
 gbrpf32le -> gbrp14le -> gbrpf32le
-avg diff: 0.007620
+avg diff: 0.003792
 min diff: 0.000000
-max diff: 0.013232
+max diff: 0.007034
 gbrpf32le -> gbrp16le -> gbrpf32le
-avg diff: 0.007680
+avg diff: 0.003853
 min diff: 0.000000
-max diff: 0.013275
+max diff: 0.007098
 gbrpf32le -> gbrap16le -> gbrpf32le
-avg diff: 0.007680
+avg diff: 0.003853
 min diff: 0.000000
-max diff: 0.013275
+max diff: 0.007098
-- 
2.27.0



More information about the ffmpeg-devel mailing list