[FFmpeg-devel] [PATCH] swscale: use 16-bit intermediate precision for RGB/XYZ conversion

Mon Dec 16 15:56:07 EET 2024

From: Niklas Haas <git at haasn.dev>

The current logic uses 12-bit linear light math, which is woefully insufficient
and leads to nasty postarization artifacts. This patch simply switches the
internal logic to 16-bit precision.

This raises the memory requirement of these tables from 32 kB to 272 kB.

Fixes: ticket 4829
Signed-off-by: Niklas Haas <git at haasn.dev>
Sponsored-by: Sovereign Tech Fund
---
 libswscale/swscale.c          | 16 ++++++++--------
 libswscale/swscale_internal.h |  8 ++++----
 libswscale/utils.c            | 19 ++++++++++++-------
 3 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 96634acfd6..da3a082905 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -773,10 +773,10 @@ void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride,
                 c->xyz2rgb_matrix[2][1] * y +
                 c->xyz2rgb_matrix[2][2] * z >> 12;
 
-            // limit values to 12-bit depth
-            r = av_clip_uintp2(r, 12);
-            g = av_clip_uintp2(g, 12);
-            b = av_clip_uintp2(b, 12);
+            // limit values to 16-bit depth
+            r = av_clip_uint16(r);
+            g = av_clip_uint16(g);
+            b = av_clip_uint16(b);
 
             // convert from sRGBlinear to RGB and scale from 12bit to 16bit
             if (desc->flags & AV_PIX_FMT_FLAG_BE) {
@@ -832,10 +832,10 @@ void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride,
                 c->rgb2xyz_matrix[2][1] * g +
                 c->rgb2xyz_matrix[2][2] * b >> 12;
 
-            // limit values to 12-bit depth
-            x = av_clip_uintp2(x, 12);
-            y = av_clip_uintp2(y, 12);
-            z = av_clip_uintp2(z, 12);
+            // limit values to 16-bit depth
+            x = av_clip_uint16(x);
+            y = av_clip_uint16(y);
+            z = av_clip_uint16(z);
 
             // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
             if (desc->flags & AV_PIX_FMT_FLAG_BE) {
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 768e394560..5acd277b50 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -547,10 +547,10 @@ struct SwsInternal {
 /* pre defined color-spaces gamma */
 #define XYZ_GAMMA (2.6f)
 #define RGB_GAMMA (2.2f)
-    int16_t *xyzgamma;
-    int16_t *rgbgamma;
-    int16_t *xyzgammainv;
-    int16_t *rgbgammainv;
+    uint16_t *xyzgamma;
+    uint16_t *rgbgamma;
+    uint16_t *xyzgammainv;
+    uint16_t *rgbgammainv;
     int16_t xyz2rgb_matrix[3][4];
     int16_t rgb2xyz_matrix[3][4];
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 4dedbfc394..937e19f651 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -948,7 +948,8 @@ static void fill_xyztables(SwsInternal *c)
         {1689, 1464,  739},
         { 871, 2929,  296},
         {  79,  488, 3891} };
-    static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096];
+    static uint16_t xyzgamma_tab[4096],  rgbgammainv_tab[4096];
+    static uint16_t rgbgamma_tab[65536], xyzgammainv_tab[65536];
 
     memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix));
     memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix));
@@ -957,15 +958,19 @@ static void fill_xyztables(SwsInternal *c)
     c->xyzgammainv = xyzgammainv_tab;
     c->rgbgammainv = rgbgammainv_tab;
 
-    if (rgbgamma_tab[4095])
+    if (xyzgamma_tab[4095])
         return;
 
-    /* set gamma vectors */
+    /* set input gamma vectors */
     for (i = 0; i < 4096; i++) {
-        xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
-        rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
-        xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0);
-        rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0);
+        xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 65535.0);
+        rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 65535.0);
+    }
+
+    /* set output gamma vectors */
+    for (i = 0; i < 65536; i++) {
+        rgbgamma_tab[i] = lrint(pow(i / 65535.0, rgbgamma) * 4095.0);
+        xyzgammainv_tab[i] = lrint(pow(i / 65535.0, xyzgammainv) * 4095.0);
     }
 }
 
-- 
2.47.0