[FFmpeg-devel] [PATCH v2 2/2] swscale/input: clamp rgbf32 values before lrintf
Mark Reid
mindmark at gmail.com
Sun Nov 14 07:24:43 EET 2021
On Sat, Nov 13, 2021 at 7:24 PM Mark Reid <mindmark at gmail.com> wrote:
>
>
> On Sat, Nov 13, 2021 at 7:02 PM James Almer <jamrial at gmail.com> wrote:
>
>> On 11/13/2021 11:56 PM, mindmark at gmail.com wrote:
>> > From: Mark Reid <mindmark at gmail.com>
>> >
>> > if the float pixel * 65535.0f > 2147483647.0f
>> > lrintf may overfow and return negative values, depending on
>> implementation.
>> > nan and +/-inf values may also be implementation defined
>> >
>> > clamp the value first so lrintf so, always works.
>> >
>> > values <=0.0f, -inf, nan = 0.0f
>> > values >=1.0f, +inf = 1.0f
>> >
>> > the clamping adds some performance overhead, but using a inline function
>> > seems help the compiler optimize on the compiliers I tested.
>> >
>> > old timings
>> > 213920 UNITS in planar_rgbf32le_to_uv, 1 runs, 0 skips
>> > 218830 UNITS in planar_rgbf32le_to_uv, 2 runs, 0 skips
>> > 223285 UNITS in planar_rgbf32le_to_uv, 4 runs, 0 skips
>> > 215405 UNITS in planar_rgbf32le_to_uv, 8 runs, 0 skips
>> > 208920 UNITS in planar_rgbf32le_to_uv, 16 runs, 0 skips
>> > 205115 UNITS in planar_rgbf32le_to_uv, 32 runs, 0 skips
>> > 212220 UNITS in planar_rgbf32le_to_uv, 64 runs, 0 skips
>> >
>> > 216440 UNITS in planar_rgbf32be_to_uv, 1 runs, 0 skips
>> > 222450 UNITS in planar_rgbf32be_to_uv, 2 runs, 0 skips
>> > 228780 UNITS in planar_rgbf32be_to_uv, 4 runs, 0 skips
>> > 226900 UNITS in planar_rgbf32be_to_uv, 8 runs, 0 skips
>> > 223168 UNITS in planar_rgbf32be_to_uv, 16 runs, 0 skips
>> > 249340 UNITS in planar_rgbf32be_to_uv, 32 runs, 0 skips
>> > 233746 UNITS in planar_rgbf32be_to_uv, 64 runs, 0 skips
>> >
>> > 173360 UNITS in planar_rgbf32le_to_y, 1 runs, 0 skips
>> > 179970 UNITS in planar_rgbf32le_to_y, 2 runs, 0 skips
>> > 182960 UNITS in planar_rgbf32le_to_y, 4 runs, 0 skips
>> > 177040 UNITS in planar_rgbf32le_to_y, 8 runs, 0 skips
>> > 170351 UNITS in planar_rgbf32le_to_y, 16 runs, 0 skips
>> > 167136 UNITS in planar_rgbf32le_to_y, 32 runs, 0 skips
>> > 165821 UNITS in planar_rgbf32le_to_y, 64 runs, 0 skips
>> >
>> > 181040 UNITS in planar_rgbf32be_to_y, 1 runs, 0 skips
>> > 182920 UNITS in planar_rgbf32be_to_y, 2 runs, 0 skips
>> > 180935 UNITS in planar_rgbf32be_to_y, 4 runs, 0 skips
>> > 180897 UNITS in planar_rgbf32be_to_y, 8 runs, 0 skips
>> > 179640 UNITS in planar_rgbf32be_to_y, 16 runs, 0 skips
>> > 178912 UNITS in planar_rgbf32be_to_y, 32 runs, 0 skips
>> > 177983 UNITS in planar_rgbf32be_to_y, 64 runs, 0 skips
>> >
>> > new timings
>> > 228860 UNITS in planar_rgbf32le_to_uv, 1 runs, 0 skips
>> > 232400 UNITS in planar_rgbf32le_to_uv, 2 runs, 0 skips
>> > 237270 UNITS in planar_rgbf32le_to_uv, 4 runs, 0 skips
>> > 229992 UNITS in planar_rgbf32le_to_uv, 8 runs, 0 skips
>> > 222270 UNITS in planar_rgbf32le_to_uv, 16 runs, 0 skips
>> > 218896 UNITS in planar_rgbf32le_to_uv, 32 runs, 0 skips
>> > 216938 UNITS in planar_rgbf32le_to_uv, 64 runs, 0 skips
>> >
>> > 232340 UNITS in planar_rgbf32be_to_uv, 1 runs, 0 skips
>> > 231830 UNITS in planar_rgbf32be_to_uv, 2 runs, 0 skips
>> > 242235 UNITS in planar_rgbf32be_to_uv, 4 runs, 0 skips
>> > 235210 UNITS in planar_rgbf32be_to_uv, 8 runs, 0 skips
>> > 229040 UNITS in planar_rgbf32be_to_uv, 16 runs, 0 skips
>> > 224996 UNITS in planar_rgbf32be_to_uv, 32 runs, 0 skips
>> > 223581 UNITS in planar_rgbf32be_to_uv, 64 runs, 0 skips
>> >
>> > 179220 UNITS in planar_rgbf32le_to_y, 1 runs, 0 skips
>> > 174790 UNITS in planar_rgbf32le_to_y, 2 runs, 0 skips
>> > 182630 UNITS in planar_rgbf32le_to_y, 4 runs, 0 skips
>> > 183002 UNITS in planar_rgbf32le_to_y, 8 runs, 0 skips
>> > 181005 UNITS in planar_rgbf32le_to_y, 16 runs, 0 skips
>> > 179390 UNITS in planar_rgbf32le_to_y, 32 runs, 0 skips
>> > 192476 UNITS in planar_rgbf32le_to_y, 64 runs, 0 skips
>> >
>> > 195620 UNITS in planar_rgbf32be_to_y, 1 runs, 0 skips
>> > 195860 UNITS in planar_rgbf32be_to_y, 2 runs, 0 skips
>> > 198700 UNITS in planar_rgbf32be_to_y, 4 runs, 0 skips
>> > 197252 UNITS in planar_rgbf32be_to_y, 8 runs, 0 skips
>> > 195702 UNITS in planar_rgbf32be_to_y, 16 runs, 0 skips
>> > 194853 UNITS in planar_rgbf32be_to_y, 32 runs, 0 skips
>> > 194459 UNITS in planar_rgbf32be_to_y, 64 runs, 0 skips
>> > ---
>> > libswscale/input.c | 21 +++++++++++++--------
>> > 1 file changed, 13 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/libswscale/input.c b/libswscale/input.c
>> > index 90efdd2ffc..2a13846abe 100644
>> > --- a/libswscale/input.c
>> > +++ b/libswscale/input.c
>> > @@ -966,6 +966,11 @@ static av_always_inline void
>> planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV,
>> >
>> > #define rdpx(src) (is_be ? av_int2float(AV_RB32(src)):
>> av_int2float(AV_RL32(src)))
>> >
>> > +static av_always_inline float clampf(float x, float min, float max)
>>
>> Use av_clipf() instead.
>>
>
> Thanks. I should have thought of that, I'll send a version
>
>
I don't think av_clipf works for the NAN case.
With my clamp function I'm exploiting that MAX changes the value that is
passed MIN and that NAN > 0.0f == false.
so the FFMAX(NAN, 0.0f) is 0.0f
It's a subtle thing, but it maps NAN to zero.
The x86 asm version of av_clipf will work if maxss is done before minss,
but the c version returns NAN if the value is NAN.
I'll see if anything breaks if I change the behavior of av_clipf
>
>> > +{
>> > + return FFMIN(FFMAX(x, min), max);
>> > +}
>> > +
>> > static av_always_inline void planar_rgbf32_to_a(uint8_t *_dst, const
>> uint8_t *_src[4], int width, int is_be, int32_t *rgb2yuv)
>> > {
>> > int i;
>> > @@ -973,7 +978,7 @@ static av_always_inline void
>> planar_rgbf32_to_a(uint8_t *_dst, const uint8_t *_s
>> > uint16_t *dst = (uint16_t *)_dst;
>> >
>> > for (i = 0; i < width; i++) {
>> > - dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src[3] + i)));
>> > + dst[i] = lrintf(clampf(65535.0f * rdpx(src[3] + i), 0.0f,
>> 65535.0f));
>> > }
>> > }
>> >
>> > @@ -987,9 +992,9 @@ static av_always_inline void
>> planar_rgbf32_to_uv(uint8_t *_dstU, uint8_t *_dstV,
>> > int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv =
>> rgb2yuv[BV_IDX];
>> >
>> > for (i = 0; i < width; i++) {
>> > - int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
>> > - int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
>> > - int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
>> > + int g = lrintf(clampf(65535.0f * rdpx(src[0] + i), 0.0f,
>> 65535.0f));
>> > + int b = lrintf(clampf(65535.0f * rdpx(src[1] + i), 0.0f,
>> 65535.0f));
>> > + int r = lrintf(clampf(65535.0f * rdpx(src[2] + i), 0.0f,
>> 65535.0f));
>> >
>> > dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT -
>> 1))) >> RGB2YUV_SHIFT;
>> > dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT -
>> 1))) >> RGB2YUV_SHIFT;
>> > @@ -1005,9 +1010,9 @@ static av_always_inline void
>> planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s
>> > int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by =
>> rgb2yuv[BY_IDX];
>> >
>> > for (i = 0; i < width; i++) {
>> > - int g = av_clip_uint16(lrintf(65535.0f * rdpx(src[0] + i)));
>> > - int b = av_clip_uint16(lrintf(65535.0f * rdpx(src[1] + i)));
>> > - int r = av_clip_uint16(lrintf(65535.0f * rdpx(src[2] + i)));
>> > + int g = lrintf(clampf(65535.0f * rdpx(src[0] + i), 0.0f,
>> 65535.0f));
>> > + int b = lrintf(clampf(65535.0f * rdpx(src[1] + i), 0.0f,
>> 65535.0f));
>> > + int r = lrintf(clampf(65535.0f * rdpx(src[2] + i), 0.0f,
>> 65535.0f));
>> >
>> > dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT -
>> 1))) >> RGB2YUV_SHIFT;
>> > }
>> > @@ -1021,7 +1026,7 @@ static av_always_inline void
>> grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src,
>> > uint16_t *dst = (uint16_t *)_dst;
>> >
>> > for (i = 0; i < width; ++i){
>> > - dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i)));
>> > + dst[i] = lrintf(clampf(65535.0f * rdpx(src + i), 0.0f,
>> 65535.0f));
>> > }
>> > }
>> >
>> > --
>> > 2.31.1.windows.1
>> >
>> > _______________________________________________
>> > ffmpeg-devel mailing list
>> > ffmpeg-devel at ffmpeg.org
>> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>> >
>> > To unsubscribe, visit link above, or email
>> > ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>> >
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel at ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>>
>
More information about the ffmpeg-devel
mailing list