[FFmpeg-devel] [PATCH 3/4] avformat/rmdec: Use 64bit for intermediate for DEINT_ID_INT4

Sat Apr 17 02:37:51 EEST 2021

On 4/16/2021 7:45 PM, James Almer wrote:
> On 4/16/2021 7:24 PM, Andreas Rheinhardt wrote:
>> James Almer:
>>> On 4/16/2021 4:04 PM, Michael Niedermayer wrote:
>>>> On Thu, Apr 15, 2021 at 06:22:10PM -0300, James Almer wrote:
>>>>> On 4/15/2021 5:44 PM, Michael Niedermayer wrote:
>>>>>> Fixes: runtime error: signed integer overflow: 65312 * 65535 cannot
>>>>>> be represented in type 'int'
>>>>>> Fixes:
>>>>>> 32832/clusterfuzz-testcase-minimized-ffmpeg_dem_RM_fuzzer-4817710040088576 
>>>>>>
>>>>>>
>>>>>>
>>>>>> Found-by: continuous fuzzing process
>>>>>> https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
>>>>>> Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
>>>>>> ---
>>>>>> Â Â Â  libavformat/rmdec.c | 4 ++--
>>>>>> Â Â Â  1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
>>>>>> index fc3bff4859..af032ed90a 100644
>>>>>> --- a/libavformat/rmdec.c
>>>>>> +++ b/libavformat/rmdec.c
>>>>>> @@ -269,9 +269,9 @@ static int
>>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>> Â Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT4:
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize > ast->audio_framesize ||
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_packet_h <= 1 ||
>>>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * sub_packet_h > (2 +
>>>>>> (sub_packet_h & 1)) * ast->audio_framesize)
>>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * (uint64_t)sub_packet_h > (2
>>>>>> + (sub_packet_h & 1)) * ast->audio_framesize)
>>>>>
>>>>> This check seems superfluous with the one below right after it.
>>>>> ast->coded_framesize * sub_packet_h must be equal to 2 *
>>>>> ast->audio_framesize. It can be removed.
>>>>>
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>> -Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * sub_packet_h !=
>>>>>> 2*ast->audio_framesize) {
>>>>>> +Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * (uint64_t)sub_packet_h !=
>>>>>> 2*ast->audio_framesize) {
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  avpriv_request_sample(s, "mismatching interleaver
>>>>>> parameters");
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
>>>>>
>>>>> How about something like
>>>>>
>>>>>> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
>>>>>> index fc3bff4859..09880ee3fe 100644
>>>>>> --- a/libavformat/rmdec.c
>>>>>> +++ b/libavformat/rmdec.c
>>>>>> @@ -269,7 +269,7 @@ static int
>>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>> Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT4:
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize > ast->audio_framesize ||
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_packet_h <= 1 ||
>>>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * sub_packet_h > (2 +
>>>>>> (sub_packet_h & 1)) * ast->audio_framesize)
>>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize > INT_MAX / sub_packet_h)
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * sub_packet_h !=
>>>>>> 2*ast->audio_framesize) {
>>>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  avpriv_request_sample(s, "mismatching interleaver
>>>>>> parameters");
>>>>>
>>>>> Instead?
>>>>
>>>> The 2 if() execute different things, the 2nd requests a sample, the 
>>>> first
>>>> not. I think this suggestion would change when we request a sample
>>>
>>> Why are we returning INVALIDDATA after requesting a sample, for that
>>> matter? If it's considered an invalid scenario, do we really need a 
>>> sample?
>>>
>>> In any case, if you don't want more files where "ast->coded_framesize *
>>> sub_packet_h != 2*ast->audio_framesize" would print a sample request,
>>> then maybe something like the following could be used instead?
>>>
>>>> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
>>>> index fc3bff4859..10c1699a81 100644
>>>> --- a/libavformat/rmdec.c
>>>> +++ b/libavformat/rmdec.c
>>>> @@ -269,6 +269,7 @@ static int
>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>> Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT4:
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize > ast->audio_framesize ||
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_packet_h <= 1 ||
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize > INT_MAX / sub_packet_h ||
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * sub_packet_h > (2 +
>>>> (sub_packet_h & 1)) * ast->audio_framesize)
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * sub_packet_h !=
>>>> 2*ast->audio_framesize) {
>>>> @@ -278,12 +279,16 @@ static int
>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  break;
>>>> Â Â Â Â Â Â Â Â Â  case DEINT_ID_GENR:
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->sub_packet_size <= 0 ||
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize > INT_MAX / sub_packet_h ||
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->sub_packet_size > ast->audio_framesize)
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->audio_framesize % ast->sub_packet_size)
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  break;
>>>> Â Â Â Â Â Â Â Â Â  case DEINT_ID_SIPR:
>>>> +Â Â Â Â Â Â Â Â Â Â Â  if (ast->audio_framesize > INT_MAX / sub_packet_h)
>>
>> sub_packet_h has not been checked for being != 0 here and in the
>> DEINT_ID_GENR codepath.
> 
> Ah, good catch. This also means av_new_packet() is potentially being 
> called with 0 as size for these two codepaths.
> 
>>
>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>>> Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT0:
>>>> Â Â Â Â Â Â Â Â Â  case DEINT_ID_VBRS:
>>>> Â Â Â Â Â Â Â Â Â  case DEINT_ID_VBRF:
>>>> @@ -296,7 +301,6 @@ static int
>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->deint_id == DEINT_ID_GENR ||
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->deint_id == DEINT_ID_SIPR) {
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (st->codecpar->block_align <= 0 ||
>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize * (uint64_t)sub_packet_h >
>>>> (unsigned)INT_MAX ||
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize * sub_packet_h <
>>>> st->codecpar->block_align)
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>> Â Â Â Â Â Â Â Â Â Â Â Â Â  if (av_new_packet(&ast->pkt, ast->audio_framesize *
>>>> sub_packet_h) < 0)
>>>
>>> Same amount of checks for all three deint ids, and no integer casting to
>>> prevent overflows.
>>
>> Since when is a division better than casting to 64bits to perform a
>> multiplication?
> 
> This is done in plenty of places across the codebase to catch the same 
> kind of overflows. Does it make any measurable difference even worth 
> mentioning, especially considering this is read in the header?
> 
> All these casts make the code really ugly and harder to read. Especially 
> things like (unsigned)INT_MAX. So if there are cleaner solutions, they 
> should be used if possible.
> Code needs to not only work, but also be maintainable.

Another option is to just change the type of the RMStream fields, like so:

> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
> index fc3bff4859..304984d2b0 100644
> --- a/libavformat/rmdec.c
> +++ b/libavformat/rmdec.c
> @@ -50,8 +50,8 @@ struct RMStream {
>      /// Audio descrambling matrix parameters
>      int64_t audiotimestamp; ///< Audio packet timestamp
>      int sub_packet_cnt; // Subpacket counter, used while reading
> -    int sub_packet_size, sub_packet_h, coded_framesize; ///< Descrambling parameters from container
> -    int audio_framesize; /// Audio frame size from container
> +    unsigned sub_packet_size, sub_packet_h, coded_framesize; ///< Descrambling parameters from container
> +    unsigned audio_framesize; /// Audio frame size from container
>      int sub_packet_lengths[16]; /// Length of each subpacket
>      int32_t deint_id;  ///< deinterleaver used in audio stream
>  };
> @@ -277,7 +277,7 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>              }
>              break;
>          case DEINT_ID_GENR:
> -            if (ast->sub_packet_size <= 0 ||
> +            if (!ast->sub_packet_size ||
>                  ast->sub_packet_size > ast->audio_framesize)
>                  return AVERROR_INVALIDDATA;
>              if (ast->audio_framesize % ast->sub_packet_size)
> @@ -296,7 +296,7 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>              ast->deint_id == DEINT_ID_GENR ||
>              ast->deint_id == DEINT_ID_SIPR) {
>              if (st->codecpar->block_align <= 0 ||
> -                ast->audio_framesize * (uint64_t)sub_packet_h > (unsigned)INT_MAX ||
> +                ast->audio_framesize * sub_packet_h > INT_MAX ||
>                  ast->audio_framesize * sub_packet_h < st->codecpar->block_align)
>                  return AVERROR_INVALIDDATA;
>              if (av_new_packet(&ast->pkt, ast->audio_framesize * sub_packet_h) < 0)

ast->audio_framesize and sub_packet_h are never bigger than INT16_MAX, 
so unless I'm missing something, this should be enough.