[FFmpeg-devel] [PATCH 3/4] avformat/rmdec: Use 64bit for intermediate for DEINT_ID_INT4

Sat Apr 17 03:59:32 EEST 2021

On 4/16/2021 9:13 PM, Andreas Rheinhardt wrote:
> James Almer:
>> On 4/16/2021 8:45 PM, Andreas Rheinhardt wrote:
>>> James Almer:
>>>> On 4/16/2021 7:45 PM, James Almer wrote:
>>>>> On 4/16/2021 7:24 PM, Andreas Rheinhardt wrote:
>>>>>> James Almer:
>>>>>>> On 4/16/2021 4:04 PM, Michael Niedermayer wrote:
>>>>>>>> On Thu, Apr 15, 2021 at 06:22:10PM -0300, James Almer wrote:
>>>>>>>>> On 4/15/2021 5:44 PM, Michael Niedermayer wrote:
>>>>>>>>>> Fixes: runtime error: signed integer overflow: 65312 * 65535
>>>>>>>>>> cannot
>>>>>>>>>> be represented in type 'int'
>>>>>>>>>> Fixes:
>>>>>>>>>> 32832/clusterfuzz-testcase-minimized-ffmpeg_dem_RM_fuzzer-4817710040088576
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Found-by: continuous fuzzing process
>>>>>>>>>> https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
>>>>>>>>>> Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
>>>>>>>>>> ---
>>>>>>>>>>  Â Â Â Â  libavformat/rmdec.c | 4 ++--
>>>>>>>>>>  Â Â Â Â  1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
>>>>>>>>>> index fc3bff4859..af032ed90a 100644
>>>>>>>>>> --- a/libavformat/rmdec.c
>>>>>>>>>> +++ b/libavformat/rmdec.c
>>>>>>>>>> @@ -269,9 +269,9 @@ static int
>>>>>>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT4:
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize >
>>>>>>>>>> ast->audio_framesize ||
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_packet_h <= 1 ||
>>>>>>>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * sub_packet_h > (2 +
>>>>>>>>>> (sub_packet_h & 1)) * ast->audio_framesize)
>>>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * (uint64_t)sub_packet_h
>>>>>>>>>>> (2
>>>>>>>>>> + (sub_packet_h & 1)) * ast->audio_framesize)
>>>>>>>>>
>>>>>>>>> This check seems superfluous with the one below right after it.
>>>>>>>>> ast->coded_framesize * sub_packet_h must be equal to 2 *
>>>>>>>>> ast->audio_framesize. It can be removed.
>>>>>>>>>
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>>>> -Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * sub_packet_h !=
>>>>>>>>>> 2*ast->audio_framesize) {
>>>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * (uint64_t)sub_packet_h !=
>>>>>>>>>> 2*ast->audio_framesize) {
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  avpriv_request_sample(s, "mismatching
>>>>>>>>>> interleaver
>>>>>>>>>> parameters");
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  }
>>>>>>>>>
>>>>>>>>> How about something like
>>>>>>>>>
>>>>>>>>>> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
>>>>>>>>>> index fc3bff4859..09880ee3fe 100644
>>>>>>>>>> --- a/libavformat/rmdec.c
>>>>>>>>>> +++ b/libavformat/rmdec.c
>>>>>>>>>> @@ -269,7 +269,7 @@ static int
>>>>>>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT4:
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize > ast->audio_framesize ||
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_packet_h <= 1 ||
>>>>>>>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * sub_packet_h > (2 +
>>>>>>>>>> (sub_packet_h & 1)) * ast->audio_framesize)
>>>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize > INT_MAX / sub_packet_h)
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * sub_packet_h !=
>>>>>>>>>> 2*ast->audio_framesize) {
>>>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  avpriv_request_sample(s, "mismatching
>>>>>>>>>> interleaver
>>>>>>>>>> parameters");
>>>>>>>>>
>>>>>>>>> Instead?
>>>>>>>>
>>>>>>>> The 2 if() execute different things, the 2nd requests a sample, the
>>>>>>>> first
>>>>>>>> not. I think this suggestion would change when we request a sample
>>>>>>>
>>>>>>> Why are we returning INVALIDDATA after requesting a sample, for that
>>>>>>> matter? If it's considered an invalid scenario, do we really need a
>>>>>>> sample?
>>>>>>>
>>>>>>> In any case, if you don't want more files where
>>>>>>> "ast->coded_framesize *
>>>>>>> sub_packet_h != 2*ast->audio_framesize" would print a sample request,
>>>>>>> then maybe something like the following could be used instead?
>>>>>>>
>>>>>>>> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
>>>>>>>> index fc3bff4859..10c1699a81 100644
>>>>>>>> --- a/libavformat/rmdec.c
>>>>>>>> +++ b/libavformat/rmdec.c
>>>>>>>> @@ -269,6 +269,7 @@ static int
>>>>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT4:
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize > ast->audio_framesize ||
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  sub_packet_h <= 1 ||
>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize > INT_MAX / sub_packet_h ||
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->coded_framesize * sub_packet_h > (2 +
>>>>>>>> (sub_packet_h & 1)) * ast->audio_framesize)
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->coded_framesize * sub_packet_h !=
>>>>>>>> 2*ast->audio_framesize) {
>>>>>>>> @@ -278,12 +279,16 @@ static int
>>>>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  break;
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_GENR:
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->sub_packet_size <= 0 ||
>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize > INT_MAX / sub_packet_h ||
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->sub_packet_size > ast->audio_framesize)
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->audio_framesize % ast->sub_packet_size)
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  break;
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_SIPR:
>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â  if (ast->audio_framesize > INT_MAX / sub_packet_h)
>>>>>>
>>>>>> sub_packet_h has not been checked for being != 0 here and in the
>>>>>> DEINT_ID_GENR codepath.
>>>>>
>>>>> Ah, good catch. This also means av_new_packet() is potentially being
>>>>> called with 0 as size for these two codepaths.
>>>>>
>>>>>>
>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>> +Â Â Â Â Â Â Â Â Â Â Â  break;
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_INT0:
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_VBRS:
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â  case DEINT_ID_VBRF:
>>>>>>>> @@ -296,7 +301,6 @@ static int
>>>>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->deint_id == DEINT_ID_GENR ||
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->deint_id == DEINT_ID_SIPR) {
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (st->codecpar->block_align <= 0 ||
>>>>>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize * (uint64_t)sub_packet_h >
>>>>>>>> (unsigned)INT_MAX ||
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize * sub_packet_h <
>>>>>>>> st->codecpar->block_align)
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â  if (av_new_packet(&ast->pkt, ast->audio_framesize *
>>>>>>>> sub_packet_h) < 0)
>>>>>>>
>>>>>>> Same amount of checks for all three deint ids, and no integer
>>>>>>> casting to
>>>>>>> prevent overflows.
>>>>>>
>>>>>> Since when is a division better than casting to 64bits to perform a
>>>>>> multiplication?
>>>>>
>>>>> This is done in plenty of places across the codebase to catch the same
>>>>> kind of overflows. Does it make any measurable difference even worth
>>>>> mentioning, especially considering this is read in the header?
>>>>>
>>>>> All these casts make the code really ugly and harder to read.
>>>>> Especially things like (unsigned)INT_MAX. So if there are cleaner
>>>>> solutions, they should be used if possible.
>>>>> Code needs to not only work, but also be maintainable.
>>>>
>>>> Another option is to just change the type of the RMStream fields,
>>>> like so:
>>>>
>>>>> diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
>>>>> index fc3bff4859..304984d2b0 100644
>>>>> --- a/libavformat/rmdec.c
>>>>> +++ b/libavformat/rmdec.c
>>>>> @@ -50,8 +50,8 @@ struct RMStream {
>>>>>  Â Â Â Â Â  /// Audio descrambling matrix parameters
>>>>>  Â Â Â Â Â  int64_t audiotimestamp; ///< Audio packet timestamp
>>>>>  Â Â Â Â Â  int sub_packet_cnt; // Subpacket counter, used while reading
>>>>> -Â Â Â  int sub_packet_size, sub_packet_h, coded_framesize; ///<
>>>>> Descrambling parameters from container
>>>>> -Â Â Â  int audio_framesize; /// Audio frame size from container
>>>>> +Â Â Â  unsigned sub_packet_size, sub_packet_h, coded_framesize; ///<
>>>>> Descrambling parameters from container
>>>>> +Â Â Â  unsigned audio_framesize; /// Audio frame size from container
>>>>>  Â Â Â Â Â  int sub_packet_lengths[16]; /// Length of each subpacket
>>>>>  Â Â Â Â Â  int32_t deint_id;Â  ///< deinterleaver used in audio stream
>>>>>  Â Â };
>>>>> @@ -277,7 +277,7 @@ static int
>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  }
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  break;
>>>>>  Â Â Â Â Â Â Â Â Â  case DEINT_ID_GENR:
>>>>> -Â Â Â Â Â Â Â Â Â Â Â  if (ast->sub_packet_size <= 0 ||
>>>>> +Â Â Â Â Â Â Â Â Â Â Â  if (!ast->sub_packet_size ||
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->sub_packet_size > ast->audio_framesize)
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  if (ast->audio_framesize % ast->sub_packet_size)
>>>>> @@ -296,7 +296,7 @@ static int
>>>>> rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->deint_id == DEINT_ID_GENR ||
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->deint_id == DEINT_ID_SIPR) {
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  if (st->codecpar->block_align <= 0 ||
>>>>> -Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize * (uint64_t)sub_packet_h >
>>>>> (unsigned)INT_MAX ||
>>>>> +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize * sub_packet_h > INT_MAX ||
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  ast->audio_framesize * sub_packet_h <
>>>>> st->codecpar->block_align)
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â  return AVERROR_INVALIDDATA;
>>>>>  Â Â Â Â Â Â Â Â Â Â Â Â Â  if (av_new_packet(&ast->pkt, ast->audio_framesize *
>>>>> sub_packet_h) < 0)
>>>>
>>>> ast->audio_framesize and sub_packet_h are never bigger than INT16_MAX,
>>>> so unless I'm missing something, this should be enough.
>>>
>>> In the multiplication ast->coded_framesize * sub_packet_h the first is
>>> read via av_rb32(). Your patch will indeed eliminate the undefined
>>> behaviour (because unsigned), but it might be that the check will now
>>> not trigger when it should trigger because only the lower 32bits are
>>> compared.
>>
>> ast->coded_framesize is guaranteed to be less than or equal to
>> ast->audio_framesize, which is guaranteed to be at most INT16_MAX.
>>
> 
> True (apart from the bound being UINT16_MAX).

Yes, my bad.

  Doesn't fix the
> uninitialized data that I mentioned though.
> Yet there is a check for coded_framesize being < 0 immediately after it
> is read. Said check would be moot with your changes. The problem is that
> if its value is not representable as an int, one could set a negative
> block_align value based upon it.

With coded_framesize being an int (local variable where the value is 
read with avio_rb32()) and ast->coded_framesize being unsigned (context 
variable where the value is ultimately stored), the end result after the 
< 0 check will be that ast->coded_framesize is at most INT_MAX right 
from the beginning, so block_align can't be negative either.