[FFmpeg-devel] [PATCH 4/6] truehd_core: Miscellaneous improvements

Sat Jul 6 17:18:02 EEST 2019

1. The loop counter of the substream_directory loop is always less than
the number of substreams, yet within the loop it is checked whether it
is less than FFMIN(3, s->hdr.num_substreams), although the check for < 3
would suffice.
2. In case the packet is a major sync packet, the last two bytes of the
major sync structure were initialized to 0xff and then immediately
overwritten afterwards without ever making use of the values just set.
3. When updating the parity_nibble during writing the new
substream_directory, the parity_nibble is updated one byte at a time
with bytes that might be read from the output packet's data. But one can
do both bytes at the same time without resorting to the data just
written by XOR'ing with the variable that contains the value that has
just been written as a big endian number. This changes the intermediate
value of parity_nibble, but in the end it just amounts to a reordering
of the sum modulo two that will eventually be written as parity_nibble.
Due to associativity and commutativity, this value is unchanged.
4. init_get_bits8 already checks that no overflow happens during the
conversion of its argument from bytes to bits. ff_mlp_read_major_sync
makes sure not to overread (the maximum size of a major_sync_info is 60
bytes anyway) and last_offset is < 2^13, so that no overflow in the
calculation of size can happen, i.e. the check for whether size is >= 0
is unnecessary. But then size is completely unnecessary and can be
removed.
5. In case the packet is just passed through, it is unnecessary to read
the packet's dts. This is therefore postponed to when we know that the
packet is not passed through.
6. Given that it seems overkill to use a bitreader just for one
variable, the size of the input access unit is now read directly.
7. A substream's offset (of the end of the substream) is now stored as is
(i.e. in units of words).

These changes amount to a slight performance improvement: It improved
from 5897 decicycles of ten runs with about 262144 runs each (including
an insignificant amount -- about 20-25 usually of skips) to 5747
decicycles under the same conditions.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
---
 libavcodec/truehd_core_bsf.c | 39 ++++++++++++------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/libavcodec/truehd_core_bsf.c b/libavcodec/truehd_core_bsf.c
index f858c2d4d5..47684235db 100644
--- a/libavcodec/truehd_core_bsf.c
+++ b/libavcodec/truehd_core_bsf.c
@@ -42,12 +42,11 @@ static int truehd_core_filter(AVBSFContext *ctx, AVPacket *out)
     GetBitContext gbc;
     AccessUnit units[MAX_SUBSTREAMS];
     AVPacket *in;
-    int ret, i, size, last_offset = 0;
+    int ret, i, last_offset = 0;
     int in_size, out_size;
     int have_header = 0;
     int substream_bits = 0;
     int end;
-    uint16_t dts;
 
     ret = ff_bsf_get_packet(ctx, &in);
     if (ret < 0)
@@ -58,20 +57,12 @@ static int truehd_core_filter(AVBSFContext *ctx, AVPacket *out)
         goto fail;
     }
 
-    ret = init_get_bits(&gbc, in->data, 32);
-    if (ret < 0)
-        goto fail;
-
-    skip_bits(&gbc, 4);
-    in_size = get_bits(&gbc, 12) * 2;
+    in_size = (AV_RB16(in->data) & 0xFFF) * 2;
     if (in_size < 4 || in_size > in->size) {
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
 
-    out_size = in_size;
-    dts = get_bits(&gbc, 16);
-
     ret = init_get_bits8(&gbc, in->data + 4, in->size - 4);
     if (ret < 0)
         goto fail;
@@ -91,26 +82,24 @@ static int truehd_core_filter(AVBSFContext *ctx, AVPacket *out)
         for (int j = 0; j < 4; j++)
             units[i].bits[j] = get_bits1(&gbc);
 
-        units[i].offset = get_bits(&gbc, 12) * 2;
-        if (i < FFMIN(s->hdr.num_substreams, 3)) {
-            last_offset = units[i].offset;
+        units[i].offset = get_bits(&gbc, 12);
+        if (i < 3) {
+            last_offset = units[i].offset * 2;
             substream_bits += 16;
         }
 
         if (units[i].bits[0]) {
             units[i].optional = get_bits(&gbc, 16);
-            if (i < FFMIN(s->hdr.num_substreams, 3))
+            if (i < 3)
                 substream_bits += 16;
         }
     }
     end = get_bits_count(&gbc);
 
-    size = ((end + 7) >> 3) + 4 + last_offset;
-    if (size >= 0 && size <= in->size)
-        out_size = size;
+    out_size = ((end + 7) >> 3) + 4 + last_offset;
     if (out_size < in_size) {
         int bpos = 0, reduce = (end - have_header * 28 * 8 - substream_bits) >> 4;
-        uint16_t parity_nibble = 0;
+        uint16_t parity_nibble, dts = AV_RB16(in->data + 2);
         uint16_t auheader;
 
         ret = av_new_packet(out, out_size);
@@ -127,8 +116,6 @@ static int truehd_core_filter(AVBSFContext *ctx, AVPacket *out)
             out->data[16 + 4] = (out->data[16 + 4] & 0x0c) | (FFMIN(s->hdr.num_substreams, 3) << 4);
             out->data[17 + 4]&= 0x7f;
             out->data[25 + 4] = out->data[25 + 4] & 0xfe;
-            out->data[26 + 4] = 0xff;
-            out->data[27 + 4] = 0xff;
             AV_WL16(out->data + 4 + 26, ff_mlp_checksum16(out->data + 4, 26));
         }
 
@@ -139,18 +126,18 @@ static int truehd_core_filter(AVBSFContext *ctx, AVPacket *out)
             substr_hdr |= (units[i].bits[1] << 14);
             substr_hdr |= (units[i].bits[2] << 13);
             substr_hdr |= (units[i].bits[3] << 12);
-            substr_hdr |= (units[i].offset / 2) & 0x0FFF;
+            substr_hdr |=  units[i].offset;
 
             AV_WB16(out->data + have_header * 28 + 4 + bpos, substr_hdr);
 
-            parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
-            parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
+            parity_nibble ^= substr_hdr;
+            bpos          += 2;
 
             if (units[i].bits[0]) {
                 AV_WB16(out->data + have_header * 28 + 4 + bpos, units[i].optional);
 
-                parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
-                parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
+                parity_nibble ^= units[i].optional;
+                bpos          += 2;
             }
         }
 
-- 
2.21.0