[FFmpeg-devel] [PATCH v4] Add support for Audible AAX (and AAX+) files

Wed Jul 15 23:03:07 CEST 2015

Le sextidi 26 messidor, an CCXXIII, Vesselin Bontchev a écrit :
> Thanks for all the feedback, and help.

A few more comments, sorry.

> From 1ed235454a61fe1f8d993d09d3d6398e7609c624 Mon Sep 17 00:00:00 2001
> From: Vesselin Bontchev <vesselin.bontchev at yandex.com>
> Date: Sat, 11 Jul 2015 18:02:47 +0000
> Subject: [PATCH] Add support for Audible AAX (and AAX+) files
> 
> ---
>  libavformat/isom.h |    5 ++
>  libavformat/mov.c  |  138 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 143 insertions(+)
> 
> diff --git a/libavformat/isom.h b/libavformat/isom.h
> index 5d48989..9fa02e7 100644
> --- a/libavformat/isom.h
> +++ b/libavformat/isom.h
> @@ -198,6 +198,11 @@ typedef struct MOVContext {
>      MOVFragmentIndex** fragment_index_data;
>      unsigned fragment_index_count;
>      int atom_depth;
> +    unsigned int aax_mode;  ///< 'aax' file has been detected
> +    unsigned char file_key[20];
> +    unsigned char file_iv[20];
> +    void *activation_bytes;
> +    int activation_bytes_size;
>  } MOVContext;
>  
>  int ff_mp4_read_descr_len(AVIOContext *pb);
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index d1b29a2..6f9bf50 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -26,6 +26,7 @@
>  #include <inttypes.h>
>  #include <limits.h>
>  #include <stdint.h>

> +#include <ctype.h>

This include is no longer needed. This is a good thing, because the
functions in ctype.h are locale-dependant and therefore not suitable for use
in FFmpeg.

>  
>  #include "libavutil/attributes.h"
>  #include "libavutil/channel_layout.h"
> @@ -37,6 +38,8 @@
>  #include "libavutil/dict.h"
>  #include "libavutil/display.h"
>  #include "libavutil/opt.h"
> +#include "libavutil/aes.h"
> +#include "libavutil/sha.h"
>  #include "libavutil/timecode.h"
>  #include "libavcodec/ac3tab.h"
>  #include "avformat.h"
> @@ -807,6 +810,109 @@ static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>      return 0; /* now go for moov */
>  }
>  

> +static void hex_encode(unsigned char *s, int len, unsigned char *o)

uint8_t is usually recommended for that kind of use.

> +{
> +    char itoa16_private[16] = "0123456789abcdef";
> +    int i;
> +    for (i = 0; i < len; ++i) {
> +        o[0] = itoa16_private[s[i] >> 4];
> +        o[1] = itoa16_private[s[i] & 15];
> +        o += 2;
> +    }

You could just write:

    for (i = 0; i < len; i++)
	snprintf(o + i * 2, 3, "%02x", s[i]);

And since you use it only to dump debug, you could do even simpler and avoid
buffers:

    for (i = 0; i < len; i++)
	av_log(ctx, AV_LOG_WARNING, "%02x", s[i]);

> +}
> +
> +#define DRM_BLOB_SIZE 56
> +
> +static int mov_read_adrm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> +{
> +    // extracted from libAAX_SDK.so and AAXSDKWin.dll files!
> +    unsigned char fixed_key[] = { 0x77, 0x21, 0x4d, 0x4b, 0x19, 0x6a, 0x87, 0xcd,
> +                                  0x52, 0x00, 0x45, 0xfd, 0x20, 0xa5, 0x1d, 0x67 };

> +    unsigned char intermediate_key[20] = {0};
> +    unsigned char intermediate_iv[20] = {0};
> +    unsigned char input[64] = {0};
> +    unsigned char output[64] = {0};
> +    unsigned char file_checksum[20] = {0};
> +    unsigned char file_checksum_encoded[41] = {0};
> +    unsigned char file_key_encoded[41] = {0};
> +    unsigned char file_iv_encoded[41] = {0};

Do you need to init to 0? The compiler or valgrind can detect access to
uninitialized memory, if you init to 0, they can not do it.

> +    unsigned char calculated_checksum[20];
> +    struct AVSHA *ctx;
> +    struct AVAES *aes_decrypt;
> +    int i;

> +    unsigned char *activation_bytes = (unsigned char*) c->activation_bytes;

Useless (and therefore harmful) cast.

> +
> +    av_log(c->fc, AV_LOG_DEBUG, "[aax] aax file detected!\n");
> +    c->aax_mode = 1;
> +

> +    ctx = av_sha_alloc();

Unchecked allocation. And ctx is probably too ambiguous.

> +    aes_decrypt = av_aes_alloc();
> +    if (!aes_decrypt) {

> +        return AVERROR(ENOMEM);

You are leaking ctx.

> +    }
> +
> +    /* drm blob processing */
> +    ffio_read_size(pb, output, 8); // go to offset 8, absolute postion 0x251
> +    ffio_read_size(pb, input, DRM_BLOB_SIZE);
> +    ffio_read_size(pb, output, 4); // go to offset 4, absolute postion 0x28d
> +    ffio_read_size(pb, file_checksum, 20);
> +    hex_encode(file_checksum, 20, file_checksum_encoded);
> +    av_log(c->fc, AV_LOG_DEBUG, "[aax] file checksum == %s\n", file_checksum_encoded);
> +
> +    /* verify activation data */
> +    if (!activation_bytes || c->activation_bytes_size != 4) {
> +        av_log(c->fc, AV_LOG_FATAL, "[aax] activation_bytes option is missing!\n");

> +        return AVERROR_INVALIDDATA;

Since activation_bytes come from the user, AVERROR(EINVAL) seems more
correct.

> +    }
> +    if (c->activation_bytes_size != 4) {
> +        av_log(c->fc, AV_LOG_FATAL, "[aax] activation_bytes value needs to be 4 bytes!\n");
> +        return AVERROR_INVALIDDATA;
> +    }

Here and at a few places later, you are leaking ctx and aes_decrypt.
Usually, to avoid leaking like that, a single exit point is established and
used with goto.

> +
> +    /* AAX (and AAX+) key derivation */
> +    av_sha_init(ctx, 160);
> +    av_sha_update(ctx, fixed_key, 16);
> +    av_sha_update(ctx, activation_bytes, 4);
> +    av_sha_final(ctx, intermediate_key);
> +    av_sha_init(ctx, 160);
> +    av_sha_update(ctx, fixed_key, 16);
> +    av_sha_update(ctx, intermediate_key, 20);
> +    av_sha_update(ctx, activation_bytes, 4);
> +    av_sha_final(ctx, intermediate_iv);
> +    av_sha_init(ctx, 160);
> +    av_sha_update(ctx, intermediate_key, 16);
> +    av_sha_update(ctx, intermediate_iv, 16);
> +    av_sha_final(ctx, calculated_checksum);
> +    if (memcmp(calculated_checksum, file_checksum, 20)) {
> +        av_log(c->fc, AV_LOG_ERROR, "[aax] mismatch in checksums!\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +    av_aes_init(aes_decrypt, intermediate_key, 128, 1);
> +    av_aes_crypt(aes_decrypt, output, input, DRM_BLOB_SIZE >> 4, intermediate_iv, 1);
> +    for (i = 0; i < 4; i++) {

> +        if (activation_bytes[i] != output[3 - i]) {

Is there a reason to have activation_bytes in the reverse order like that?

> +            av_log(c->fc, AV_LOG_ERROR, "[aax] error in drm blob decryption!\n");
> +            return AVERROR_INVALIDDATA;
> +        }
> +    }
> +    memcpy(c->file_key, output + 8, 16);
> +    memcpy(input, output + 26, 16);
> +    av_sha_init(ctx, 160);
> +    av_sha_update(ctx, input, 16);
> +    av_sha_update(ctx, c->file_key, 16);
> +    av_sha_update(ctx, fixed_key, 16);
> +    av_sha_final(ctx, c->file_iv);
> +    hex_encode(c->file_key, 16, file_key_encoded);
> +    av_log(c->fc, AV_LOG_DEBUG, "[aax] file key == %s\n", file_key_encoded);
> +    hex_encode(c->file_iv, 16, file_iv_encoded);
> +    av_log(c->fc, AV_LOG_DEBUG, "[aax] file iv == %s\n", file_iv_encoded);
> +
> +    av_free(aes_decrypt);
> +    av_free(ctx);
> +
> +    return 0;
> +}
> +
>  /* read major brand, minor version and compatible brands and store them as metadata */
>  static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>  {
> @@ -3594,6 +3700,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
>  { MKTAG('e','l','s','t'), mov_read_elst },
>  { MKTAG('e','n','d','a'), mov_read_enda },
>  { MKTAG('f','i','e','l'), mov_read_fiel },
> +{ MKTAG('a','d','r','m'), mov_read_adrm },
>  { MKTAG('f','t','y','p'), mov_read_ftyp },
>  { MKTAG('g','l','b','l'), mov_read_glbl },
>  { MKTAG('h','d','l','r'), mov_read_hdlr },
> @@ -4337,6 +4444,32 @@ static int should_retry(AVIOContext *pb, int error_code) {
>      return 1;
>  }
>  
> +/* Audible AAX (and AAX+) bytestream decryption
> + *

> + * ffmpeg -activation_bytes 00112233 -i test.aax -vn -codec copy -v debug output.mp4

Should go in the docs.

> + *
> + */
> +static int aax_filter(uint8_t *input, int size, MOVContext *c)
> +{
> +    int blocks = 0;
> +    unsigned char iv[16];
> +    struct AVAES *aes_decrypt;
> +

> +    aes_decrypt = av_aes_alloc();
> +    if (!aes_decrypt) {
> +        return AVERROR(ENOMEM);
> +    }

Is there any reason to repeatedly allocate and free this? You would put it
in the context and allocate it only once.

> +
> +    memcpy(iv, c->file_iv, 16); // iv is overwritten
> +    blocks = size >> 4;
> +    av_aes_init(aes_decrypt, c->file_key, 128, 1);

> +    av_aes_crypt(aes_decrypt, input, input, blocks, iv, 1);

What happens if size is not a multiple of 16? A check might be useful to
detect broken files.

> +
> +    av_free(aes_decrypt);
> +
> +    return 0;
> +}
> +
>  static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
>  {
>      MOVContext *mov = s->priv_data;
> @@ -4431,6 +4564,9 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
>      pkt->flags |= sample->flags & AVINDEX_KEYFRAME ? AV_PKT_FLAG_KEY : 0;
>      pkt->pos = sample->pos;
>  
> +    if (mov->aax_mode)
> +        aax_filter(pkt->data, pkt->size, mov);
> +
>      return 0;
>  }
>  
> @@ -4544,6 +4680,8 @@ static const AVOption mov_options[] = {
>          AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = FLAGS },
>      { "export_xmp", "Export full XMP metadata", OFFSET(export_xmp),
>          AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = FLAGS },
> +    { "activation_bytes", "Secret bytes for Audible AAX files", OFFSET(activation_bytes),
> +        AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_DECODING_PARAM },
>      { NULL },
>  };
>  

Regards,

-- 
  Nicolas George
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150715/c510c024/attachment.sig>