[FFmpeg-devel] [PATCH 2/2] avcodec/amr*dec: add multichannel support

Paul B Mahol onemda at gmail.com
Fri Oct 1 09:41:15 EEST 2021


On Fri, Oct 1, 2021 at 3:17 AM Sun Zhenliang <hisunzhenliang at outlook.com>
wrote:

>
> Sent with a Spark
> 2021年10月1日 +0800 05:01 Paul B Mahol <onemda at gmail.com>,写道:
> > Signed-off-by: Paul B Mahol <onemda at gmail.com>
> > ---
> > libavcodec/amr_parser.c | 39 ++++++++++++++++++--------
> > libavcodec/amrnbdec.c | 54 +++++++++++++++++++++++-------------
> > libavcodec/amrwbdec.c | 59 +++++++++++++++++++++++++--------------
> > libavformat/amr.c | 61 +++++++++++++++++++++++++++++------------
> > 4 files changed, 143 insertions(+), 70 deletions(-)
> >
> > diff --git a/libavcodec/amr_parser.c b/libavcodec/amr_parser.c
> > index 2659cb40d7..222d8e05e9 100644
> > --- a/libavcodec/amr_parser.c
> > +++ b/libavcodec/amr_parser.c
> > @@ -39,6 +39,7 @@ typedef struct AMRParseContext {
> > ParseContext pc;
> > uint64_t cumulated_size;
> > uint64_t block_count;
> > + int current_channel;
> > int remaining;
> > } AMRParseContext;
> >
> > @@ -57,21 +58,35 @@ static int amr_parse(AVCodecParserContext *s1,
> > if (s1->flags & PARSER_FLAG_COMPLETE_FRAMES) {
> > next = buf_size;
> > } else {
> > - if (s->remaining) {
> > - next = s->remaining;
> > - } else {
> > - int mode = (buf[0] >> 3) & 0x0F;
> > -
> > - if (avctx->codec_id == AV_CODEC_ID_AMR_NB) {
> > - next = amrnb_packed_size[mode];
> > - } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) {
> > - next = amrwb_packed_size[mode];
> > + int offset = 0;
> > +
> > + for (int ch = s->current_channel; ch < avctx->channels; ch++) {
> > + if (s->remaining) {
> > + next = s->remaining;
> > + } else {
> > + int mode = (buf[offset] >> 3) & 0x0F;
> > +
> > + if (avctx->codec_id == AV_CODEC_ID_AMR_NB) {
> > + next = amrnb_packed_size[mode];
> > + } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) {
> > + next = amrwb_packed_size[mode];
> > + }
> > + }
> > +
> > + offset += next;
> > + if (offset >= buf_size) {
> > + s->remaining = offset - buf_size;
> > + next = END_NOT_FOUND;
> > + s->current_channel = ch;
> > + break;
> > + } else {
> > + s->remaining = 0;
> > + s->current_channel = 0;
> > }
> > }
> >
> > - s->remaining = next - FFMIN(buf_size, next);
> > - if (s->remaining)
> > - next = END_NOT_FOUND;
> > + if (s->remaining == 0)
> > + next = offset;
> >
> > if (next != END_NOT_FOUND) {
> > if (s->cumulated_size < UINT64_MAX - next) {
> > diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
> > index e366a09976..472fa85f87 100644
> > --- a/libavcodec/amrnbdec.c
> > +++ b/libavcodec/amrnbdec.c
> > @@ -145,6 +145,10 @@ typedef struct AMRContext {
> >
> > } AMRContext;
> >
> > +typedef struct AMRChannelsContext {
> > + AMRContext ch[2];
> > +} AMRChannelsContext;
> > +
> > /** Double version of ff_weighted_vector_sumf() */
> > static void weighted_vector_sumd(double *out, const double *in_a,
> > const double *in_b, double weight_coeff_a,
> > @@ -159,20 +163,24 @@ static void weighted_vector_sumd(double *out,
> const double *in_a,
> >
> > static av_cold int amrnb_decode_init(AVCodecContext *avctx)
> > {
> > - AMRContext *p = avctx->priv_data;
> > + AMRChannelsContext *s = avctx->priv_data;
> > int i;
> >
> > - if (avctx->channels > 1) {
> > - avpriv_report_missing_feature(avctx, "multi-channel AMR");
> > + if (avctx->channels > 2) {
> > + avpriv_report_missing_feature(avctx, ">2 channel AMR");
> > return AVERROR_PATCHWELCOME;
> > }
> >
> > - avctx->channels = 1;
> > - avctx->channel_layout = AV_CH_LAYOUT_MONO;
> > + if (!avctx->channels) {
> > + avctx->channels = 1;
> > + avctx->channel_layout = AV_CH_LAYOUT_MONO;
> > + }
> > if (!avctx->sample_rate)
> > avctx->sample_rate = 8000;
> > - avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
> > + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
> >
> > + for (int ch = 0; ch < avctx->channels; ch++) {
> > + AMRContext *p = &s->ch[ch];
> > // p->excitation always points to the same position in p->excitation_buf
> > p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER +
> 1];
> >
> > @@ -188,6 +196,7 @@ static av_cold int amrnb_decode_init(AVCodecContext
> *avctx)
> > ff_acelp_vectors_init(&p->acelpv_ctx);
> > ff_celp_filter_init(&p->celpf_ctx);
> > ff_celp_math_init(&p->celpm_ctx);
> > + }
> >
> > return 0;
> > }
> > @@ -949,25 +958,30 @@ static int amrnb_decode_frame(AVCodecContext
> *avctx, void *data,
> > int *got_frame_ptr, AVPacket *avpkt)
> > {
> >
> > - AMRContext *p = avctx->priv_data; // pointer to private data
> > + AMRChannelsContext *s = avctx->priv_data; // pointer to private data
> > AVFrame *frame = data;
> > const uint8_t *buf = avpkt->data;
> > int buf_size = avpkt->size;
> > - float *buf_out; // pointer to the output data buffer
> > - int i, subframe, ret;
> > - float fixed_gain_factor;
> > - AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness
> processing
> > - float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold
> result from anti-sparseness processing
> > - float synth_fixed_gain; // the fixed gain that synthesis should use
> > - const float *synth_fixed_vector; // pointer to the fixed vector that
> synthesis should use
> > + int ret;
> >
> > /* get output buffer */
> > frame->nb_samples = AMR_BLOCK_SIZE;
> > if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> > return ret;
> > - buf_out = (float *)frame->data[0];
> > +
> > + for (int ch = 0; ch < avctx->channels; ch++) {
> > + AMRContext *p = &s->ch[ch];
> > + float fixed_gain_factor;
> > + AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness
> processing
> > + float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold
> result from anti-sparseness processing
> > + float synth_fixed_gain; // the fixed gain that synthesis should use
> > + const float *synth_fixed_vector; // pointer to the fixed vector that
> synthesis should use
> > + float *buf_out = (float *)frame->extended_data[ch];
> > + int channel_size;
> > + int i, subframe;
> >
> > p->cur_frame_mode = unpack_bitstream(p, buf, buf_size);
> > + channel_size = frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for
> rounding and +8 for TOC
> > if (p->cur_frame_mode == NO_DATA) {
> > av_log(avctx, AV_LOG_ERROR, "Corrupt bitstream\n");
> > return AVERROR_INVALIDDATA;
> > @@ -1072,11 +1086,13 @@ static int amrnb_decode_frame(AVCodecContext
> *avctx, void *data,
> > * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */
> > p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
> > 0.84, 0.16, LP_FILTER_ORDER);
> > + buf += channel_size;
> > + buf_size -= channel_size;
> > + }
> >
> > *got_frame_ptr = 1;
> >
> > - /* return the amount of bytes consumed if everything was OK */
> > - return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and
> +8 for TOC
> > + return avpkt->size;
> > }
> >
> >
> > @@ -1085,10 +1101,10 @@ const AVCodec ff_amrnb_decoder = {
> > .long_name = NULL_IF_CONFIG_SMALL("AMR-NB (Adaptive Multi-Rate
> NarrowBand)"),
> > .type = AVMEDIA_TYPE_AUDIO,
> > .id = AV_CODEC_ID_AMR_NB,
> > - .priv_data_size = sizeof(AMRContext),
> > + .priv_data_size = sizeof(AMRChannelsContext),
> > .init = amrnb_decode_init,
> > .decode = amrnb_decode_frame,
> > .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
> > - .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
> > + .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
> > AV_SAMPLE_FMT_NONE },
> > };
> > diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
> > index a6c1d894d7..998dd82791 100644
> > --- a/libavcodec/amrwbdec.c
> > +++ b/libavcodec/amrwbdec.c
> > @@ -93,21 +93,30 @@ typedef struct AMRWBContext {
> >
> > } AMRWBContext;
> >
> > +typedef struct AMRWBChannelsContext {
> > + AMRWBContext ch[2];
> > +} AMRWBChannelsContext;
> > +
> > static av_cold int amrwb_decode_init(AVCodecContext *avctx)
> > {
> > - AMRWBContext *ctx = avctx->priv_data;
> > + AMRWBChannelsContext *s = avctx->priv_data;
> > int i;
> >
> > - if (avctx->channels > 1) {
> > - avpriv_report_missing_feature(avctx, "multi-channel AMR");
> > + if (avctx->channels > 2) {
> > + avpriv_report_missing_feature(avctx, ">2 channel AMR");
> > return AVERROR_PATCHWELCOME;
> > }
> >
> > - avctx->channels = 1;
> > - avctx->channel_layout = AV_CH_LAYOUT_MONO;
> > + if (!avctx->channels) {
> > + avctx->channels = 1;
> > + avctx->channel_layout = AV_CH_LAYOUT_MONO;
> > + }
> > if (!avctx->sample_rate)
> > avctx->sample_rate = 16000;
> > - avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
> > + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
> > +
> > + for (int ch = 0; ch < avctx->channels; ch++) {
> > + AMRWBContext *ctx = &s->ch[ch];
> >
> > av_lfg_init(&ctx->prng, 1);
> >
> > @@ -124,6 +133,7 @@ static av_cold int amrwb_decode_init(AVCodecContext
> *avctx)
> > ff_acelp_vectors_init(&ctx->acelpv_ctx);
> > ff_celp_filter_init(&ctx->celpf_ctx);
> > ff_celp_math_init(&ctx->celpm_ctx);
> > + }
> >
> > return 0;
> > }
> > @@ -1094,13 +1104,21 @@ static void update_sub_state(AMRWBContext *ctx)
> > static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
> > int *got_frame_ptr, AVPacket *avpkt)
> > {
> > - AMRWBContext *ctx = avctx->priv_data;
> > + AMRWBChannelsContext *s = avctx->priv_data;
> > AVFrame *frame = data;
> > - AMRWBFrame *cf = &ctx->frame;
> > const uint8_t *buf = avpkt->data;
> > int buf_size = avpkt->size;
> > + int sub, i, ret;
> > +
> > + /* get output buffer */
> > + frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k;
> > + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> > + return ret;
> > +
> > + for (int ch = 0; ch < avctx->channels; ch++) {
> > + AMRWBContext *ctx = &s->ch[ch];
> > + AMRWBFrame *cf = &ctx->frame;
> > int expected_fr_size, header_size;
> > - float *buf_out;
> > float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result
> from anti-sparseness processing
> > float fixed_gain_factor; // fixed gain correction factor (gamma)
> > float *synth_fixed_vector; // pointer to the fixed vector that synthesis
> should use
> > @@ -1110,13 +1128,7 @@ static int amrwb_decode_frame(AVCodecContext
> *avctx, void *data,
> > float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency
> band
> > float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from
> synthesis
> > float hb_gain;
> > - int sub, i, ret;
> > -
> > - /* get output buffer */
> > - frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k;
> > - if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> > - return ret;
> > - buf_out = (float *)frame->data[0];
> > + float *buf_out = (float *)frame->extended_data[ch];
> >
> > header_size = decode_mime_header(ctx, buf);
> > expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1;
> > @@ -1127,9 +1139,10 @@ static int amrwb_decode_frame(AVCodecContext
> *avctx, void *data,
> > if (ctx->fr_cur_mode == NO_DATA || !ctx->fr_quality) {
> > /* The specification suggests a "random signal" and
> > "a muting technique" to "gradually decrease the output level". */
> > - av_samples_set_silence(&frame->data[0], 0, frame->nb_samples, 1,
> AV_SAMPLE_FMT_FLT);
> > - *got_frame_ptr = 1;
> > - return expected_fr_size;
> > + av_samples_set_silence(&frame->extended_data[ch], 0,
> frame->nb_samples, 1, AV_SAMPLE_FMT_FLT);
> > + buf += expected_fr_size;
> > + buf_size -= expected_fr_size;
> > + continue;
> > }
> > if (ctx->fr_cur_mode > MODE_SID) {
> > av_log(avctx, AV_LOG_ERROR,
> > @@ -1270,9 +1283,13 @@ static int amrwb_decode_frame(AVCodecContext
> *avctx, void *data,
> > memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER *
> sizeof(ctx->isp[3][0]));
> > memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float));
> >
> > + buf += expected_fr_size;
> > + buf_size -= expected_fr_size;
> > + }
> > +
> > *got_frame_ptr = 1;
> >
> > - return expected_fr_size;
> > + return avpkt->size;
> > }
> >
> > const AVCodec ff_amrwb_decoder = {
> > @@ -1280,7 +1297,7 @@ const AVCodec ff_amrwb_decoder = {
> > .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate
> WideBand)"),
> > .type = AVMEDIA_TYPE_AUDIO,
> > .id = AV_CODEC_ID_AMR_WB,
> > - .priv_data_size = sizeof(AMRWBContext),
> > + .priv_data_size = sizeof(AMRWBChannelsContext),
> > .init = amrwb_decode_init,
> > .decode = amrwb_decode_frame,
> > .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
> > diff --git a/libavformat/amr.c b/libavformat/amr.c
> > index 8e79caee18..3f73c51a57 100644
> > --- a/libavformat/amr.c
> > +++ b/libavformat/amr.c
> > @@ -21,13 +21,12 @@
> >
> > /*
> > Write and read amr data according to RFC3267,
> http://www.ietf.org/rfc/rfc3267.txt?number=3267
> > -
> > -Only mono files are supported.
> > -
> > */
> >
> > #include "libavutil/channel_layout.h"
> > +#include "libavutil/intreadwrite.h"
> > #include "avformat.h"
> > +#include "avio_internal.h"
> > #include "internal.h"
> > #include "rawdec.h"
> > #include "rawenc.h"
> > @@ -36,8 +35,10 @@ typedef struct AMRContext {
> > FFRawDemuxerContext rawctx;
> > } AMRContext;
> >
> > -static const char AMR_header[] = "#!AMR\n";
> > -static const char AMRWB_header[] = "#!AMR-WB\n";
> > +static const uint8_t AMR_header[6] = "#!AMR\x0a";
> > +static const uint8_t AMRMC_header[12] = "#!AMR_MC1.0\x0a";
> > +static const uint8_t AMRWB_header[9] = "#!AMR-WB\x0a";
> > +static const uint8_t AMRWBMC_header[15] = "#!AMR-WB_MC1.0\x0a";
> >
> > static const uint8_t amrnb_packed_size[16] = {
> > 13, 14, 16, 18, 20, 21, 27, 32, 6, 1, 1, 1, 1, 1, 1, 1
> > @@ -69,7 +70,7 @@ static int amr_probe(const AVProbeData *p)
> > {
> > // Only check for "#!AMR" which could be amr-wb, amr-nb.
> > // This will also trigger multichannel files: "#!AMR_MC1.0\n" and
> > - // "#!AMR-WB_MC1.0\n" (not supported)
> > + // "#!AMR-WB_MC1.0\n"
> >
> > if (!memcmp(p->buf, AMR_header, 5))
> > return AVPROBE_SCORE_MAX;
> > @@ -82,35 +83,59 @@ static int amr_read_header(AVFormatContext *s)
> > {
> > AVIOContext *pb = s->pb;
> > AVStream *st;
> > - uint8_t header[9];
> > + uint8_t header[19];
> > + int back = 0, ret;
> > +
> > + ret = ffio_ensure_seekback(s->pb, 19);
> > + if (ret < 0)
> > + return ret;
> >
> > - if (avio_read(pb, header, 6) != 6)
> > + if (avio_read(pb, header, 19) != 19)
> > return AVERROR_INVALIDDATA;
> there are header only amr files only
> have “#!AMR\n”.  It would be better to
> use the way to check headers in different
> amrs in this patch.
>

I do not follow, proposed patch work with all four variants just fine.
And is much simpler than what you propose.

https://patchwork.ffmpeg.org/project/ffmpeg/patch/TYCPR01MB59827312A79D5E772DE0D6AAC1DC9@TYCPR01MB5982.jpnprd01.prod.outlook.com/
> >
> > st = avformat_new_stream(s, NULL);
> > if (!st)
> > return AVERROR(ENOMEM);
> > - if (memcmp(header, AMR_header, 6)) {
> > - if (avio_read(pb, header + 6, 3) != 3)
> > - return AVERROR_INVALIDDATA;
> > - if (memcmp(header, AMRWB_header, 9)) {
> > - return -1;
> > - }
> > -
> > + if (!memcmp(header, AMR_header, sizeof(AMR_header))) {
> > + st->codecpar->codec_tag = MKTAG('s', 'a', 'm', 'r');
> > + st->codecpar->codec_id = AV_CODEC_ID_AMR_NB;
> > + st->codecpar->sample_rate = 8000;
> > + st->codecpar->channels = 1;
> > + st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
> > + back = 19 - sizeof(AMR_header);
> > + } else if (!memcmp(header, AMRWB_header, sizeof(AMRWB_header))) {
> > st->codecpar->codec_tag = MKTAG('s', 'a', 'w', 'b');
> > st->codecpar->codec_id = AV_CODEC_ID_AMR_WB;
> > st->codecpar->sample_rate = 16000;
> > - } else {
> > + st->codecpar->channels = 1;
> > + st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
> > + back = 19 - sizeof(AMRWB_header);
> > + } else if (!memcmp(header, AMRMC_header, sizeof(AMRMC_header))) {
> > st->codecpar->codec_tag = MKTAG('s', 'a', 'm', 'r');
> > st->codecpar->codec_id = AV_CODEC_ID_AMR_NB;
> > st->codecpar->sample_rate = 8000;
> > + st->codecpar->channels = AV_RL32(header + 12);
> > + back = 15 - sizeof(AMRMC_header);
> > + } else if (!memcmp(header, AMRWBMC_header, sizeof(AMRWBMC_header))) {
> > + st->codecpar->codec_tag = MKTAG('s', 'a', 'w', 'b');
> > + st->codecpar->codec_id = AV_CODEC_ID_AMR_WB;
> > + st->codecpar->sample_rate = 16000;
> > + st->codecpar->channels = AV_RL32(header + 15);
> > + back = 15 - sizeof(AMRWBMC_header);
> > + } else {
> > + return AVERROR_INVALIDDATA;
> > }
> > - st->codecpar->channels = 1;
> > - st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
> > +
> > + if (st->codecpar->channels < 1)
> > + return AVERROR_INVALIDDATA;
> > +
> > st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> > ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> > avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> >
> > + if (back > 0)
> > + avio_seek(pb, -back, SEEK_CUR);
> > +
> > return 0;
> > }
> >
> > --
> > 2.33.0
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel at ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>


More information about the ffmpeg-devel mailing list