[Libav-user] Conversion from mp3 to aac/mp4 container problem

Taha Ansari mtaha.ansari at gmail.com
Fri Jun 21 11:53:08 CEST 2013


Hi!

I have made a small application to extract audio from an mp4 file, or
simply convert an existing audio file to AAC/mp4 format (both raw AAC, or
inside mp4 container). I have run this application with existing mp4 files
as input, and it properly extracts audio, and encodes to mp4 (audio
only:AAC), or even directly in AAC format (i.e. test.aac also works). But
when I tried running it on mp3 files, output clip plays faster than it
should be (a clip of 1:12 seconds plays back till 1:05 seconds only, and is
also noisy).

Here is the code I have written to achieve this:

////////////////////////////////////////////////

#include "stdafx.h"

#include <iostream>
#include <fstream>

#include <string>
#include <vector>
#include <map>

#include <deque>
#include <queue>

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <conio.h>

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libavutil/dict.h"
#include "libavutil/error.h"
#include "libavutil/opt.h"
#include <libavutil/fifo.h>
#include <libavutil/imgutils.h>
#include <libavutil/samplefmt.h>
}

AVFormatContext* fmt_ctx= NULL;
int audio_stream_index = -1;
AVCodecContext *codec_ctx_audio = NULL;
AVCodec*        codec_audio = NULL;
AVFrame*        decoded_frame = NULL;
uint8_t**        audio_dst_data = NULL;
int                got_frame = 0;
int                audiobufsize = 0;
AVPacket        input_packet;
int                audio_dst_linesize = 0;
int                audio_dst_bufsize = 0;

AVOutputFormat *output_format        = NULL ;
AVFormatContext *output_fmt_ctx        = NULL;
AVStream *audio_st                    = NULL;
AVCodec *audio_codec                = NULL;
double audio_pts                    = 0.0;

int audio_input_frame_size = 0;

uint8_t *audio_data_buf = NULL;
uint8_t *audio_out = NULL;
int                audio_bit_rate;
int                audio_sample_rate;
int                audio_channels;

int decode_packet();
int open_audio_input(char* src_filename);
int decode_frame();

int open_encoder(char* output_filename);
AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,
    enum AVCodecID codec_id);
int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st);
void close_audio(AVFormatContext *oc, AVStream *st);
void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize);

int open_audio_input(char* src_filename)
{
    int i =0;
    /* open input file, and allocate format context */
    if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0)
    {
        fprintf(stderr, "Could not open source file %s\n", src_filename);
        exit(1);
    }

    // Retrieve stream information
    if(avformat_find_stream_info(fmt_ctx, NULL)<0)
        return -1; // Couldn't find stream information

    // Dump information about file onto standard error
    av_dump_format(fmt_ctx, 0, src_filename, 0);

    // Find the first video stream
    for(i=0; i<fmt_ctx->nb_streams; i++)
    {
        if(fmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)
        {
            audio_stream_index=i;
            break;
        }
    }
    if ( audio_stream_index != -1 )
    {
        // Get a pointer to the codec context for the audio stream
        codec_ctx_audio=fmt_ctx->streams[audio_stream_index]->codec;

        // Find the decoder for the video stream
        codec_audio=avcodec_find_decoder(codec_ctx_audio->codec_id);
        if(codec_audio==NULL) {
            fprintf(stderr, "Unsupported audio codec!\n");
            return -1; // Codec not found
        }

        // Open codec
        AVDictionary *codecDictOptions = NULL;
        if(avcodec_open2(codec_ctx_audio, codec_audio, &codecDictOptions)<0)
            return -1; // Could not open codec

        // Allocate audio frame
        if ( decoded_frame == NULL ) decoded_frame = avcodec_alloc_frame();
        int nb_planes = 0;
        AVStream* audio_stream = fmt_ctx->streams[audio_stream_index];
        nb_planes = av_sample_fmt_is_planar(codec_ctx_audio->sample_fmt) ?
            codec_ctx_audio->channels : 1;
        int tempSize =  sizeof(uint8_t *) * nb_planes;
        audio_dst_data = (uint8_t**)av_mallocz(tempSize);
        if (!audio_dst_data)
        {
            fprintf(stderr, "Could not allocate audio data buffers\n");
        }
        else
        {
            for ( int i = 0 ; i < nb_planes ; i ++ )
            {
                audio_dst_data[i] = NULL;
            }
        }
    }
}


int decode_frame()
{
    int rv = 0;
    got_frame = 0;
    if ( fmt_ctx == NULL  )
    {
        return rv;
    }
    int ret = 0;
    audiobufsize = 0;
    rv = av_read_frame(fmt_ctx, &input_packet);
    if ( rv < 0 )
    {
        return rv;
    }
    rv = decode_packet();
    // Free the input_packet that was allocated by av_read_frame
    av_free_packet(&input_packet);
    return rv;
}

int decode_packet()
{
    int rv = 0;
    int ret = 0;

    //audio stream?
    if(input_packet.stream_index == audio_stream_index)
    {
        /* decode audio frame */
        rv = avcodec_decode_audio4(codec_ctx_audio, decoded_frame,
&got_frame, &input_packet);
        if (rv < 0)
        {
            fprintf(stderr, "Error decoding audio frame\n");
            //return ret;
        }
        else
        {
            if (got_frame)
            {
                if ( audio_dst_data[0] == NULL )
                {
                    ret = av_samples_alloc(audio_dst_data,
&audio_dst_linesize, decoded_frame->channels,
                        decoded_frame->nb_samples,
(AVSampleFormat)decoded_frame->format, 1);
                    if (ret < 0)
                    {
                        fprintf(stderr, "Could not allocate audio
buffer\n");
                        return AVERROR(ENOMEM);
                    }
                    /* TODO: extend return code of the av_samples_*
functions so that this call is not needed */
                    audio_dst_bufsize =
                        av_samples_get_buffer_size(NULL,
decoded_frame->channels,
                        decoded_frame->nb_samples,
(AVSampleFormat)decoded_frame->format, 1);
                }
                /* copy audio data to destination buffer:
                * this is required since rawaudio expects non aligned data
*/
                av_samples_copy(audio_dst_data, decoded_frame->data, 0, 0,
                    decoded_frame->nb_samples, decoded_frame->channels,
(AVSampleFormat)decoded_frame->format);
            }
        }
    }
    return rv;
}


int open_encoder(char* output_filename )
{
    int rv = 0;

    /* allocate the output media context */
    AVOutputFormat *opfmt = NULL;

    avformat_alloc_output_context2(&output_fmt_ctx, opfmt, NULL,
output_filename);
    if (!output_fmt_ctx) {
        printf("Could not deduce output format from file extension: using
MPEG.\n");
        avformat_alloc_output_context2(&output_fmt_ctx, NULL, "mpeg",
output_filename);
    }
    if (!output_fmt_ctx) {
        rv = -1;
    }
    else
    {
        output_format = output_fmt_ctx->oformat;
    }

    /* Add the audio stream using the default format codecs
    * and initialize the codecs. */
    audio_st = NULL;

    if ( output_fmt_ctx )
    {
        if (output_format->audio_codec != AV_CODEC_ID_NONE)
        {
            audio_st = add_audio_stream(output_fmt_ctx, &audio_codec,
output_format->audio_codec);
        }

        /* Now that all the parameters are set, we can open the audio and
        * video codecs and allocate the necessary encode buffers. */
        if (audio_st)
        {
            rv = open_audio(output_fmt_ctx, audio_codec, audio_st);
            if ( rv < 0 ) return rv;
        }

        av_dump_format(output_fmt_ctx, 0, output_filename, 1);
        /* open the output file, if needed */
        if (!(output_format->flags & AVFMT_NOFILE))
        {
            if (avio_open(&output_fmt_ctx->pb, output_filename,
AVIO_FLAG_WRITE) < 0) {
                fprintf(stderr, "Could not open '%s'\n", output_filename);
                rv = -1;
            }
            else
            {
                /* Write the stream header, if any. */
                if (avformat_write_header(output_fmt_ctx, NULL) < 0)
                {
                    fprintf(stderr, "Error occurred when opening output
file\n");
                    rv = -1;
                }
            }
        }
    }

    return rv;
}

AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,
    enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;

    /* find the audio encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        fprintf(stderr, "Could not find codec\n");
        exit(1);
    }

    st = avformat_new_stream(oc, *codec);
    if (!st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }
    st->id = 1;

    c = st->codec;

    /* put sample parameters */
    c->sample_fmt  = AV_SAMPLE_FMT_S16;
    c->bit_rate    = audio_bit_rate;
    c->sample_rate = audio_sample_rate;
    c->channels    = audio_channels;

    // some formats want stream headers to be separate
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;

    return st;
}

int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
    int ret=0;
    AVCodecContext *c;

    c = st->codec;

    /* open it */
    if (avcodec_open2(c, codec, NULL) < 0) {
        fprintf(stderr, "could not open codec\n");
        return -1;
        //exit(1);
    }

    if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
        audio_input_frame_size = 10000;
    else
        audio_input_frame_size = c->frame_size;
    int tempSize = audio_input_frame_size *
        av_get_bytes_per_sample(c->sample_fmt) *
        c->channels;
    return ret;
}

void close_audio(AVFormatContext *oc, AVStream *st)
{
    avcodec_close(st->codec);
}

void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize)
{
    AVFormatContext *oc = output_fmt_ctx;
    AVStream *st = audio_st;
    if ( oc == NULL || st == NULL ) return;
    AVCodecContext *c;
    AVPacket pkt = { 0 }; // data and size must be 0;
    AVFrame *frame = avcodec_alloc_frame();
    int got_packet;

    av_init_packet(&pkt);
    c = st->codec;

    frame->nb_samples = audio_input_frame_size;
    int buf_size =         audio_src_bufsize *
        av_get_bytes_per_sample(c->sample_fmt) *
        c->channels;
    avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt,
        (uint8_t *) *audio_src_data,
        buf_size, 1);
    avcodec_encode_audio2(c, &pkt, frame, &got_packet);
    if (!got_packet)
    {
        avcodec_free_frame(&frame);
    }
    else
    {
        pkt.stream_index = st->index;
        /* Write the compressed frame to the media file. */
        if (av_interleaved_write_frame(oc, &pkt) != 0)
        {
            fprintf(stderr, "Error while writing audio frame\n");
            exit(1);
        }
        avcodec_free_frame(&frame);
    }
    av_free_packet(&pkt);
}


void write_delayed_frames(AVFormatContext *oc, AVStream *st)
{
    AVCodecContext *c = st->codec;
    int got_output = 0;
    int ret = 0;
    AVPacket pkt;
    pkt.data = NULL;
    pkt.size = 0;
    av_init_packet(&pkt);
    int i = 0;
    //int got_packet;


    for (got_output = 1; got_output; i++)
    {
        ret = avcodec_encode_audio2(c, &pkt, NULL, &got_output);
        if (ret < 0)
        {
            fprintf(stderr, "error encoding frame\n");
            exit(1);
        }
        static int64_t tempPts = 0;
        static int64_t tempDts = 0;
        /* If size is zero, it means the image was buffered. */
        if (got_output)
        {
            if (pkt.pts != AV_NOPTS_VALUE)
                pkt.pts =  av_rescale_q(pkt.pts, st->codec->time_base,
st->time_base);
            if (pkt.dts != AV_NOPTS_VALUE)
                pkt.dts = av_rescale_q(pkt.dts, st->codec->time_base,
st->time_base);
            if (c->coded_frame->key_frame)
                pkt.flags |= AV_PKT_FLAG_KEY;

            pkt.stream_index = st->index;
            /* Write the compressed frame to the media file. */
            ret = av_interleaved_write_frame(oc, &pkt);
        }
        else
        {
            ret = 0;
        }
        av_free_packet(&pkt);
    }
}

int main (int argc, char **argv)
{
    /* register all formats and codecs */
    av_register_all();
    int i =0;
    char src_filename[90] = "test.mp3";
    char dst_filename[90] = "test.mp4";
    open_audio_input(src_filename);
    audio_bit_rate        = codec_ctx_audio->bit_rate;
    audio_sample_rate    = codec_ctx_audio->sample_rate;
    audio_channels        = codec_ctx_audio->channels;
    open_encoder( dst_filename );
    while(1)
    {
        int rv = decode_frame();
        if ( rv < 0 )
        {
            break;
        }

        if (audio_st)
        {
            audio_pts = (double)audio_st->pts.val * audio_st->time_base.num
/
                audio_st->time_base.den;
        }
        else
        {
            audio_pts = 0.0;
        }
        printf("\naudio_pts: %.3f",audio_pts);
        if ( codec_ctx_audio )
        {
            if ( got_frame)
            {
                write_audio_frame( audio_dst_data, audio_dst_bufsize );
            }
        }
        if ( audio_dst_data[0] )
        {
            av_freep(&audio_dst_data[0]);
            audio_dst_data[0] = NULL;
        }
    }
    write_delayed_frames( output_fmt_ctx, audio_st );
    av_write_trailer(output_fmt_ctx);
    close_audio( output_fmt_ctx, audio_st);
    return 0;
}
///////////////////////////////////////////////

I have been looking at this problem from many angles since about two days
now, but cant seem to figure out what I'm doing wrong.

Note also: the printf() statement I've inserted shows audio_pts up to
64.551 (that's about 1:05 seconds that also proves encoder is not going to
full duration of input file: 1:12 secs):
.......
.......
.......
audio_pts: 63.808
audio_pts: 63.832
audio_pts: 63.855
audio_pts: 63.878
audio_pts: 63.901
audio_pts: 63.925
audio_pts: 63.948
audio_pts: 63.971
audio_pts: 63.994
audio_pts: 64.017
audio_pts: 64.041
audio_pts: 64.064
audio_pts: 64.087
audio_pts: 64.110
audio_pts: 64.134
audio_pts: 64.157
audio_pts: 64.180
audio_pts: 64.203
audio_pts: 64.226
audio_pts: 64.250
audio_pts: 64.273
audio_pts: 64.296
audio_pts: 64.319
audio_pts: 64.342
audio_pts: 64.366
audio_pts: 64.389
audio_pts: 64.412
audio_pts: 64.435
audio_pts: 64.459
audio_pts: 64.482
audio_pts: 64.505
audio_pts: 64.528
audio_pts: 64.551


Can anyone please guide me what I may be doing wrong?

Thanks in advance for any guidance!

p.s. when run through command line like: ffmpeg -i test.mp3 test.mp4, it
converts the file just fine.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://ffmpeg.org/pipermail/libav-user/attachments/20130621/80949a00/attachment.html>


More information about the Libav-user mailing list