[Libav-user] Multiple queries on filter_audio.c
amey jain
amey.jain7 at gmail.com
Fri Jun 23 23:12:33 EEST 2017
Hi,
I have modified filter_audio.c a little and have used it with
av_decode_audio4, to get PCM decoded audio in float, mono sampled and
in 5512 Hz. I have used filter graph abuffer-->aformat-->abuffersink.
There are multiple problems I am facing now with file.
1. When I seek frames I don't get frames of required PTS in some of
videos of MPEG-TS stream. In some of *.ts formats it works.(in
function process_frame_by_pts)
2. When I collect frames from buffersink, I get their sample_rate from
frame->sample_rate still 48000 Hz, which should be 5512 Hz. Is that
supposed to be like that or I am wrong at conversion somewhere.
Any hint to problem will do. I was asked last time thats why I am
putting complete file down here.
Thanks
/* Intial file to extract audio from video */
#include <stdio.h>
#include <libavfilter/avcodec.h>
#include <libavformat/avformat.h>
#include <math.h>
#include <unistd.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/frame.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include "av_decoder.h"
#include "config.h"
static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *dec_ctx;
AVFilterContext *buffersink_ctx;
AVFilterContext *buffersrc_ctx;
AVFilterGraph *filter_graph;
int audio_stream_index = -1;
static AVFilterGraph *graph=NULL;
static AVFilterContext *src=NULL,*sink=NULL;
enum AVSampleFormat INPUT_SAMPLE_FMT = -1;
uint64_t INPUT_SAMPLERATE = 0;
uint64_t INPUT_CHANNEL_LAYOUT = 0;
char *orig_video_name = NULL;
char *edited_video_name = NULL;
//taken from ffmpeg-filter_audio.c
https://www.ffmpeg.org/doxygen/2.2/filter_audio_8c-example.html
int init_filter_graph(AVFilterGraph **graph, AVFilterContext **src,
AVFilterContext **sink)
{
AVFilterGraph *filter_graph;
AVFilterContext *abuffer_ctx;
AVFilter *abuffer=NULL;
AVFilterContext *resample_ctx;
AVFilter *resample;
AVFilterContext *aformat_ctx;
AVFilter *aformat;
AVFilterContext *abuffersink_ctx;
AVFilter *abuffersink;
AVDictionary *options_dict = NULL;
uint8_t options_str[1024];
uint8_t ch_layout[64];
int err;
char errstr[256];
/* Create a new filtergraph, which will contain all the filters. */
filter_graph = avfilter_graph_alloc();
if (!filter_graph) {
fprintf(stderr, "Unable to create filter graph.\n");
return AVERROR(ENOMEM);
}
/* Create the abuffer filter;
* it will be used for feeding the data into the graph. */
abuffer = avfilter_get_by_name("abuffer");
if (abuffer == NULL) {
fprintf(stderr, "Could not find the abuffer filter.\n");
return AVERROR_FILTER_NOT_FOUND;
}
abuffer_ctx = avfilter_graph_alloc_filter(filter_graph, abuffer, "src");
if (abuffer_ctx == NULL) {
fprintf(stderr, "Could not allocate the abuffer instance.\n");
return AVERROR(ENOMEM);
}
/* Set the filter options through the AVOptions API. */
av_get_channel_layout_string(ch_layout, sizeof(ch_layout), 0,
INPUT_CHANNEL_LAYOUT);
err = av_opt_set(abuffer_ctx, "channel_layout", ch_layout,
AV_OPT_SEARCH_CHILDREN);
fprintf(stderr,"DEBUG: av_opt_set for channel_layout returned %d\n",err);
err = av_opt_set(abuffer_ctx, "sample_fmt",
av_get_sample_fmt_name(INPUT_SAMPLE_FMT), AV_OPT_SEARCH_CHILDREN);
fprintf(stderr,"DEBUG: av_opt_set for sample_fmt returned %d\n",err);
err = av_opt_set_q(abuffer_ctx, "time_base", (AVRational){ 1,
INPUT_SAMPLERATE }, AV_OPT_SEARCH_CHILDREN);
fprintf(stderr,"DEBUG: av_opt_set for time_base returned %d\n",err);
err = av_opt_set_int(abuffer_ctx, "sample_rate", INPUT_SAMPLERATE,
AV_OPT_SEARCH_CHILDREN);
fprintf(stderr,"DEBUG: av_opt_set for channel_layout returned %d\n",err);
/* Now initialize the filter; we pass NULL options, since we have already
* set all the options above. */
err = avfilter_init_str(abuffer_ctx, NULL);
if (err < 0) {
fprintf(stderr, "Could not initialize the abuffer filter.\n");
return err;
}
// Create resampling filter.
resample = avfilter_get_by_name("aformat");
if (!resample) {
fprintf(stderr, "Could not find the aformat filter.\n");
return AVERROR_FILTER_NOT_FOUND;
}
resample_ctx = avfilter_graph_alloc_filter(filter_graph, resample, "aformat");
if (!resample_ctx) {
fprintf(stderr, "Could not allocate the resample instance.\n");
return AVERROR(ENOMEM);
}
// Set the filter options through the AVOptions API.
av_get_channel_layout_string(ch_layout, sizeof(ch_layout), 0,
AV_CH_LAYOUT_MONO);
if(! av_opt_set(resample_ctx, "channel_layout", ch_layout,
AV_OPT_SEARCH_CHILDREN)){
fprintf(stderr,"channel layout for resample_ctx not set\n");
return;
}
if(! av_opt_set(resample_ctx, "sample_fmt",
av_get_sample_fmt_name(AV_SAMPLE_FMT_FLT), AV_OPT_SEARCH_CHILDREN)){
fprintf(stderr,"channel layout for resample_ctx not set\n");
return;
}
av_opt_set_q(resample_ctx, "time_base", (AVRational){ 1, 5512 },
AV_OPT_SEARCH_CHILDREN);
if(! av_opt_set_int(resample_ctx, "sample_rate", 5512,
AV_OPT_SEARCH_CHILDREN)){
fprintf(stderr,"channel layout for resample_ctx not set\n");
return;
}
err = avfilter_init_str(resample_ctx, NULL);
if (err < 0) {
fprintf(stderr, "Could not initialize the resampling filter.\n");
return err;
}
/* Finally create the abuffersink filter;
* it will be used to get the filtered data out of the graph. */
abuffersink = avfilter_get_by_name("abuffersink");
if (!abuffersink) {
fprintf(stderr, "Could not find the abuffersink filter.\n");
return AVERROR_FILTER_NOT_FOUND;
}
abuffersink_ctx = avfilter_graph_alloc_filter(filter_graph,
abuffersink, "sink");
if (!abuffersink_ctx) {
fprintf(stderr, "Could not allocate the abuffersink instance.\n");
return AVERROR(ENOMEM);
}
/* This filter takes no options. */
err = avfilter_init_str(abuffersink_ctx, NULL);
if (err < 0) {
fprintf(stderr, "Could not initialize the abuffersink instance.\n");
return err;
}
/* Connect the filters;
* in this simple case the filters just form a linear chain. */
err = avfilter_link(abuffer_ctx, 0, resample_ctx, 0);
if (err >= 0){
err = avfilter_link(resample_ctx, 0, abuffersink_ctx, 0);
if (err < 0) {
fprintf(stderr, "Error connecting filters resample and
buffersink %d error\n",err);
return err;
}
}
else{
fprintf(stderr, "Error connecting filters buffer src and resample
%d error\n",err);
av_strerror(err, errstr, sizeof(errstr));
fprintf(stderr, "%s\n", errstr);
return err;
}
/* Configure the graph. */
err = avfilter_graph_config(filter_graph, NULL);
if (err < 0) {
fprintf(stderr, "Error configuring the filter graph\n");
return err;
}
*graph = filter_graph;
*src = abuffer_ctx;
*sink = abuffersink_ctx;
return 0;
}
/*
* Initialises decoder
* To select only orig file pass 1 to file_select flag
*/
int init_decoder(char *filename1,char *filename2,uint8_t file_select)
{
int err;
if(file_select && filename1 != NULL){
orig_video_name = filename1;
err = open_input_file(0);
if(err != 0){
fprintf(stderr,"Not able to open input file\n");
return err;
}
}
else if(filename1 == NULL || filename2 == NULL && !file_select){
fprintf(stderr,"ERROR: Unable to initialise decoder. Filename null\n");
return -1;
}
else{
orig_video_name = filename1;
edited_video_name = filename2;
}
// add any other settings from outer file in this function
err = init_filter_graph(&graph, &src, &sink);
if(err != 0){
fprintf(stderr,"ERROR: Not able to initialize input parameters %d\n",err);
return err;
}
else {
fprintf(stderr,"DEBUG: initialisation completed\n");
return 1;
}
return 0;
}
void init_input_parameters(AVFrame *frame,AVCodecContext *dec_ctx)
{
if(frame == NULL){
fprintf(stderr,"ERROR:Frame is NULL\n");
return;
}
if(dec_ctx == NULL){
fprintf(stderr,"ERROR:AVCodec context NULL\n");
return;
}
INPUT_CHANNEL_LAYOUT = frame->channel_layout;
INPUT_SAMPLERATE = frame->sample_rate;
INPUT_SAMPLE_FMT = dec_ctx->sample_fmt;
if(!INPUT_CHANNEL_LAYOUT || !INPUT_SAMPLERATE || (INPUT_SAMPLE_FMT == -1)){
fprintf(stderr,"ERROR:input parameters not set\n");
return;
}
}
/*
Opens input file and sets,initialises important context and parameters
args: file_select Selects name of the file to be opened from
orig_video_name(0) and edited_video_name(1)
*/
int open_input_file(uint8_t file_select)
{
int ret;
AVCodec *dec = NULL;
AVDictionaryEntry *tag = NULL;
// this parameter needs to be set taking parameters from CLI, taking
language number 1/2.
uint8_t language = 0,i,j,got_frame;
char *filename;
AVFrame *frame = av_frame_alloc();
int err,len;
char errstr[128];
AVPacket pkt;
av_register_all();
avfilter_register_all();
//open input video file
if(file_select == 0)
filename = orig_video_name;
else if(file_select == 1)
filename = edited_video_name;
else{
fprintf(stderr,"ERROR: Invalid value for file_select flag\n");
return -1;
}
if((ret = avformat_open_input(&fmt_ctx,filename,NULL,NULL)) < 0){
printf("Unable to open %s\n",filename);
return ret;
}
//print useful format information
printf("Opening format:%s\nFile:%sTotal %d streams in
video\n",fmt_ctx->iformat->name,filename,fmt_ctx->nb_streams);
if((ret == avformat_find_stream_info(fmt_ctx,NULL)) < 0){
printf("Unable to find stream info\n");
return ret;
}
//support for multiple audio streams(for multiple languages
possibly) if any in video;
//if language parameter is not set by default eng is choosen else
first stream is selected;
for(i = 0; i < fmt_ctx->nb_streams;i++){
tag = NULL;
if(fmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
tag = av_dict_get(fmt_ctx->streams[i]->metadata, "language",
tag, AV_DICT_IGNORE_SUFFIX);
if(tag != NULL && strcmp(tag->value,"eng") == 0){ //language unset
if(language == 0){
audio_stream_index = i;
break;
}
}
else{
audio_stream_index = i;
break;
}
}
}
/* select audio stream and initialise decoder*/
ret = av_find_best_stream(fmt_ctx,AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
if(ret < 0){
printf("Cannot find audio stream in input\n");
return ret;
}
else if(dec == NULL)
printf("Audio decoder not found\n");
audio_stream_index = ret;
dec_ctx = fmt_ctx->streams[audio_stream_index]->codec;
printf("Selected audio stream:%d",audio_stream_index);
printf("Time base unit:AVStream->time_base:
%lu/%lu\n",fmt_ctx->streams[audio_stream_index]->time_base.num,fmt_ctx->streams[audio_stream_index]->time_base.den);
/* init audio decoder */
if((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
printf("Cannot open audio decoder\n");
return ret;
}
while(1){ //read an audio frame for format specifications
ret = av_read_frame(fmt_ctx, &pkt);
if(ret < 0){
printf("Unable to read frame:%d \n",ret);
break;
}
if(pkt.stream_index == audio_stream_index){
got_frame = 0;
len = avcodec_decode_audio4(dec_ctx, frame, &got_frame, &pkt);
if(len < 0){
av_strerror(len,errstr,128);
fprintf(stderr,"ERROR %s %d %s\n",__FUNCTION__,__LINE__,errstr);
}
else if(got_frame)
break;
}
}
/* Initialise filters */
init_input_parameters(frame, dec_ctx);
av_frame_free(&frame);
return 0;
}
/*
Read frame sequence covering 1.5 seconds from time given and buffers
those frame.
Works like loop traversing frames to frames for a duration of 1.5
seconds. Resampling
function is called on frame itself then.
arg: time_to_seek_ms Time in miliseconds to get frame from
arg: index Index of subtitle block
*/
void process_frame_by_pts(uint16_t index,int64_t time_to_seek_ms)
{
int64_t num = fmt_ctx->streams[audio_stream_index]->time_base.num;
int64_t den = fmt_ctx->streams[audio_stream_index]->time_base.den;
int64_t duration;
int64_t start_pts = ((float)time_to_seek_ms/1000) * (float)den/num;
// converted to timebase of audio stream
int64_t end_pts = start_pts + ((float)(GRANUALITY/1000) * den/num);
int i = 0, count = 0,temp = 0,out_count = 0,in_count = 0;
uint8_t *OUTPUT_SAMPLES = NULL;
AVPacket pkt;
uint8_t *in;
int got_frame,ret;
AVFrame *frame = av_frame_alloc();
int size,len,buf_size;
uint64_t output_ch_layout = av_get_channel_layout("mono");
enum AVSampleFormat src_sample_fmt;
uint8_t *output_buffer = NULL;
int err;
char errstr[128];
if(end_pts > fmt_ctx->streams[audio_stream_index]->duration){
printf("Error: End PTS greater then duration of stream\n");
return;
}
ret = av_seek_frame(fmt_ctx,audio_stream_index,start_pts,AVSEEK_FLAG_BACKWARD);
//get one frame before timing to cover all
if( ret < 0 ){
fprintf(stderr,"av_seek_frame failed with error code %d\n",ret);
return;
}
fprintf(stdout,"Start PTS: %lu End PTS: %lu\n",start_pts,end_pts);
// Problem is that output gets stored in uint8_t type whereas I want
output in float type
do{ //outer do-while to read packets
ret = av_read_frame(fmt_ctx, &pkt);
if(ret < 0){
printf("Unable to read frame:%d \n",ret);
break;
}
if(pkt.stream_index == audio_stream_index){ // processing audio packets
size = pkt.size;
while(size > 0){ // inner while to decode frames, if more than
one are present in a single packet
got_frame = 0;
len = avcodec_decode_audio4(dec_ctx, frame, &got_frame, &pkt);
if(len < 0){
printf("Error while decoding\n");
}
if(got_frame){
err = av_buffersrc_add_frame(src, frame);
if(err < 0) {
av_frame_unref(frame);
fprintf(stderr,"Error adding frame to source buffer\n");
return;
}
size = size - len;
}
}
}
}while(frame->pts < end_pts);
while((err = av_buffersink_get_frame(sink,frame)) >= 0){
if(err < 0){
av_strerror(err,errstr,128);
fprintf(stderr,"av_buffer_get_frame returns %d %s\n",err,errstr);
break;
}
fprintf(stdout,"frame_pts %lu frame->duration(PTS) %d
frame->duration: %f with sample rate as:%d number of samples
%d\n",frame->pts,frame->pkt_duration,((double)frame->pkt_duration/frame->sample_rate)
* 1000,frame->sample_rate,frame->nb_samples);// here is problem of
sample rate
}
av_frame_free(&frame);
return;
}
void close_filter()
{
avfilter_graph_free(&graph);
}
More information about the Libav-user
mailing list