[FFmpeg-devel] [RFC] Generic psychoacoustic model interface
Kostya
kostya.shishkov
Wed Aug 27 10:35:20 CEST 2008
Here's my first attempt to define codec-agnostic psy model.
Here's an interface for it. I'm not sure about AC3, but
it should be possible to use it with DCA, Vorbis,
MPEG Audio Layers I-III and NBC, maybe WMA too.
In case somebody codes an implementation, of course.
Personally I plan to make my encoder use it backed with
already implemented 3GPP model.
-------------- next part --------------
/*
* audio encoder psychoacoustic model
* Copyright (C) 2008 Konstantin Shishkov
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef FFMPEG_AACPSY_H
#define FFMPEG_AACPSY_H
#include "avcodec.h"
/** maximum possible number of bands */
#define MAX_BANDS 128
/**
* single band psychoacoustic information
*/
typedef struct FFPsyBand{
float energy;
float threshold;
float perceptual_entropy;
}FFPsyBand;
/**
* windowing related information
*/
typedef struct FFWindowInfo{
int window_type[2]; ///< window type (short/long/transitional, etc.) - current and previous
int window_shape; ///< window shape (sine/KBD/whatever)
void *additional_info; ///< codec-dependent window information
}FFWindowInfo;
/**
* context used by psychoacoustic model
*/
typedef struct FFPsyContext{
AVCodecContext *avctx; ///< encoder context
FFPsyBand bands[MAX_BANDS]; ///< frame bands information
FFWindowInfo *win_info; ///< frame window info
const uint8_t *long_bands; ///< scalefactor band sizes for long frame
int num_long_bands; ///< number of scalefactor bands for long frame
const uint8_t *short_bands; ///< scalefactor band sizes for short frame
int num_short_bands; ///< number of scalefactor bands for short frame
void* model_priv_data; ///< psychoacoustic model implementation private data
}FFPsyContext;
/**
* Initialize psychoacoustic model.
*
* @param ctx model context
* @param avctx codec context
* @param long_bands scalefactor band lengths for long frame
* @param num_long_bands number of scalefactor bands for long frame
* @param short_bands scalefactor band lengths for short frame
* @param num_short_bands number of scalefactor bands for short frame
*
* @return zero if successful, a negative value if not
*/
int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
const uint8_t * long_bands, int num_long_bands,
const uint8_t *short_bands, int num_short_bands);
/**
* Suggest window sequence for channel.
*
* @param ctx model context
* @param audio samples for the current frame
* @param la lookahead samples (NULL when unavailable)
* @param channel number of channel element to analyze
* @param prev_type previous window type
*
* @return suggested window information in a structure
*/
FFWindowInfo* ff_psy_suggest_window(AACPsyContext *ctx, int16_t *audio, int16_t *la,
int channel, int prev_type);
/**
* Perform psychoacoustic analysis and set band info.
*
* @param ctx model context
* @param tag number of channel element to analyze
* @param type channel element type (e.g. ID_SCE or ID_CPE)
* @param cpe pointer to the current channel element
*/
void ff_psy_analyze(AACPsyContext *ctx, int tag, int type, ChannelElement *cpe);
/**
* Cleanup model context at the end.
*
* @param ctx model context
*/
void ff_psy_end(AACPsyContext *ctx);
/**************************************************************************
* Audio preprocessing stuff. *
* This should be moved into some audio filter eventually. *
**************************************************************************/
struct FFPsyPreprocessContext;
/**
* psychoacoustic model audio preprocessing initialization
*/
struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx);
/**
* Preprocess several channel in audio frame in order to compress it better.
*
* @param ctx preprocessing context
* @param audio samples to preprocess
* @param dest place to put filtered samples
* @param tag number of channel group
* @param channels number of channel to preprocess (some additional work may be done on stereo pair)
*/
void ff_aac_psy_preprocess(struct FFPsyPreprocessContext *ctx, int16_t *audio, int16_t *dest, int tag, int channels);
/**
* Cleanup audio preprocessing module.
*/
void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx);
#endif /* FFMPEG_AACPSY_H */
More information about the ffmpeg-devel
mailing list