[FFmpeg-soc] [soc]: r4322 - in wmapro: wma3.h wma3data.h wma3dec.c
faust3
subversion at mplayerhq.hu
Sat May 30 12:59:36 CEST 2009
Author: faust3
Date: Sat May 30 12:59:35 2009
New Revision: 4322
Log:
described the codec a bit and reworked existing comments
Modified:
wmapro/wma3.h
wmapro/wma3data.h
wmapro/wma3dec.c
Modified: wmapro/wma3.h
==============================================================================
--- wmapro/wma3.h Sat May 30 02:20:08 2009 (r4321)
+++ wmapro/wma3.h Sat May 30 12:59:35 2009 (r4322)
@@ -31,14 +31,14 @@
#include "wma3data.h"
#include "dsputil.h"
-/* current decoder limitations */
+/** current decoder limitations */
#define MAX_CHANNELS 8 ///< max number of handled channels
#define MAX_SUBFRAMES 32 ///< max number of subframes per channel
#define MAX_BANDS 29 ///< max number of scale factor bands
#define MAX_FRAMESIZE 16384 ///< maximum compressed frame size
#define MAX_FRAMEBITS (MAX_FRAMESIZE << 3) ///< maximum frame size in bits
-/* size of block defines taken from wma.h */
+/** size of block defines taken from wma.h */
#define BLOCK_MIN_BITS 7 ///< log2 of min block size
#define BLOCK_MAX_BITS 12 ///< log2 of max block size
#define BLOCK_MIN_SIZE (1 << BLOCK_MIN_BITS) ///< minimum block size
@@ -88,7 +88,7 @@ typedef struct {
* @brief main decoder context
*/
typedef struct WMA3DecodeContext {
- /* generic decoder variables */
+ /** generic decoder variables */
AVCodecContext* avctx; ///< codec context for av_log
DSPContext dsp; ///< accelerated dsp functions
uint8_t frame_data[MAX_FRAMESIZE +
@@ -104,7 +104,7 @@ typedef struct WMA3DecodeContext {
VLC coef_vlc[2]; ///< coefficient run length vlc codes
int coef_max[2]; ///< max length of vlc codes
- /* frame size dependent frame information (set during initialization) */
+ /** frame size dependent frame information (set during initialization) */
uint8_t lossless; ///< lossless mode
unsigned int decode_flags; ///< used compression features
uint8_t len_prefix; ///< frame is prefixed with its length
@@ -124,7 +124,7 @@ typedef struct WMA3DecodeContext {
int* sf_offsets; ///< scale factor resample matrix
int* subwoofer_cutoffs; ///< subwoofer cutoff values
- /* packet decode state */
+ /** packet decode state */
uint8_t packet_sequence_number; ///< current packet number
int num_saved_bits; ///< saved number of bits
int frame_offset; ///< frame offset in the bit reservoir
@@ -133,7 +133,7 @@ typedef struct WMA3DecodeContext {
uint8_t bit6; ///< unknown
uint8_t packet_loss; ///< set in case of bitstream error
- /* frame decode state */
+ /** frame decode state */
unsigned int frame_num; ///< current frame number
GetBitContext gb; ///< bitstream reader context
int buf_bit_size; ///< buffer size in bits
@@ -143,7 +143,7 @@ typedef struct WMA3DecodeContext {
int skip_frame; ///< skip output step
int parsed_all_subframes; ///< all subframes decoded?
- /* subframe/block decode state */
+ /** subframe/block decode state */
int subframe_len; ///< current subframe length
int channels_for_cur_subframe; ///< number of channels that contain the subframe
int channel_indexes_for_cur_subframe[MAX_CHANNELS];
Modified: wmapro/wma3data.h
==============================================================================
--- wmapro/wma3data.h Sat May 30 02:20:08 2009 (r4321)
+++ wmapro/wma3data.h Sat May 30 12:59:35 2009 (r4322)
@@ -86,7 +86,7 @@ static const uint8_t ff_wma3_scale_huffb
/**
- * @name huffman, run and level tables for run length coded scale factors
+ * @name huffman, run and level tables for run level coded scale factors
* @{
*/
#define FF_WMA3_HUFF_SCALE_RL_SIZE 120
@@ -157,7 +157,7 @@ static const uint8_t ff_wma3_scale_rl_le
/**
- * @name huffman, run and level codes for run length coded coefficients
+ * @name huffman, run and level codes for run level coded coefficients
* @{
*/
#define FF_WMA3_HUFF_COEF0_SIZE 272
Modified: wmapro/wma3dec.c
==============================================================================
--- wmapro/wma3dec.c Sat May 30 02:20:08 2009 (r4321)
+++ wmapro/wma3dec.c Sat May 30 12:59:35 2009 (r4322)
@@ -23,6 +23,55 @@
/**
* @file libavcodec/wma3dec.c
* @brief wmapro decoder implementation
+ * Wmapro is an MDCT based codec comparable to wma standard or AAC.
+ * The decoding therefore consist of the following steps:
+ * - bitstream decoding
+ * - reconstruction of per channel data
+ * - rescaling and requantization
+ * - IMDCT
+ * - windowing and overlapp-add
+ *
+ * The compressed wmapro bitstream is split into individual packets.
+ * Every such packet contains one or more wma frames.
+ * The compressed frames may have a variable length and frames may
+ * cross packet boundaries.
+ * Common to all wmapro frames is the number of samples that are stored in
+ * a frame.
+ * The number of samples and a few other decode flags are stored
+ * as extradata that has to be passed to the decoder.
+ *
+ * The wmapro frames themselves are again split into a variable number of
+ * subframes. Every subframe contains the data for 2^N time domain samples
+ * where N varies between 7 and 12.
+ *
+ * The frame layouts for the individual channels of a wma frame does not need
+ * to be the same.
+ * However, if the offsets and lengths of several subframes of a frame are the
+ * same, the subframes of the channels can be grouped.
+ * Every group may then use special coding techniques like M/S stereo coding
+ * to improve the compression ratio. These channel transformations do not
+ * need to be applied to a whole subframe. Instead, they can also work on
+ * individual scale factor bands (see below).
+ * The coefficients that cary the audio signal in the frequency domain
+ * are transmitted as huffman coded vectors with 4, 2 and 1 elements.
+ * In addition to that, the encoder can switch to a run level coding scheme
+ * by transmitting subframen_length / 128 zero coefficients.
+ *
+ * Before the audio signal can be converted to the time domain, the
+ * coefficients have to be rescaled and inverse quantized.
+ * A subframe is therefore split into several scale factor bands that get
+ * scaled individually.
+ * Scale factors are submitted for every frame but they might be shared
+ * between the subframes of a channel. Scale factors are initially DPCM coded.
+ * Once scale factors are shared, the differences are transmitted as run
+ * level codes.
+ * Every subframe length and offset combination in the frame layout shares a
+ * common quantization factor that can be adjusted for every channel by a
+ * modifier.
+ * After the inverse quantization, the coefficients get processed by an IMDCT.
+ * The resulting values are then windowed with a sine window and the first half
+ * of the values are added to the second half of the output from the previous
+ * subframe in order to reconstruct the output samples.
*/
#include "avcodec.h"
@@ -74,7 +123,7 @@ static int wma_get_samples_per_frame(int
else
samples_per_frame = 8192;
- /* WMA voice code if (decode_flags & 0x800) {
+ /** WMA voice code if (decode_flags & 0x800) {
tmp = ((decode_flags & 6) >> 1) | ((decode_flags & 0x600) >> 7);
samples_per_frame = (tmp+1)*160;
} else { */
@@ -141,7 +190,7 @@ static av_cold int wma_decode_init(AVCod
s->avctx = avctx;
dsputil_init(&s->dsp, avctx);
- /* FIXME: is this really the right thing to do for 24 bits? */
+ /** FIXME: is this really the right thing to do for 24 bits? */
s->sample_bit_depth = 16; // avctx->bits_per_sample;
if (avctx->extradata_size >= 18) {
s->decode_flags = AV_RL16(edata_ptr+14);
@@ -578,6 +627,7 @@ static int wma_decode_channel_transform(
memset(s->chgroup[i].decorrelation_matrix,0,sizeof(float) *s->num_channels * s->num_channels);
}
+ /** in the one channel case channel transforms are pointless */
if(s->num_channels == 1 ){
s->num_chgroups = 0;
s->chgroup[0].num_channels = 1;
@@ -603,7 +653,7 @@ static int wma_decode_channel_transform(
chgroup->no_rotation = 0;
chgroup->transform = 0;
- /* decode channel mask */
+ /** decode channel mask */
memset(chgroup->use_channel,0,sizeof(chgroup->use_channel));
if(remaining_channels > 2){
@@ -624,11 +674,7 @@ static int wma_decode_channel_transform(
}
}
- /** done decode channel mask */
-
- /* decide x form type
- FIXME: port this to float, all rotations should lie
- on the unit circle */
+ /** decode transform type */
if(chgroup->num_channels == 1){
chgroup->no_rotation = 1;
chgroup->transform = 2;
@@ -647,7 +693,7 @@ static int wma_decode_channel_transform(
}else{
chgroup->no_rotation = 1;
chgroup->transform = 1;
- chgroup->decorrelation_matrix[0] = 0.70703125; // FIXME: cos(pi/4)
+ chgroup->decorrelation_matrix[0] = 0.70703125; /** cos(pi/4) */
chgroup->decorrelation_matrix[1] = -0.70703125;
chgroup->decorrelation_matrix[2] = 0.70703125;
chgroup->decorrelation_matrix[3] = 0.70703125;
@@ -678,9 +724,8 @@ static int wma_decode_channel_transform(
}
}
- /** done decide x form type */
-
- if(!chgroup->no_rotation){ /** decode channel transform */
+ /** decode additional transform parameters */
+ if(!chgroup->no_rotation){
int n_offset = chgroup->num_channels * (chgroup->num_channels - 1) / 2;
int i;
for(i=0;i<n_offset;i++){
@@ -690,16 +735,15 @@ static int wma_decode_channel_transform(
chgroup->positive[i] = get_bits1(&s->gb);
}
- /* decode transform on / off */
+ /** decode transform on / off */
if(chgroup->num_channels <= 1 || ((chgroup->no_rotation != 1 || chgroup->transform == 2) && chgroup->no_rotation)){
- // done
int i;
for(i=0;i<s->num_bands;i++)
chgroup->transform_band[i] = 1;
}else{
+ /** transform can be enabled for individual bands */
if(get_bits1(&s->gb) == 0){
int i;
- // transform works on individual scale factor bands
for(i=0;i< s->num_bands;i++){
chgroup->transform_band[i] = get_bits1(&s->gb);
}
@@ -709,7 +753,6 @@ static int wma_decode_channel_transform(
chgroup->transform_band[i] = 1;
}
}
- /** done decode transform on / off */
remaining_channels -= chgroup->num_channels;
}
}
@@ -723,7 +766,7 @@ static int wma_decode_channel_transform(
*/
static unsigned int wma_get_large_val(WMA3DecodeContext* s)
{
- /* consumes up to 34 bits */
+ /** consumes up to 34 bits */
int n_bits = 8;
/** decode length */
if(get_bits1(&s->gb)){
@@ -772,20 +815,19 @@ static int wma_decode_coeffs(WMA3DecodeC
level = ff_wma3_coef0_level;
}
- /* for subframe_len 128 the first zero coefficient will switch to the run length mode */
+ /** for subframe_len 128 the first zero coefficient will switch to the run level mode */
if(s->subframe_len == 128){
zero_init = num_zeros = 1;
rl_switchmask = 1;
}
- /* read coefficients (consumes up to 167 bits per iteration for
+ /** decode vector coefficients (consumes up to 167 bits per iteration for
4 vector coded large values) */
while(!rl_mode && cur_coeff + 3 < s->subframe_len){
int vals[4];
int i;
unsigned int idx;
- /* read 4 values at once */
idx = get_vlc2(&s->gb, s->vec4_vlc.table, VLCBITS, ((FF_WMA3_HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS));
if ( idx == FF_WMA3_HUFF_VEC4_SIZE - 1 ){
@@ -812,12 +854,15 @@ static int wma_decode_coeffs(WMA3DecodeC
vals[3] = ff_wma3_symbol_to_vec4[idx] & 0xF;
}
+ /** decode sign */
for(i=0;i<4;i++){
if(vals[i]){
int sign = get_bits1(&s->gb) - 1;
ci->coeffs[cur_coeff] = (vals[i]^sign) - sign;
num_zeros = zero_init;
}else{
+ /** switch to run level mode when subframe_len / 128 zeros
+ were found in a row */
rl_mode |= (num_zeros & rl_switchmask);
++num_zeros;
}
@@ -825,6 +870,7 @@ static int wma_decode_coeffs(WMA3DecodeC
}
}
+ /** decode run level coded coefficients */
if(rl_mode){
unsigned int coeff_mask = s->subframe_len - 1;
while(cur_coeff < s->subframe_len){
@@ -852,6 +898,7 @@ static int wma_decode_coeffs(WMA3DecodeC
cur_coeff += get_bits(&s->gb,2) + 1;
}
}
+ /** decode sign */
sign = get_bits1(&s->gb) - 1;
ci->coeffs[cur_coeff & coeff_mask] = (val^sign) - sign;
++cur_coeff;
@@ -871,7 +918,7 @@ static int wma_decode_scale_factors(WMA3
int i;
const int idx0 = av_log2(s->samples_per_frame/s->subframe_len);
- /* should never consume more than 5344 bits
+ /** should never consume more than 5344 bits
* MAX_CHANNELS * (1 + MAX_BANDS * 23)
*/
@@ -902,9 +949,10 @@ static int wma_decode_scale_factors(WMA3
if(s->channel[c].transmit_sf){
int b;
- if(!s->channel[c].reuse_sf){ //DPCM coded
+ if(!s->channel[c].reuse_sf){
int i;
int val;
+ /** decode DPCM coded scale factors */
s->channel[c].scale_factor_step = get_bits(&s->gb,2) + 1;
val = get_vlc2(&s->gb, s->sf_vlc.table, SCALEVLCBITS, ((FF_WMA3_HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS));
s->channel[c].scale_factors[0] = 45 / s->channel[c].scale_factor_step + val - 60;
@@ -912,8 +960,10 @@ static int wma_decode_scale_factors(WMA3
val = get_vlc2(&s->gb, s->sf_vlc.table, SCALEVLCBITS, ((FF_WMA3_HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS));
s->channel[c].scale_factors[i] = s->channel[c].scale_factors[i-1] + val - 60;
}
- }else{ // rl-coded
+ }else{
int i;
+ /** run level decode differences to the resampled factors */
+
memcpy(s->channel[c].scale_factors,s->channel[c].resampled_scale_factors,
4 * s->num_bands);
@@ -1019,9 +1069,10 @@ static void wma_inverse_channel_transfor
(s->chgroup[i].no_rotation == 1) &&
(s->chgroup[i].transform == 1)){
int b;
+ /** M/S stereo decoding */
for(b = 0; b < s->num_bands;b++){
int y;
- if(s->chgroup[i].transform_band[b] == 1){ // M/S stereo
+ if(s->chgroup[i].transform_band[b] == 1){
for(y=s->cur_sfb_offsets[b];y<FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);y++){
float v1 = s->channel[0].coeffs[y];
float v2 = s->channel[1].coeffs[y];
@@ -1041,12 +1092,15 @@ static void wma_inverse_channel_transfor
int cnt = 0;
float* ch_data[MAX_CHANNELS];
float sums[MAX_CHANNELS * MAX_CHANNELS];
+
+ /** multichannel decorrelation */
if(!s->chgroup[i].no_rotation)
wma_calc_decorrelation_matrix(s,&s->chgroup[i]);
+ /** get the channels that use the transform */
for(x=0;x<s->channels_for_cur_subframe;x++){
int chan = s->channel_indexes_for_cur_subframe[x];
- if(s->chgroup[i].use_channel[chan] == 1){ // assign ptrs
+ if(s->chgroup[i].use_channel[chan] == 1){
ch_data[cnt] = s->channel[chan].coeffs;
++cnt;
}
@@ -1055,7 +1109,7 @@ static void wma_inverse_channel_transfor
for(b = 0; b < s->num_bands;b++){
int y;
if(s->chgroup[i].transform_band[b] == 1){
- // multiply values with decorrelation_matrix
+ /** multiply values with the decorrelation_matrix */
for(y=s->cur_sfb_offsets[b];y<FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);y++){
float* matrix = s->chgroup[i].decorrelation_matrix;
int m;
@@ -1074,7 +1128,7 @@ static void wma_inverse_channel_transfor
++ch_data[m];
}
}
- }else{ /** skip band */
+ }else{ /** skip band */
for(y=0;y<s->chgroup[i].num_channels;y++)
ch_data[y] += s->cur_sfb_offsets[b+1] - s->cur_sfb_offsets[b];
}
@@ -1168,7 +1222,7 @@ static int wma_decode_subframe(WMA3Decod
int c = s->channel_indexes_for_cur_subframe[i];
/** calculate number of scale factor bands and their offsets */
- /** FIXME move out of the loop */
+ /* FIXME move out of the loop */
if(i == 0){
if(s->channel[c].num_subframes <= 1){
s->num_bands = s->num_sfb[0];
@@ -1238,7 +1292,7 @@ static int wma_decode_subframe(WMA3Decod
int quant;
int sign = 1;
int large_quant = 0;
- if((get_bits1(&s->gb))){ /** FIXME: might influence how often getvec4 may be called */
+ if((get_bits1(&s->gb))){ /** FIXME: might change run level mode decision */
av_log(s->avctx,AV_LOG_ERROR,"unsupported quant step coding\n");
return 0;
}
@@ -1300,6 +1354,7 @@ static int wma_decode_subframe(WMA3Decod
av_log(s->avctx,AV_LOG_DEBUG,"BITSTREAM: subframe length was %i\n",get_bits_count(&s->gb) - s->subframe_offset);
if(transmit_coeffs){
+ /** reconstruct the per channel data */
wma_inverse_channel_transform(s);
for(i=0;i<s->channels_for_cur_subframe;i++){
int c = s->channel_indexes_for_cur_subframe[i];
@@ -1308,7 +1363,7 @@ static int wma_decode_subframe(WMA3Decod
if(c == s->lfe_channel)
memset(&s->tmp[s->cur_subwoofer_cutoff],0,sizeof(float) * (subframe_len - s->cur_subwoofer_cutoff));
- /** inverse quantization */
+ /** inverse quantization and rescaling */
for(b=0;b<s->num_bands;b++){
int start = s->cur_sfb_offsets[b];
int end = s->cur_sfb_offsets[b+1];
@@ -1329,7 +1384,8 @@ static int wma_decode_subframe(WMA3Decod
}
dst = &s->channel[c].out[s->samples_per_frame/2 + s->channel[c].subframe_offset[s->channel[c].cur_subframe]];
- ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len)-BLOCK_MIN_BITS], dst, s->tmp); // DCTIV with reverse
+ /** apply imdct (ff_imdct_half == DCTIV with reverse) */
+ ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len)-BLOCK_MIN_BITS], dst, s->tmp);
}
}else{
for(i=0;i<s->channels_for_cur_subframe;i++){
@@ -1340,6 +1396,7 @@ static int wma_decode_subframe(WMA3Decod
}
}
+ /** window and overlapp-add */
wma_window(s);
/** handled one subframe */
@@ -1429,7 +1486,7 @@ static int wma_decode_frame(WMA3DecodeCo
s->channel[i].reuse_sf = 0;
}
- /** parse all subframes */
+ /** decode all subframes */
while(!s->parsed_all_subframes){
if(!wma_decode_subframe(s)){
s->packet_loss = 1;
@@ -1462,7 +1519,7 @@ static int wma_decode_frame(WMA3DecodeCo
s->samples += s->num_channels * s->samples_per_frame;
if(len != (get_bits_count(gb) - s->frame_offset) + 2){
- /* FIXME: not sure if this is always an error */
+ /** FIXME: not sure if this is always an error */
av_log(s->avctx,AV_LOG_ERROR,"frame[%i] would have to skip %i bits\n",s->frame_num,len - (get_bits_count(gb) - s->frame_offset) - 1);
s->packet_loss = 1;
return 0;
More information about the FFmpeg-soc
mailing list