[FFmpeg-devel] [PATCH] HE-AACv1 second revision
Alex Converse
alex.converse
Tue Feb 9 16:03:07 CET 2010
On Mon, Feb 8, 2010 at 9:16 AM, Michael Niedermayer <michaelni at gmx.at> wrote:
> On Sat, Jan 30, 2010 at 05:33:00PM -0500, Alex Converse wrote:
>> On Sat, Jan 30, 2010 at 5:32 PM, Alex Converse <alex.converse at gmail.com> wrote:
>> > Notes:
>> > *There are still several lroundf() calls that take all integer inputs.
>> > If anyone has advice on how to do them in an integer only fashion. I
>> > would love to hear it.
>
> could you tell me what range the input values to the remaining 4 equations
> have?
+ num_bands_0 = lrintf(half_bands * log2f(sbr->k[1] /
(float)sbr->k[0])) << 1;
half_bands = {4, 5, 6}
k[1] = {1..64}
k[0] = {1..32}
k[1] / k[0] <= 2.2449f
k[1] - k[0] <= 48
+ float invwarp = spectrum->bs_alter_scale ? 0.76923076923076923077
+ : 1.0; //
bs_alter_scale = {0,1}
+ int num_bands_1 = lrintf(half_bands * invwarp *
+ log2f(sbr->k[2] / (float)sbr->k[1])) << 1;
k[1] = {2..64}
k[2] = {3..64}
k[2] / k[1] > 1.12245
k[2] - k[1] <= 48
sbr->n_q = FFMAX(1, lrintf(sbr->spectrum_params[1].bs_noise_bands *
log2f(sbr->k[2] / (float)sbr->k[4])));
// 0 <= bs_noise_bands <= 3
k[2] = {1..64}
k[4] = {1..32}
static void make_bands(int16_t* bands, int start, int stop, int num_bands)
{
int k, previous, present;
float base, prod;
base = powf((float)stop / start, 1.0f / num_bands);
prod = start;
previous = start;
for (k = 0; k < num_bands-1; k++) {
prod *= base;
present = lrintf(prod);
bands[k] = present - previous;
previous = present;
}
bands[num_bands-1] = stop - previous;
}
start = {1..64}
stop = {1..64}
num_bands = {1..48}
>
> a few comments (no full review) below
>
> [...]
>> +/**
>> + * Analysis QMF Bank (14496-3 sp04 p206)
>> + *
>> + * @param ? x ? ? ? pointer to the beginning of the first sample window
>> + * @param ? W ? ? ? array of complex-valued samples split into subbands
>> + */
>> +static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *fft, const float *in, float *x,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? FFTComplex u[64], float W[2][32][32][2])
>> +{
>> + ? ?int i, k, l;
>> + ? ?const uint16_t *revtab = fft->revtab;
>> + ? ?memcpy(W[0], W[1], sizeof(W[0]));
>> + ? ?memcpy(x ? ?, x+1024, (320-32)*sizeof(x[0]));
>> + ? ?memcpy(x+288, in ? ?, ? ? 1024*sizeof(x[0]));
>> + ? ?x += 319;
>> + ? ?for (l = 0; l < 32; l++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? // are not supported
>> + ? ? ? ?float z[320];
>> + ? ? ? ?for (i = 0; i < 320; i++)
>> + ? ? ? ? ? ?z[i] = x[-i] * sbr_qmf_window_ds[i];
>> + ? ? ? ?for (i = 0; i < 64; i++) {
>> + ? ? ? ? ? ?float f = z[i] + z[i + 64] + z[i + 128] + z[i + 192] + z[i + 256];
>
>
>> + ? ? ? ? ? ?u[revtab[i]].re = f * analysis_cos_pre[i];
>> + ? ? ? ? ? ?u[revtab[i]].im = f * analysis_sin_pre[i];
>> + ? ? ? ?}
>> + ? ? ? ?ff_fft_calc(fft, u);
>
> cant this be done with a fft /dct /whatever of half that size?
I will look into that again.
>
> [...]
>> +/// High Frequency Generator (14496-3 sp04 p215)
>> +static int sbr_hf_gen(AACContext *ac, SpectralBandReplication *sbr,
>> + ? ? ? ? ? ? ? ? ? ? ?float X_high[64][40][2], float X_low[32][40][2], float (*alpha0)[2],
>> + ? ? ? ? ? ? ? ? ? ? ?float (*alpha1)[2], float bw_array[2][5], uint8_t *t_env,
>> + ? ? ? ? ? ? ? ? ? ? ?int bs_num_env)
>> +{
>> + ? ?int i, x, l;
>> + ? ?int k = sbr->k[4];
>> + ? ?for (i = 0; i < sbr->num_patches; i++) {
>> + ? ? ? ?for (x = 0; x < sbr->patch_num_subbands[i]; x++, k++) {
>> + ? ? ? ? ? ?const int g = find_freq_subband(sbr->f_tablenoise, sbr->n_q + 1, k);
>> + ? ? ? ? ? ?const int p = sbr->patch_start_subband[i] + x;
>> +
>> + ? ? ? ? ? ?if (g < 0) {
>> + ? ? ? ? ? ? ? ?av_log(ac->avccontext, AV_LOG_ERROR,
>> + ? ? ? ? ? ? ? ? ? ? ? "ERROR : no subband found for frequency %d\n", k);
>> + ? ? ? ? ? ? ? ?return -1;
>> + ? ? ? ? ? ?}
>> +
>
>> + ? ? ? ? ? ?for (l = t_env[0] << 1; l < t_env[bs_num_env] << 1; l++) {
>
> i would suggest to avoid using l as variable because it looks so similar to 1
> makes the code quite hard to read IMHO
>
'l' is consistently used in the spec as that index. I like using the
same index variables because I have transposed some matrices for good
reason. With good fonts 'l' and '1' should be sufficiently distinct.
Perhaps replacing the "<< 1"s with "* 2" may improve readability
enough?
>
> [...]
>> +///< window coefficients for analysis/synthesis QMF banks
>> +static DECLARE_ALIGNED_16(float, sbr_qmf_window_ds)[320];
>> +static const DECLARE_ALIGNED_16(float, sbr_qmf_window_us)[640] = {
>
> seems symmetric, thus maybe storing just half of it would work and it might
> (or might not) allow some optimizations
>
The down sampled windows allow me to use existing dsp functions that
are all single stride.
More information about the ffmpeg-devel
mailing list