[FFmpeg-devel] [PATCH 2/2] lavc/aacsbr: sbr_dequant optimization

Ganesh Ajjanagadde gajjanag at mit.edu
Sat Dec 19 18:43:03 CET 2015


On Sat, Dec 19, 2015 at 4:21 AM, Andreas Cadhalpun
<andreas.cadhalpun at googlemail.com> wrote:
> On 18.12.2015 16:52, Ganesh Ajjanagadde wrote:
>> On Tue, Dec 15, 2015 at 9:53 PM, Ganesh Ajjanagadde
>> <gajjanagadde at gmail.com> wrote:
>>> This uses ff_exp2fi to get a speedup (~ 6x).
>>>
>>> sample benchmark (Haswell, GNU/Linux):
>>> old:
>>>   19102 decicycles in sbr_dequant,    1023 runs,      1 skips
>>>   19002 decicycles in sbr_dequant,    2045 runs,      3 skips
>>>   17638 decicycles in sbr_dequant,    4093 runs,      3 skips
>>>   15825 decicycles in sbr_dequant,    8189 runs,      3 skips
>>>   16404 decicycles in sbr_dequant,   16379 runs,      5 skips
>>>
>>> new:
>>>    3063 decicycles in sbr_dequant,    1024 runs,      0 skips
>>>    3049 decicycles in sbr_dequant,    2048 runs,      0 skips
>>>    2968 decicycles in sbr_dequant,    4096 runs,      0 skips
>>>    2818 decicycles in sbr_dequant,    8191 runs,      1 skips
>>>    2853 decicycles in sbr_dequant,   16383 runs,      1 skips
>>>
>>> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde at gmail.com>
>>> ---
>>>  libavcodec/aacsbr.c | 34 ++++++++++++++++++++++------------
>>>  1 file changed, 22 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
>>> index d1e3a91..15956e3 100644
>>> --- a/libavcodec/aacsbr.c
>>> +++ b/libavcodec/aacsbr.c
>>> @@ -33,6 +33,7 @@
>>>  #include "aacsbrdata.h"
>>>  #include "aacsbr_tablegen.h"
>>>  #include "fft.h"
>>> +#include "internal.h"
>>>  #include "aacps.h"
>>>  #include "sbrdsp.h"
>>>  #include "libavutil/internal.h"
>>> @@ -73,15 +74,22 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>>>  {
>>>      int k, e;
>>>      int ch;
>>> -
>>> +    static const double exp2_tab[2] = {1, M_SQRT2};
>>>      if (id_aac == TYPE_CPE && sbr->bs_coupling) {
>>> -        float alpha      = sbr->data[0].bs_amp_res ?  1.0f :  0.5f;
>>> -        float pan_offset = sbr->data[0].bs_amp_res ? 12.0f : 24.0f;
>>> +        int pan_offset = sbr->data[0].bs_amp_res ? 12 : 24;
>>>          for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
>>>              for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
>>> -                float temp1 = exp2f(sbr->data[0].env_facs_q[e][k] * alpha + 7.0f);
>>> -                float temp2 = exp2f((pan_offset - sbr->data[1].env_facs_q[e][k]) * alpha);
>>> -                float fac;
>>> +                float temp1, temp2, fac;
>>> +                if (sbr->data[0].bs_amp_res) {
>>> +                    temp1 = ff_exp2fi(sbr->data[0].env_facs_q[e][k] + 7);
>>> +                    temp2 = ff_exp2fi(pan_offset - sbr->data[1].env_facs_q[e][k]);
>>> +                }
>>> +                else {
>>> +                    temp1 = ff_exp2fi((sbr->data[0].env_facs_q[e][k]>>1) + 7) *
>>> +                            exp2_tab[sbr->data[0].env_facs_q[e][k] & 1];
>>> +                    temp2 = ff_exp2fi((pan_offset - sbr->data[1].env_facs_q[e][k])>>1) *
>>> +                            exp2_tab[(pan_offset - sbr->data[1].env_facs_q[e][k]) & 1];
>>> +                }
>>>                  if (temp1 > 1E20) {
>>>                      av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
>>>                      temp1 = 1;
>>> @@ -93,8 +101,8 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>>>          }
>>>          for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
>>>              for (k = 0; k < sbr->n_q; k++) {
>>> -                float temp1 = exp2f(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
>>> -                float temp2 = exp2f(12 - sbr->data[1].noise_facs_q[e][k]);
>>> +                float temp1 = ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
>>> +                float temp2 = ff_exp2fi(12 - sbr->data[1].noise_facs_q[e][k]);
>>>                  float fac;
>>>                  av_assert0(temp1 <= 1E20);
>>>                  fac = temp1 / (1.0f + temp2);
>>> @@ -104,11 +112,13 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>>>          }
>>>      } else { // SCE or one non-coupled CPE
>>>          for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
>>> -            float alpha = sbr->data[ch].bs_amp_res ? 1.0f : 0.5f;
>>>              for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
>>>                  for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
>>> -                    sbr->data[ch].env_facs[e][k] =
>>> -                        exp2f(alpha * sbr->data[ch].env_facs_q[e][k] + 6.0f);
>>> +                    if (sbr->data[ch].bs_amp_res)
>>> +                        sbr->data[ch].env_facs[e][k] = ff_exp2fi(sbr->data[ch].env_facs_q[e][k] + 6);
>>> +                    else
>>> +                        sbr->data[ch].env_facs[e][k] = ff_exp2fi((sbr->data[ch].env_facs_q[e][k]>>1) + 6)
>>> +                                                       * exp2_tab[sbr->data[ch].env_facs_q[e][k] & 1];
>>>                      if (sbr->data[ch].env_facs[e][k] > 1E20) {
>>>                          av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
>>>                          sbr->data[ch].env_facs[e][k] = 1;
>>> @@ -118,7 +128,7 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
>>>              for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
>>>                  for (k = 0; k < sbr->n_q; k++)
>>>                      sbr->data[ch].noise_facs[e][k] =
>>> -                        exp2f(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
>>> +                        ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
>>>          }
>>>      }
>>>  }
>>> --
>>> 2.6.4
>>>
>>
>> ping for aac maintainers: Rostislav? thanks.
>
> I'm not an aac maintainer, but the change looks correct and the speedup nice, so this
> patch should be OK.
>
> One caveat is that NOISE_FLOOR_OFFSET is defined as FIXR(6.0f) and is thus a float here.
> Since this patch makes that unnecessary, it's probably a good idea to change that
> to simply 6.

Good point, don't want the overhead/impreciseness of possible float to
int conversion, though I think a good compiler can remove it since 6.0
is exactly representable.
Pushed, thanks.

>
> Best regards,
> Andreas
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


More information about the ffmpeg-devel mailing list