[FFmpeg-cvslog] ac3enc_fixed: drop unnecessary fixed-point DSP code
Lynne
git at videolan.org
Thu Jan 14 03:48:38 EET 2021
ffmpeg | branch: master | Lynne <dev at lynne.ee> | Sat Jan 9 03:19:18 2021 +0100| [9e05421dbe0c733dca2a39f8399db86acc7e82bc] | committer: Lynne
ac3enc_fixed: drop unnecessary fixed-point DSP code
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9e05421dbe0c733dca2a39f8399db86acc7e82bc
---
libavcodec/ac3dsp.c | 60 ---------
libavcodec/ac3dsp.h | 47 -------
libavcodec/ac3tab.c | 38 ------
libavcodec/ac3tab.h | 1 -
libavcodec/arm/ac3dsp_init_arm.c | 9 --
libavcodec/x86/ac3dsp.asm | 258 ---------------------------------------
libavcodec/x86/ac3dsp_init.c | 52 +-------
7 files changed, 1 insertion(+), 464 deletions(-)
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 382f87c05f..85c721dd3b 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -46,49 +46,6 @@ static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
}
}
-static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
-{
- int i, v = 0;
- for (i = 0; i < len; i++)
- v |= abs(src[i]);
- return v;
-}
-
-static void ac3_lshift_int16_c(int16_t *src, unsigned int len,
- unsigned int shift)
-{
- uint32_t *src32 = (uint32_t *)src;
- const uint32_t mask = ~(((1 << shift) - 1) << 16);
- int i;
- len >>= 1;
- for (i = 0; i < len; i += 8) {
- src32[i ] = (src32[i ] << shift) & mask;
- src32[i+1] = (src32[i+1] << shift) & mask;
- src32[i+2] = (src32[i+2] << shift) & mask;
- src32[i+3] = (src32[i+3] << shift) & mask;
- src32[i+4] = (src32[i+4] << shift) & mask;
- src32[i+5] = (src32[i+5] << shift) & mask;
- src32[i+6] = (src32[i+6] << shift) & mask;
- src32[i+7] = (src32[i+7] << shift) & mask;
- }
-}
-
-static void ac3_rshift_int32_c(int32_t *src, unsigned int len,
- unsigned int shift)
-{
- do {
- *src++ >>= shift;
- *src++ >>= shift;
- *src++ >>= shift;
- *src++ >>= shift;
- *src++ >>= shift;
- *src++ >>= shift;
- *src++ >>= shift;
- *src++ >>= shift;
- len -= 8;
- } while (len > 0);
-}
-
static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len)
{
const float scale = 1 << 24;
@@ -376,19 +333,6 @@ void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matr
ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len);
}
-static void apply_window_int16_c(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len)
-{
- int i;
- int len2 = len >> 1;
-
- for (i = 0; i < len2; i++) {
- int16_t w = window[i];
- output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
- output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
- }
-}
-
void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
int out_ch, int in_ch, int len)
{
@@ -424,9 +368,6 @@ void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
{
c->ac3_exponent_min = ac3_exponent_min_c;
- c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
- c->ac3_lshift_int16 = ac3_lshift_int16_c;
- c->ac3_rshift_int32 = ac3_rshift_int32_c;
c->float_to_fixed24 = float_to_fixed24_c;
c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
c->update_bap_counts = ac3_update_bap_counts_c;
@@ -438,7 +379,6 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
c->out_channels = 0;
c->downmix = NULL;
c->downmix_fixed = NULL;
- c->apply_window_int16 = apply_window_int16_c;
if (ARCH_ARM)
ff_ac3dsp_init_arm(c, bit_exact);
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index 161de4cb86..a23b11526e 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -42,39 +42,6 @@ typedef struct AC3DSPContext {
*/
void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
- /**
- * Calculate the maximum MSB of the absolute value of each element in an
- * array of int16_t.
- * @param src input array
- * constraints: align 16. values must be in range [-32767,32767]
- * @param len number of values in the array
- * constraints: multiple of 16 greater than 0
- * @return a value with the same MSB as max(abs(src[]))
- */
- int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);
-
- /**
- * Left-shift each value in an array of int16_t by a specified amount.
- * @param src input array
- * constraints: align 16
- * @param len number of values in the array
- * constraints: multiple of 32 greater than 0
- * @param shift left shift amount
- * constraints: range [0,15]
- */
- void (*ac3_lshift_int16)(int16_t *src, unsigned int len, unsigned int shift);
-
- /**
- * Right-shift each value in an array of int32_t by a specified amount.
- * @param src input array
- * constraints: align 16
- * @param len number of values in the array
- * constraints: multiple of 16 greater than 0
- * @param shift right shift amount
- * constraints: range [0,31]
- */
- void (*ac3_rshift_int32)(int32_t *src, unsigned int len, unsigned int shift);
-
/**
* Convert an array of float in range [-1.0,1.0] to int32_t with range
* [-(1<<24),(1<<24)]
@@ -136,20 +103,6 @@ typedef struct AC3DSPContext {
int in_channels;
void (*downmix)(float **samples, float **matrix, int len);
void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int len);
-
- /**
- * Apply symmetric window in 16-bit fixed-point.
- * @param output destination array
- * constraints: 16-byte aligned
- * @param input source array
- * constraints: 16-byte aligned
- * @param window window array
- * constraints: 16-byte aligned, at least len/2 elements
- * @param len full window length
- * constraints: multiple of ? greater than zero
- */
- void (*apply_window_int16)(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len);
} AC3DSPContext;
void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact);
diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c
index d018110331..99307218cc 100644
--- a/libavcodec/ac3tab.c
+++ b/libavcodec/ac3tab.c
@@ -147,44 +147,6 @@ const uint8_t ff_eac3_default_cpl_band_struct[18] = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1
};
-/* AC-3 MDCT window */
-
-/* MDCT window */
-DECLARE_ALIGNED(16, const int16_t, ff_ac3_window)[AC3_WINDOW_SIZE/2] = {
- 4, 7, 12, 16, 21, 28, 34, 42,
- 51, 61, 72, 84, 97, 111, 127, 145,
- 164, 184, 207, 231, 257, 285, 315, 347,
- 382, 419, 458, 500, 544, 591, 641, 694,
- 750, 810, 872, 937, 1007, 1079, 1155, 1235,
- 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016,
- 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076,
- 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444,
- 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127,
- 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112,
- 8380, 8652, 8927, 9207, 9491, 9778,10069,10363,
-10660,10960,11264,11570,11879,12190,12504,12820,
-13138,13458,13780,14103,14427,14753,15079,15407,
-15735,16063,16392,16720,17049,17377,17705,18032,
-18358,18683,19007,19330,19651,19970,20287,20602,
-20914,21225,21532,21837,22139,22438,22733,23025,
-23314,23599,23880,24157,24430,24699,24964,25225,
-25481,25732,25979,26221,26459,26691,26919,27142,
-27359,27572,27780,27983,28180,28373,28560,28742,
-28919,29091,29258,29420,29577,29729,29876,30018,
-30155,30288,30415,30538,30657,30771,30880,30985,
-31086,31182,31274,31363,31447,31528,31605,31678,
-31747,31814,31877,31936,31993,32046,32097,32145,
-32190,32232,32272,32310,32345,32378,32409,32438,
-32465,32490,32513,32535,32556,32574,32592,32608,
-32623,32636,32649,32661,32671,32681,32690,32698,
-32705,32712,32718,32724,32729,32733,32737,32741,
-32744,32747,32750,32752,32754,32756,32757,32759,
-32760,32761,32762,32763,32764,32764,32765,32765,
-32766,32766,32766,32766,32767,32767,32767,32767,
-32767,32767,32767,32767,32767,32767,32767,32767,
-32767,32767,32767,32767,32767,32767,32767,32767,
-};
-
const uint8_t ff_ac3_log_add_tab[260]= {
0x40,0x3f,0x3e,0x3d,0x3c,0x3b,0x3a,0x39,0x38,0x37,
0x36,0x35,0x34,0x34,0x33,0x32,0x31,0x30,0x2f,0x2f,
diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h
index 1d1264e3fc..a0036a301b 100644
--- a/libavcodec/ac3tab.h
+++ b/libavcodec/ac3tab.h
@@ -37,7 +37,6 @@ extern const int ff_ac3_sample_rate_tab[];
extern const uint16_t ff_ac3_bitrate_tab[19];
extern const uint8_t ff_ac3_rematrix_band_tab[5];
extern const uint8_t ff_eac3_default_cpl_band_struct[18];
-extern const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2];
extern const uint8_t ff_ac3_log_add_tab[260];
extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3];
extern const uint8_t ff_ac3_bap_tab[64];
diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c
index a3c32ff407..9217a7d0c2 100644
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -26,13 +26,8 @@
#include "config.h"
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
-int ff_ac3_max_msb_abs_int16_neon(const int16_t *src, int len);
-void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift);
-void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len);
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
-void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
- const int16_t *window, unsigned n);
void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
const int32_t *coef0,
const int32_t *coef1,
@@ -61,12 +56,8 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
if (have_neon(cpu_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
- c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon;
- c->ac3_lshift_int16 = ff_ac3_lshift_int16_neon;
- c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
c->extract_exponents = ff_ac3_extract_exponents_neon;
- c->apply_window_int16 = ff_apply_window_int16_neon;
c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
}
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 675ade3101..4ddaa94320 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -35,10 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
cextern pd_1
pd_151: times 4 dd 151
-; used in ff_apply_window_int16()
-pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0
-pd_16384: times 4 dd 16384
-
SECTION .text
;-----------------------------------------------------------------------------
@@ -81,133 +77,6 @@ AC3_EXPONENT_MIN
%endif
%undef LOOP_ALIGN
-;-----------------------------------------------------------------------------
-; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
-;
-; This function uses 2 different methods to calculate a valid result.
-; 1) logical 'or' of abs of each element
-; This is used for ssse3 because of the pabsw instruction.
-; It is also used for mmx because of the lack of min/max instructions.
-; 2) calculate min/max for the array, then or(abs(min),abs(max))
-; This is used for mmxext and sse2 because they have pminsw/pmaxsw.
-;-----------------------------------------------------------------------------
-
-; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word
-%macro OR_WORDS_HORIZ 2 ; src, tmp
-%if cpuflag(sse2)
- movhlps %2, %1
- por %1, %2
- pshuflw %2, %1, q0032
- por %1, %2
- pshuflw %2, %1, q0001
- por %1, %2
-%elif cpuflag(mmxext)
- pshufw %2, %1, q0032
- por %1, %2
- pshufw %2, %1, q0001
- por %1, %2
-%else ; mmx
- movq %2, %1
- psrlq %2, 32
- por %1, %2
- movq %2, %1
- psrlq %2, 16
- por %1, %2
-%endif
-%endmacro
-
-%macro AC3_MAX_MSB_ABS_INT16 1
-cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
- pxor m2, m2
- pxor m3, m3
-.loop:
-%ifidn %1, min_max
- mova m0, [srcq]
- mova m1, [srcq+mmsize]
- pminsw m2, m0
- pminsw m2, m1
- pmaxsw m3, m0
- pmaxsw m3, m1
-%else ; or_abs
-%if notcpuflag(ssse3)
- mova m0, [srcq]
- mova m1, [srcq+mmsize]
- ABS2 m0, m1, m3, m4
-%else ; ssse3
- ; using memory args is faster for ssse3
- pabsw m0, [srcq]
- pabsw m1, [srcq+mmsize]
-%endif
- por m2, m0
- por m2, m1
-%endif
- add srcq, mmsize*2
- sub lend, mmsize
- ja .loop
-%ifidn %1, min_max
- ABS2 m2, m3, m0, m1
- por m2, m3
-%endif
- OR_WORDS_HORIZ m2, m0
- movd eax, m2
- and eax, 0xFFFF
- RET
-%endmacro
-
-INIT_MMX mmx
-AC3_MAX_MSB_ABS_INT16 or_abs
-INIT_MMX mmxext
-AC3_MAX_MSB_ABS_INT16 min_max
-INIT_XMM sse2
-AC3_MAX_MSB_ABS_INT16 min_max
-INIT_XMM ssse3
-AC3_MAX_MSB_ABS_INT16 or_abs
-
-;-----------------------------------------------------------------------------
-; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
-;-----------------------------------------------------------------------------
-
-%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set
-cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift
- movd m0, shiftd
-.loop:
- mova m1, [srcq ]
- mova m2, [srcq+mmsize ]
- mova m3, [srcq+mmsize*2]
- mova m4, [srcq+mmsize*3]
- %3 m1, m0
- %3 m2, m0
- %3 m3, m0
- %3 m4, m0
- mova [srcq ], m1
- mova [srcq+mmsize ], m2
- mova [srcq+mmsize*2], m3
- mova [srcq+mmsize*3], m4
- add srcq, mmsize*4
- sub lend, mmsize*32/%2
- ja .loop
-.end:
- REP_RET
-%endmacro
-
-;-----------------------------------------------------------------------------
-; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
-;-----------------------------------------------------------------------------
-
-INIT_MMX mmx
-AC3_SHIFT l, 16, psllw
-INIT_XMM sse2
-AC3_SHIFT l, 16, psllw
-
-;-----------------------------------------------------------------------------
-; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
-;-----------------------------------------------------------------------------
-
-INIT_MMX mmx
-AC3_SHIFT r, 32, psrad
-INIT_XMM sse2
-AC3_SHIFT r, 32, psrad
-
;-----------------------------------------------------------------------------
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
;-----------------------------------------------------------------------------
@@ -423,130 +292,3 @@ AC3_EXTRACT_EXPONENTS
INIT_XMM ssse3
AC3_EXTRACT_EXPONENTS
%endif
-
-;-----------------------------------------------------------------------------
-; void ff_apply_window_int16(int16_t *output, const int16_t *input,
-; const int16_t *window, unsigned int len)
-;-----------------------------------------------------------------------------
-
-%macro REVERSE_WORDS 1-2
-%if cpuflag(ssse3) && notcpuflag(atom)
- pshufb %1, %2
-%elif cpuflag(sse2)
- pshuflw %1, %1, 0x1B
- pshufhw %1, %1, 0x1B
- pshufd %1, %1, 0x4E
-%elif cpuflag(mmxext)
- pshufw %1, %1, 0x1B
-%endif
-%endmacro
-
-%macro MUL16FIXED 3
-%if cpuflag(ssse3) ; dst, src, unused
-; dst = ((dst * src) + (1<<14)) >> 15
- pmulhrsw %1, %2
-%elif cpuflag(mmxext) ; dst, src, temp
-; dst = (dst * src) >> 15
-; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
-; in from the pmullw result.
- mova %3, %1
- pmulhw %1, %2
- pmullw %3, %2
- psrlw %3, 15
- psllw %1, 1
- por %1, %3
-%endif
-%endmacro
-
-%macro APPLY_WINDOW_INT16 1 ; %1 bitexact version
-%if %1
-cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2
-%else
-cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2
-%endif
- lea offset2q, [offsetq-mmsize]
-%if cpuflag(ssse3) && notcpuflag(atom)
- mova m5, [pb_revwords]
- ALIGN 16
-%elif %1
- mova m5, [pd_16384]
-%endif
-.loop:
-%if cpuflag(ssse3)
- ; This version does the 16x16->16 multiplication in-place without expanding
- ; to 32-bit. The ssse3 version is bit-identical.
- mova m0, [windowq+offset2q]
- mova m1, [ inputq+offset2q]
- pmulhrsw m1, m0
- REVERSE_WORDS m0, m5
- pmulhrsw m0, [ inputq+offsetq ]
- mova [outputq+offset2q], m1
- mova [outputq+offsetq ], m0
-%elif %1
- ; This version expands 16-bit to 32-bit, multiplies by the window,
- ; adds 16384 for rounding, right shifts 15, then repacks back to words to
- ; save to the output. The window is reversed for the second half.
- mova m3, [windowq+offset2q]
- mova m4, [ inputq+offset2q]
- pxor m0, m0
- punpcklwd m0, m3
- punpcklwd m1, m4
- pmaddwd m0, m1
- paddd m0, m5
- psrad m0, 15
- pxor m2, m2
- punpckhwd m2, m3
- punpckhwd m1, m4
- pmaddwd m2, m1
- paddd m2, m5
- psrad m2, 15
- packssdw m0, m2
- mova [outputq+offset2q], m0
- REVERSE_WORDS m3
- mova m4, [ inputq+offsetq]
- pxor m0, m0
- punpcklwd m0, m3
- punpcklwd m1, m4
- pmaddwd m0, m1
- paddd m0, m5
- psrad m0, 15
- pxor m2, m2
- punpckhwd m2, m3
- punpckhwd m1, m4
- pmaddwd m2, m1
- paddd m2, m5
- psrad m2, 15
- packssdw m0, m2
- mova [outputq+offsetq], m0
-%else
- ; This version does the 16x16->16 multiplication in-place without expanding
- ; to 32-bit. The mmxext and sse2 versions do not use rounding, and
- ; therefore are not bit-identical to the C version.
- mova m0, [windowq+offset2q]
- mova m1, [ inputq+offset2q]
- mova m2, [ inputq+offsetq ]
- MUL16FIXED m1, m0, m3
- REVERSE_WORDS m0
- MUL16FIXED m2, m0, m3
- mova [outputq+offset2q], m1
- mova [outputq+offsetq ], m2
-%endif
- add offsetd, mmsize
- sub offset2d, mmsize
- jae .loop
- REP_RET
-%endmacro
-
-INIT_MMX mmxext
-APPLY_WINDOW_INT16 0
-INIT_XMM sse2
-APPLY_WINDOW_INT16 0
-
-INIT_MMX mmxext
-APPLY_WINDOW_INT16 1
-INIT_XMM sse2
-APPLY_WINDOW_INT16 1
-INIT_XMM ssse3
-APPLY_WINDOW_INT16 1
-INIT_XMM ssse3, atom
-APPLY_WINDOW_INT16 1
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index 2e7e2fb6da..2ae762af46 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -30,17 +30,6 @@ void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs
void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
-int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
-int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
-int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
-int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
-
-void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
-void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
-
-void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
-void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
-
void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len);
void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
@@ -50,28 +39,12 @@ int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
-void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len);
-void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len);
-void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len);
-void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len);
-void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len);
-void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
- const int16_t *window, unsigned int len);
-
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
- c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
- c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
- c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
}
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
if (!bit_exact) {
@@ -80,43 +53,20 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
- c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
- if (bit_exact) {
- c->apply_window_int16 = ff_apply_window_int16_mmxext;
- } else {
- c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
- }
}
if (EXTERNAL_SSE(cpu_flags)) {
c->float_to_fixed24 = ff_float_to_fixed24_sse;
}
if (EXTERNAL_SSE2(cpu_flags)) {
c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
- c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
c->extract_exponents = ff_ac3_extract_exponents_sse2;
- if (bit_exact) {
- c->apply_window_int16 = ff_apply_window_int16_sse2;
- }
- }
-
- if (EXTERNAL_SSE2_FAST(cpu_flags)) {
- c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
- c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
- if (!bit_exact) {
- c->apply_window_int16 = ff_apply_window_int16_round_sse2;
- }
}
if (EXTERNAL_SSSE3(cpu_flags)) {
- c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
- if (cpu_flags & AV_CPU_FLAG_ATOM) {
- c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
- } else {
+ if (!(cpu_flags & AV_CPU_FLAG_ATOM))
c->extract_exponents = ff_ac3_extract_exponents_ssse3;
- c->apply_window_int16 = ff_apply_window_int16_ssse3;
- }
}
}
More information about the ffmpeg-cvslog
mailing list