[MPlayer-dev-eng] [PATCH] Make mp3lib SIMD optimizations work on AMD64, Part 5
Zuxy Meng
zuxy.meng at gmail.com
Fri May 25 17:47:24 CEST 2007
Hi,
2007/5/25, Guillaume Poirier <gpoirier at mplayerhq.hu>:
> Hi,
>
> Reimar Döffinger wrote:
> > Hello,
> > On Fri, May 25, 2007 at 02:14:49PM +0800, Zuxy Meng wrote:
> >> We've reached Part 5. This is again a big diff, replacing many 'long'
> >> to 'int', and several 'real' to 'short'. Among those, many are
> >> necessary because under LP64 environment, sizeof(long) != sizeof(int)
> >> and sizeof(long) != sizeof(float); others are optimizations simply
> >> because we really don't need such a long type, or the declaration
> >> didn't match its use (declared as float, used as short).
> >
> > These are separate issues and should be fixed in different patches. Also
> > if you changed long to int because the code assumes 32 bits, it would
> > make more sense to change it to int32_t. Replacing long with int without
> > thinking can make the code even slower on 64 bit systems.
>
> [..]
>
> >
> > This is unrelated and also completely useless since a static function
> > that is used in only one place will be inlined anyway.
> > In addition it is wrong since "static void make_decode_tables()" !=
> > "static void make_decode_tables(void)"
>
>
> Thanks for your quality review Reimar! I realize I'm quite a lousy
> reviewer, but I'm trying to do my best! :-(
But you're always a prompt tester and more importantly, a nice guy
who's easy to get along with:-)
Anway I've updated the patch according to suggestions from you and
Reimar, which should be cleaner.
In this patch, long->int conversions are carried out because any of
the following:
1. Assumed 32-bit access either by inline assembly or by bswap_32
macros. (e.g. variables in decode_i586.c, costab_mmx in decode_MMX.c
and newhead in sr1.c)
2. Value that never exceed 32 bits. (such as variables representing
sampling frequency and frame size, variables initialized to known
32-bit integers, and variables that are assigned by an 'int' and are
used to pass an 'int' parameter.
--
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: mp3lib/decode_i586.c
===================================================================
--- mp3lib/decode_i586.c ?????? 23382??
+++ mp3lib/decode_i586.c ????????????
@@ -33,9 +33,9 @@
#include "mangle.h"
#define real float /* ugly - but only way */
-static long attribute_used buffs[1088]={0};
-static long attribute_used bo=1;
-static long attribute_used saved_ebp=0;
+static int attribute_used buffs[1088]={0};
+static int attribute_used bo=1;
+static int attribute_used saved_ebp=0;
int synth_1to1_pent(real *bandPtr, int channel, short *samples)
{
Index: mp3lib/layer3.c
===================================================================
--- mp3lib/layer3.c ?????? 23384??
+++ mp3lib/layer3.c ????????????
@@ -324,7 +324,7 @@
* read additional side information (for MPEG 1 and MPEG 2)
*/
static int III_get_side_info(struct III_sideinfo *si,int stereo,
- int ms_stereo,long sfreq,int single,int lsf)
+ int ms_stereo,int sfreq,int single,int lsf)
{
int ch, gr;
int powdiff = (single == 3) ? 4 : 0;
@@ -1264,7 +1264,7 @@
static real hybridOut[2][SSLIMIT][SBLIMIT];
{ struct gr_info_s *gr_info = &(sideinfo.ch[0].gr[gr]);
- long part2bits;
+ int part2bits;
if(fr->lsf)
part2bits = III_get_scale_factors_2(scalefacs[0],gr_info,0);
else
@@ -1276,7 +1276,7 @@
if(stereo == 2) {
struct gr_info_s *gr_info = &(sideinfo.ch[1].gr[gr]);
- long part2bits;
+ int part2bits;
if(fr->lsf)
part2bits = III_get_scale_factors_2(scalefacs[1],gr_info,i_stereo);
else
Index: mp3lib/decode_MMX.c
===================================================================
--- mp3lib/decode_MMX.c ?????? 23383??
+++ mp3lib/decode_MMX.c ????????????
@@ -14,7 +14,7 @@
extern void (*dct64_MMX_func)(short*, short*, real*);
static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
-unsigned long __attribute__((aligned(16))) costab_mmx[] =
+unsigned int __attribute__((aligned(16))) costab_mmx[] =
{
1056974725,
1057056395,
Index: mp3lib/sr1.c
===================================================================
--- mp3lib/sr1.c ?????? 23383??
+++ mp3lib/sr1.c ????????????
@@ -108,11 +107,11 @@
{0,8,16,24,32,40,48,56,64,80,96,112,128,144,160,} }
};
-static long freqs[9] = { 44100, 48000, 32000, 22050, 24000, 16000 , 11025 , 12000 , 8000 };
+static int freqs[9] = { 44100, 48000, 32000, 22050, 24000, 16000 , 11025 , 12000 , 8000 };
LOCAL unsigned int getbits(short number_of_bits)
{
- unsigned long rval;
+ unsigned rval;
// if(MP3_frames>=7741) printf("getbits: bits=%d bitsleft=%d wordptr=%x\n",number_of_bits,bitsleft,wordpointer);
if((bitsleft-=number_of_bits)<0) return 0;
if(!number_of_bits) return 0;
@@ -133,12 +132,12 @@
LOCAL unsigned int getbits_fast(short number_of_bits)
{
- unsigned long rval;
+ unsigned rval;
// if(MP3_frames>=7741) printf("getbits_fast: bits=%d bitsleft=%d wordptr=%x\n",number_of_bits,bitsleft,wordpointer);
if((bitsleft-=number_of_bits)<0) return 0;
if(!number_of_bits) return 0;
#if defined(CAN_COMPILE_X86_ASM)
- rval = bswap_16(*((unsigned short *)wordpointer));
+ rval = bswap_16(*((uint16_t *)wordpointer));
#else
/*
* we may not be able to address unaligned 16-bit data on non-x86 cpus.
@@ -167,7 +166,7 @@
return ((rval>>7)&1);
}
-LOCAL void set_pointer(long backstep)
+LOCAL void set_pointer(int backstep)
{
// if(backstep!=512 && backstep>fsizeold)
// printf("\rWarning! backstep (%d>%d) \n",backstep,fsizeold);
@@ -178,10 +177,10 @@
// printf("Backstep %d (bitsleft=%d)\n",backstep,bitsleft);
}
-LOCAL int stream_head_read(unsigned char *hbuf,unsigned long *newhead){
+LOCAL int stream_head_read(unsigned char *hbuf,uint32_t *newhead){
if(mp3_read(hbuf,4) != 4) return FALSE;
#if defined(CAN_COMPILE_X86_ASM)
- *newhead = bswap_32(*((unsigned long *)hbuf));
+ *newhead = bswap_32(*((uint32_t*)hbuf));
#else
/*
* we may not be able to address unaligned 32-bit data on non-x86 cpus.
@@ -196,8 +195,8 @@
return TRUE;
}
-LOCAL int stream_head_shift(unsigned char *hbuf,unsigned long *head){
- *((unsigned long *)hbuf) >>= 8;
+LOCAL int stream_head_shift(unsigned char *hbuf,uint32_t *head){
+ *((unsigned*)hbuf) >>= 8;
if(mp3_read(hbuf+3,1) != 1) return 0;
*head <<= 8;
*head |= hbuf[3];
@@ -208,7 +207,7 @@
* decode a header and write the information
* into the frame structure
*/
-LOCAL int decode_header(struct frame *fr,unsigned long newhead){
+LOCAL int decode_header(struct frame *fr,uint32_t newhead){
// head_check:
if( (newhead & 0xffe00000) != 0xffe00000 ||
@@ -217,8 +216,8 @@
fr->lay = 4-((newhead>>17)&3);
// if(fr->lay!=3) return FALSE;
- if( newhead & ((long)1<<20) ) {
- fr->lsf = (newhead & ((long)1<<19)) ? 0x0 : 0x1;
+ if( newhead & (1<<20) ) {
+ fr->lsf = (newhead & (1<<19)) ? 0x0 : 0x1;
fr->mpeg25 = 0;
} else {
fr->lsf = 1;
@@ -253,7 +252,7 @@
case 2:
MP3_bitrate=tabsel_123[fr->lsf][1][fr->bitrate_index];
MP3_samplerate=freqs[fr->sampling_frequency];
- fr->framesize = (long) MP3_bitrate * 144000;
+ fr->framesize = MP3_bitrate * 144000;
fr->framesize /= MP3_samplerate;
MP3_framesize=fr->framesize;
fr->framesize += fr->padding - 4;
@@ -267,7 +266,7 @@
MP3_bitrate=tabsel_123[fr->lsf][2][fr->bitrate_index];
MP3_samplerate=freqs[fr->sampling_frequency];
- fr->framesize = (long) MP3_bitrate * 144000;
+ fr->framesize = MP3_bitrate * 144000;
fr->framesize /= MP3_samplerate<<(fr->lsf);
MP3_framesize=fr->framesize;
fr->framesize += fr->padding - 4;
@@ -276,7 +275,7 @@
// fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ? (fr->mode_ext<<2)+4 : 32;
MP3_bitrate=tabsel_123[fr->lsf][0][fr->bitrate_index];
MP3_samplerate=freqs[fr->sampling_frequency];
- fr->framesize = (long) MP3_bitrate * 12000;
+ fr->framesize = MP3_bitrate * 12000;
fr->framesize /= MP3_samplerate;
MP3_framesize = ((fr->framesize+fr->padding)<<2);
fr->framesize = MP3_framesize-4;
@@ -314,7 +313,7 @@
* read next frame return number of frames read.
*/
LOCAL int read_frame(struct frame *fr){
- unsigned long newhead;
+ uint32_t newhead;
union {
unsigned char buf[8];
unsigned long dummy; // for alignment
Index: mp3lib/mpg123.h
===================================================================
--- mp3lib/mpg123.h ?????? 23382??
+++ mp3lib/mpg123.h ????????????
@@ -71,7 +71,7 @@
int lay;
int error_protection;
int bitrate_index;
- long sampling_frequency;
+ int sampling_frequency;
int padding;
int extension;
int mode;
@@ -79,7 +79,7 @@
int copyright;
int original;
int emphasis;
- long framesize; /* computed framesize */
+ int framesize; /* computed framesize */
};
Index: mp3lib/tabinit.c
===================================================================
--- mp3lib/tabinit.c ?????? 23382??
+++ mp3lib/tabinit.c ????????????
@@ -8,7 +8,7 @@
static real cos64[32], cos32[16], cos16[8], cos8[4], cos4[2];
real *mp3lib_pnts[]={ cos64,cos32,cos16,cos8,cos4 };
-static long intwinbase[] = {
+static int intwinbase[] = {
0, -1, -1, -1, -1, -1, -1, -2, -2, -2,
-2, -3, -3, -4, -4, -5, -5, -6, -7, -7,
-8, -9, -10, -11, -13, -14, -16, -17, -19, -21,
More information about the MPlayer-dev-eng
mailing list