[MPlayer-dev-eng] [PATCH] Make mp3lib SIMD optimizations work on AMD64, Part 5

Zuxy Meng zuxy.meng at gmail.com
Fri May 25 17:47:24 CEST 2007


Hi,

2007/5/25, Guillaume Poirier <gpoirier at mplayerhq.hu>:
> Hi,
>
> Reimar Döffinger wrote:
> > Hello,
> > On Fri, May 25, 2007 at 02:14:49PM +0800, Zuxy Meng wrote:
> >>  We've reached Part 5. This is again a big diff, replacing many 'long'
> >>  to 'int', and several 'real' to 'short'. Among those, many are
> >>  necessary because under LP64 environment, sizeof(long) != sizeof(int)
> >>  and sizeof(long) != sizeof(float); others are optimizations simply
> >>  because we really don't need such a long type, or the declaration
> >>  didn't match its use (declared as float, used as short).
> >
> > These are separate issues and should be fixed in different patches. Also
> > if you changed long to int because the code assumes 32 bits, it would
> > make more sense to change it to int32_t. Replacing long with int without
> > thinking can make the code even slower on 64 bit systems.
>
> [..]
>
> >
> > This is unrelated and also completely useless since a static function
> > that is used in only one place will be inlined anyway.
> > In addition it is wrong since "static void make_decode_tables()" !=
> > "static void make_decode_tables(void)"
>
>
> Thanks for your quality review Reimar! I realize I'm quite a lousy
> reviewer, but I'm trying to do my best! :-(

But you're always a prompt tester and more importantly, a nice guy
who's easy to get along with:-)

Anway I've updated the patch according to suggestions from you and
Reimar, which should be cleaner.

In this patch, long->int conversions are carried out because any of
the following:
1. Assumed 32-bit access either by inline assembly or by bswap_32
macros. (e.g. variables in decode_i586.c, costab_mmx in decode_MMX.c
and newhead in sr1.c)
2. Value that never exceed 32 bits. (such as variables representing
sampling frequency and frame size, variables initialized to known
32-bit integers, and variables that are assigned by an 'int' and are
used to pass an 'int' parameter.
-- 
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: mp3lib/decode_i586.c
===================================================================
--- mp3lib/decode_i586.c	?????? 23382??
+++ mp3lib/decode_i586.c	????????????
@@ -33,9 +33,9 @@
 #include "mangle.h"
 #define real float /* ugly - but only way */
 
-static long attribute_used buffs[1088]={0};
-static long attribute_used bo=1;
-static long attribute_used saved_ebp=0;
+static int attribute_used buffs[1088]={0};
+static int attribute_used bo=1;
+static int attribute_used saved_ebp=0;
 
 int synth_1to1_pent(real *bandPtr, int channel, short *samples)
 {
Index: mp3lib/layer3.c
===================================================================
--- mp3lib/layer3.c	?????? 23384??
+++ mp3lib/layer3.c	????????????
@@ -324,7 +324,7 @@
  * read additional side information (for MPEG 1 and MPEG 2)
  */
 static int III_get_side_info(struct III_sideinfo *si,int stereo,
- int ms_stereo,long sfreq,int single,int lsf)
+ int ms_stereo,int sfreq,int single,int lsf)
 {
    int ch, gr;
    int powdiff = (single == 3) ? 4 : 0;
@@ -1264,7 +1264,7 @@
     static real hybridOut[2][SSLIMIT][SBLIMIT];
 
     { struct gr_info_s *gr_info = &(sideinfo.ch[0].gr[gr]);
-      long part2bits;
+      int part2bits;
       if(fr->lsf)
         part2bits = III_get_scale_factors_2(scalefacs[0],gr_info,0);
       else
@@ -1276,7 +1276,7 @@
     if(stereo == 2) {
       struct gr_info_s *gr_info = &(sideinfo.ch[1].gr[gr]);
       
-      long part2bits;
+      int part2bits;
       if(fr->lsf) 
         part2bits = III_get_scale_factors_2(scalefacs[1],gr_info,i_stereo);
       else
Index: mp3lib/decode_MMX.c
===================================================================
--- mp3lib/decode_MMX.c	?????? 23383??
+++ mp3lib/decode_MMX.c	????????????
@@ -14,7 +14,7 @@
 extern void (*dct64_MMX_func)(short*, short*, real*);
 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
-unsigned long __attribute__((aligned(16))) costab_mmx[] =
+unsigned int __attribute__((aligned(16))) costab_mmx[] =
 {
 	1056974725,
 	1057056395,
Index: mp3lib/sr1.c
===================================================================
--- mp3lib/sr1.c	?????? 23383??
+++ mp3lib/sr1.c	????????????
@@ -108,11 +107,11 @@
      {0,8,16,24,32,40,48,56,64,80,96,112,128,144,160,} }
 };
 
-static long freqs[9] = { 44100, 48000, 32000, 22050, 24000, 16000 , 11025 , 12000 , 8000 };
+static int freqs[9] = { 44100, 48000, 32000, 22050, 24000, 16000 , 11025 , 12000 , 8000 };
 
 LOCAL unsigned int getbits(short number_of_bits)
 {
-  unsigned long rval;
+  unsigned rval;
 //  if(MP3_frames>=7741) printf("getbits: bits=%d  bitsleft=%d  wordptr=%x\n",number_of_bits,bitsleft,wordpointer);
   if((bitsleft-=number_of_bits)<0) return 0;
   if(!number_of_bits) return 0;
@@ -133,12 +132,12 @@
 
 LOCAL unsigned int getbits_fast(short number_of_bits)
 {
-  unsigned long rval;
+  unsigned rval;
 //  if(MP3_frames>=7741) printf("getbits_fast: bits=%d  bitsleft=%d  wordptr=%x\n",number_of_bits,bitsleft,wordpointer);
   if((bitsleft-=number_of_bits)<0) return 0;
   if(!number_of_bits) return 0;
 #if defined(CAN_COMPILE_X86_ASM)
-  rval = bswap_16(*((unsigned short *)wordpointer));
+  rval = bswap_16(*((uint16_t *)wordpointer));
 #else
   /*
    * we may not be able to address unaligned 16-bit data on non-x86 cpus.
@@ -167,7 +166,7 @@
   return ((rval>>7)&1);
 }
 
-LOCAL void set_pointer(long backstep)
+LOCAL void set_pointer(int backstep)
 {
 //  if(backstep!=512 && backstep>fsizeold)
 //    printf("\rWarning! backstep (%d>%d)                                         \n",backstep,fsizeold);
@@ -178,10 +177,10 @@
 //  printf("Backstep %d  (bitsleft=%d)\n",backstep,bitsleft);
 }
 
-LOCAL int stream_head_read(unsigned char *hbuf,unsigned long *newhead){
+LOCAL int stream_head_read(unsigned char *hbuf,uint32_t *newhead){
   if(mp3_read(hbuf,4) != 4) return FALSE;
 #if defined(CAN_COMPILE_X86_ASM)
-  *newhead = bswap_32(*((unsigned long *)hbuf));
+  *newhead = bswap_32(*((uint32_t*)hbuf));
 #else
   /*
    * we may not be able to address unaligned 32-bit data on non-x86 cpus.
@@ -196,8 +195,8 @@
   return TRUE;
 }
 
-LOCAL int stream_head_shift(unsigned char *hbuf,unsigned long *head){
-  *((unsigned long *)hbuf) >>= 8;
+LOCAL int stream_head_shift(unsigned char *hbuf,uint32_t *head){
+  *((unsigned*)hbuf) >>= 8;
   if(mp3_read(hbuf+3,1) != 1) return 0;
   *head <<= 8;
   *head |= hbuf[3];
@@ -208,7 +207,7 @@
  * decode a header and write the information
  * into the frame structure
  */
-LOCAL int decode_header(struct frame *fr,unsigned long newhead){
+LOCAL int decode_header(struct frame *fr,uint32_t newhead){
 
     // head_check:
     if( (newhead & 0xffe00000) != 0xffe00000 ||  
@@ -217,8 +216,8 @@
     fr->lay = 4-((newhead>>17)&3);
 //    if(fr->lay!=3) return FALSE;
 
-    if( newhead & ((long)1<<20) ) {
-      fr->lsf = (newhead & ((long)1<<19)) ? 0x0 : 0x1;
+    if( newhead & (1<<20) ) {
+      fr->lsf = (newhead & (1<<19)) ? 0x0 : 0x1;
       fr->mpeg25 = 0;
     } else {
       fr->lsf = 1;
@@ -253,7 +252,7 @@
   case 2:
     MP3_bitrate=tabsel_123[fr->lsf][1][fr->bitrate_index];
     MP3_samplerate=freqs[fr->sampling_frequency];
-    fr->framesize = (long) MP3_bitrate * 144000;
+    fr->framesize = MP3_bitrate * 144000;
     fr->framesize /= MP3_samplerate;
     MP3_framesize=fr->framesize;
     fr->framesize += fr->padding - 4;
@@ -267,7 +266,7 @@
 
     MP3_bitrate=tabsel_123[fr->lsf][2][fr->bitrate_index];
     MP3_samplerate=freqs[fr->sampling_frequency];
-    fr->framesize  = (long) MP3_bitrate * 144000;
+    fr->framesize  = MP3_bitrate * 144000;
     fr->framesize /= MP3_samplerate<<(fr->lsf);
     MP3_framesize=fr->framesize;
     fr->framesize += fr->padding - 4;
@@ -276,7 +275,7 @@
 //    fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ? (fr->mode_ext<<2)+4 : 32;
     MP3_bitrate=tabsel_123[fr->lsf][0][fr->bitrate_index];
     MP3_samplerate=freqs[fr->sampling_frequency];
-    fr->framesize  = (long) MP3_bitrate * 12000;
+    fr->framesize  = MP3_bitrate * 12000;
     fr->framesize /= MP3_samplerate;
     MP3_framesize  = ((fr->framesize+fr->padding)<<2);
     fr->framesize  = MP3_framesize-4;
@@ -314,7 +313,7 @@
  * read next frame     return number of frames read.
  */
 LOCAL int read_frame(struct frame *fr){
-  unsigned long newhead;
+  uint32_t newhead;
   union {
     unsigned char buf[8];
     unsigned long dummy; // for alignment
Index: mp3lib/mpg123.h
===================================================================
--- mp3lib/mpg123.h	?????? 23382??
+++ mp3lib/mpg123.h	????????????
@@ -71,7 +71,7 @@
     int lay;
     int error_protection;
     int bitrate_index;
-    long sampling_frequency;
+    int sampling_frequency;
     int padding;
     int extension;
     int mode;
@@ -79,7 +79,7 @@
     int copyright;
          int original;
          int emphasis;
-         long framesize; /* computed framesize */
+         int framesize; /* computed framesize */
 };
 
 
Index: mp3lib/tabinit.c
===================================================================
--- mp3lib/tabinit.c	?????? 23382??
+++ mp3lib/tabinit.c	????????????
@@ -8,7 +8,7 @@
 static real cos64[32], cos32[16], cos16[8], cos8[4], cos4[2];
 real *mp3lib_pnts[]={ cos64,cos32,cos16,cos8,cos4 };
 
-static long intwinbase[] = {
+static int intwinbase[] = {
      0,    -1,    -1,    -1,    -1,    -1,    -1,    -2,    -2,    -2,
     -2,    -3,    -3,    -4,    -4,    -5,    -5,    -6,    -7,    -7,
     -8,    -9,   -10,   -11,   -13,   -14,   -16,   -17,   -19,   -21,


More information about the MPlayer-dev-eng mailing list