[MPlayer-dev-eng] xvidix threading

Zuxy Meng zuxy.meng at gmail.com
Wed Dec 26 04:52:40 CET 2007


Hi,

2007/9/16, Howard Chu <hyc at highlandsun.com>:
> (Resending after subscribing...) I've been messing around with my Dvico FusionHDTV tuner card and
> running into performance problems. Playing standard-definition channels is easy enough, but playing
> a full 1920x1080i channel was just too much. Using xvidix/radeon for output...
>
> oprofile showed that the bulk of the execution time is in memcpy, from vidix_draw_slice:
>
> CPU: AMD64 processors, speed 2600 MHz (estimated)
> Counted CPU_CLK_UNHALTED events (Cycles outside of halt state) with a unit mask of 0x00 (No unit
> mask) count 50000
> samples  %        image name               app name                 symbol name
> 179887   40.7671  mplayer                  mplayer                  fast_memcpy
> 49657    11.2536  mplayer                  mplayer                  swScale_MMX2
> 29557     6.6984  mplayer                  mplayer                  mmxext_idct
> 20101     4.5554  mplayer                  mplayer                  hcscale_MMX2
> 15602     3.5358  mplayer                  mplayer                  slice_intra_DCT
> 15397     3.4894  mplayer                  mplayer                  MC_put_o_16_mmxext
> 14941     3.3860  mplayer                  mplayer                  mpeg2_slice
> 11475     2.6005  mplayer                  mplayer                  motion_fr_frame_420
> 9799      2.2207  mplayer                  mplayer                  get_non_intra_block
>
> CPU: AMD64 processors, speed 2600 MHz (estimated)
> Counted CPU_CLK_UNHALTED events (Cycles outside of halt state) with a unit mask of 0x00 (No unit
> mask) count 50000
> samples  %        image name               app name                 symbol name
> -------------------------------------------------------------------------------
>    142       0.4817  mplayer                  mplayer                  vidix_draw_slice_420
>    29337    99.5183  mplayer                  mplayer                  fast_memcpy
> 29479    22.3475  mplayer                  mplayer                  fast_memcpy_SSE
>    29479    100.000  mplayer                  mplayer                  fast_memcpy_SSE [self]
> -------------------------------------------------------------------------------
>    73        0.9778  mplayer                  mplayer                  mpeg2_slice
>    7393     99.0222  mplayer                  mplayer                  slice_non_intra_DCT
> 7466      5.6598  mplayer                  mplayer                  get_non_intra_block
>    7466     100.000  mplayer                  mplayer                  get_non_intra_block [self]
> -------------------------------------------------------------------------------
>
> I tried scaling it down to 960x540 which was enough to bring the CPU usage down to around 95% or so,
> but it was still maxing out occasionally. Since I've got a dual core processor and the other core
> was idle, I tried moving the draw_slice function into a separate thread. This turned out to work
> pretty well; instead of maxing out at 100% of one core I can now manage higher resolutions without
> any frame losses (using both cores). (I also tried to use the -dr option but it only worked at a few
> choices of image sizes. Not sure what the story is there.)
>
> The attached diff (against current svn) is just for your consideration, for real use it would need a
> commandline switch to toggle it on/off. By the way, it seems to have some sync problems when used
> without the scale filter.
>
> --
>    -- Howard Chu
>    Chief Architect, Symas Corp.  http://www.symas.com
>    Director, Highland Sun        http://highlandsun.com/hyc/
>    Chief Architect, OpenLDAP     http://www.openldap.org/project/
>
>
> Index: vosub_vidix.c
> ===================================================================
> --- vosub_vidix.c       (revision 24537)
> +++ vosub_vidix.c       (working copy)
> @@ -22,6 +22,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <errno.h>
> +#include <pthread.h>
>
>  #include "config.h"
>  #include "mp_msg.h"
> @@ -49,9 +50,14 @@
>  static vidix_playback_t   vidix_play;
>  static vidix_fourcc_t    vidix_fourcc;
>  static vo_functions_t *   vo_server;
> +static vo_functions_t vo_local;
>  static vidix_yuv_t       dstrides;
>  /*static uint32_t (*server_control)(uint32_t request, void *data, ...);*/
>
> +static pthread_mutex_t vidix_mutex = PTHREAD_MUTEX_INITIALIZER;
> +static pthread_cond_t vidix_cond = PTHREAD_COND_INITIALIZER;
> +static pthread_t vidix_thread;
> +
>  int vidix_start(void)
>  {
>     int err;
> @@ -96,7 +102,7 @@
>     dest += dstrides.y*y + x;
>     src = image[0];
>     for(i=0;i<h;i++){
> -        memcpy(dest,src,w);
> +        mem2agpcpy(dest,src,w);
>         src+=stride[0];
>         dest += dstrides.y;
>     }
> @@ -122,6 +128,7 @@
>             src += stride[1];
>            src2+= stride[2];
>        }
> +       i = -1;
>     }
>     else
>     {
> @@ -130,7 +137,7 @@
>                dest += dstrides.v*y/4 + x;
>                src = image[1];
>                for(i=0;i<h/2;i++){
> -                       memcpy(dest,src,w/2);
> +                       mem2agpcpy(dest,src,w/2);
>                        src+=stride[1];
>                        dest+=dstrides.v/2;
>                }
> @@ -140,13 +147,13 @@
>                dest += dstrides.u*y/4 + x;
>                src = image[2];
>                for(i=0;i<h/2;i++){
> -                       memcpy(dest,src,w/2);
> +                       mem2agpcpy(dest,src,w/2);
>                        src+=stride[2];
>                        dest += dstrides.u/2;
>                }
> -               return 0;
> +               i = 0;
>     }
> -    return -1;
> +    return i;
>  }
>
>  static uint32_t vidix_draw_slice_410(uint8_t *image[], int stride[], int w,int h,int x,int y)
> @@ -241,10 +248,38 @@
>     return 0;
>  }
>
> +static uint8_t *vds_image[3];
> +static int vds_stride[3], vds_w, vds_h, vds_x, vds_y;
> +
> +static void vidix_task()
> +{
> +       pthread_mutex_lock(&vidix_mutex);
> +       pthread_cond_signal(&vidix_cond);
> +
> +       for(;;) {
> +               pthread_cond_wait(&vidix_cond, &vidix_mutex);
> +               if (!vds_image[0]) break;
> +               vo_local.draw_slice(vds_image, vds_stride, vds_w, vds_h, vds_x, vds_y);
> +               vds_image[0] = NULL;
> +       }
> +}
> +
>  uint32_t vidix_draw_slice(uint8_t *image[], int stride[], int w,int h,int x,int y)
>  {
> -    mp_msg(MSGT_VO,MSGL_WARN, MSGTR_LIBVO_SUB_VIDIX_DummyVidixdrawsliceWasCalled);
> -    return -1;
> +       pthread_mutex_lock(&vidix_mutex);
> +       vds_image[0] = image[0];
> +       vds_image[1] = image[1];
> +       vds_image[2] = image[2];
> +       vds_stride[0] = stride[0];
> +       vds_stride[1] = stride[1];
> +       vds_stride[2] = stride[2];
> +       vds_w = w;
> +       vds_h = h;
> +       vds_x = x;
> +       vds_y = y;
> +       pthread_cond_signal(&vidix_cond);
> +       pthread_mutex_unlock(&vidix_mutex);
> +       return 0;
>  }
>
>  static uint32_t  vidix_draw_image(mp_image_t *mpi){
> @@ -279,7 +314,7 @@
>  static void draw_alpha(int x0,int y0, int w,int h, unsigned char* src, unsigned char *srca, int stride)
>  {
>     uint32_t apitch,bespitch;
> -    void *lvo_mem;
> +    char *lvo_mem;
>     lvo_mem = vidix_mem + vidix_play.offsets[next_frame] + vidix_play.offset.y;
>     apitch = vidix_play.dest.pitch.y-1;
>     switch(vidix_play.fourcc){
> @@ -557,11 +592,17 @@
>            is_422_planes_eq = sstride == dstrides.y;
>
>            if(src_format == IMGFMT_YV12 || src_format == IMGFMT_I420 || src_format == IMGFMT_IYUV)
> -                vo_server->draw_slice = vidix_draw_slice_420;
> +                vo_local.draw_slice = vidix_draw_slice_420;
>            else if (src_format == IMGFMT_YVU9 || src_format == IMGFMT_IF09)
> -                vo_server->draw_slice = vidix_draw_slice_410;
> -           else vo_server->draw_slice = vidix_draw_slice_packed;
> +                vo_local.draw_slice = vidix_draw_slice_410;
> +           else vo_local.draw_slice = vidix_draw_slice_packed;
>        }
> +       if (!vidix_thread) {
> +               pthread_mutex_lock(&vidix_mutex);
> +               pthread_create(&vidix_thread, NULL, vidix_task, NULL);
> +               pthread_cond_wait(&vidix_cond, &vidix_mutex);
> +               pthread_mutex_unlock(&vidix_mutex);
> +       }
>        return 0;
>  }

Anybody ever cares this?

-- 
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6



More information about the MPlayer-dev-eng mailing list