[FFmpeg-devel] [PATCH] Patch cleanup for MPEG 1 & 2 optimizations

Mon Apr 7 00:45:37 CEST 2008

On Sun, Apr 06, 2008 at 11:32:49PM +0200, -strites- wrote:
> http://roundup.mplayerhq.hu/roundup/ffmpeg/issue100
> 
> I modified "libavcodec/mpegvideo_common.h" and "libavcodec/mpegvideo.c"
> according to the patch proposed, making "capsule functions" and defining a
> "no_mpeg12" variable taking inspiration from the reply.
> 
> Omitted some patch modifications in "libavcodec/mpegvideo.c" because them
> were breaking the test suite.
> -- 
> Keiji Costantini

> From 2a7195e815b0f3f7da6ec92af2fa9297ef53e594 Mon Sep 17 00:00:00 2001
> From: strites <strites at gmail.com>
> Date: Sun, 6 Apr 2008 15:13:51 +0200
> Subject: [PATCH] cosmetics
> 
> ---
>  libavcodec/mpegvideo_common.h |   50 +++++++++++++++++++++++++++-------------
>  1 files changed, 34 insertions(+), 16 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo_common.h b/libavcodec/mpegvideo_common.h
> index 2be8172..d8cca0c 100644
> --- a/libavcodec/mpegvideo_common.h
> +++ b/libavcodec/mpegvideo_common.h
[...]
> -            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
> -                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
> +            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize,
> +                                17, 17+field_based, src_x,
> +                                src_y<<field_based, s->h_edge_pos,
> +                                s->v_edge_pos);

I think src_x and y should not be split like this over 2 lines.

>              ptr_y = s->edge_emu_buffer;
>              if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
>                  uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
> -                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
> -                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
> -                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
> -                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
> +                ff_emulated_edge_mc(uvbuf ,
> +                                    ptr_cb, s->uvlinesize, 9,
> +                                    9+field_based,
> +                                    uvsrc_x,
> +                                    uvsrc_y<<field_based,
> +                                    s->h_edge_pos>>1,
> +                                    s->v_edge_pos>>1);

Similarly block_w and block_h should not be split like that.

[...]

> From 01c21766e0b1263db5f4dce388c7c4a4d6e6c9ff Mon Sep 17 00:00:00 2001
> From: strites <strites at gmail.com>
> Date: Sun, 6 Apr 2008 19:56:23 +0200
> Subject: [PATCH] Unroll codepath
> 
> ---
>  libavcodec/mpegvideo_common.h |   34 ++++++++++++++++++++++++++--------
>  1 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo_common.h b/libavcodec/mpegvideo_common.h
> index d8cca0c..b1bb279 100644
> --- a/libavcodec/mpegvideo_common.h
> +++ b/libavcodec/mpegvideo_common.h
> @@ -237,13 +237,12 @@ static inline int hpel_motion(MpegEncContext *s,
>      return emu;
>  }
>  
> -/* apply one mpeg motion vector to the three components */
>  static av_always_inline
> -void mpeg_motion(MpegEncContext *s,
> +void mpeg_motion_internal(MpegEncContext *s,
>                   uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
>                   int field_based, int bottom_field, int field_select,
>                   uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
> -                 int motion_x, int motion_y, int h)
> +                 int motion_x, int motion_y, int h, int not_mpeg12)

I think is_mpeg12 / !is_mpeg12 would be clearer than !not_mpeg12 / not_mpeg12.

>  {
>      uint8_t *ptr_y, *ptr_cb, *ptr_cr;
>      int dxy, uvdxy, mx, my, src_x, src_y,
> @@ -264,8 +263,7 @@ if(s->quarter_sample)
>      dxy = ((motion_y & 1) << 1) | (motion_x & 1);
>      src_x = s->mb_x* 16               + (motion_x >> 1);
>      src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
> -

cosmetic

[...]
> @@ -358,18 +356,38 @@ if(s->quarter_sample)
>  
>      pix_op[0][dxy](dest_y, ptr_y, linesize, h);
>  
> -    if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
> +    if(not_mpeg12 && !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
>          pix_op[s->chroma_x_shift][uvdxy]
>                  (dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
>          pix_op[s->chroma_x_shift][uvdxy]
>                  (dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
>      }

This is not correct.

[...]
> @@ -362,8 +362,8 @@ if(s->quarter_sample)
>          pix_op[s->chroma_x_shift][uvdxy]
>                  (dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
>      }
> -    if((ENABLE_H261_ENCODER || ENABLE_H261_DECODER) &&
> -         s->out_format == FMT_H261 && not_mpeg12){
> +    if(not_mpeg12 && (ENABLE_H261_ENCODER || ENABLE_H261_DECODER) &&
> +         s->out_format == FMT_H261 ){
>          ff_h261_loop_filter(s);
>      }
>  }

I agree that this is better but this should be correctly placed in the
first patch. Not placed at the end and then moved to the begin in a
subsequent patch.

> @@ -639,12 +639,12 @@ static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
>   * @param pic_op qpel motion compensation function (average or put normally)
>   * the motion vectors are taken from s->mv and the MV type from s->mv_type
>   */
> -static inline void MPV_motion(MpegEncContext *s,
> +static inline void MPV_motion_internal(MpegEncContext *s,
>                                uint8_t *dest_y, uint8_t *dest_cb,
>                                uint8_t *dest_cr, int dir,
>                                uint8_t **ref_picture,
>                                op_pixels_func (*pix_op)[4],
> -                              qpel_mc_func (*qpix_op)[16])
> +                              qpel_mc_func (*qpix_op)[16],int not_mpeg12)

There should be a space after the ,

[...]
> @@ -710,7 +710,7 @@ static inline void MPV_motion(MpegEncContext *s,
>              mx += mv[0][0];
>              my += mv[0][1];
>          }
> -        if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
> +        if(!not_mpeg12 && !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
>              chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
>  
>          return;

This is not correct.

[...]
> @@ -1751,7 +1751,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
>      mb_x = s->mb_x;
>      mb_y = s->mb_y;
>  
> -    if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
> +    if(not_mpeg12 && s->avctx->debug&FF_DEBUG_DCT_COEFF) {
>         /* save DCT coefficients */
>         int i,j;
>         DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];

This looks like it would break FF_DEBUG_DCT_COEFF with mpeg1/2

[...]
> @@ -1830,7 +1830,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
>              /* motion handling */
>              /* decoding or more than one mb_type (MC was already done otherwise) */
>              if(!s->encoding){
> -                if(lowres_flag){
> +                if(not_mpeg12 && lowres_flag){
>                      h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
>  
>                      if (s->mv_dir & MV_DIR_FORWARD) {

This looks like it would break lowres decoding.

> @@ -1859,8 +1859,8 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
>              }
>  
>              /* skip dequant / idct if we are really late ;) */
> -            if(s->hurry_up>1) goto skip_idct;
> -            if(s->avctx->skip_idct){
> +            if(not_mpeg12 && s->hurry_up>1) goto skip_idct;
> +            if(not_mpeg12 && s->avctx->skip_idct){
>                  if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
>                     ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
>                     || s->avctx->skip_idct >= AVDISCARD_ALL)

These look like they would break skip_idct and hurry_up.

> @@ -1868,8 +1868,8 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
>              }
>  
>              /* add dct residue */
> -            if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
> -                                || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
> +            if((s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
> +                                || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant)))){

cosmetics

[...]
> @@ -1921,7 +1921,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
>              }
>          } else {
>              /* dct only in intra block */
> -            if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
> +            if((s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO))){
>                  put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
>                  put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
>                  put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);

cosmetics

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Democracy is the form of government in which you can choose your dictator
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080407/84f1242e/attachment.pgp>