[FFmpeg-devel] [patch 1/3]Fix bug for POWER LE: /libavcodec/ppc/me_cmp.c

Fri Oct 31 13:03:58 CET 2014

On Fri, Oct 31, 2014 at 06:45:51PM +0800, rongyan wrote:
> Hi,
> There are 3 patches to fix bugs for POWER8 little endian. I will send 3 patches in 3 different email. This is the first, functions sad8_altivec() sse8_altivec(), hadamard8_diff8x8_altivec(), hadamard8_diff16x8_altivec() are fixed.
>  The fate test result after merge these 3 patches can be found on website by searching "ibmcrl", also attached in the below to facilitate the review. The passed test cases change from  1679/2182 to 2202/2235.
>  
>  Thanks.
>   
>  Rong Yan
>   ------------------
>   The world has enough for everyone's need, but not enough for everyone's greed.

>  me_cmp.c |  460 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 408 insertions(+), 52 deletions(-)
> dd8fe6d196e1330633e22dd0395b5c010e5585df  0001-libavcodec-ppc-me_cmp.c-fix-sad8_altivec-sse8_altive.patch
> From a1bd28eb6d775e41defca51fc156a23e4d3f16ed Mon Sep 17 00:00:00 2001
> From: Rong Yan <rongyan236 at gmail.com>
> Date: Fri, 31 Oct 2014 10:18:01 +0000
> Subject: [PATCH 1/3] libavcodec/ppc/me_cmp.c : fix sad8_altivec()
>  sse8_altivec()     hadamard8_diff8x8_altivec()    
>  hadamard8_diff16x8_altivec()     for POWER LE
> 
> ---
>  libavcodec/ppc/me_cmp.c | 460 ++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 408 insertions(+), 52 deletions(-)
> 
> diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c
> index 8ff8193..9759b17 100644
> --- a/libavcodec/ppc/me_cmp.c
> +++ b/libavcodec/ppc/me_cmp.c
> @@ -34,16 +34,18 @@
>  #include "libavcodec/mpegvideo.h"
>  #include "libavcodec/me_cmp.h"
>  
> +#include <stdio.h>
>  #if HAVE_ALTIVEC
> -#if HAVE_VSX
> +
> +#if !HAVE_BIGENDIAN
>  static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>                                                          int line_size, int h)
>  {
>      int i, s = 0;
> -    const vector unsigned char zero =
> +    const vector unsigned char __attribute__((aligned(16))) zero =
>          (const vector unsigned char) vec_splat_u8(0);
> -    vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> -    vector signed int sumdiffs;
> +    vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned int) vec_splat_u32(0);
> +    vector signed int __attribute__((aligned(16))) sumdiffs;

adding alignment directives should be a sepearte patch

>  
>      for (i = 0; i < h; i++) {
>          /* Read unaligned pixels into our vectors. The vectors are as follows:
> @@ -69,10 +71,11 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>      /* Sum up the four partial sums, and put the result into s. */
>      sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
>      sumdiffs = vec_splat(sumdiffs, 3);
> -    vec_vsx_st(sumdiffs, 0, &s);
> +    //vec_vsx_st(sumdiffs, 0, &s);
> +    vec_ste(sumdiffs, 0, &s);
>      return s;
>  }
> -#else
> +#else /* else of #if !HAVE_BIGENDIAN */
>  static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>                              int line_size, int h)
>  {
> @@ -114,19 +117,19 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>  
>      return s;
>  }

> -#endif /* HAVE_VSX */
> +#endif /* end of #if !HAVE_BIGENDIAN */

this is inconsistent with the rest of the codebase
its also a cosmetic change and cosmetic changes must not be in the
same patch as functional changes

>  
> -#if HAVE_VSX
> +#if !HAVE_BIGENDIAN
>  static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>                                                          int line_size, int h)
>  {
>    int i, s = 0;
> -  const vector unsigned char zero =
> +  const vector unsigned char __attribute__((aligned(16)))zero =
>      (const vector unsigned char) vec_splat_u8(0);
> -  vector unsigned char perm = vec_lvsl(0, pix2);
> -  vector unsigned char pix1v, pix3v, avgv, t5;
> -  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> -  vector signed int sumdiffs;
> +  vector unsigned char __attribute__((aligned(16))) perm = vec_lvsl(0, pix2);
> +  vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, avgv, t5;
> +  vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned int) vec_splat_u32(0);
> +  vector signed int __attribute__((aligned(16))) sumdiffs;
>    uint8_t *pix3 = pix2 + line_size;
>  
>    /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
> @@ -162,10 +165,11 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>    /* Sum up the four partial sums, and put the result into s. */
>    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
>    sumdiffs = vec_splat(sumdiffs, 3);
> -  vec_vsx_st(sumdiffs, 0, &s);
> +  //vec_vsx_st(sumdiffs, 0, &s);
> +  vec_ste(sumdiffs, 0, &s);
>    return s;
>  }
> -#else
> +#else /* else of #if !HAVE_BIGENDIAN */
>  static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>                              int line_size, int h)
>  {
> @@ -219,24 +223,24 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>      vec_ste(sumdiffs, 0, &s);
>      return s;
>  }
> -#endif /* HAVE_VSX */
> +#endif /* end of #if !HAVE_BIGENDIAN */
>  
> -#if HAVE_VSX
> +#if !HAVE_BIGENDIAN
>  static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>                               int line_size, int h)
>  {
>    int i, s = 0;
>    uint8_t *pix3 = pix2 + line_size;
> -  const vector unsigned char zero =
> +  const vector unsigned char __attribute__((aligned(16))) zero =
>      (const vector unsigned char) vec_splat_u8(0);
> -  const vector unsigned short two =
> +  const vector unsigned short __attribute__((aligned(16))) two =
>      (const vector unsigned short) vec_splat_u16(2);
> -  vector unsigned char avgv, t5;
> -  vector unsigned char pix1v, pix3v, pix3iv;
> -  vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
> -  vector unsigned short avghv, avglv;
> -  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> -  vector signed int sumdiffs;
> +  vector unsigned char __attribute__((aligned(16))) avgv, t5;
> +  vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, pix3iv;
> +  vector unsigned short __attribute__((aligned(16))) pix3lv, pix3hv, pix3ilv, pix3ihv;
> +  vector unsigned short __attribute__((aligned(16))) avghv, avglv;
> +  vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned int) vec_splat_u32(0);
> +  vector signed int __attribute__((aligned(16))) sumdiffs;
>  
>    /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
>     * iteration becomes pix2 in the next iteration. We can use this
> @@ -305,10 +309,11 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>    /* Sum up the four partial sums, and put the result into s. */
>    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
>    sumdiffs = vec_splat(sumdiffs, 3);
> -  vec_vsx_st(sumdiffs, 0, &s);

> +  //vec_vsx_st(sumdiffs, 0, &s);

what purpose does this line have ?

[...]

> +#if !HAVE_BIGENDIAN
> +/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
> + * It's the sad8_altivec code above w/ squaring added. */
> +static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> +                        int line_size, int h)
[...]
> +    return s;
> +}
> +#else /* else of #if !HAVE_BIGENDIAN */
>  /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
>   * It's the sad8_altivec code above w/ squaring added. */
>  static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> @@ -617,18 +670,17 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
>  
>      return s;
>  }
> +#endif /* end of #if !HAVE_BIGENDIAN */

Why do you keep sending patches which duplicate code?

Maybe someone can translate this to a language you understand
better then english:

Code duplication is not allowed

#if A
 foo
 this
 bar
#else
 foo
 that
 bar
#endif

always must be replaced by

 foo
#if A
 this
#else
 that
#endif
 bar

also the then remaining difference (this vs that) should be avoided
as well by using appropriate macros and inline functions

there is already some duplicated code in there, this was a mistake.
No more duplicated code should be added.

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Old school: Use the lowest level language in which you can solve the problem
            conveniently.
New school: Use the highest level language in which the latest supercomputer
            can solve the problem without the user falling asleep waiting.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141031/d31f5823/attachment.asc>