[FFmpeg-devel] [patch 1/3]Fix bug for POWER LE: /libavcodec/ppc/me_cmp.c
Michael Niedermayer
michaelni at gmx.at
Fri Oct 31 13:27:44 CET 2014
On Fri, Oct 31, 2014 at 01:03:58PM +0100, Michael Niedermayer wrote:
> On Fri, Oct 31, 2014 at 06:45:51PM +0800, rongyan wrote:
> > Hi,
> > There are 3 patches to fix bugs for POWER8 little endian. I will send 3 patches in 3 different email. This is the first, functions sad8_altivec() sse8_altivec(), hadamard8_diff8x8_altivec(), hadamard8_diff16x8_altivec() are fixed.
> > The fate test result after merge these 3 patches can be found on website by searching "ibmcrl", also attached in the below to facilitate the review. The passed test cases change from 1679/2182 to 2202/2235.
> >
> > Thanks.
> >
> > Rong Yan
> > ------------------
> > The world has enough for everyone's need, but not enough for everyone's greed.
>
>
> > me_cmp.c | 460 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 408 insertions(+), 52 deletions(-)
> > dd8fe6d196e1330633e22dd0395b5c010e5585df 0001-libavcodec-ppc-me_cmp.c-fix-sad8_altivec-sse8_altive.patch
> > From a1bd28eb6d775e41defca51fc156a23e4d3f16ed Mon Sep 17 00:00:00 2001
> > From: Rong Yan <rongyan236 at gmail.com>
> > Date: Fri, 31 Oct 2014 10:18:01 +0000
> > Subject: [PATCH 1/3] libavcodec/ppc/me_cmp.c : fix sad8_altivec()
> > sse8_altivec() hadamard8_diff8x8_altivec()
> > hadamard8_diff16x8_altivec() for POWER LE
> >
> > ---
> > libavcodec/ppc/me_cmp.c | 460 ++++++++++++++++++++++++++++++++++++++++++------
> > 1 file changed, 408 insertions(+), 52 deletions(-)
> >
> > diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c
> > index 8ff8193..9759b17 100644
> > --- a/libavcodec/ppc/me_cmp.c
> > +++ b/libavcodec/ppc/me_cmp.c
> > @@ -34,16 +34,18 @@
> > #include "libavcodec/mpegvideo.h"
> > #include "libavcodec/me_cmp.h"
> >
> > +#include <stdio.h>
> > #if HAVE_ALTIVEC
> > -#if HAVE_VSX
> > +
> > +#if !HAVE_BIGENDIAN
> > static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > int line_size, int h)
> > {
> > int i, s = 0;
> > - const vector unsigned char zero =
> > + const vector unsigned char __attribute__((aligned(16))) zero =
> > (const vector unsigned char) vec_splat_u8(0);
> > - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> > - vector signed int sumdiffs;
> > + vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned int) vec_splat_u32(0);
> > + vector signed int __attribute__((aligned(16))) sumdiffs;
>
> adding alignment directives should be a sepearte patch
>
>
> >
> > for (i = 0; i < h; i++) {
> > /* Read unaligned pixels into our vectors. The vectors are as follows:
> > @@ -69,10 +71,11 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > /* Sum up the four partial sums, and put the result into s. */
> > sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
> > sumdiffs = vec_splat(sumdiffs, 3);
> > - vec_vsx_st(sumdiffs, 0, &s);
> > + //vec_vsx_st(sumdiffs, 0, &s);
> > + vec_ste(sumdiffs, 0, &s);
> > return s;
> > }
> > -#else
> > +#else /* else of #if !HAVE_BIGENDIAN */
> > static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > int line_size, int h)
> > {
> > @@ -114,19 +117,19 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> >
> > return s;
> > }
>
> > -#endif /* HAVE_VSX */
> > +#endif /* end of #if !HAVE_BIGENDIAN */
>
> this is inconsistent with the rest of the codebase
> its also a cosmetic change and cosmetic changes must not be in the
> same patch as functional changes
>
>
> >
> > -#if HAVE_VSX
> > +#if !HAVE_BIGENDIAN
> > static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > int line_size, int h)
> > {
> > int i, s = 0;
> > - const vector unsigned char zero =
> > + const vector unsigned char __attribute__((aligned(16)))zero =
> > (const vector unsigned char) vec_splat_u8(0);
> > - vector unsigned char perm = vec_lvsl(0, pix2);
> > - vector unsigned char pix1v, pix3v, avgv, t5;
> > - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> > - vector signed int sumdiffs;
> > + vector unsigned char __attribute__((aligned(16))) perm = vec_lvsl(0, pix2);
> > + vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, avgv, t5;
> > + vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned int) vec_splat_u32(0);
> > + vector signed int __attribute__((aligned(16))) sumdiffs;
> > uint8_t *pix3 = pix2 + line_size;
> >
> > /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
> > @@ -162,10 +165,11 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > /* Sum up the four partial sums, and put the result into s. */
> > sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
> > sumdiffs = vec_splat(sumdiffs, 3);
> > - vec_vsx_st(sumdiffs, 0, &s);
> > + //vec_vsx_st(sumdiffs, 0, &s);
> > + vec_ste(sumdiffs, 0, &s);
> > return s;
> > }
> > -#else
> > +#else /* else of #if !HAVE_BIGENDIAN */
> > static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > int line_size, int h)
> > {
> > @@ -219,24 +223,24 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > vec_ste(sumdiffs, 0, &s);
> > return s;
> > }
> > -#endif /* HAVE_VSX */
> > +#endif /* end of #if !HAVE_BIGENDIAN */
> >
> > -#if HAVE_VSX
> > +#if !HAVE_BIGENDIAN
> > static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > int line_size, int h)
> > {
> > int i, s = 0;
> > uint8_t *pix3 = pix2 + line_size;
> > - const vector unsigned char zero =
> > + const vector unsigned char __attribute__((aligned(16))) zero =
> > (const vector unsigned char) vec_splat_u8(0);
> > - const vector unsigned short two =
> > + const vector unsigned short __attribute__((aligned(16))) two =
> > (const vector unsigned short) vec_splat_u16(2);
> > - vector unsigned char avgv, t5;
> > - vector unsigned char pix1v, pix3v, pix3iv;
> > - vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
> > - vector unsigned short avghv, avglv;
> > - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> > - vector signed int sumdiffs;
> > + vector unsigned char __attribute__((aligned(16))) avgv, t5;
> > + vector unsigned char __attribute__((aligned(16))) pix1v, pix3v, pix3iv;
> > + vector unsigned short __attribute__((aligned(16))) pix3lv, pix3hv, pix3ilv, pix3ihv;
> > + vector unsigned short __attribute__((aligned(16))) avghv, avglv;
> > + vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned int) vec_splat_u32(0);
> > + vector signed int __attribute__((aligned(16))) sumdiffs;
> >
> > /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
> > * iteration becomes pix2 in the next iteration. We can use this
> > @@ -305,10 +309,11 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > /* Sum up the four partial sums, and put the result into s. */
> > sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
> > sumdiffs = vec_splat(sumdiffs, 3);
> > - vec_vsx_st(sumdiffs, 0, &s);
>
> > + //vec_vsx_st(sumdiffs, 0, &s);
>
> what purpose does this line have ?
>
>
> [...]
>
> > +#if !HAVE_BIGENDIAN
> > +/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
> > + * It's the sad8_altivec code above w/ squaring added. */
> > +static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > + int line_size, int h)
> [...]
> > + return s;
> > +}
> > +#else /* else of #if !HAVE_BIGENDIAN */
> > /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
> > * It's the sad8_altivec code above w/ squaring added. */
> > static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> > @@ -617,18 +670,17 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> >
> > return s;
> > }
> > +#endif /* end of #if !HAVE_BIGENDIAN */
>
> Why do you keep sending patches which duplicate code?
>
> Maybe someone can translate this to a language you understand
> better then english:
>
> Code duplication is not allowed
>
> #if A
> foo
> this
> bar
> #else
> foo
> that
> bar
> #endif
>
> always must be replaced by
>
> foo
> #if A
> this
> #else
> that
> #endif
> bar
>
>
> also the then remaining difference (this vs that) should be avoided
> as well by using appropriate macros and inline functions
>
> there is already some duplicated code in there, this was a mistake.
> No more duplicated code should be added.
For reference the difference between the if and else
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
* It's the sad8_altivec code above w/ squaring added. */
static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{
int i, s;
- const vector unsigned int zero =
+ const vector unsigned int __attribute__((aligned(16))) zero =
(const vector unsigned int) vec_splat_u32(0);
- const vector unsigned char permclear =
+ const vector unsigned char __attribute__((aligned(16))) permclear =
(vector unsigned char)
{ 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
- vector unsigned char perm1 = vec_lvsl(0, pix1);
- vector unsigned char perm2 = vec_lvsl(0, pix2);
- vector unsigned int sum = (vector unsigned int) vec_splat_u32(0);
- vector signed int sumsqr;
+ vector unsigned int __attribute__((aligned(16))) sum = (vector unsigned int) vec_splat_u32(0);
+ vector signed int __attribute__((aligned(16))) sumsqr;
for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2.
* Since we're reading 16 pixels, and actually only want 8,
* mask out the last 8 pixels. The 0s don't change the sum. */
- vector unsigned char pix1l = vec_ld(0, pix1);
- vector unsigned char pix1r = vec_ld(7, pix1);
- vector unsigned char pix2l = vec_ld(0, pix2);
- vector unsigned char pix2r = vec_ld(7, pix2);
- vector unsigned char t1 = vec_and(vec_perm(pix1l, pix1r, perm1),
- permclear);
- vector unsigned char t2 = vec_and(vec_perm(pix2l, pix2r, perm2),
- permclear);
+ vector unsigned char t1 = vec_and(vec_vsx_ld(0, pix1), permclear);
+ vector unsigned char t2 = vec_and(vec_vsx_ld(0, pix2), permclear);
/* Since we want to use unsigned chars, we can take advantage
* of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */
@@ -613,22 +614,21 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Sum up the four partial sums, and put the result into s. */
sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
sumsqr = vec_splat(sumsqr, 3);
+ //vec_vsx_st(sumsqr, 0, &s);
vec_ste(sumsqr, 0, &s);
-
return s;
}
this is just a difference in alignment, why is the alignment
difference needed ?
and a difference of how data is loaded, which belongs in a seperate
macro
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Those who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141031/965f3ae7/attachment.asc>
More information about the ffmpeg-devel
mailing list