[FFmpeg-devel] [patch 1/3]Fix bug for POWER LE:/libavcodec/ppc/me_cmp.c
Michael Niedermayer
michaelni at gmx.at
Tue Nov 4 12:22:49 CET 2014
On Tue, Nov 04, 2014 at 03:05:24PM +0800, rongyan wrote:
> Hi,
> There are 3 patches to fix bugs for POWER8 little endian. I will send 3 patches in 3 different email. This is the first, functions
> hadamard8_diff8x8_altivec() hadamard8_diff16x8_altivec()
> sad16_x2_altivec()
> sad16_y2_altivec()
> sad16_xy2_altivec()
> sad16_altivec()
> sad8_altivec()
> sse16_altivec()
> sse8_altivec()
> are fixed.
> The fate test result after merge these 3 patches can be found on website by searching "ibmcrl", also attached in the below to facilitate the review. The passed test cases change from 1679/2182 to 2202/2235.
>
> Thanks for review and any comments please feel free to let me know.
>
> Rong Yan
> ------------------
> The world has enough for everyone's need, but not enough for everyone's greed.
> me_cmp.c | 497 ++++++++++++++++++++++++---------------------------------------
> 1 file changed, 196 insertions(+), 301 deletions(-)
> d18753b0bc6ac53409a3070a61795e079282b6ee 0001-libavcodec-ppc-me_cmp.c-fix-hadamard8_diff8x8_altive.patch
> From 40e4b6d2871772e17c86986cfeca16dcdb24ded8 Mon Sep 17 00:00:00 2001
> From: Rong Yan <rongyan236 at gmail.com>
> Date: Tue, 4 Nov 2014 06:23:11 +0000
> Subject: [PATCH 1/3] libavcodec/ppc/me_cmp.c : fix hadamard8_diff8x8_altivec()
> hadamard8_diff16x8_altivec() sad16_x2_altivec() sad16_y2_altivec()
> sad16_xy2_altivec() sad16_altivec() sad8_altivec() sse16_altivec()
> sse8_altivec() for POWER LE
>
> ---
> libavcodec/ppc/me_cmp.c | 497 +++++++++++++++++++-----------------------------
> 1 file changed, 196 insertions(+), 301 deletions(-)
>
> diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c
> index 8ff8193..133c3bb 100644
> --- a/libavcodec/ppc/me_cmp.c
> +++ b/libavcodec/ppc/me_cmp.c
> @@ -35,64 +35,39 @@
> #include "libavcodec/me_cmp.h"
>
> #if HAVE_ALTIVEC
> -#if HAVE_VSX
> -static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> - int line_size, int h)
> -{
> - int i, s = 0;
> - const vector unsigned char zero =
> - (const vector unsigned char) vec_splat_u8(0);
> - vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> - vector signed int sumdiffs;
> -
> - for (i = 0; i < h; i++) {
> - /* Read unaligned pixels into our vectors. The vectors are as follows:
> - * pix1v: pix1[0] - pix1[15]
> - * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */
> - vector unsigned char pix1v = vec_vsx_ld(0, pix1);
> - vector unsigned char pix2v = vec_vsx_ld(0, pix2);
> - vector unsigned char pix2iv = vec_vsx_ld(1, pix2);
> -
> - /* Calculate the average vector. */
> - vector unsigned char avgv = vec_avg(pix2v, pix2iv);
> -
> - /* Calculate a sum of abs differences vector. */
> - vector unsigned char t5 = vec_sub(vec_max(pix1v, avgv),
> - vec_min(pix1v, avgv));
> -
> - /* Add each 4 pixel group together and put 4 results into sad. */
> - sad = vec_sum4s(t5, sad);
> -
> - pix1 += line_size;
> - pix2 += line_size;
> - }
> - /* Sum up the four partial sums, and put the result into s. */
> - sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
> - sumdiffs = vec_splat(sumdiffs, 3);
> - vec_vsx_st(sumdiffs, 0, &s);
> - return s;
> -}
> -#else
> static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> int line_size, int h)
> {
> int i, s = 0;
> +#if HAVE_BIGENDIAN
> const vector unsigned char zero =
> (const vector unsigned char) vec_splat_u8(0);
> vector unsigned char perm1 = vec_lvsl(0, pix2);
> vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
> vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
> vector signed int sumdiffs;
> +#else /* HAVE_BIGENDIAN */
> + const vector unsigned char __attribute__((aligned(16))) zero =
> + (const vector unsigned char) vec_splat_u8(0);
> + vector unsigned int __attribute__((aligned(16))) sad = (vector unsigned int) vec_splat_u32(0);
> + vector signed int __attribute__((aligned(16))) sumdiffs;
> +#endif /* HAVE_BIGENDIAN */
this is much better!
but please use
#if HAVE_BIGENDIAN
#define DECLARE_ALIGNED_LE(n,t,v) t v
#else
#define DECLARE_ALIGNED_LE(n,t,v) DECLARE_ALIGNED(n,t,v)
#endif
...
DECLARE_ALIGNED_LE(16, const vector unsigned char, zero) =
(const vector unsigned char) vec_splat_u8(0);
#if HAVE_BIGENDIAN
vector unsigned char perm1 = vec_lvsl(0, pix2);
vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
#endif /* HAVE_BIGENDIAN */
DECLARE_ALIGNED_LE(16, vector unsigned int, sad) = (vector unsigned int) vec_splat_u32(0);
DECLARE_ALIGNED_LE(16, vector signed int, sumdiffs);
>
> for (i = 0; i < h; i++) {
> /* Read unaligned pixels into our vectors. The vectors are as follows:
> * pix1v: pix1[0] - pix1[15]
> * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */
> +#if HAVE_BIGENDIAN
> vector unsigned char pix1v = vec_ld(0, pix1);
> vector unsigned char pix2l = vec_ld(0, pix2);
> vector unsigned char pix2r = vec_ld(16, pix2);
> vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1);
> vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2);
> +#else /* HAVE_BIGENDIAN */
> + vector unsigned char pix1v = vec_vsx_ld(0, pix1);
> + vector unsigned char pix2v = vec_vsx_ld(0, pix2);
> + vector unsigned char pix2iv = vec_vsx_ld(1, pix2);
> +#endif /* HAVE_BIGENDIAN */
>
> /* Calculate the average vector. */
> vector unsigned char avgv = vec_avg(pix2v, pix2iv);
please use:
#if HAVE_BIGENDIAN
#define VEC_LD(a, b) vec_ld(a, b)
#else
#define VEC_LD(a, b) vec_vsx_ld(a, b)
#endif
...
vector unsigned char pix1v = VEC_LD(0, pix1);
vector unsigned char pix2l = VEC_LD(0, pix2);
vector unsigned char pix2r = VEC_LD(16, pix2);
#if HAVE_BIGENDIAN
vector unsigned char pix2v = vec_perm(pix2l, pix2r, perm1);
vector unsigned char pix2iv = vec_perm(pix2l, pix2r, perm2);
#endif /* HAVE_BIGENDIAN */
[...]
> +#if HAVE_BIGENDIAN
> pix1v = vec_ld(0, pix1);
>
> pix2l = vec_ld(0, pix3);
> @@ -373,6 +262,16 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
> pix3lv = (vector unsigned short) vec_mergel(zero, pix3v);
> pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv);
> pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv);
> +#else /* HAVE_BIGENDIAN */
> + pix1v = vec_vsx_ld(0, pix1);
> + pix3v = vec_vsx_ld(0, pix3);
> + pix3iv = vec_vsx_ld(1, pix3);
> +
> + pix3hv = (vector unsigned short) vec_mergeh(pix3v, zero);
> + pix3lv = (vector unsigned short) vec_mergel(pix3v, zero);
> + pix3ihv = (vector unsigned short) vec_mergeh(pix3iv, zero);
> + pix3ilv = (vector unsigned short) vec_mergel(pix3iv, zero);
> +#endif /* HAVE_BIGENDIAN */
please use:
#if HAVE_BIGENDIAN
#define VEC_MERGEH(a, b) vec_mergeh(a,b)
#define VEC_MERGEL(a, b) vec_mergel(a,b)
#else
#define VEC_MERGEH(a, b) vec_mergeh(b,a)
#define VEC_MERGEL(a, b) vec_mergel(b,a)
#endif
...
pix3hv = (vector unsigned short) VEC_MERGEH(pix3v, zero);
pix3lv = (vector unsigned short) VEC_MERGEL(pix3v, zero);
pix3ihv = (vector unsigned short) VEC_MERGEH(pix3iv, zero);
pix3ilv = (vector unsigned short) VEC_MERGEL(pix3iv, zero);
[...]
> +#if HAVE_BIGENDIAN
> vec_ste(sumsqr, 0, &s);
> +#else /* HAVE_BIGENDIAN */
> + vec_vsx_st(sumsqr, 0, &s);
> +#endif /* HAVE_BIGENDIAN */
please use:
#if HAVE_BIGENDIAN
#define VEC_STE(a,b,c) vec_ste(a,b,c)
#else
#define VEC_STE(a,b,c) vec_vsx_st(a,b,c)
#endif
...
VEC_STE(sumsqr, 0, &s);
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Opposition brings concord. Out of discord comes the fairest harmony.
-- Heraclitus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141104/8c1dc291/attachment.asc>
More information about the ffmpeg-devel
mailing list