[FFmpeg-devel] [PATCH 1/6] x86: huffyuvdsp: port mmx add_bytes to yasm

Michael Niedermayer michaelni at gmx.at
Thu May 29 18:33:24 CEST 2014


On Thu, May 29, 2014 at 04:18:52PM +0200, Christophe Gisquet wrote:
> 2014-05-29 16:07 GMT+02:00 Christophe Gisquet <christophe.gisquet at gmail.com>:
> > AAAAND ignore it. Hunk incorrectly split.
> 
> Hopefully correct hunks.
> 
> -- 
> Christophe

>  huffyuvdsp.c             |    2 +-
>  huffyuvdsp.h             |    2 +-
>  ppc/huffyuvdsp_altivec.c |    2 +-
>  x86/huffyuvdsp.asm       |   38 ++++++++++++++++++++++++++++++++++++++
>  x86/huffyuvdsp_init.c    |    9 +++++++--
>  x86/huffyuvdsp_mmx.c     |   32 +-------------------------------
>  6 files changed, 49 insertions(+), 36 deletions(-)
> 8b01bf088b8f4184d66ed87f547cb77353e9f905  0001-x86-huffyuvdsp-port-add_bytes-to-yasm.patch
> From 39206710ca46e725216a607c35a65b11d6bf6412 Mon Sep 17 00:00:00 2001
> From: Christophe Gisquet <christophe.gisquet at gmail.com>
> Date: Wed, 28 May 2014 15:52:24 +0200
> Subject: [PATCH 1/4] x86: huffyuvdsp: port add_bytes to yasm
> 
>           C   MMX  SSE2
> Cycles: 2972  587  302
> ---
>  libavcodec/huffyuvdsp.c             |  2 +-
>  libavcodec/huffyuvdsp.h             |  2 +-
>  libavcodec/ppc/huffyuvdsp_altivec.c |  2 +-
>  libavcodec/x86/huffyuvdsp.asm       | 38 +++++++++++++++++++++++++++++++++++++
>  libavcodec/x86/huffyuvdsp_init.c    |  9 +++++++--
>  libavcodec/x86/huffyuvdsp_mmx.c     | 32 +------------------------------
>  6 files changed, 49 insertions(+), 36 deletions(-)
> 
> diff --git a/libavcodec/huffyuvdsp.c b/libavcodec/huffyuvdsp.c
> index cbc09cf..3d51552 100644
> --- a/libavcodec/huffyuvdsp.c
> +++ b/libavcodec/huffyuvdsp.c
> @@ -27,7 +27,7 @@
>  #define pb_7f (~0UL / 255 * 0x7f)
>  #define pb_80 (~0UL / 255 * 0x80)
>  
> -static void add_bytes_c(uint8_t *dst, uint8_t *src, int w)
> +static void add_bytes_c(uint8_t *dst, uint8_t *src, intptr_t w)
>  {
>      long i;
>  
> diff --git a/libavcodec/huffyuvdsp.h b/libavcodec/huffyuvdsp.h
> index fd66f0a..c52dd69 100644
> --- a/libavcodec/huffyuvdsp.h
> +++ b/libavcodec/huffyuvdsp.h
> @@ -35,7 +35,7 @@
>  
>  typedef struct HuffYUVDSPContext {
>      void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */,
> -                      int w);
> +                      intptr_t w);
>      void (*add_hfyu_median_pred)(uint8_t *dst, const uint8_t *top,
>                                   const uint8_t *diff, int w,
>                                   int *left, int *left_top);
> diff --git a/libavcodec/ppc/huffyuvdsp_altivec.c b/libavcodec/ppc/huffyuvdsp_altivec.c
> index ff2bd87..0052dae 100644
> --- a/libavcodec/ppc/huffyuvdsp_altivec.c
> +++ b/libavcodec/ppc/huffyuvdsp_altivec.c
> @@ -31,7 +31,7 @@
>  #include "libavcodec/huffyuvdsp.h"
>  
>  #if HAVE_ALTIVEC
> -static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w)
> +static void add_bytes_altivec(uint8_t *dst, uint8_t *src, intptr_t w)
>  {
>      register int i;
>      register vector unsigned char vdst, vsrc;
> diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
> index f183ebe..881299c 100644
> --- a/libavcodec/x86/huffyuvdsp.asm
> +++ b/libavcodec/x86/huffyuvdsp.asm
> @@ -163,3 +163,41 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
>      ADD_HFYU_LEFT_LOOP 0, 1
>  .src_unaligned:
>      ADD_HFYU_LEFT_LOOP 0, 0
> +
> +%macro ADD_BYTES 0
> +cglobal add_bytes, 3,4,4, dst, src, w, size
> +    mov  sizeq, wq
> +    and  sizeq, -2*mmsize
> +    jz  .2
> +    add   dstq, sizeq
> +    add   srcq, sizeq
> +    neg  sizeq
> +.1:
> +    mova    m0, [dstq + sizeq]
> +    mova    m1, [srcq + sizeq]
> +    mova    m2, [dstq + sizeq + mmsize]
> +    mova    m3, [srcq + sizeq + mmsize]
> +    paddb   m1, m0
> +    paddb   m3, m2
> +    mova   [dstq + sizeq], m1
> +    mova   [dstq + sizeq + mmsize], m3
> +    add  sizeq, 2*mmsize
> +    jl .1

> +.2:
> +    and     wq, 2*mmsize-1
> +    jz    .end
> +    add   dstq, wq
> +    add   srcq, wq
> +    neg     wq
> +    mov  sizeb, [srcq + wq]
> +    add [dstq + wq], sizeb
> +    inc     wq
> +    jmp .2

this looks rather odd, i suspect this doesnt work

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

What does censorship reveal? It reveals fear. -- Julian Assange
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140529/a4727ace/attachment.asc>


More information about the ffmpeg-devel mailing list