[FFmpeg-devel] [FFmpeg-Devel][PATCH 1/5] postproc: Replaced inline asm for prefetching with prefetch macros
Michael Niedermayer
michaelni at gmx.at
Wed Apr 1 22:59:34 CEST 2015
On Wed, Apr 01, 2015 at 02:36:01PM -0400, Tucker DiNapoli wrote:
> These patches are updates to patches previously posted to the mailing lists,
> with some bugs fixed and the reasoning behind some changes expanded on.
>
> This addes macros in postprocess.c that use inline asm for x86,
> __builtin_prefetch if using a recent enough gcc compatable compiler, and
> that does nothing otherwise. Inline asm in postprocess_template.c was
> replaced by these macros.
> ---
> libpostproc/postprocess.c | 10 ++++++
> libpostproc/postprocess_template.c | 63 +++++---------------------------------
> 2 files changed, 18 insertions(+), 55 deletions(-)
>
> diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
> index 9d89782..f8d28ba 100644
> --- a/libpostproc/postprocess.c
> +++ b/libpostproc/postprocess.c
> @@ -197,6 +197,16 @@ static inline void prefetcht2(const void *p)
> : : "r" (p)
> );
> }
> +#elif AV_GCC_VERSION_AT_LEAST(3,2)
> +#define prefetchnta(p) __builtin_prefetch(p,0,0)
> +#define prefetcht0(p) __builtin_prefetch(p,0,1)
> +#define prefetcht1(p) __builtin_prefetch(p,0,2)
> +#define prefetcht2(p) __builtin_prefetch(p,0,3)
> +#else
> +#define prefetchnta(p)
> +#define prefetcht0(p)
> +#define prefetcht1(p)
> +#define prefetcht2(p)
> #endif
>
> /* The horizontal functions exist only in C because the MMX
> diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
> index 16e441a..6377ea7 100644
> --- a/libpostproc/postprocess_template.c
> +++ b/libpostproc/postprocess_template.c
> @@ -3368,34 +3368,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
> // finish 1 block before the next otherwise we might have a problem
> // with the L1 Cache of the P4 ... or only a few blocks at a time or something
> for(x=0; x<width; x+=BLOCK_SIZE){
> -
> -#if TEMPLATE_PP_MMXEXT && HAVE_6REGS
> -/*
> - prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
> - prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
> - prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
> - prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
> -*/
> -
> - __asm__(
> - "mov %4, %%"REG_a" \n\t"
> - "shr $2, %%"REG_a" \n\t"
> - "and $6, %%"REG_a" \n\t"
> - "add %5, %%"REG_a" \n\t"
> - "mov %%"REG_a", %%"REG_d" \n\t"
> - "imul %1, %%"REG_a" \n\t"
> - "imul %3, %%"REG_d" \n\t"
> - "prefetchnta 32(%%"REG_a", %0) \n\t"
> - "prefetcht0 32(%%"REG_d", %2) \n\t"
> - "add %1, %%"REG_a" \n\t"
> - "add %3, %%"REG_d" \n\t"
> - "prefetchnta 32(%%"REG_a", %0) \n\t"
> - "prefetcht0 32(%%"REG_d", %2) \n\t"
> - :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
> - "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
> - : "%"REG_a, "%"REG_d
> - );
> -#endif
> + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
> + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
> + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
> + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
this will fail on older CPUs which do not support prefetch*
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Into a blind darkness they enter who follow after the Ignorance,
they as if into a greater darkness enter who devote themselves
to the Knowledge alone. -- Isha Upanishad
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150401/248cc393/attachment.asc>
More information about the ffmpeg-devel
mailing list