[FFmpeg-devel] [FFMpeg-Devel][GSoC][PATCH 3/6] postproc: Moved inline asm for packing QP to seperate function
Tucker DiNapoli
t.dinapoli42 at gmail.com
Wed Apr 22 22:27:28 CEST 2015
This patch contains the code for the avx2/sse2 versions of the new
function, but they are deliberately ignored, since the support for
avx2/sse2 isn't yet present (the next commit fixes this).
This is a temporary measure until full sse2/avx2 implementation is
complete, but it works with sse2/avx2 as inline asm.
Moving this to a separate file would add overhead due to having to call a function,
if this is a reasonable trade off for removing inline asm than I can eaisly do that.
---
libpostproc/postprocess_template.c | 61 +++++++++++++++++++++++++++++++-------
1 file changed, 51 insertions(+), 10 deletions(-)
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index b7296c4..083be9d 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3249,7 +3249,6 @@ static inline void RENAME(prefetchnta)(const void *p)
: : "r" (p)
);
}
-
static inline void RENAME(prefetcht0)(const void *p)
{
__asm__ volatile( "prefetcht0 (%0)\n\t"
@@ -3305,6 +3304,55 @@ static inline void RENAME(prefetcht2)(const void *p)
return;
}
#endif
+/*
+ This is temporary. Ultimately the inline asm should be removed completely
+ and moved to another file (though this has some performance overhead), but for
+ now this code is necessary.
+ Get around the issues with inline avx by using an explicit register
+ and simplify code by abstracting simd detail like in yasm code
+*/
+#if TEMPLATE_PP_MMX
+static inline void RENAME(packQP)(PPContext c)
+{
+#if 0 //TEMPLATE_PP_AVX2 goes here
+ __asm__ volatile(
+ "vmovdqa (%1), %%ymm0\n\t"
+ "vpermq $0, %%ymm0, %%ymm0 \n\t"
+ "vpunpcklbw %%ymm0, %%ymm0, %%ymm0 \n\t" // 0, 0, 0, 0, 0, 0, OP, QP
+ "vpunpcklwd %%ymm0, %%ymm0, %%ymm0 \n\t" // 0, 0, 0, 0, 0, 0, QP, QP
+ "vpunpckldq %%ymm0, %%ymm0, %%ymm0 \n\t" //QP,...,QP
+ "vpunpcklqdq %%ymm0, %%ymm0, %%ymm0 \n\t" //copy to upper quadword(s)
+ "vmovdqa %%ymm0, %0"
+ : "=m" (c.pQPb_block)
+ : "r" (c.QP_block)
+ : "%ymm0"
+ );
+#else
+#if 0 //TEMPLATE_PP_SSE2 goes here
+#define M0 "%xmm0"
+#define MOVA "movdqa"
+#else
+#define M0 "%mm0"
+#define MOVA "movq"
+#endif
+ __asm__ volatile(
+ MOVA" (%1), %"M0"\n\t"
+ "punpcklbw %"M0", %"M0" \n\t" // 0, 0, 0, 0, 0, 0, OP, QP
+ "punpcklwd %"M0", %"M0" \n\t" // 0, 0, 0, 0, 0, 0, QP, QP
+ "punpckldq %"M0", %"M0" \n\t" //QP,...,QP
+#if 0 //TEMPLATE_PP_SSE2
+ "punpcklqdq %"M0", %"M0" \n\t" //copy to upper quadword(s)
+#endif
+ MOVA" %"M0", %0"
+ : "=m" (c.pQPb_block)
+ : "r" (c.QP_block)
+ : M0
+ );
+#undef M0
+#undef MOVA
+#endif
+}
+#endif
/**
* Filter array of bytes (Y or U or V values)
*/
@@ -3516,6 +3564,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
for(qp_index=0; qp_index < (endx-startx)/BLOCK_SIZE; qp_index++){
QP = QPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
nonBQP = nonBQPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
+
if(!isColor){
QP= (QP* QPCorrecture + 256*128)>>16;
nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
@@ -3524,15 +3573,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
c.QP_block[qp_index] = QP;
c.nonBQP_block[qp_index] = nonBQP;
#if TEMPLATE_PP_MMX
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
- "movq %%mm7, %0 \n\t"
- : "=m" (c.pQPb_block[qp_index])
- : "r" (QP)
- );
+ RENAME(packQP)(c);
#endif
}
for(; x < endx; x+=BLOCK_SIZE){
--
2.3.5
More information about the ffmpeg-devel
mailing list