[FFmpeg-devel] [PATCH 7/7] Moved contitional inline asm into a macro to cleanup code

Tucker DiNapoli t.dinapoli42 at gmail.com
Fri Mar 27 21:51:48 CET 2015


From: Tucker DiNapoli <T.DiNapoli42 at gmail.com>

---
 libpostproc/postprocess.c          |  2 +-
 libpostproc/postprocess_template.c | 41 ++++++++++++++++++++++++--------------
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 9d89782..b8740db 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -117,7 +117,7 @@ const char *postproc_license(void)
 #define OPTIONS_ARRAY_SIZE 10
 #define BLOCK_SIZE 8
 #define TEMP_STRIDE 8
-//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
+#define BLOCKS_PER_ITERATION 1 //1 for now to keep old code working
 
 #if ARCH_X86 && HAVE_INLINE_ASM
 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 794ea17..34c1cc1 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -1449,7 +1449,11 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
  */
 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
 {
+    int block_index;
+    uint8_t *src_base = src;
+    for(block_index=0;block_index<BLOCKS_PER_ITERATION; block_index++){
 #if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
+    src = src_base;
     src+= 4*stride;
     __asm__ volatile(
         "lea (%0, %1), %%"REG_a"                \n\t"
@@ -1476,6 +1480,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
     );
 #else
     int a, b, x;
+    src = src_base;
     src+= 4*stride;
 
     for(x=0; x<2; x++){
@@ -1491,6 +1496,8 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
         src += 4;
     }
 #endif
+      src_base += 8;
+  }
 }
 
 /**
@@ -1502,6 +1509,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
  */
 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
 {
+
 #if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
     src+= stride*3;
     __asm__ volatile(
@@ -2539,7 +2547,6 @@ Switch between
 #endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
 }
 #endif //TEMPLATE_PP_ALTIVEC
-
 #if TEMPLATE_PP_MMX
 /**
  * accurate deblock filter
@@ -3082,7 +3089,6 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 
 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
                                 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
-
 /**
  * Copy a block from src to dst and fixes the blacklevel.
  * levelFix == 0 -> do not touch the brightness & contrast
@@ -3175,9 +3181,10 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
         : "%"REG_d
     );
 #else //TEMPLATE_PP_MMX && HAVE_6REGS
-    for(i=0; i<8; i++)
+    for(i=0; i<8; i++){
         memcpy( &(dst[dstStride*i]),
                 &(src[srcStride*i]), BLOCK_SIZE);
+    }
 #endif //TEMPLATE_PP_MMX && HAVE_6REGS
     }else{
 #if TEMPLATE_PP_MMX && HAVE_6REGS
@@ -3241,7 +3248,21 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride)
     }
 #endif
 }
-
+#undef mmx_pack_qp
+#if TEMPLATE_PP_MMX
+#define mmx_pack_qp(QP, pQP)                                            \
+    __asm__ volatile(                                                   \
+        "movd %1, %%mm7         \n\t"                                   \
+        "packuswb %%mm7, %%mm7  \n\t" /*0, 0, 0, QP, 0, 0, 0, QP*/      \
+        "packuswb %%mm7, %%mm7  \n\t" /* 0,QP, 0, QP, 0,QP, 0, QP*/     \
+        "packuswb %%mm7, %%mm7  \n\t" /*QP,..., QP*/                    \
+        "movq %%mm7, %0         \n\t"                                   \
+        : "=m" (pQP)                                                    \
+        : "r" (QP)                                                      \
+    );
+#else
+#define mmx_pack_qp(QP,pQP)
+#endif
 /**
  * Filter array of bytes (Y or U or V values)
  */
@@ -3457,17 +3478,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                 }
                 c.QP_block[qp_index]= QP;
                 c.nonBQP_block[qp_index]= nonBQP;
-#if TEMPLATE_PP_MMX
-                __asm__ volatile(
-                    "movd %1, %%mm7         \n\t"
-                    "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-                    "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-                    "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
-                    "movq %%mm7, %0         \n\t"
-                    : "=m" (c.pQPb_block[qp_index])
-                    : "r" (QP)
-                );
-#endif
+                mmx_pack_qp(QP, c.pQPb_block[qp_index]);
             }
             qp_index = 0;
             for(; x < endx; x+=BLOCK_SIZE){
-- 
2.3.3



More information about the ffmpeg-devel mailing list