[FFmpeg-devel] [PATCH 3/7] Made QP, nonBQP, and pQPb arrays

Tucker DiNapoli t.dinapoli42 at gmail.com
Fri Mar 27 21:51:44 CET 2015


From: Tucker DiNapoli <T.DiNapoli42 at gmail.com>

Also pulled QP initialization out of inner loop.

Added some dummy fields to PPContext to allow current code to work while
changing QP stuff.
---
 libpostproc/postprocess_internal.h |   6 ++
 libpostproc/postprocess_template.c | 138 ++++++++++++++++++-------------------
 2 files changed, 74 insertions(+), 70 deletions(-)

diff --git a/libpostproc/postprocess_internal.h b/libpostproc/postprocess_internal.h
index 1ebd974..ccf862a 100644
--- a/libpostproc/postprocess_internal.h
+++ b/libpostproc/postprocess_internal.h
@@ -143,6 +143,9 @@ typedef struct PPContext{
     DECLARE_ALIGNED(8, uint64_t, pQPb);
     DECLARE_ALIGNED(8, uint64_t, pQPb2);
 
+    DECLARE_ALIGNED(8, uint64_t, pQPb_block)[4];
+    DECLARE_ALIGNED(8, uint64_t, pQPb2_block)[4];
+
     DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64];
     DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64];
 
@@ -153,6 +156,9 @@ typedef struct PPContext{
     int QP;
     int nonBQP;
 
+    QP_STORE_T QP_block[4];
+    QP_STORE_T nonBQP_block[4];
+
     int frameNum;
 
     int cpuCaps;
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 6377ea7..344152e 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3416,7 +3416,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 #endif
         const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
         int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
-        int QP=0;
+        int QP=0, nonBQP=0;
         /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
            if not than use a temporary buffer */
         if(y+15 >= height){
@@ -3449,58 +3449,69 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             int endx = FFMIN(width, x+32);
             uint8_t *dstBlockStart = dstBlock;
             const uint8_t *srcBlockStart = srcBlock;
-          for(; x < endx; x+=BLOCK_SIZE){
-            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
-            prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
-            prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
-
-            RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
-                              srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+            int qp_index = 0;
+            for(qp_index=0; qp_index < 4; qp_index+=1){
+                QP = QPptr[(x+qp_index*8)>>qpHShift];
+                nonBQP = nonBQPptr[(x+qp_index*8)>>qpHShift];
+                if(!isColor){
+                    QP= (QP* QPCorrecture + 256*128)>>16;
+                    nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
+                    yHistogram[srcBlock[srcStride*12 + 4]]++;
+                }
+                c.QP_block[qp_index]= QP;
+                c.nonBQP_block[qp_index]= nonBQP;
+#if TEMPLATE_PP_MMX
+                __asm__ volatile(
+                    "movd %1, %%mm7         \n\t"
+                    "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
+                    "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
+                    "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
+                    "movq %%mm7, %0         \n\t"
+                    : "=m" (c.pQPb_block[qp_index])
+                    : "r" (QP)
+                );
+#endif
+            }
+            for(; x < endx; x+=BLOCK_SIZE){
+                prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
+                prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
+                prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32);
+                prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);
+
+                RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
+                                  srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+
+                if(mode & LINEAR_IPOL_DEINT_FILTER)
+                    RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
+                else if(mode & LINEAR_BLEND_DEINT_FILTER)
+                    RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
+                else if(mode & MEDIAN_DEINT_FILTER)
+                    RENAME(deInterlaceMedian)(dstBlock, dstStride);
+                else if(mode & CUBIC_IPOL_DEINT_FILTER)
+                    RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
+                else if(mode & FFMPEG_DEINT_FILTER)
+                    RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+                else if(mode & LOWPASS5_DEINT_FILTER)
+                    RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
+                /*          else if(mode & CUBIC_BLEND_DEINT_FILTER)
+                            RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
+                */
+                dstBlock+=8;
+                srcBlock+=8;
+            }
 
-            if(mode & LINEAR_IPOL_DEINT_FILTER)
-                RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
-            else if(mode & LINEAR_BLEND_DEINT_FILTER)
-                RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
-            else if(mode & MEDIAN_DEINT_FILTER)
-                RENAME(deInterlaceMedian)(dstBlock, dstStride);
-            else if(mode & CUBIC_IPOL_DEINT_FILTER)
-                RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
-            else if(mode & FFMPEG_DEINT_FILTER)
-                RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
-            else if(mode & LOWPASS5_DEINT_FILTER)
-                RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
-/*          else if(mode & CUBIC_BLEND_DEINT_FILTER)
-                RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
-*/
-            dstBlock+=8;
-            srcBlock+=8;
-          }
+            qp_index = 0;
+            dstBlock = dstBlockStart;
+            srcBlock = srcBlockStart;
 
-          dstBlock = dstBlockStart;
-          srcBlock = srcBlockStart;
+            for(x = startx; x < endx; x+=BLOCK_SIZE){
+                const int stride= dstStride;
+                //temporary while changing QP stuff to make things continue to work
+                c.QP = c.QP_block[qp_index];
+                c.nonBQP = c.nonBQP_block[qp_index];
+                c.pQPb = c.pQPb_block[qp_index];
+                c.pQPb2 = c.pQPb2_block[qp_index++];
 
-          for(x = startx; x < endx; x+=BLOCK_SIZE){
-            const int stride= dstStride;
-            QP = QPptr[x>>qpHShift];
-            c.nonBQP = nonBQPptr[x>>qpHShift];
-            if(!isColor){
-                QP= (QP* QPCorrecture + 256*128)>>16;
-                c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
-                yHistogram[srcBlock[srcStride*12 + 4]]++;
-            }
-            c.QP= QP;
-#if TEMPLATE_PP_MMX
-            __asm__ volatile(
-                "movd %1, %%mm7         \n\t"
-                "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
-                "movq %%mm7, %0         \n\t"
-                : "=m" (c.pQPb)
-                : "r" (QP)
-            );
-#endif
             /* only deblock if we have 2 blocks */
             if(y + 8 < height){
                 if(mode & V_X1_FILTER)
@@ -3521,6 +3532,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             srcBlock+=8;
           }
 
+          qp_index = 0;
           dstBlock = dstBlockStart;
           srcBlock = srcBlockStart;
 
@@ -3528,26 +3540,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
             const int stride= dstStride;
             av_unused uint8_t *tmpXchg;
 
-            if(isColor){
-                QP= QPptr[x>>qpHShift];
-                c.nonBQP= nonBQPptr[x>>qpHShift];
-            }else{
-                QP= QPptr[x>>4];
-                QP= (QP* QPCorrecture + 256*128)>>16;
-                c.nonBQP= nonBQPptr[x>>4];
-                c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
-            }
-            c.QP= QP;
+
+            c.QP = c.QP_block[qp_index];
+            c.nonBQP = c.nonBQP_block[qp_index];
+            c.pQPb = c.pQPb_block[qp_index];
+            c.pQPb2 = c.pQPb2_block[qp_index++];
 #if TEMPLATE_PP_MMX
-            __asm__ volatile(
-                "movd %1, %%mm7         \n\t"
-                "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-                "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
-                "movq %%mm7, %0         \n\t"
-                : "=m" (c.pQPb)
-                : "r" (QP)
-            );
             RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 #endif
             /* check if we have a previous block to deblock it with dstBlock */
@@ -3569,7 +3567,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 
 #else
                 if(mode & H_X1_FILTER)
-                    horizX1Filter(dstBlock-4, stride, QP);
+                    horizX1Filter(dstBlock-4, stride, c.QP);
                 else if(mode & H_DEBLOCK){
 #if TEMPLATE_PP_ALTIVEC
                     DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
-- 
2.3.3



More information about the ffmpeg-devel mailing list