[FFmpeg-devel] [PATCH] videodsp: move draw_edges from mpegvideoencdsp

James Almer jamrial at gmail.com
Wed Jul 16 01:53:21 CEST 2014


It's also used by some decoders

Signed-off-by: James Almer <jamrial at gmail.com>
---
 configure                             |   4 +-
 libavcodec/diracdec.c                 |  12 ++--
 libavcodec/mpegvideo_enc.c            |   8 +--
 libavcodec/mpegvideoencdsp.c          |  30 --------
 libavcodec/mpegvideoencdsp.h          |   6 --
 libavcodec/snow.c                     |   7 +-
 libavcodec/snow.h                     |   1 -
 libavcodec/snowenc.c                  |   2 +-
 libavcodec/videodsp.c                 |  30 ++++++++
 libavcodec/videodsp.h                 |   6 ++
 libavcodec/x86/mpegvideoencdsp_init.c | 118 --------------------------------
 libavcodec/x86/videodsp_init.c        | 124 +++++++++++++++++++++++++++++++++-
 12 files changed, 175 insertions(+), 173 deletions(-)

diff --git a/configure b/configure
index 4ed43a0..47acef5 100755
--- a/configure
+++ b/configure
@@ -2048,7 +2048,7 @@ cook_decoder_select="audiodsp mdct sinewin"
 cscd_decoder_select="lzo"
 cscd_decoder_suggest="zlib"
 dca_decoder_select="mdct"
-dirac_decoder_select="dsputil dwt golomb videodsp mpegvideoenc"
+dirac_decoder_select="dwt golomb videodsp qpeldsp"
 dnxhd_decoder_select="blockdsp idctdsp"
 dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
 dvvideo_decoder_select="dvprofile idctdsp"
@@ -2157,7 +2157,7 @@ rv30_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpeg_e
 rv40_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpeg_er mpegvideo videodsp"
 shorten_decoder_select="golomb"
 sipr_decoder_select="lsp"
-snow_decoder_select="dsputil dwt h264qpel hpeldsp rangecoder mpegvideoenc"
+snow_decoder_select="dsputil dwt h264qpel hpeldsp rangecoder videodsp"
 snow_encoder_select="aandcttables dsputil dwt h264qpel hpeldsp mpegvideoenc rangecoder"
 sonic_decoder_select="golomb rangecoder"
 sonic_encoder_select="golomb rangecoder"
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index a18c867..2d79aeb 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -137,7 +137,7 @@ typedef struct Plane {
 
 typedef struct DiracContext {
     AVCodecContext *avctx;
-    MpegvideoEncDSPContext mpvencdsp;
+    VideoDSPContext vdsp;
     DiracDSPContext diracdsp;
     GetBitContext gb;
     dirac_source_params source;
@@ -424,7 +424,7 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx)
     s->frame_number = -1;
 
     ff_diracdsp_init(&s->diracdsp);
-    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
+    ff_videodsp_init(&s->vdsp, 8);
 
     for (i = 0; i < MAX_FRAMES; i++) {
         s->all_frames[i].avframe = av_frame_alloc();
@@ -1557,7 +1557,7 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in
     int i, edge = EDGE_WIDTH/2;
 
     ref->hpel[plane][0] = ref->avframe->data[plane];
-    s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
+    s->vdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
 
     /* no need for hpel if we only have fpel vectors */
     if (!s->mv_precision)
@@ -1574,9 +1574,9 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in
         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
                                       ref->hpel[plane][3], ref->hpel[plane][0],
                                       ref->avframe->linesize[plane], width, height);
-        s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
-        s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
-        s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
+        s->vdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
+        s->vdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
+        s->vdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
     }
     ref->interpolated[plane] = 1;
 }
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index a4786b4..289fcd4 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -1165,7 +1165,7 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
                         }
                     }
                     if ((s->width & 15) || (s->height & (vpad-1))) {
-                        s->mpvencdsp.draw_edges(dst, dst_stride,
+                        s->vdsp.draw_edges(dst, dst_stride,
                                                 w, h,
                                                 16>>h_shift,
                                                 vpad>>v_shift,
@@ -1549,19 +1549,19 @@ static void frame_end(MpegEncContext *s)
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
         int hshift = desc->log2_chroma_w;
         int vshift = desc->log2_chroma_h;
-        s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
+        s->vdsp.draw_edges(s->current_picture.f->data[0],
                                 s->current_picture.f->linesize[0],
                                 s->h_edge_pos, s->v_edge_pos,
                                 EDGE_WIDTH, EDGE_WIDTH,
                                 EDGE_TOP | EDGE_BOTTOM);
-        s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
+        s->vdsp.draw_edges(s->current_picture.f->data[1],
                                 s->current_picture.f->linesize[1],
                                 s->h_edge_pos >> hshift,
                                 s->v_edge_pos >> vshift,
                                 EDGE_WIDTH >> hshift,
                                 EDGE_WIDTH >> vshift,
                                 EDGE_TOP | EDGE_BOTTOM);
-        s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
+        s->vdsp.draw_edges(s->current_picture.f->data[2],
                                 s->current_picture.f->linesize[2],
                                 s->h_edge_pos >> hshift,
                                 s->v_edge_pos >> vshift,
diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c
index 10ad369..373379c 100644
--- a/libavcodec/mpegvideoencdsp.c
+++ b/libavcodec/mpegvideoencdsp.c
@@ -126,34 +126,6 @@ static int pix_norm1_c(uint8_t *pix, int line_size)
     return s;
 }
 
-/* draw the edges of width 'w' of an image of size width, height */
-// FIXME: Check that this is OK for MPEG-4 interlaced.
-static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
-                           int w, int h, int sides)
-{
-    uint8_t *ptr = buf, *last_line;
-    int i;
-
-    /* left and right */
-    for (i = 0; i < height; i++) {
-        memset(ptr - w, ptr[0], w);
-        memset(ptr + width, ptr[width - 1], w);
-        ptr += wrap;
-    }
-
-    /* top and bottom + corners */
-    buf -= w;
-    last_line = buf + (height - 1) * wrap;
-    if (sides & EDGE_TOP)
-        for (i = 0; i < h; i++)
-            // top
-            memcpy(buf - (i + 1) * wrap, buf, width + w + w);
-    if (sides & EDGE_BOTTOM)
-        for (i = 0; i < h; i++)
-            // bottom
-            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
-}
-
 av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
                                      AVCodecContext *avctx)
 {
@@ -168,8 +140,6 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
     c->pix_sum   = pix_sum_c;
     c->pix_norm1 = pix_norm1_c;
 
-    c->draw_edges = draw_edges_8_c;
-
     if (ARCH_ARM)
         ff_mpegvideoencdsp_init_arm(c, avctx);
     if (ARCH_PPC)
diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h
index e12f4c6..81e3fe6 100644
--- a/libavcodec/mpegvideoencdsp.h
+++ b/libavcodec/mpegvideoencdsp.h
@@ -26,9 +26,6 @@
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
 
-#define EDGE_TOP    1
-#define EDGE_BOTTOM 2
-
 typedef struct MpegvideoEncDSPContext {
     int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
                         int16_t basis[64], int scale);
@@ -39,9 +36,6 @@ typedef struct MpegvideoEncDSPContext {
 
     void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
                       int src_wrap, int width, int height);
-
-    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
-                       int w, int h, int sides);
 } MpegvideoEncDSPContext;
 
 void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 711d1a4..57446dd 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -433,7 +433,6 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){
     ff_videodsp_init(&s->vdsp, 8);
     ff_dwt_init(&s->dwt);
     ff_h264qpel_init(&s->h264qpel, 8);
-    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 
 #define mcf(dx,dy)\
     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
@@ -643,14 +642,14 @@ int ff_snow_frame_start(SnowContext *s){
    int h= s->avctx->height;
 
     if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
-        s->mpvencdsp.draw_edges(s->current_picture->data[0],
+        s->vdsp.draw_edges(s->current_picture->data[0],
                                 s->current_picture->linesize[0], w   , h   ,
                                 EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
         if (s->current_picture->data[2]) {
-            s->mpvencdsp.draw_edges(s->current_picture->data[1],
+            s->vdsp.draw_edges(s->current_picture->data[1],
                                     s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
                                     EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
-            s->mpvencdsp.draw_edges(s->current_picture->data[2],
+            s->vdsp.draw_edges(s->current_picture->data[2],
                                     s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
                                     EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
         }
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index 2cda5b3..a9c8518 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -115,7 +115,6 @@ typedef struct SnowContext{
     QpelDSPContext qdsp;
     VideoDSPContext vdsp;
     H264QpelContext h264qpel;
-    MpegvideoEncDSPContext mpvencdsp;
     SnowDWTContext dwt;
     AVFrame *new_picture;
     AVFrame *input_picture;              ///< new_picture with the internal linesizes
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index cb83821..eefbf08 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -1568,7 +1568,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]],
                    &pict->data[i][y * pict->linesize[i]],
                    width>>hshift);
-        s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
+        s->vdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
                                 width >> hshift, height >> vshift,
                                 EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
                                 EDGE_TOP | EDGE_BOTTOM);
diff --git a/libavcodec/videodsp.c b/libavcodec/videodsp.c
index ba618a7..2b91a5b 100644
--- a/libavcodec/videodsp.c
+++ b/libavcodec/videodsp.c
@@ -31,6 +31,35 @@
 #include "videodsp_template.c"
 #undef BIT_DEPTH
 
+
+/* draw the edges of width 'w' of an image of size width, height */
+// FIXME: Check that this is OK for MPEG-4 interlaced.
+static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
+                           int w, int h, int sides)
+{
+    uint8_t *ptr = buf, *last_line;
+    int i;
+
+    /* left and right */
+    for (i = 0; i < height; i++) {
+        memset(ptr - w, ptr[0], w);
+        memset(ptr + width, ptr[width - 1], w);
+        ptr += wrap;
+    }
+
+    /* top and bottom + corners */
+    buf -= w;
+    last_line = buf + (height - 1) * wrap;
+    if (sides & EDGE_TOP)
+        for (i = 0; i < h; i++)
+            // top
+            memcpy(buf - (i + 1) * wrap, buf, width + w + w);
+    if (sides & EDGE_BOTTOM)
+        for (i = 0; i < h; i++)
+            // bottom
+            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
+}
+
 static void just_return(uint8_t *buf, ptrdiff_t stride, int h)
 {
 }
@@ -43,6 +72,7 @@ av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
     } else {
         ctx->emulated_edge_mc = ff_emulated_edge_mc_16;
     }
+    ctx->draw_edges = draw_edges_8_c;
 
     if (ARCH_AARCH64)
         ff_videodsp_init_aarch64(ctx, bpc);
diff --git a/libavcodec/videodsp.h b/libavcodec/videodsp.h
index fc01a31..a6567fc 100644
--- a/libavcodec/videodsp.h
+++ b/libavcodec/videodsp.h
@@ -29,6 +29,9 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#define EDGE_TOP    1
+#define EDGE_BOTTOM 2
+
 #define EMULATED_EDGE(depth) \
 void ff_emulated_edge_mc_ ## depth(uint8_t *dst, const uint8_t *src, \
                                    ptrdiff_t dst_stride, ptrdiff_t src_stride, \
@@ -74,6 +77,9 @@ typedef struct VideoDSPContext {
      * @param h      number of lines to prefetch
      */
     void (*prefetch)(uint8_t *buf, ptrdiff_t stride, int h);
+
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
+                       int w, int h, int sides);
 } VideoDSPContext;
 
 void ff_videodsp_init(VideoDSPContext *ctx, int bpc);
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
index d91b902..f9090f6 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -97,120 +97,6 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
 #undef PHADDD
 #endif /* HAVE_SSSE3_INLINE */
 
-/* Draw the edges of width 'w' of an image of size width, height
- * this MMX version can only handle w == 8 || w == 16. */
-static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
-                           int w, int h, int sides)
-{
-    uint8_t *ptr, *last_line;
-    int i;
-
-    last_line = buf + (height - 1) * wrap;
-    /* left and right */
-    ptr = buf;
-    if (w == 8) {
-        __asm__ volatile (
-            "1:                             \n\t"
-            "movd            (%0), %%mm0    \n\t"
-            "punpcklbw      %%mm0, %%mm0    \n\t"
-            "punpcklwd      %%mm0, %%mm0    \n\t"
-            "punpckldq      %%mm0, %%mm0    \n\t"
-            "movq           %%mm0, -8(%0)   \n\t"
-            "movq      -8(%0, %2), %%mm1    \n\t"
-            "punpckhbw      %%mm1, %%mm1    \n\t"
-            "punpckhwd      %%mm1, %%mm1    \n\t"
-            "punpckhdq      %%mm1, %%mm1    \n\t"
-            "movq           %%mm1, (%0, %2) \n\t"
-            "add               %1, %0       \n\t"
-            "cmp               %3, %0       \n\t"
-            "jb                1b           \n\t"
-            : "+r" (ptr)
-            : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
-              "r" (ptr + wrap * height));
-    } else if (w == 16) {
-        __asm__ volatile (
-            "1:                                 \n\t"
-            "movd            (%0), %%mm0        \n\t"
-            "punpcklbw      %%mm0, %%mm0        \n\t"
-            "punpcklwd      %%mm0, %%mm0        \n\t"
-            "punpckldq      %%mm0, %%mm0        \n\t"
-            "movq           %%mm0, -8(%0)       \n\t"
-            "movq           %%mm0, -16(%0)      \n\t"
-            "movq      -8(%0, %2), %%mm1        \n\t"
-            "punpckhbw      %%mm1, %%mm1        \n\t"
-            "punpckhwd      %%mm1, %%mm1        \n\t"
-            "punpckhdq      %%mm1, %%mm1        \n\t"
-            "movq           %%mm1,  (%0, %2)    \n\t"
-            "movq           %%mm1, 8(%0, %2)    \n\t"
-            "add               %1, %0           \n\t"
-            "cmp               %3, %0           \n\t"
-            "jb                1b               \n\t"
-            : "+r"(ptr)
-            : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
-            );
-    } else {
-        av_assert1(w == 4);
-        __asm__ volatile (
-            "1:                             \n\t"
-            "movd            (%0), %%mm0    \n\t"
-            "punpcklbw      %%mm0, %%mm0    \n\t"
-            "punpcklwd      %%mm0, %%mm0    \n\t"
-            "movd           %%mm0, -4(%0)   \n\t"
-            "movd      -4(%0, %2), %%mm1    \n\t"
-            "punpcklbw      %%mm1, %%mm1    \n\t"
-            "punpckhwd      %%mm1, %%mm1    \n\t"
-            "punpckhdq      %%mm1, %%mm1    \n\t"
-            "movd           %%mm1, (%0, %2) \n\t"
-            "add               %1, %0       \n\t"
-            "cmp               %3, %0       \n\t"
-            "jb                1b           \n\t"
-            : "+r" (ptr)
-            : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
-              "r" (ptr + wrap * height));
-    }
-
-    /* top and bottom (and hopefully also the corners) */
-    if (sides & EDGE_TOP) {
-        for (i = 0; i < h; i += 4) {
-            ptr = buf - (i + 1) * wrap - w;
-            __asm__ volatile (
-                "1:                             \n\t"
-                "movq (%1, %0), %%mm0           \n\t"
-                "movq    %%mm0, (%0)            \n\t"
-                "movq    %%mm0, (%0, %2)        \n\t"
-                "movq    %%mm0, (%0, %2, 2)     \n\t"
-                "movq    %%mm0, (%0, %3)        \n\t"
-                "add        $8, %0              \n\t"
-                "cmp        %4, %0              \n\t"
-                "jb         1b                  \n\t"
-                : "+r" (ptr)
-                : "r" ((x86_reg) buf - (x86_reg) ptr - w),
-                  "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3),
-                  "r" (ptr + width + 2 * w));
-        }
-    }
-
-    if (sides & EDGE_BOTTOM) {
-        for (i = 0; i < h; i += 4) {
-            ptr = last_line + (i + 1) * wrap - w;
-            __asm__ volatile (
-                "1:                             \n\t"
-                "movq (%1, %0), %%mm0           \n\t"
-                "movq    %%mm0, (%0)            \n\t"
-                "movq    %%mm0, (%0, %2)        \n\t"
-                "movq    %%mm0, (%0, %2, 2)     \n\t"
-                "movq    %%mm0, (%0, %3)        \n\t"
-                "add        $8, %0              \n\t"
-                "cmp        %4, %0              \n\t"
-                "jb         1b                  \n\t"
-                : "+r" (ptr)
-                : "r" ((x86_reg) last_line - (x86_reg) ptr - w),
-                  "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3),
-                  "r" (ptr + width + 2 * w));
-        }
-    }
-}
-
 #endif /* HAVE_INLINE_ASM */
 
 av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
@@ -239,10 +125,6 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
             c->try_8x8basis = try_8x8basis_mmx;
         }
         c->add_8x8basis = add_8x8basis_mmx;
-
-        if (avctx->bits_per_raw_sample <= 8) {
-            c->draw_edges = draw_edges_mmx;
-        }
     }
 
     if (INLINE_AMD3DNOW(cpu_flags)) {
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index 3218abd..dab6ec2 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -240,14 +240,136 @@ static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
 }
 #endif /* HAVE_YASM */
 
+#if HAVE_INLINE_ASM
+/* Draw the edges of width 'w' of an image of size width, height
+ * this MMX version can only handle w == 8 || w == 16. */
+static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
+                           int w, int h, int sides)
+{
+    uint8_t *ptr, *last_line;
+    int i;
+
+    last_line = buf + (height - 1) * wrap;
+    /* left and right */
+    ptr = buf;
+    if (w == 8) {
+        __asm__ volatile (
+            "1:                             \n\t"
+            "movd            (%0), %%mm0    \n\t"
+            "punpcklbw      %%mm0, %%mm0    \n\t"
+            "punpcklwd      %%mm0, %%mm0    \n\t"
+            "punpckldq      %%mm0, %%mm0    \n\t"
+            "movq           %%mm0, -8(%0)   \n\t"
+            "movq      -8(%0, %2), %%mm1    \n\t"
+            "punpckhbw      %%mm1, %%mm1    \n\t"
+            "punpckhwd      %%mm1, %%mm1    \n\t"
+            "punpckhdq      %%mm1, %%mm1    \n\t"
+            "movq           %%mm1, (%0, %2) \n\t"
+            "add               %1, %0       \n\t"
+            "cmp               %3, %0       \n\t"
+            "jb                1b           \n\t"
+            : "+r" (ptr)
+            : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
+              "r" (ptr + wrap * height));
+    } else if (w == 16) {
+        __asm__ volatile (
+            "1:                                 \n\t"
+            "movd            (%0), %%mm0        \n\t"
+            "punpcklbw      %%mm0, %%mm0        \n\t"
+            "punpcklwd      %%mm0, %%mm0        \n\t"
+            "punpckldq      %%mm0, %%mm0        \n\t"
+            "movq           %%mm0, -8(%0)       \n\t"
+            "movq           %%mm0, -16(%0)      \n\t"
+            "movq      -8(%0, %2), %%mm1        \n\t"
+            "punpckhbw      %%mm1, %%mm1        \n\t"
+            "punpckhwd      %%mm1, %%mm1        \n\t"
+            "punpckhdq      %%mm1, %%mm1        \n\t"
+            "movq           %%mm1,  (%0, %2)    \n\t"
+            "movq           %%mm1, 8(%0, %2)    \n\t"
+            "add               %1, %0           \n\t"
+            "cmp               %3, %0           \n\t"
+            "jb                1b               \n\t"
+            : "+r"(ptr)
+            : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
+            );
+    } else {
+        av_assert1(w == 4);
+        __asm__ volatile (
+            "1:                             \n\t"
+            "movd            (%0), %%mm0    \n\t"
+            "punpcklbw      %%mm0, %%mm0    \n\t"
+            "punpcklwd      %%mm0, %%mm0    \n\t"
+            "movd           %%mm0, -4(%0)   \n\t"
+            "movd      -4(%0, %2), %%mm1    \n\t"
+            "punpcklbw      %%mm1, %%mm1    \n\t"
+            "punpckhwd      %%mm1, %%mm1    \n\t"
+            "punpckhdq      %%mm1, %%mm1    \n\t"
+            "movd           %%mm1, (%0, %2) \n\t"
+            "add               %1, %0       \n\t"
+            "cmp               %3, %0       \n\t"
+            "jb                1b           \n\t"
+            : "+r" (ptr)
+            : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
+              "r" (ptr + wrap * height));
+    }
+
+    /* top and bottom (and hopefully also the corners) */
+    if (sides & EDGE_TOP) {
+        for (i = 0; i < h; i += 4) {
+            ptr = buf - (i + 1) * wrap - w;
+            __asm__ volatile (
+                "1:                             \n\t"
+                "movq (%1, %0), %%mm0           \n\t"
+                "movq    %%mm0, (%0)            \n\t"
+                "movq    %%mm0, (%0, %2)        \n\t"
+                "movq    %%mm0, (%0, %2, 2)     \n\t"
+                "movq    %%mm0, (%0, %3)        \n\t"
+                "add        $8, %0              \n\t"
+                "cmp        %4, %0              \n\t"
+                "jb         1b                  \n\t"
+                : "+r" (ptr)
+                : "r" ((x86_reg) buf - (x86_reg) ptr - w),
+                  "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3),
+                  "r" (ptr + width + 2 * w));
+        }
+    }
+
+    if (sides & EDGE_BOTTOM) {
+        for (i = 0; i < h; i += 4) {
+            ptr = last_line + (i + 1) * wrap - w;
+            __asm__ volatile (
+                "1:                             \n\t"
+                "movq (%1, %0), %%mm0           \n\t"
+                "movq    %%mm0, (%0)            \n\t"
+                "movq    %%mm0, (%0, %2)        \n\t"
+                "movq    %%mm0, (%0, %2, 2)     \n\t"
+                "movq    %%mm0, (%0, %3)        \n\t"
+                "add        $8, %0              \n\t"
+                "cmp        %4, %0              \n\t"
+                "jb         1b                  \n\t"
+                : "+r" (ptr)
+                : "r" ((x86_reg) last_line - (x86_reg) ptr - w),
+                  "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3),
+                  "r" (ptr + width + 2 * w));
+        }
+    }
+}
+#endif /* HAVE_INLINE_ASM */
+
 void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h);
 void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h);
 
 av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
 {
-#if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
+#if HAVE_INLINE_ASM
+    if (INLINE_MMX(cpu_flags) && bpc <= 8) {
+        ctx->draw_edges       = draw_edges_mmx;
+    }
+#endif /* HAVE_INLINE_ASM */
+
+#if HAVE_YASM
 #if ARCH_X86_32
     if (EXTERNAL_MMX(cpu_flags) && bpc <= 8) {
         ctx->emulated_edge_mc = emulated_edge_mc_mmx;
-- 
1.8.5.5



More information about the ffmpeg-devel mailing list