[FFmpeg-devel] [PATCH 02/21] vp3: use hpeldsp instead of dsputil for half-pel functions.

Ronald S. Bultje rsbultje at gmail.com
Mon Mar 11 00:54:28 CET 2013


From: "Ronald S. Bultje" <rsbultje at gmail.com>

This makes vp3 independent of dsputil.
---
 configure                    | 2 +-
 libavcodec/vp3.c             | 9 +++++----
 libavcodec/x86/constants.c   | 1 +
 libavcodec/x86/dsputil.asm   | 3 ++-
 libavcodec/x86/dsputil_mmx.c | 3 ---
 libavcodec/x86/dsputil_mmx.h | 3 ---
 libavcodec/x86/vp3dsp.asm    | 5 +++--
 7 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/configure b/configure
index 1881fb4..f8dd3b7 100755
--- a/configure
+++ b/configure
@@ -1824,7 +1824,7 @@ vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel"
 vc1image_decoder_select="vc1_decoder"
 vorbis_decoder_select="mdct"
 vorbis_encoder_select="mdct"
-vp3_decoder_select="dsputil vp3dsp videodsp"
+vp3_decoder_select="hpeldsp vp3dsp videodsp"
 vp5_decoder_select="dsputil h264chroma videodsp vp3dsp"
 vp6_decoder_select="dsputil h264chroma huffman videodsp vp3dsp"
 vp6a_decoder_select="vp6_decoder"
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index ac17a30..a66364b 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -38,6 +38,7 @@
 #include "internal.h"
 #include "dsputil.h"
 #include "get_bits.h"
+#include "hpeldsp.h"
 #include "videodsp.h"
 #include "vp3data.h"
 #include "vp3dsp.h"
@@ -141,7 +142,7 @@ typedef struct Vp3DecodeContext {
     int keyframe;
     uint8_t idct_permutation[64];
     uint8_t idct_scantable[64];
-    DSPContext dsp;
+    HpelDSPContext hdsp;
     VideoDSPContext vdsp;
     VP3DSPContext vp3dsp;
     DECLARE_ALIGNED(16, int16_t, block)[64];
@@ -1565,7 +1566,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                            VP3 source but this would be slower as
                            put_no_rnd_pixels_tab is better optimzed */
                         if(motion_halfpel_index != 3){
-                            s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
+                            s->hdsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
                                 output_plane + first_pixel,
                                 motion_source, stride, 8);
                         }else{
@@ -1599,7 +1600,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                 } else {
 
                     /* copy directly from the previous frame */
-                    s->dsp.put_pixels_tab[1][0](
+                    s->hdsp.put_pixels_tab[1][0](
                         output_plane + first_pixel,
                         last_plane + first_pixel,
                         stride, 8);
@@ -1676,7 +1677,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     if (avctx->codec_id != AV_CODEC_ID_THEORA)
         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_hpeldsp_init(&s->hdsp, avctx->flags | CODEC_FLAG_BITEXACT);
     ff_videodsp_init(&s->vdsp, 8);
     ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
 
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index 821d73f..a0891e3 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -36,4 +36,5 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_64)   = { 0x0040004000400040ULL, 0x004
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_0)    = { 0x0000000000000000ULL, 0x0000000000000000ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_1)    = { 0x0101010101010101ULL, 0x0101010101010101ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x0303030303030303ULL };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_7)    =   0x0707070707070707ULL;
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL };
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 9970c02..329780c 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -26,13 +26,14 @@
 SECTION_RODATA
 pb_f: times 16 db 15
 pb_zzzzzzzz77777777: times 8 db -1
-pb_7: times 8 db 7
 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
 pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13
 pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0
 pd_16384: times 4 dd 16384
 pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 
+cextern pb_7
+
 SECTION_TEXT
 
 %macro SCALARPRODUCT 0
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 46c283a..0ee721e 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -52,10 +52,7 @@ DECLARE_ALIGNED(8,  const uint64_t, ff_pw_255)  =   0x00ff00ff00ff00ffULL;
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 0x0200020002000200ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
 
-DECLARE_ALIGNED(8,  const uint64_t, ff_pb_7)    =   0x0707070707070707ULL;
-DECLARE_ALIGNED(8,  const uint64_t, ff_pb_1F)   =   0x1F1F1F1F1F1F1F1FULL;
 DECLARE_ALIGNED(8,  const uint64_t, ff_pb_3F)   =   0x3F3F3F3F3F3F3F3FULL;
-DECLARE_ALIGNED(8,  const uint64_t, ff_pb_81)   =   0x8181818181818181ULL;
 DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;
 
 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
index 4b7a1fd..1e62c53 100644
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -49,10 +49,7 @@ extern const uint64_t ff_pw_255;
 
 extern const xmm_reg  ff_pb_1;
 extern const xmm_reg  ff_pb_3;
-extern const uint64_t ff_pb_7;
-extern const uint64_t ff_pb_1F;
 extern const uint64_t ff_pb_3F;
-extern const uint64_t ff_pb_81;
 extern const xmm_reg  ff_pb_F8;
 extern const uint64_t ff_pb_FC;
 
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 423866c..a9ef727 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -33,12 +33,13 @@ vp3_idct_data: times 8 dw 64277
                times 8 dw 25080
                times 8 dw 12785
 
+pb_1F: times 8 db 0x1f
+pb_81: times 8 db 0x81
+
 cextern pb_1
 cextern pb_3
 cextern pb_7
-cextern pb_1F
 cextern pb_80
-cextern pb_81
 
 cextern pw_8
 
-- 
1.7.11.3



More information about the ffmpeg-devel mailing list