[FFmpeg-cvslog] dsputil: Split off *_8x8basis to a separate context
Diego Biurrun
git at videolan.org
Mon Jul 7 15:14:10 CEST 2014
ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Mon Dec 30 19:19:39 2013 +0100| [8d686ca59db14900ad5c12b547fb8a7afc8b0b94] | committer: Diego Biurrun
dsputil: Split off *_8x8basis to a separate context
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8d686ca59db14900ad5c12b547fb8a7afc8b0b94
---
libavcodec/Makefile | 3 +-
libavcodec/dsputil.c | 32 -----
libavcodec/dsputil.h | 6 -
libavcodec/mpegvideo.h | 2 +
libavcodec/mpegvideo_enc.c | 13 +-
libavcodec/mpegvideoencdsp.c | 64 ++++++++++
libavcodec/mpegvideoencdsp.h | 41 +++++++
libavcodec/x86/Makefile | 3 +-
libavcodec/x86/dsputilenc_mmx.c | 79 -------------
..._qns_template.c => mpegvideoenc_qns_template.c} | 2 +-
libavcodec/x86/mpegvideoencdsp_init.c | 125 ++++++++++++++++++++
11 files changed, 245 insertions(+), 125 deletions(-)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f865d29..d3d531e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -67,7 +67,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \
OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \
mpegvideo_motion.o mpegutils.o
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
- motion_est.o ratecontrol.o
+ motion_est.o ratecontrol.o \
+ mpegvideoencdsp.o
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 5e5ad93..68f5120 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -464,35 +464,6 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
return score1 + FFABS(score2) * 8;
}
-static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
- int16_t basis[64], int scale)
-{
- int i;
- unsigned int sum = 0;
-
- for (i = 0; i < 8 * 8; i++) {
- int b = rem[i] + ((basis[i] * scale +
- (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
- (BASIS_SHIFT - RECON_SHIFT));
- int w = weight[i];
- b >>= RECON_SHIFT;
- assert(-512 < b && b < 512);
-
- sum += (w * b) * (w * b) >> 4;
- }
- return sum >> 2;
-}
-
-static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
-{
- int i;
-
- for (i = 0; i < 8 * 8; i++)
- rem[i] += (basis[i] * scale +
- (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
- (BASIS_SHIFT - RECON_SHIFT);
-}
-
static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
int stride, int h)
{
@@ -1126,9 +1097,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->nsse[0] = nsse16_c;
c->nsse[1] = nsse8_c;
- c->try_8x8basis = try_8x8basis_c;
- c->add_8x8basis = add_8x8basis_c;
-
c->shrink[0] = av_image_copy_plane;
c->shrink[1] = ff_shrink22;
c->shrink[2] = ff_shrink44;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index dfbca5a..f8b7b0d 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -87,12 +87,6 @@ typedef struct DSPContext {
void (*fdct)(int16_t *block /* align 16 */);
void (*fdct248)(int16_t *block /* align 16 */);
- int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
- int16_t basis[64], int scale);
- void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
-#define BASIS_SHIFT 16
-#define RECON_SHIFT 6
-
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
int w, int h, int sides);
#define EDGE_WIDTH 16
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 27c72da..ba39f90 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -37,6 +37,7 @@
#include "hpeldsp.h"
#include "idctdsp.h"
#include "mpegvideodsp.h"
+#include "mpegvideoencdsp.h"
#include "put_bits.h"
#include "ratecontrol.h"
#include "parser.h"
@@ -355,6 +356,7 @@ typedef struct MpegEncContext {
HpelDSPContext hdsp;
IDCTDSPContext idsp;
MpegVideoDSPContext mdsp;
+ MpegvideoEncDSPContext mpvencdsp;
QpelDSPContext qdsp;
VideoDSPContext vdsp;
H263DSPContext h263dsp;
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 65e2a8c..f95a76b 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -701,6 +701,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
if (ARCH_X86)
ff_MPV_encode_init_x86(s);
+ ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
ff_qpeldsp_init(&s->qdsp);
s->avctx->coded_frame = s->current_picture.f;
@@ -3871,7 +3872,7 @@ STOP_TIMER("memset rem[]")}
run_tab[rle_index++]=run;
run=0;
- s->dsp.add_8x8basis(rem, basis[j], coeff);
+ s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
}else{
run++;
}
@@ -3885,7 +3886,7 @@ STOP_TIMER("init rem[]")
{START_TIMER
#endif
for(;;){
- int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
+ int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
int best_coeff=0;
int best_change=0;
int run2, best_unquant_change=0, analyze_gradient;
@@ -3929,7 +3930,8 @@ STOP_TIMER("dct")}
if(new_coeff >= 2048 || new_coeff < 0)
continue;
- score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
+ score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
+ new_coeff - old_coeff);
if(score<best_score){
best_score= score;
best_coeff= 0;
@@ -4052,7 +4054,8 @@ STOP_TIMER("dct")}
unquant_change= new_coeff - old_coeff;
assert((score < 100*lambda && score > -100*lambda) || lambda==0);
- score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
+ score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
+ unquant_change);
if(score<best_score){
best_score= score;
best_coeff= i;
@@ -4126,7 +4129,7 @@ if(256*256*256*64 % count == 0){
}
}
- s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
+ s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
}else{
break;
}
diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c
new file mode 100644
index 0000000..16ff1f2
--- /dev/null
+++ b/libavcodec/mpegvideoencdsp.c
@@ -0,0 +1,64 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "avcodec.h"
+#include "mpegvideoencdsp.h"
+
+static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
+ int16_t basis[64], int scale)
+{
+ int i;
+ unsigned int sum = 0;
+
+ for (i = 0; i < 8 * 8; i++) {
+ int b = rem[i] + ((basis[i] * scale +
+ (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
+ (BASIS_SHIFT - RECON_SHIFT));
+ int w = weight[i];
+ b >>= RECON_SHIFT;
+ assert(-512 < b && b < 512);
+
+ sum += (w * b) * (w * b) >> 4;
+ }
+ return sum >> 2;
+}
+
+static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
+{
+ int i;
+
+ for (i = 0; i < 8 * 8; i++)
+ rem[i] += (basis[i] * scale +
+ (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
+ (BASIS_SHIFT - RECON_SHIFT);
+}
+
+av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+{
+ c->try_8x8basis = try_8x8basis_c;
+ c->add_8x8basis = add_8x8basis_c;
+
+ if (ARCH_X86)
+ ff_mpegvideoencdsp_init_x86(c, avctx);
+}
diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h
new file mode 100644
index 0000000..2e8427d
--- /dev/null
+++ b/libavcodec/mpegvideoencdsp.h
@@ -0,0 +1,41 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGVIDEOENCDSP_H
+#define AVCODEC_MPEGVIDEOENCDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+
+typedef struct MpegvideoEncDSPContext {
+ int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
+ int16_t basis[64], int scale);
+ void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+
+} MpegvideoEncDSPContext;
+
+void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx);
+
+#endif /* AVCODEC_MPEGVIDEOENCDSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 1e76b4c..bfc1373 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -23,7 +23,8 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
x86/mpegvideodsp.o
-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o
+OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
+ x86/mpegvideoencdsp_init.o
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 81c9d13..7af0913 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -805,72 +805,6 @@ DCT_SAD_FUNC(ssse3)
#undef HSUM
#undef DCT_SAD
-#define PHADDD(a, t) \
- "movq " #a ", " #t " \n\t" \
- "psrlq $32, " #a " \n\t" \
- "paddd " #t ", " #a " \n\t"
-
-/*
- * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
- * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
- * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
- */
-#define PMULHRW(x, y, s, o) \
- "pmulhw " #s ", " #x " \n\t" \
- "pmulhw " #s ", " #y " \n\t" \
- "paddw " #o ", " #x " \n\t" \
- "paddw " #o ", " #y " \n\t" \
- "psraw $1, " #x " \n\t" \
- "psraw $1, " #y " \n\t"
-#define DEF(x) x ## _mmx
-#define SET_RND MOVQ_WONE
-#define SCALE_OFFSET 1
-
-#include "dsputil_qns_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-
-#define DEF(x) x ## _3dnow
-#define SET_RND(x)
-#define SCALE_OFFSET 0
-#define PMULHRW(x, y, s, o) \
- "pmulhrw " #s ", " #x " \n\t" \
- "pmulhrw " #s ", " #y " \n\t"
-
-#include "dsputil_qns_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-
-#if HAVE_SSSE3_INLINE
-#undef PHADDD
-#define DEF(x) x ## _ssse3
-#define SET_RND(x)
-#define SCALE_OFFSET -1
-
-#define PHADDD(a, t) \
- "pshufw $0x0E, " #a ", " #t " \n\t" \
- /* faster than phaddd on core2 */ \
- "paddd " #t ", " #a " \n\t"
-
-#define PMULHRW(x, y, s, o) \
- "pmulhrsw " #s ", " #x " \n\t" \
- "pmulhrsw " #s ", " #y " \n\t"
-
-#include "dsputil_qns_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-#undef PHADDD
-#endif /* HAVE_SSSE3_INLINE */
-
#endif /* HAVE_INLINE_ASM */
int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
@@ -921,16 +855,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
c->nsse[1] = nsse8_mmx;
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
c->vsad[0] = vsad16_mmx;
- c->try_8x8basis = try_8x8basis_mmx;
- }
- c->add_8x8basis = add_8x8basis_mmx;
- }
-
- if (INLINE_AMD3DNOW(cpu_flags)) {
- if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
- c->try_8x8basis = try_8x8basis_3dnow;
}
- c->add_8x8basis = add_8x8basis_3dnow;
}
if (INLINE_MMXEXT(cpu_flags)) {
@@ -956,10 +881,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
#if HAVE_SSSE3_INLINE
if (INLINE_SSSE3(cpu_flags)) {
- if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
- c->try_8x8basis = try_8x8basis_ssse3;
- }
- c->add_8x8basis = add_8x8basis_ssse3;
c->sum_abs_dctelem = sum_abs_dctelem_ssse3;
}
#endif
diff --git a/libavcodec/x86/dsputil_qns_template.c b/libavcodec/x86/mpegvideoenc_qns_template.c
similarity index 98%
rename from libavcodec/x86/dsputil_qns_template.c
rename to libavcodec/x86/mpegvideoenc_qns_template.c
index 321d14a..8d8d687 100644
--- a/libavcodec/x86/dsputil_qns_template.c
+++ b/libavcodec/x86/mpegvideoenc_qns_template.c
@@ -1,5 +1,5 @@
/*
- * DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
+ * QNS functions are compiled 3 times for MMX/3DNOW/SSSE3
* Copyright (c) 2004 Michael Niedermayer
*
* MMX optimization by Michael Niedermayer <michaelni at gmx.at>
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
new file mode 100644
index 0000000..db2c37f
--- /dev/null
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -0,0 +1,125 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/mpegvideoencdsp.h"
+
+#if HAVE_INLINE_ASM
+
+#define PHADDD(a, t) \
+ "movq " #a ", " #t " \n\t" \
+ "psrlq $32, " #a " \n\t" \
+ "paddd " #t ", " #a " \n\t"
+
+/*
+ * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
+ * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
+ * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
+ */
+#define PMULHRW(x, y, s, o) \
+ "pmulhw " #s ", " #x " \n\t" \
+ "pmulhw " #s ", " #y " \n\t" \
+ "paddw " #o ", " #x " \n\t" \
+ "paddw " #o ", " #y " \n\t" \
+ "psraw $1, " #x " \n\t" \
+ "psraw $1, " #y " \n\t"
+#define DEF(x) x ## _mmx
+#define SET_RND MOVQ_WONE
+#define SCALE_OFFSET 1
+
+#include "mpegvideoenc_qns_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+
+#define DEF(x) x ## _3dnow
+#define SET_RND(x)
+#define SCALE_OFFSET 0
+#define PMULHRW(x, y, s, o) \
+ "pmulhrw " #s ", " #x " \n\t" \
+ "pmulhrw " #s ", " #y " \n\t"
+
+#include "mpegvideoenc_qns_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+
+#if HAVE_SSSE3_INLINE
+#undef PHADDD
+#define DEF(x) x ## _ssse3
+#define SET_RND(x)
+#define SCALE_OFFSET -1
+
+#define PHADDD(a, t) \
+ "pshufw $0x0E, " #a ", " #t " \n\t" \
+ /* faster than phaddd on core2 */ \
+ "paddd " #t ", " #a " \n\t"
+
+#define PMULHRW(x, y, s, o) \
+ "pmulhrsw " #s ", " #x " \n\t" \
+ "pmulhrsw " #s ", " #y " \n\t"
+
+#include "mpegvideoenc_qns_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+#undef PHADDD
+#endif /* HAVE_SSSE3_INLINE */
+
+#endif /* HAVE_INLINE_ASM */
+
+av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+ AVCodecContext *avctx)
+{
+#if HAVE_INLINE_ASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (INLINE_MMX(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_mmx;
+ }
+ c->add_8x8basis = add_8x8basis_mmx;
+ }
+
+ if (INLINE_AMD3DNOW(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_3dnow;
+ }
+ c->add_8x8basis = add_8x8basis_3dnow;
+ }
+
+#if HAVE_SSSE3_INLINE
+ if (INLINE_SSSE3(cpu_flags)) {
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->try_8x8basis = try_8x8basis_ssse3;
+ }
+ c->add_8x8basis = add_8x8basis_ssse3;
+ }
+#endif /* HAVE_SSSE3_INLINE */
+
+#endif /* HAVE_INLINE_ASM */
+}
More information about the ffmpeg-cvslog
mailing list