[FFmpeg-cvslog] dsputil: Split off IDCT bits into their own context
Diego Biurrun
git at videolan.org
Tue Jul 1 15:36:42 CEST 2014
ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Fri Jan 24 11:55:16 2014 +0100| [e3fcb14347466095839c2a3c47ebecff02da891e] | committer: Diego Biurrun
dsputil: Split off IDCT bits into their own context
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e3fcb14347466095839c2a3c47ebecff02da891e
---
configure | 40 ++--
doc/optimization.txt | 3 -
libavcodec/Makefile | 5 +-
libavcodec/aic.c | 23 ++-
libavcodec/arm/Makefile | 23 ++-
libavcodec/arm/dsputil_arm.h | 4 -
libavcodec/arm/dsputil_armv6.S | 27 ---
libavcodec/arm/dsputil_init_arm.c | 60 ------
libavcodec/arm/dsputil_init_armv6.c | 18 --
libavcodec/arm/{dsputil_arm.S => idctdsp_arm.S} | 2 +-
libavcodec/arm/idctdsp_arm.h | 34 ++++
libavcodec/arm/idctdsp_armv6.S | 48 +++++
libavcodec/arm/idctdsp_init_arm.c | 98 ++++++++++
...putil_init_armv5te.c => idctdsp_init_armv5te.c} | 6 +-
libavcodec/arm/idctdsp_init_armv6.c | 48 +++++
.../{dsputil_init_neon.c => idctdsp_init_neon.c} | 8 +-
libavcodec/arm/{dsputil_neon.S => idctdsp_neon.S} | 2 +-
libavcodec/asv.c | 1 -
libavcodec/asv.h | 2 +
libavcodec/asvdec.c | 16 +-
libavcodec/asvenc.c | 1 +
libavcodec/cavs.c | 7 +-
libavcodec/cavs.h | 4 +-
libavcodec/cavsdsp.c | 2 +-
libavcodec/dnxhddec.c | 52 +++---
libavcodec/dnxhdenc.c | 9 +-
libavcodec/dsputil.c | 162 +---------------
libavcodec/dsputil.h | 66 -------
libavcodec/dvdec.c | 9 +-
libavcodec/dxva2_mpeg2.c | 2 +-
libavcodec/eamad.c | 9 +-
libavcodec/eatgq.c | 2 +-
libavcodec/eatqi.c | 7 +-
libavcodec/g2meet.c | 12 +-
libavcodec/h263.c | 8 +-
libavcodec/idctdsp.c | 197 ++++++++++++++++++++
libavcodec/idctdsp.h | 104 +++++++++++
libavcodec/intrax8.c | 15 +-
libavcodec/ljpegenc.c | 9 +-
libavcodec/mdec.c | 20 +-
libavcodec/mimic.c | 10 +-
libavcodec/mjpegdec.c | 10 +-
libavcodec/mjpegdec.h | 4 +-
libavcodec/mjpegenc_common.c | 2 +-
libavcodec/mjpegenc_common.h | 2 +-
libavcodec/mpeg12dec.c | 30 +--
libavcodec/mpeg4videodec.c | 39 ++--
libavcodec/mpeg4videoenc.c | 28 +--
libavcodec/mpegvideo.c | 52 +++---
libavcodec/mpegvideo.h | 2 +
libavcodec/mpegvideo_enc.c | 20 +-
libavcodec/mpegvideo_xvmc.c | 2 +-
libavcodec/msmpeg4.c | 10 +-
libavcodec/nuv.c | 1 +
libavcodec/ppc/Makefile | 2 +-
libavcodec/ppc/dsputil_altivec.h | 3 -
libavcodec/ppc/dsputil_ppc.c | 6 -
libavcodec/ppc/{idct_altivec.c => idctdsp.c} | 30 ++-
libavcodec/proresdec.c | 2 +-
libavcodec/proresdsp.c | 2 +-
libavcodec/rtjpeg.c | 8 +-
libavcodec/rtjpeg.h | 5 +-
libavcodec/vc1dec.c | 116 +++++++-----
libavcodec/wmv2.c | 23 +--
libavcodec/wmv2dsp.c | 2 +-
libavcodec/x86/Makefile | 10 +-
libavcodec/x86/cavsdsp.c | 3 +-
libavcodec/x86/dsputil_init.c | 85 ---------
libavcodec/x86/dsputil_mmx.c | 135 --------------
libavcodec/x86/dsputil_x86.h | 7 -
libavcodec/x86/idct_mmx_xvid.c | 2 +-
libavcodec/x86/idct_sse2_xvid.c | 2 +-
libavcodec/x86/idctdsp.h | 31 +++
libavcodec/x86/idctdsp_init.c | 106 +++++++++++
libavcodec/x86/idctdsp_mmx.c | 168 +++++++++++++++++
libavcodec/x86/mpegvideoenc_template.c | 2 +-
libavcodec/x86/proresdsp_init.c | 2 +-
libavcodec/x86/simple_idct.c | 2 +-
78 files changed, 1235 insertions(+), 896 deletions(-)
diff --git a/configure b/configure
index 7ea15aa..be97868 100755
--- a/configure
+++ b/configure
@@ -1546,6 +1546,7 @@ CONFIG_EXTRA="
huffman
huffyuvdsp
huffyuvencdsp
+ idctdsp
intrax8
lgplv3
lpc
@@ -1703,6 +1704,7 @@ threads_if_any="$THREADS_LIST"
# subsystems
dct_select="rdft"
+dsputil_select="idctdsp"
error_resilience_select="dsputil"
intrax8_select="error_resilience"
mdct_select="fft"
@@ -1710,7 +1712,7 @@ rdft_select="fft"
mpeg_er_select="error_resilience"
mpegaudio_select="mpegaudiodsp"
mpegaudiodsp_select="dct"
-mpegvideo_select="blockdsp dsputil hpeldsp videodsp"
+mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp"
mpegvideoenc_select="dsputil mpegvideo qpeldsp"
# decoders / encoders
@@ -1720,16 +1722,16 @@ aac_latm_decoder_select="aac_decoder aac_latm_parser"
ac3_decoder_select="ac3_parser ac3dsp bswapdsp mdct"
ac3_encoder_select="ac3dsp audiodsp dsputil mdct"
ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct"
-aic_decoder_select="dsputil golomb"
+aic_decoder_select="golomb idctdsp"
alac_encoder_select="lpc"
als_decoder_select="bswapdsp"
amrnb_decoder_select="lsp"
amrwb_decoder_select="lsp"
amv_decoder_select="sp5x_decoder"
ape_decoder_select="bswapdsp"
-asv1_decoder_select="blockdsp bswapdsp dsputil"
+asv1_decoder_select="blockdsp bswapdsp idctdsp"
asv1_encoder_select="bswapdsp dsputil"
-asv2_decoder_select="blockdsp bswapdsp dsputil"
+asv2_decoder_select="blockdsp bswapdsp idctdsp"
asv2_encoder_select="bswapdsp dsputil"
atrac1_decoder_select="mdct sinewin"
atrac3_decoder_select="mdct"
@@ -1737,23 +1739,23 @@ atrac3p_decoder_select="mdct sinewin"
bink_decoder_select="blockdsp hpeldsp"
binkaudio_dct_decoder_select="mdct rdft dct sinewin"
binkaudio_rdft_decoder_select="mdct rdft sinewin"
-cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
+cavs_decoder_select="blockdsp golomb h264chroma idctdsp qpeldsp videodsp"
cllc_decoder_select="bswapdsp"
comfortnoise_encoder_select="lpc"
cook_decoder_select="audiodsp mdct sinewin"
cscd_decoder_select="lzo"
cscd_decoder_suggest="zlib"
dca_decoder_select="mdct"
-dnxhd_decoder_select="blockdsp dsputil"
-dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc"
-dvvideo_decoder_select="dsputil"
+dnxhd_decoder_select="blockdsp idctdsp"
+dnxhd_encoder_select="aandcttables blockdsp dsputil idctdsp mpegvideoenc"
+dvvideo_decoder_select="idctdsp"
dvvideo_encoder_select="dsputil"
dxa_decoder_deps="zlib"
eac3_decoder_select="ac3_decoder"
eac3_encoder_select="ac3_encoder"
-eamad_decoder_select="aandcttables blockdsp bswapdsp dsputil mpegvideo"
-eatgq_decoder_select="aandcttables dsputil"
-eatqi_decoder_select="aandcttables blockdsp bswapdsp dsputil mpeg1video_decoder"
+eamad_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpegvideo"
+eatgq_decoder_select="aandcttables idctdsp"
+eatqi_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpeg1video_decoder"
exr_decoder_deps="zlib"
ffv1_decoder_select="golomb rangecoder"
ffv1_encoder_select="rangecoder"
@@ -1770,7 +1772,7 @@ flv_encoder_select="h263_encoder"
fourxm_decoder_select="blockdsp bswapdsp"
fraps_decoder_select="bswapdsp huffman"
g2m_decoder_deps="zlib"
-g2m_decoder_select="blockdsp dsputil"
+g2m_decoder_select="blockdsp idctdsp"
h261_decoder_select="mpeg_er mpegvideo"
h261_encoder_select="aandcttables mpegvideoenc"
h263_decoder_select="error_resilience h263_parser h263dsp mpeg_er mpegvideo qpeldsp"
@@ -1790,12 +1792,12 @@ jpegls_decoder_select="golomb mjpeg_decoder"
jpegls_encoder_select="golomb"
jv_decoder_select="blockdsp"
lagarith_decoder_select="huffyuvdsp"
-ljpeg_encoder_select="aandcttables dsputil"
+ljpeg_encoder_select="aandcttables idctdsp"
loco_decoder_select="golomb"
-mdec_decoder_select="blockdsp dsputil mpegvideo"
+mdec_decoder_select="blockdsp idctdsp mpegvideo"
metasound_decoder_select="lsp mdct sinewin"
-mimic_decoder_select="blockdsp bswapdsp dsputil hpeldsp"
-mjpeg_decoder_select="blockdsp dsputil hpeldsp"
+mimic_decoder_select="blockdsp bswapdsp hpeldsp idctdsp"
+mjpeg_decoder_select="blockdsp hpeldsp idctdsp"
mjpeg_encoder_select="aandcttables mpegvideoenc"
mjpegb_decoder_select="mjpeg_decoder"
mlp_decoder_select="mlp_parser"
@@ -1829,13 +1831,13 @@ mss2_decoder_select="error_resilience mpeg_er qpeldsp vc1_decoder"
mxpeg_decoder_select="mjpeg_decoder"
nellymoser_decoder_select="mdct sinewin"
nellymoser_encoder_select="audio_frame_queue mdct sinewin"
-nuv_decoder_select="dsputil lzo"
+nuv_decoder_select="idctdsp lzo"
on2avc_decoder_select="mdct"
opus_decoder_deps="avresample"
png_decoder_deps="zlib"
png_encoder_deps="zlib"
png_encoder_select="huffyuvencdsp"
-prores_decoder_select="dsputil"
+prores_decoder_select="idctdsp"
prores_encoder_select="dsputil"
qcelp_decoder_select="lsp"
qdm2_decoder_select="mdct rdft mpegaudiodsp"
@@ -1888,7 +1890,7 @@ wmav2_encoder_select="mdct sinewin"
wmavoice_decoder_select="lsp rdft dct mdct sinewin"
wmv1_decoder_select="h263_decoder"
wmv1_encoder_select="h263_encoder"
-wmv2_decoder_select="blockdsp h263_decoder intrax8 videodsp"
+wmv2_decoder_select="blockdsp h263_decoder idctdsp intrax8 videodsp"
wmv2_encoder_select="h263_encoder"
wmv3_decoder_select="vc1_decoder"
wmv3image_decoder_select="wmv3_decoder"
diff --git a/doc/optimization.txt b/doc/optimization.txt
index b51183f..b3dca64 100644
--- a/doc/optimization.txt
+++ b/doc/optimization.txt
@@ -136,9 +136,6 @@ dct_unquantize_mpeg2
dct_unquantize_h263
Used in MPEG-4/H.263 en/decoding.
-FIXME remaining functions?
-BTW, most of these functions are in dsputil.c/.h, some are in mpegvideo.c/.h.
-
Alignment:
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index bfe50f3..dc374cb 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -33,9 +33,8 @@ OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o
OBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o
OBJS-$(CONFIG_CABAC) += cabac.o
OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o
+OBJS-$(CONFIG_DSPUTIL) += dsputil.o
OBJS-$(CONFIG_DXVA2) += dxva2.o
-OBJS-$(CONFIG_DSPUTIL) += dsputil.o faanidct.o \
- simple_idct.o jrevdct.o
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o
FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o
@@ -51,6 +50,8 @@ OBJS-$(CONFIG_HPELDSP) += hpeldsp.o
OBJS-$(CONFIG_HUFFMAN) += huffman.o
OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o
OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o
+OBJS-$(CONFIG_IDCTDSP) += idctdsp.o faanidct.o \
+ simple_idct.o jrevdct.o
OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o
OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o
OBJS-$(CONFIG_LPC) += lpc.o
diff --git a/libavcodec/aic.c b/libavcodec/aic.c
index 68ae728..dac9d8b 100644
--- a/libavcodec/aic.c
+++ b/libavcodec/aic.c
@@ -24,10 +24,10 @@
#include "avcodec.h"
#include "bytestream.h"
-#include "dsputil.h"
#include "internal.h"
#include "get_bits.h"
#include "golomb.h"
+#include "idctdsp.h"
#include "unary.h"
#define AIC_HDR_SIZE 24
@@ -139,7 +139,7 @@ static const uint8_t *aic_scan[NUM_BANDS] = {
typedef struct AICContext {
AVCodecContext *avctx;
AVFrame *frame;
- DSPContext dsp;
+ IDCTDSPContext idsp;
ScanTable scantable;
int num_x_slices;
@@ -336,16 +336,15 @@ static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y,
recombine_block_il(ctx->block, ctx->scantable.permutated,
&base_y, &ext_y, blk);
unquant_block(ctx->block, ctx->quant);
- ctx->dsp.idct(ctx->block);
+ ctx->idsp.idct(ctx->block);
if (!ctx->interlaced) {
dst = Y + (blk >> 1) * 8 * ystride + (blk & 1) * 8;
- ctx->dsp.put_signed_pixels_clamped(ctx->block, dst,
- ystride);
+ ctx->idsp.put_signed_pixels_clamped(ctx->block, dst, ystride);
} else {
dst = Y + (blk & 1) * 8 + (blk >> 1) * ystride;
- ctx->dsp.put_signed_pixels_clamped(ctx->block, dst,
- ystride * 2);
+ ctx->idsp.put_signed_pixels_clamped(ctx->block, dst,
+ ystride * 2);
}
}
Y += 16;
@@ -354,9 +353,9 @@ static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y,
recombine_block(ctx->block, ctx->scantable.permutated,
&base_c, &ext_c);
unquant_block(ctx->block, ctx->quant);
- ctx->dsp.idct(ctx->block);
- ctx->dsp.put_signed_pixels_clamped(ctx->block, C[blk],
- ctx->frame->linesize[blk + 1]);
+ ctx->idsp.idct(ctx->block);
+ ctx->idsp.put_signed_pixels_clamped(ctx->block, C[blk],
+ ctx->frame->linesize[blk + 1]);
C[blk] += 8;
}
}
@@ -426,11 +425,11 @@ static av_cold int aic_decode_init(AVCodecContext *avctx)
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
- ff_dsputil_init(&ctx->dsp, avctx);
+ ff_idctdsp_init(&ctx->idsp, avctx);
for (i = 0; i < 64; i++)
scan[i] = i;
- ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, scan);
+ ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, scan);
ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index eb92a8c..3a3e244 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -6,10 +6,7 @@ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \
arm/ac3dsp_arm.o
OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o
OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o
-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \
- arm/dsputil_arm.o \
- arm/jrevdct_arm.o \
- arm/simple_idct_arm.o
+OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o
OBJS-$(CONFIG_FFT) += arm/fft_init_arm.o \
arm/fft_fixed_init_arm.o
OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o
@@ -18,6 +15,10 @@ OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o
OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \
arm/hpeldsp_arm.o
+OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_arm.o \
+ arm/idctdsp_arm.o \
+ arm/jrevdct_arm.o \
+ arm/simple_idct_arm.o
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
@@ -40,7 +41,7 @@ OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_init_arm.o
OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_arm.o \
arm/rv40dsp_init_arm.o
-ARMV5TE-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv5te.o \
+ARMV5TE-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv5te.o \
arm/simple_idct_armv5te.o
ARMV5TE-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_armv5te.o \
arm/mpegvideo_armv5te_s.o
@@ -51,11 +52,13 @@ ARMV5TE-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv5te.o
ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \
- arm/dsputil_armv6.o \
- arm/simple_idct_armv6.o
+ arm/dsputil_armv6.o
ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o
ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
arm/hpeldsp_armv6.o
+ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \
+ arm/idctdsp_armv6.o \
+ arm/simple_idct_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
@@ -83,9 +86,6 @@ NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \
arm/int_neon.o
NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \
arm/blockdsp_neon.o
-NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \
- arm/dsputil_neon.o \
- arm/simple_idct_neon.o
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
arm/fft_fixed_neon.o
NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o
@@ -96,6 +96,9 @@ NEON-OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_neon.o \
arm/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_neon.o \
arm/hpeldsp_neon.o
+NEON-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_neon.o \
+ arm/idctdsp_neon.o \
+ arm/simple_idct_neon.o
NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \
arm/mdct_fixed_neon.o
NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o
diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h
index 6080203..5b976aa 100644
--- a/libavcodec/arm/dsputil_arm.h
+++ b/libavcodec/arm/dsputil_arm.h
@@ -24,11 +24,7 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
-void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx,
- unsigned high_bit_depth);
void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
-void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
- unsigned high_bit_depth);
#endif /* AVCODEC_ARM_DSPUTIL_ARM_H */
diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S
index e667a47..b89171f 100644
--- a/libavcodec/arm/dsputil_armv6.S
+++ b/libavcodec/arm/dsputil_armv6.S
@@ -20,33 +20,6 @@
#include "libavutil/arm/asm.S"
-function ff_add_pixels_clamped_armv6, export=1
- push {r4-r8,lr}
- mov r3, #8
-1:
- ldm r0!, {r4,r5,r12,lr}
- ldrd r6, r7, [r1]
- pkhbt r8, r4, r5, lsl #16
- pkhtb r5, r5, r4, asr #16
- pkhbt r4, r12, lr, lsl #16
- pkhtb lr, lr, r12, asr #16
- pld [r1, r2]
- uxtab16 r8, r8, r6
- uxtab16 r5, r5, r6, ror #8
- uxtab16 r4, r4, r7
- uxtab16 lr, lr, r7, ror #8
- usat16 r8, #8, r8
- usat16 r5, #8, r5
- usat16 r4, #8, r4
- usat16 lr, #8, lr
- orr r6, r8, r5, lsl #8
- orr r7, r4, lr, lsl #8
- subs r3, r3, #1
- strd_post r6, r7, r1, r2
- bgt 1b
- pop {r4-r8,pc}
-endfunc
-
function ff_get_pixels_armv6, export=1
pld [r1, r2]
push {r4-r8, lr}
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 3310908..a8c806a 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -28,71 +28,11 @@
#include "libavcodec/dsputil.h"
#include "dsputil_arm.h"
-void ff_j_rev_dct_arm(int16_t *data);
-void ff_simple_idct_arm(int16_t *data);
-
-/* XXX: local hack */
-static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
-static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
-
-void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest,
- int line_size);
-
-/* XXX: those functions should be suppressed ASAP when all IDCTs are
- * converted */
-static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block)
-{
- ff_j_rev_dct_arm(block);
- ff_put_pixels_clamped(block, dest, line_size);
-}
-
-static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block)
-{
- ff_j_rev_dct_arm(block);
- ff_add_pixels_clamped(block, dest, line_size);
-}
-
-static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block)
-{
- ff_simple_idct_arm(block);
- ff_put_pixels_clamped(block, dest, line_size);
-}
-
-static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block)
-{
- ff_simple_idct_arm(block);
- ff_add_pixels_clamped(block, dest, line_size);
-}
-
av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
int cpu_flags = av_get_cpu_flags();
- ff_put_pixels_clamped = c->put_pixels_clamped;
- ff_add_pixels_clamped = c->add_pixels_clamped;
-
- if (!high_bit_depth) {
- if (avctx->idct_algo == FF_IDCT_AUTO ||
- avctx->idct_algo == FF_IDCT_ARM) {
- c->idct_put = j_rev_dct_arm_put;
- c->idct_add = j_rev_dct_arm_add;
- c->idct = ff_j_rev_dct_arm;
- c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
- } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) {
- c->idct_put = simple_idct_arm_put;
- c->idct_add = simple_idct_arm_add;
- c->idct = ff_simple_idct_arm;
- c->idct_permutation_type = FF_NO_IDCT_PERM;
- }
- }
-
- c->add_pixels_clamped = ff_add_pixels_clamped_arm;
-
- if (have_armv5te(cpu_flags))
- ff_dsputil_init_armv5te(c, avctx, high_bit_depth);
if (have_armv6(cpu_flags))
ff_dsputil_init_armv6(c, avctx, high_bit_depth);
- if (have_neon(cpu_flags))
- ff_dsputil_init_neon(c, avctx, high_bit_depth);
}
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 2b1002b..fab5e0d 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -26,13 +26,6 @@
#include "libavcodec/mpegvideo.h"
#include "dsputil_arm.h"
-void ff_simple_idct_armv6(int16_t *data);
-void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data);
-void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data);
-
-void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels,
- int line_size);
-
void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
const uint8_t *s2, int stride);
@@ -56,17 +49,6 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
- if (!high_bit_depth) {
- if (avctx->idct_algo == FF_IDCT_AUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLEARMV6) {
- c->idct_put = ff_simple_idct_put_armv6;
- c->idct_add = ff_simple_idct_add_armv6;
- c->idct = ff_simple_idct_armv6;
- c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
- }
- }
- c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
-
if (!high_bit_depth)
c->get_pixels = ff_get_pixels_armv6;
c->diff_pixels = ff_diff_pixels_armv6;
diff --git a/libavcodec/arm/dsputil_arm.S b/libavcodec/arm/idctdsp_arm.S
similarity index 99%
rename from libavcodec/arm/dsputil_arm.S
rename to libavcodec/arm/idctdsp_arm.S
index 82fcf2a..34f467e 100644
--- a/libavcodec/arm/dsputil_arm.S
+++ b/libavcodec/arm/idctdsp_arm.S
@@ -1,5 +1,5 @@
@
-@ ARMv4 optimized DSP utils
+@ ARMv4-optimized IDCT functions
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
@
@ This file is part of Libav.
diff --git a/libavcodec/arm/idctdsp_arm.h b/libavcodec/arm/idctdsp_arm.h
new file mode 100644
index 0000000..9012b82
--- /dev/null
+++ b/libavcodec/arm/idctdsp_arm.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_IDCTDSP_ARM_H
+#define AVCODEC_ARM_IDCTDSP_ARM_H
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+
+void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+
+#endif /* AVCODEC_ARM_IDCTDSP_ARM_H */
diff --git a/libavcodec/arm/idctdsp_armv6.S b/libavcodec/arm/idctdsp_armv6.S
new file mode 100644
index 0000000..c180d73
--- /dev/null
+++ b/libavcodec/arm/idctdsp_armv6.S
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_add_pixels_clamped_armv6, export=1
+ push {r4-r8,lr}
+ mov r3, #8
+1:
+ ldm r0!, {r4,r5,r12,lr}
+ ldrd r6, r7, [r1]
+ pkhbt r8, r4, r5, lsl #16
+ pkhtb r5, r5, r4, asr #16
+ pkhbt r4, r12, lr, lsl #16
+ pkhtb lr, lr, r12, asr #16
+ pld [r1, r2]
+ uxtab16 r8, r8, r6
+ uxtab16 r5, r5, r6, ror #8
+ uxtab16 r4, r4, r7
+ uxtab16 lr, lr, r7, ror #8
+ usat16 r8, #8, r8
+ usat16 r5, #8, r5
+ usat16 r4, #8, r4
+ usat16 lr, #8, lr
+ orr r6, r8, r5, lsl #8
+ orr r7, r4, lr, lsl #8
+ subs r3, r3, #1
+ strd_post r6, r7, r1, r2
+ bgt 1b
+ pop {r4-r8,pc}
+endfunc
diff --git a/libavcodec/arm/idctdsp_init_arm.c b/libavcodec/arm/idctdsp_init_arm.c
new file mode 100644
index 0000000..b4d1899
--- /dev/null
+++ b/libavcodec/arm/idctdsp_init_arm.c
@@ -0,0 +1,98 @@
+/*
+ * ARM-optimized IDCT functions
+ * Copyright (c) 2001 Lionel Ulmer
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idctdsp_arm.h"
+
+void ff_j_rev_dct_arm(int16_t *data);
+void ff_simple_idct_arm(int16_t *data);
+
+/* XXX: local hack */
+static void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
+static void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, int line_size);
+
+void ff_add_pixels_clamped_arm(const int16_t *block, uint8_t *dest,
+ int line_size);
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ * converted */
+static void j_rev_dct_arm_put(uint8_t *dest, int line_size, int16_t *block)
+{
+ ff_j_rev_dct_arm(block);
+ ff_put_pixels_clamped(block, dest, line_size);
+}
+
+static void j_rev_dct_arm_add(uint8_t *dest, int line_size, int16_t *block)
+{
+ ff_j_rev_dct_arm(block);
+ ff_add_pixels_clamped(block, dest, line_size);
+}
+
+static void simple_idct_arm_put(uint8_t *dest, int line_size, int16_t *block)
+{
+ ff_simple_idct_arm(block);
+ ff_put_pixels_clamped(block, dest, line_size);
+}
+
+static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block)
+{
+ ff_simple_idct_arm(block);
+ ff_add_pixels_clamped(block, dest, line_size);
+}
+
+av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ ff_put_pixels_clamped = c->put_pixels_clamped;
+ ff_add_pixels_clamped = c->add_pixels_clamped;
+
+ if (!high_bit_depth) {
+ if (avctx->idct_algo == FF_IDCT_AUTO ||
+ avctx->idct_algo == FF_IDCT_ARM) {
+ c->idct_put = j_rev_dct_arm_put;
+ c->idct_add = j_rev_dct_arm_add;
+ c->idct = ff_j_rev_dct_arm;
+ c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
+ } else if (avctx->idct_algo == FF_IDCT_SIMPLEARM) {
+ c->idct_put = simple_idct_arm_put;
+ c->idct_add = simple_idct_arm_add;
+ c->idct = ff_simple_idct_arm;
+ c->idct_permutation_type = FF_NO_IDCT_PERM;
+ }
+ }
+
+ c->add_pixels_clamped = ff_add_pixels_clamped_arm;
+
+ if (have_armv5te(cpu_flags))
+ ff_idctdsp_init_armv5te(c, avctx, high_bit_depth);
+ if (have_armv6(cpu_flags))
+ ff_idctdsp_init_armv6(c, avctx, high_bit_depth);
+ if (have_neon(cpu_flags))
+ ff_idctdsp_init_neon(c, avctx, high_bit_depth);
+}
diff --git a/libavcodec/arm/dsputil_init_armv5te.c b/libavcodec/arm/idctdsp_init_armv5te.c
similarity index 91%
rename from libavcodec/arm/dsputil_init_armv5te.c
rename to libavcodec/arm/idctdsp_init_armv5te.c
index eb45b72..e2492a5 100644
--- a/libavcodec/arm/dsputil_init_armv5te.c
+++ b/libavcodec/arm/idctdsp_init_armv5te.c
@@ -22,14 +22,14 @@
#include "libavutil/attributes.h"
#include "libavcodec/avcodec.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_arm.h"
+#include "libavcodec/idctdsp.h"
+#include "idctdsp_arm.h"
void ff_simple_idct_armv5te(int16_t *data);
void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data);
void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data);
-av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx,
+av_cold void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
if (!high_bit_depth &&
diff --git a/libavcodec/arm/idctdsp_init_armv6.c b/libavcodec/arm/idctdsp_init_armv6.c
new file mode 100644
index 0000000..e92f471
--- /dev/null
+++ b/libavcodec/arm/idctdsp_init_armv6.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "idctdsp_arm.h"
+
+void ff_simple_idct_armv6(int16_t *data);
+void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, int16_t *data);
+void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, int16_t *data);
+
+void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels,
+ int line_size);
+
+av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+ if (!high_bit_depth) {
+ if (avctx->idct_algo == FF_IDCT_AUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEARMV6) {
+ c->idct_put = ff_simple_idct_put_armv6;
+ c->idct_add = ff_simple_idct_add_armv6;
+ c->idct = ff_simple_idct_armv6;
+ c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
+ }
+ }
+ c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
+}
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/idctdsp_init_neon.c
similarity index 92%
rename from libavcodec/arm/dsputil_init_neon.c
rename to libavcodec/arm/idctdsp_init_neon.c
index 9d4c76c..1790597 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/idctdsp_init_neon.c
@@ -1,5 +1,5 @@
/*
- * ARM NEON optimised DSP functions
+ * ARM-NEON-optimized IDCT functions
* Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
*
* This file is part of Libav.
@@ -23,8 +23,8 @@
#include "libavutil/attributes.h"
#include "libavcodec/avcodec.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_arm.h"
+#include "libavcodec/idctdsp.h"
+#include "idctdsp_arm.h"
void ff_simple_idct_neon(int16_t *data);
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
@@ -34,7 +34,7 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
-av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
+av_cold void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
if (!high_bit_depth) {
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/idctdsp_neon.S
similarity index 99%
rename from libavcodec/arm/dsputil_neon.S
rename to libavcodec/arm/idctdsp_neon.S
index ed6f218..7095879 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/idctdsp_neon.S
@@ -1,5 +1,5 @@
/*
- * ARM NEON optimised DSP functions
+ * ARM-NEON-optimized IDCT functions
* Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
*
* This file is part of Libav.
diff --git a/libavcodec/asv.c b/libavcodec/asv.c
index dba9e84..71c5e5f 100644
--- a/libavcodec/asv.c
+++ b/libavcodec/asv.c
@@ -84,7 +84,6 @@ av_cold void ff_asv_common_init(AVCodecContext *avctx) {
ASV1Context * const a = avctx->priv_data;
ff_bswapdsp_init(&a->bbdsp);
- ff_dsputil_init(&a->dsp, avctx);
a->mb_width = (avctx->width + 15) / 16;
a->mb_height = (avctx->height + 15) / 16;
diff --git a/libavcodec/asv.h b/libavcodec/asv.h
index 037e646..3f8d56c 100644
--- a/libavcodec/asv.h
+++ b/libavcodec/asv.h
@@ -34,6 +34,7 @@
#include "blockdsp.h"
#include "bswapdsp.h"
#include "dsputil.h"
+#include "idctdsp.h"
#include "get_bits.h"
#include "put_bits.h"
@@ -42,6 +43,7 @@ typedef struct ASV1Context{
BlockDSPContext bdsp;
BswapDSPContext bbdsp;
DSPContext dsp;
+ IDCTDSPContext idsp;
PutBitContext pb;
GetBitContext gb;
ScanTable scantable;
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index c785d15..252f88a 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -30,6 +30,7 @@
#include "avcodec.h"
#include "blockdsp.h"
#include "put_bits.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mathops.h"
#include "mpeg12data.h"
@@ -190,14 +191,14 @@ static inline void idct_put(ASV1Context *a, AVFrame *frame, int mb_x, int mb_y)
uint8_t *dest_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8;
uint8_t *dest_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8;
- a->dsp.idct_put(dest_y , linesize, block[0]);
- a->dsp.idct_put(dest_y + 8, linesize, block[1]);
- a->dsp.idct_put(dest_y + 8*linesize , linesize, block[2]);
- a->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]);
+ a->idsp.idct_put(dest_y, linesize, block[0]);
+ a->idsp.idct_put(dest_y + 8, linesize, block[1]);
+ a->idsp.idct_put(dest_y + 8 * linesize, linesize, block[2]);
+ a->idsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]);
if (!(a->avctx->flags&CODEC_FLAG_GRAY)) {
- a->dsp.idct_put(dest_cb, frame->linesize[1], block[4]);
- a->dsp.idct_put(dest_cr, frame->linesize[2], block[5]);
+ a->idsp.idct_put(dest_cb, frame->linesize[1], block[4]);
+ a->idsp.idct_put(dest_cr, frame->linesize[2], block[5]);
}
}
@@ -283,8 +284,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
ff_asv_common_init(avctx);
ff_blockdsp_init(&a->bdsp, avctx);
+ ff_idctdsp_init(&a->idsp, avctx);
init_vlcs(a);
- ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab);
+ ff_init_scantable(a->idsp.idct_permutation, &a->scantable, ff_asv_scantab);
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
a->inv_qscale = avctx->extradata[0];
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index 47b766a..e8c6d00 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -247,6 +247,7 @@ static av_cold int encode_init(AVCodecContext *avctx){
avctx->coded_frame->key_frame = 1;
ff_asv_common_init(avctx);
+ ff_dsputil_init(&a->dsp, avctx);
if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c
index 21bc1ed..2be50a7 100644
--- a/libavcodec/cavs.c
+++ b/libavcodec/cavs.c
@@ -29,6 +29,7 @@
#include "get_bits.h"
#include "golomb.h"
#include "h264chroma.h"
+#include "idctdsp.h"
#include "mathops.h"
#include "qpeldsp.h"
#include "cavs.h"
@@ -760,13 +761,13 @@ av_cold int ff_cavs_init(AVCodecContext *avctx)
AVSContext *h = avctx->priv_data;
ff_blockdsp_init(&h->bdsp, avctx);
- ff_dsputil_init(&h->dsp, avctx);
ff_h264chroma_init(&h->h264chroma, 8);
+ ff_idctdsp_init(&h->idsp, avctx);
ff_videodsp_init(&h->vdsp, 8);
ff_cavsdsp_init(&h->cdsp, avctx);
- ff_init_scantable_permutation(h->dsp.idct_permutation,
+ ff_init_scantable_permutation(h->idsp.idct_permutation,
h->cdsp.idct_perm);
- ff_init_scantable(h->dsp.idct_permutation, &h->scantable, ff_zigzag_direct);
+ ff_init_scantable(h->idsp.idct_permutation, &h->scantable, ff_zigzag_direct);
h->avctx = avctx;
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
index c5a10b5..cfae055 100644
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -24,8 +24,8 @@
#include "cavsdsp.h"
#include "blockdsp.h"
-#include "dsputil.h"
#include "h264chroma.h"
+#include "idctdsp.h"
#include "get_bits.h"
#include "videodsp.h"
@@ -162,9 +162,9 @@ typedef struct AVSFrame {
typedef struct AVSContext {
AVCodecContext *avctx;
- DSPContext dsp;
BlockDSPContext bdsp;
H264ChromaContext h264chroma;
+ IDCTDSPContext idsp;
VideoDSPContext vdsp;
CAVSDSPContext cdsp;
GetBitContext gb;
diff --git a/libavcodec/cavsdsp.c b/libavcodec/cavsdsp.c
index 666dc7f..958e3c5 100644
--- a/libavcodec/cavsdsp.c
+++ b/libavcodec/cavsdsp.c
@@ -24,7 +24,7 @@
#include <stdio.h>
-#include "dsputil.h"
+#include "idctdsp.h"
#include "mathops.h"
#include "cavsdsp.h"
#include "libavutil/common.h"
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index 3bd8ffe..ca67990 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -28,7 +28,7 @@
#include "blockdsp.h"
#include "get_bits.h"
#include "dnxhddata.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "internal.h"
typedef struct DNXHDContext {
@@ -42,7 +42,7 @@ typedef struct DNXHDContext {
int cur_field; ///< current interlaced field
VLC ac_vlc, dc_vlc, run_vlc;
int last_dc[3];
- DSPContext dsp;
+ IDCTDSPContext idsp;
DECLARE_ALIGNED(16, int16_t, blocks)[12][64];
ScanTable scantable;
const CIDEntry *cid_table;
@@ -95,7 +95,7 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, int cid)
ctx->cid_table->run_bits, 1, 1,
ctx->cid_table->run_codes, 2, 2, 0);
- ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable,
+ ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable,
ff_zigzag_direct);
ctx->cid = cid;
}
@@ -136,7 +136,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
ctx->avctx->bits_per_raw_sample = 10;
if (ctx->bit_depth != 10) {
ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
- ff_dsputil_init(&ctx->dsp, ctx->avctx);
+ ff_idctdsp_init(&ctx->idsp, ctx->avctx);
ctx->bit_depth = 10;
ctx->decode_dct_block = dnxhd_decode_dct_block_10_444;
}
@@ -146,7 +146,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
ctx->avctx->bits_per_raw_sample = 10;
if (ctx->bit_depth != 10) {
ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
- ff_dsputil_init(&ctx->dsp, ctx->avctx);
+ ff_idctdsp_init(&ctx->idsp, ctx->avctx);
ctx->bit_depth = 10;
ctx->decode_dct_block = dnxhd_decode_dct_block_10;
}
@@ -155,7 +155,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
ctx->avctx->bits_per_raw_sample = 8;
if (ctx->bit_depth != 8) {
ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
- ff_dsputil_init(&ctx->dsp, ctx->avctx);
+ ff_idctdsp_init(&ctx->idsp, ctx->avctx);
ctx->bit_depth = 8;
ctx->decode_dct_block = dnxhd_decode_dct_block_8;
}
@@ -340,34 +340,34 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame,
dct_y_offset = dct_linesize_luma << 3;
dct_x_offset = 8 << shift1;
if (!ctx->is_444) {
- ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
- ctx->dsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]);
- ctx->dsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[4]);
- ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]);
+ ctx->idsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
+ ctx->idsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]);
+ ctx->idsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[4]);
+ ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]);
if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
dct_y_offset = dct_linesize_chroma << 3;
- ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
- ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]);
- ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]);
- ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]);
+ ctx->idsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
+ ctx->idsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]);
+ ctx->idsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]);
+ ctx->idsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]);
}
} else {
- ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
- ctx->dsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]);
- ctx->dsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[6]);
- ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[7]);
+ ctx->idsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
+ ctx->idsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]);
+ ctx->idsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[6]);
+ ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[7]);
if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
dct_y_offset = dct_linesize_chroma << 3;
- ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
- ctx->dsp.idct_put(dest_u + dct_x_offset, dct_linesize_chroma, ctx->blocks[3]);
- ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[8]);
- ctx->dsp.idct_put(dest_u + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[9]);
- ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[4]);
- ctx->dsp.idct_put(dest_v + dct_x_offset, dct_linesize_chroma, ctx->blocks[5]);
- ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[10]);
- ctx->dsp.idct_put(dest_v + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[11]);
+ ctx->idsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
+ ctx->idsp.idct_put(dest_u + dct_x_offset, dct_linesize_chroma, ctx->blocks[3]);
+ ctx->idsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[8]);
+ ctx->idsp.idct_put(dest_u + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[9]);
+ ctx->idsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[4]);
+ ctx->idsp.idct_put(dest_v + dct_x_offset, dct_linesize_chroma, ctx->blocks[5]);
+ ctx->idsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[10]);
+ ctx->idsp.idct_put(dest_v + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[11]);
}
}
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index b85027b..223791a 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -200,14 +200,14 @@ static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
if (ctx->cid_table->bit_depth == 8) {
for (i = 1; i < 64; i++) {
- int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+ int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
weight_matrix[j] = ctx->cid_table->luma_weight[i];
}
ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16,
weight_matrix, ctx->m.intra_quant_bias, 1,
ctx->m.avctx->qmax, 1);
for (i = 1; i < 64; i++) {
- int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+ int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
weight_matrix[j] = ctx->cid_table->chroma_weight[i];
}
ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16,
@@ -228,7 +228,7 @@ static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
// 10-bit
for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
for (i = 1; i < 64; i++) {
- int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+ int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
/* The quantization formula from the VC-3 standard is:
* quantized = sign(block[i]) * floor(abs(block[i]/s) * p /
@@ -308,6 +308,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
ff_blockdsp_init(&ctx->bdsp, avctx);
ff_dsputil_init(&ctx->m.dsp, avctx);
+ ff_idctdsp_init(&ctx->m.idsp, avctx);
ff_dct_common_init(&ctx->m);
if (!ctx->m.dct_quantize)
ctx->m.dct_quantize = ff_dct_quantize_c;
@@ -634,7 +635,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg,
if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
- ctx->m.dsp.idct(block);
+ ctx->m.idsp.idct(block);
ssd += dnxhd_ssd_block(block, src_block);
}
}
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index ca0c8ef..5e5ad93 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -33,7 +33,6 @@
#include "dsputil.h"
#include "simple_idct.h"
#include "faandct.h"
-#include "faanidct.h"
#include "imgconvert.h"
#include "mathops.h"
#include "mpegvideo.h"
@@ -48,60 +47,6 @@ uint32_t ff_square_tab[512] = { 0, };
#define BIT_DEPTH 8
#include "dsputilenc_template.c"
-av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
- const uint8_t *src_scantable)
-{
- int i, end;
-
- st->scantable = src_scantable;
-
- for (i = 0; i < 64; i++) {
- int j = src_scantable[i];
- st->permutated[i] = permutation[j];
- }
-
- end = -1;
- for (i = 0; i < 64; i++) {
- int j = st->permutated[i];
- if (j > end)
- end = j;
- st->raster_end[i] = end;
- }
-}
-
-av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
- int idct_permutation_type)
-{
- int i;
-
- if (ARCH_X86)
- if (ff_init_scantable_permutation_x86(idct_permutation,
- idct_permutation_type))
- return;
-
- switch (idct_permutation_type) {
- case FF_NO_IDCT_PERM:
- for (i = 0; i < 64; i++)
- idct_permutation[i] = i;
- break;
- case FF_LIBMPEG2_IDCT_PERM:
- for (i = 0; i < 64; i++)
- idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
- break;
- case FF_TRANSPOSE_IDCT_PERM:
- for (i = 0; i < 64; i++)
- idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
- break;
- case FF_PARTTRANS_IDCT_PERM:
- for (i = 0; i < 64; i++)
- idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
- break;
- default:
- av_log(NULL, AV_LOG_ERROR,
- "Internal error, IDCT permutation not set\n");
- }
-}
-
static int pix_sum_c(uint8_t *pix, int line_size)
{
int s = 0, i, j;
@@ -259,68 +204,6 @@ static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
}
}
-static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
- int line_size)
-{
- int i;
-
- /* read the pixels */
- for (i = 0; i < 8; i++) {
- pixels[0] = av_clip_uint8(block[0]);
- pixels[1] = av_clip_uint8(block[1]);
- pixels[2] = av_clip_uint8(block[2]);
- pixels[3] = av_clip_uint8(block[3]);
- pixels[4] = av_clip_uint8(block[4]);
- pixels[5] = av_clip_uint8(block[5]);
- pixels[6] = av_clip_uint8(block[6]);
- pixels[7] = av_clip_uint8(block[7]);
-
- pixels += line_size;
- block += 8;
- }
-}
-
-static void put_signed_pixels_clamped_c(const int16_t *block,
- uint8_t *restrict pixels,
- int line_size)
-{
- int i, j;
-
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++) {
- if (*block < -128)
- *pixels = 0;
- else if (*block > 127)
- *pixels = 255;
- else
- *pixels = (uint8_t) (*block + 128);
- block++;
- pixels++;
- }
- pixels += (line_size - 8);
- }
-}
-
-static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
- int line_size)
-{
- int i;
-
- /* read the pixels */
- for (i = 0; i < 8; i++) {
- pixels[0] = av_clip_uint8(pixels[0] + block[0]);
- pixels[1] = av_clip_uint8(pixels[1] + block[1]);
- pixels[2] = av_clip_uint8(pixels[2] + block[2]);
- pixels[3] = av_clip_uint8(pixels[3] + block[3]);
- pixels[4] = av_clip_uint8(pixels[4] + block[4]);
- pixels[5] = av_clip_uint8(pixels[5] + block[5]);
- pixels[6] = av_clip_uint8(pixels[6] + block[6]);
- pixels[7] = av_clip_uint8(pixels[7] + block[7]);
- pixels += line_size;
- block += 8;
- }
-}
-
static int sum_abs_dctelem_c(int16_t *block)
{
int sum = 0, i;
@@ -967,7 +850,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
s->dct_unquantize_inter(s, temp, 0, s->qscale);
}
- s->dsp.idct_add(lsrc2, 8, temp);
+ s->idsp.idct_add(lsrc2, 8, temp);
distortion = s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
@@ -1138,18 +1021,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
-static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
-{
- ff_j_rev_dct(block);
- put_pixels_clamped_c(block, dest, line_size);
-}
-
-static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
-{
- ff_j_rev_dct(block);
- add_pixels_clamped_c(block, dest, line_size);
-}
-
/* draw the edges of width 'w' of an image of size width, height */
// FIXME: Check that this is OK for MPEG-4 interlaced.
static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
@@ -1209,36 +1080,8 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
}
#endif /* CONFIG_ENCODERS */
- if (avctx->bits_per_raw_sample == 10) {
- c->idct_put = ff_simple_idct_put_10;
- c->idct_add = ff_simple_idct_add_10;
- c->idct = ff_simple_idct_10;
- c->idct_permutation_type = FF_NO_IDCT_PERM;
- } else {
- if (avctx->idct_algo == FF_IDCT_INT) {
- c->idct_put = jref_idct_put;
- c->idct_add = jref_idct_add;
- c->idct = ff_j_rev_dct;
- c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
- } else if (avctx->idct_algo == FF_IDCT_FAAN) {
- c->idct_put = ff_faanidct_put;
- c->idct_add = ff_faanidct_add;
- c->idct = ff_faanidct;
- c->idct_permutation_type = FF_NO_IDCT_PERM;
- } else { // accurate/default
- c->idct_put = ff_simple_idct_put_8;
- c->idct_add = ff_simple_idct_add_8;
- c->idct = ff_simple_idct_8;
- c->idct_permutation_type = FF_NO_IDCT_PERM;
- }
- }
-
c->diff_pixels = diff_pixels_c;
- c->put_pixels_clamped = put_pixels_clamped_c;
- c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
- c->add_pixels_clamped = add_pixels_clamped_c;
-
c->sum_abs_dctelem = sum_abs_dctelem_c;
c->pix_sum = pix_sum_c;
@@ -1309,7 +1152,4 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
ff_dsputil_init_ppc(c, avctx, high_bit_depth);
if (ARCH_X86)
ff_dsputil_init_x86(c, avctx, high_bit_depth);
-
- ff_init_scantable_permutation(c->idct_permutation,
- c->idct_permutation_type);
}
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index b271dcc..dfbca5a 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -45,22 +45,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c,
uint8_t *blk2 /* align 1 */, int line_size, int h);
/**
- * Scantable.
- */
-typedef struct ScanTable {
- const uint8_t *scantable;
- uint8_t permutated[64];
- uint8_t raster_end[64];
-} ScanTable;
-
-void ff_init_scantable(uint8_t *permutation, ScanTable *st,
- const uint8_t *src_scantable);
-void ff_init_scantable_permutation(uint8_t *idct_permutation,
- int idct_permutation_type);
-int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
- int idct_permutation_type);
-
-/**
* DSPContext.
*/
typedef struct DSPContext {
@@ -72,15 +56,6 @@ typedef struct DSPContext {
const uint8_t *s1 /* align 8 */,
const uint8_t *s2 /* align 8 */,
int stride);
- void (*put_pixels_clamped)(const int16_t *block /* align 16 */,
- uint8_t *pixels /* align 8 */,
- int line_size);
- void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */,
- uint8_t *pixels /* align 8 */,
- int line_size);
- void (*add_pixels_clamped)(const int16_t *block /* align 16 */,
- uint8_t *pixels /* align 8 */,
- int line_size);
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
int (*pix_sum)(uint8_t *pix, int line_size);
@@ -112,47 +87,6 @@ typedef struct DSPContext {
void (*fdct)(int16_t *block /* align 16 */);
void (*fdct248)(int16_t *block /* align 16 */);
- /* IDCT really */
- void (*idct)(int16_t *block /* align 16 */);
-
- /**
- * block -> idct -> clip to unsigned 8 bit -> dest.
- * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
- * @param line_size size in bytes of a horizontal line of dest
- */
- void (*idct_put)(uint8_t *dest /* align 8 */,
- int line_size, int16_t *block /* align 16 */);
-
- /**
- * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
- * @param line_size size in bytes of a horizontal line of dest
- */
- void (*idct_add)(uint8_t *dest /* align 8 */,
- int line_size, int16_t *block /* align 16 */);
-
- /**
- * IDCT input permutation.
- * Several optimized IDCTs need a permutated input (relative to the
- * normal order of the reference IDCT).
- * This permutation must be performed before the idct_put/add.
- * Note, normally this can be merged with the zigzag/alternate scan<br>
- * An example to avoid confusion:
- * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
- * - (x -> reference DCT -> reference IDCT -> x)
- * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
- * -> simple_idct_mmx -> x)
- * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
- * -> simple_idct_mmx -> ...)
- */
- uint8_t idct_permutation[64];
- int idct_permutation_type;
-#define FF_NO_IDCT_PERM 1
-#define FF_LIBMPEG2_IDCT_PERM 2
-#define FF_SIMPLE_IDCT_PERM 3
-#define FF_TRANSPOSE_IDCT_PERM 4
-#define FF_PARTTRANS_IDCT_PERM 5
-#define FF_SSE2_IDCT_PERM 6
-
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
diff --git a/libavcodec/dvdec.c b/libavcodec/dvdec.c
index 9a559db..a03914d 100644
--- a/libavcodec/dvdec.c
+++ b/libavcodec/dvdec.c
@@ -39,6 +39,7 @@
#include "libavutil/imgutils.h"
#include "libavutil/pixdesc.h"
#include "avcodec.h"
+#include "idctdsp.h"
#include "internal.h"
#include "get_bits.h"
#include "put_bits.h"
@@ -61,17 +62,17 @@ static const int dv_iweight_bits = 14;
static av_cold int dvvideo_decode_init(AVCodecContext *avctx)
{
DVVideoContext *s = avctx->priv_data;
- DSPContext dsp;
+ IDCTDSPContext idsp;
int i;
- ff_dsputil_init(&dsp, avctx);
+ ff_idctdsp_init(&idsp, avctx);
for (i = 0; i < 64; i++)
- s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
+ s->dv_zigzag[0][i] = idsp.idct_permutation[ff_zigzag_direct[i]];
memcpy(s->dv_zigzag[1], ff_dv_zigzag248_direct, sizeof(s->dv_zigzag[1]));
- s->idct_put[0] = dsp.idct_put;
+ s->idct_put[0] = idsp.idct_put;
s->idct_put[1] = ff_simple_idct248_put;
return ff_dvvideo_init(avctx);
diff --git a/libavcodec/dxva2_mpeg2.c b/libavcodec/dxva2_mpeg2.c
index 044e669..f6ef5e5 100644
--- a/libavcodec/dxva2_mpeg2.c
+++ b/libavcodec/dxva2_mpeg2.c
@@ -110,7 +110,7 @@ static void fill_quantization_matrices(AVCodecContext *avctx,
for (i = 0; i < 4; i++)
qm->bNewQmatrix[i] = 1;
for (i = 0; i < 64; i++) {
- int n = s->dsp.idct_permutation[ff_zigzag_direct[i]];
+ int n = s->idsp.idct_permutation[ff_zigzag_direct[i]];
qm->Qmatrix[0][i] = s->intra_matrix[n];;
qm->Qmatrix[1][i] = s->inter_matrix[n];;
qm->Qmatrix[2][i] = s->chroma_intra_matrix[n];;
diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c
index 8fe1575..9edf344 100644
--- a/libavcodec/eamad.c
+++ b/libavcodec/eamad.c
@@ -33,6 +33,7 @@
#include "get_bits.h"
#include "aandcttab.h"
#include "eaidct.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mpeg12.h"
#include "mpeg12data.h"
@@ -47,7 +48,7 @@ typedef struct MadContext {
AVCodecContext *avctx;
BlockDSPContext bdsp;
BswapDSPContext bbdsp;
- DSPContext dsp;
+ IDCTDSPContext idsp;
AVFrame *last_frame;
GetBitContext gb;
void *bitstream_buf;
@@ -66,9 +67,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
ff_blockdsp_init(&s->bdsp, avctx);
ff_bswapdsp_init(&s->bbdsp);
- ff_dsputil_init(&s->dsp, avctx);
- ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
- ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
+ ff_idctdsp_init(&s->idsp, avctx);
+ ff_init_scantable_permutation(s->idsp.idct_permutation, FF_NO_IDCT_PERM);
+ ff_init_scantable(s->idsp.idct_permutation, &s->scantable, ff_zigzag_direct);
ff_mpeg12_init_vlcs();
s->last_frame = av_frame_alloc();
diff --git a/libavcodec/eatgq.c b/libavcodec/eatgq.c
index 1ead5f7..d8320c9 100644
--- a/libavcodec/eatgq.c
+++ b/libavcodec/eatgq.c
@@ -32,7 +32,7 @@
#define BITSTREAM_READER_LE
#include "get_bits.h"
#include "bytestream.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "aandcttab.h"
#include "eaidct.h"
#include "internal.h"
diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index 36ec2e4..60d80e9 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -32,6 +32,7 @@
#include "get_bits.h"
#include "aandcttab.h"
#include "eaidct.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mpeg12.h"
#include "mpegvideo.h"
@@ -51,9 +52,9 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
s->avctx = avctx;
ff_blockdsp_init(&s->bdsp, avctx);
ff_bswapdsp_init(&t->bsdsp);
- ff_dsputil_init(&s->dsp, avctx);
- ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
+ ff_idctdsp_init(&s->idsp, avctx);
+ ff_init_scantable_permutation(s->idsp.idct_permutation, FF_NO_IDCT_PERM);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
s->qscale = 1;
avctx->time_base = (AVRational){1, 15};
avctx->pix_fmt = AV_PIX_FMT_YUV420P;
diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c
index 9660155..456045d 100644
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c
@@ -31,7 +31,7 @@
#include "avcodec.h"
#include "blockdsp.h"
#include "bytestream.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "get_bits.h"
#include "internal.h"
#include "mjpeg.h"
@@ -74,7 +74,7 @@ static const uint8_t chroma_quant[64] = {
typedef struct JPGContext {
BlockDSPContext bdsp;
- DSPContext dsp;
+ IDCTDSPContext idsp;
ScanTable scantable;
VLC dc_vlc[2], ac_vlc[2];
@@ -153,8 +153,8 @@ static av_cold int jpg_init(AVCodecContext *avctx, JPGContext *c)
return ret;
ff_blockdsp_init(&c->bdsp, avctx);
- ff_dsputil_init(&c->dsp, avctx);
- ff_init_scantable(c->dsp.idct_permutation, &c->scantable,
+ ff_idctdsp_init(&c->idsp, avctx);
+ ff_init_scantable(c->idsp.idct_permutation, &c->scantable,
ff_zigzag_direct);
return 0;
@@ -279,13 +279,13 @@ static int jpg_decode_data(JPGContext *c, int width, int height,
if ((ret = jpg_decode_block(c, &gb, 0,
c->block[i + j * 2])) != 0)
return ret;
- c->dsp.idct(c->block[i + j * 2]);
+ c->idsp.idct(c->block[i + j * 2]);
}
}
for (i = 1; i < 3; i++) {
if ((ret = jpg_decode_block(c, &gb, i, c->block[i + 3])) != 0)
return ret;
- c->dsp.idct(c->block[i + 3]);
+ c->idsp.idct(c->block[i + 3]);
}
for (j = 0; j < 16; j++) {
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index 6d5ffc0..9019548 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -267,7 +267,7 @@ void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n)
if (a != 1024) {
ac_val -= 16;
for(i=1;i<8;i++) {
- block[s->dsp.idct_permutation[i<<3]] += ac_val[i];
+ block[s->idsp.idct_permutation[i << 3]] += ac_val[i];
}
pred_dc = a;
}
@@ -276,7 +276,7 @@ void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n)
if (c != 1024) {
ac_val -= 16 * wrap;
for(i=1;i<8;i++) {
- block[s->dsp.idct_permutation[i ]] += ac_val[i + 8];
+ block[s->idsp.idct_permutation[i]] += ac_val[i + 8];
}
pred_dc = c;
}
@@ -304,10 +304,10 @@ void ff_h263_pred_acdc(MpegEncContext * s, int16_t *block, int n)
/* left copy */
for(i=1;i<8;i++)
- ac_val1[i ] = block[s->dsp.idct_permutation[i<<3]];
+ ac_val1[i] = block[s->idsp.idct_permutation[i << 3]];
/* top copy */
for(i=1;i<8;i++)
- ac_val1[8 + i] = block[s->dsp.idct_permutation[i ]];
+ ac_val1[8 + i] = block[s->idsp.idct_permutation[i]];
}
int16_t *ff_h263_pred_motion(MpegEncContext * s, int block, int dir,
diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
new file mode 100644
index 0000000..8542ab3
--- /dev/null
+++ b/libavcodec/idctdsp.c
@@ -0,0 +1,197 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "avcodec.h"
+#include "dct.h"
+#include "faanidct.h"
+#include "idctdsp.h"
+#include "simple_idct.h"
+
+av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
+ const uint8_t *src_scantable)
+{
+ int i, end;
+
+ st->scantable = src_scantable;
+
+ for (i = 0; i < 64; i++) {
+ int j = src_scantable[i];
+ st->permutated[i] = permutation[j];
+ }
+
+ end = -1;
+ for (i = 0; i < 64; i++) {
+ int j = st->permutated[i];
+ if (j > end)
+ end = j;
+ st->raster_end[i] = end;
+ }
+}
+
+av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
+ int idct_permutation_type)
+{
+ int i;
+
+ if (ARCH_X86)
+ if (ff_init_scantable_permutation_x86(idct_permutation,
+ idct_permutation_type))
+ return;
+
+ switch (idct_permutation_type) {
+ case FF_NO_IDCT_PERM:
+ for (i = 0; i < 64; i++)
+ idct_permutation[i] = i;
+ break;
+ case FF_LIBMPEG2_IDCT_PERM:
+ for (i = 0; i < 64; i++)
+ idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+ break;
+ case FF_TRANSPOSE_IDCT_PERM:
+ for (i = 0; i < 64; i++)
+ idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
+ break;
+ case FF_PARTTRANS_IDCT_PERM:
+ for (i = 0; i < 64; i++)
+ idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
+ break;
+ default:
+ av_log(NULL, AV_LOG_ERROR,
+ "Internal error, IDCT permutation not set\n");
+ }
+}
+
+static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
+ int line_size)
+{
+ int i;
+
+ /* read the pixels */
+ for (i = 0; i < 8; i++) {
+ pixels[0] = av_clip_uint8(block[0]);
+ pixels[1] = av_clip_uint8(block[1]);
+ pixels[2] = av_clip_uint8(block[2]);
+ pixels[3] = av_clip_uint8(block[3]);
+ pixels[4] = av_clip_uint8(block[4]);
+ pixels[5] = av_clip_uint8(block[5]);
+ pixels[6] = av_clip_uint8(block[6]);
+ pixels[7] = av_clip_uint8(block[7]);
+
+ pixels += line_size;
+ block += 8;
+ }
+}
+
+static void put_signed_pixels_clamped_c(const int16_t *block,
+ uint8_t *restrict pixels,
+ int line_size)
+{
+ int i, j;
+
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++) {
+ if (*block < -128)
+ *pixels = 0;
+ else if (*block > 127)
+ *pixels = 255;
+ else
+ *pixels = (uint8_t) (*block + 128);
+ block++;
+ pixels++;
+ }
+ pixels += (line_size - 8);
+ }
+}
+
+static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
+ int line_size)
+{
+ int i;
+
+ /* read the pixels */
+ for (i = 0; i < 8; i++) {
+ pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+ pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+ pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+ pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+ pixels[4] = av_clip_uint8(pixels[4] + block[4]);
+ pixels[5] = av_clip_uint8(pixels[5] + block[5]);
+ pixels[6] = av_clip_uint8(pixels[6] + block[6]);
+ pixels[7] = av_clip_uint8(pixels[7] + block[7]);
+ pixels += line_size;
+ block += 8;
+ }
+}
+
+static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
+{
+ ff_j_rev_dct(block);
+ put_pixels_clamped_c(block, dest, line_size);
+}
+
+static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
+{
+ ff_j_rev_dct(block);
+ add_pixels_clamped_c(block, dest, line_size);
+}
+
+av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
+{
+ const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+
+ if (avctx->bits_per_raw_sample == 10) {
+ c->idct_put = ff_simple_idct_put_10;
+ c->idct_add = ff_simple_idct_add_10;
+ c->idct = ff_simple_idct_10;
+ c->idct_permutation_type = FF_NO_IDCT_PERM;
+ } else {
+ if (avctx->idct_algo == FF_IDCT_INT) {
+ c->idct_put = jref_idct_put;
+ c->idct_add = jref_idct_add;
+ c->idct = ff_j_rev_dct;
+ c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
+ } else if (avctx->idct_algo == FF_IDCT_FAAN) {
+ c->idct_put = ff_faanidct_put;
+ c->idct_add = ff_faanidct_add;
+ c->idct = ff_faanidct;
+ c->idct_permutation_type = FF_NO_IDCT_PERM;
+ } else { // accurate/default
+ c->idct_put = ff_simple_idct_put_8;
+ c->idct_add = ff_simple_idct_add_8;
+ c->idct = ff_simple_idct_8;
+ c->idct_permutation_type = FF_NO_IDCT_PERM;
+ }
+ }
+
+ c->put_pixels_clamped = put_pixels_clamped_c;
+ c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+ c->add_pixels_clamped = add_pixels_clamped_c;
+
+ if (ARCH_ARM)
+ ff_idctdsp_init_arm(c, avctx, high_bit_depth);
+ if (ARCH_PPC)
+ ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
+ if (ARCH_X86)
+ ff_idctdsp_init_x86(c, avctx, high_bit_depth);
+
+ ff_init_scantable_permutation(c->idct_permutation,
+ c->idct_permutation_type);
+}
diff --git a/libavcodec/idctdsp.h b/libavcodec/idctdsp.h
new file mode 100644
index 0000000..e3a2317
--- /dev/null
+++ b/libavcodec/idctdsp.h
@@ -0,0 +1,104 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_IDCTDSP_H
+#define AVCODEC_IDCTDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+/**
+ * Scantable.
+ */
+typedef struct ScanTable {
+ const uint8_t *scantable;
+ uint8_t permutated[64];
+ uint8_t raster_end[64];
+} ScanTable;
+
+void ff_init_scantable(uint8_t *permutation, ScanTable *st,
+ const uint8_t *src_scantable);
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
+ int idct_permutation_type);
+int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
+ int idct_permutation_type);
+
+typedef struct IDCTDSPContext {
+ /* pixel ops : interface with DCT */
+ void (*put_pixels_clamped)(const int16_t *block /* align 16 */,
+ uint8_t *pixels /* align 8 */,
+ int line_size);
+ void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */,
+ uint8_t *pixels /* align 8 */,
+ int line_size);
+ void (*add_pixels_clamped)(const int16_t *block /* align 16 */,
+ uint8_t *pixels /* align 8 */,
+ int line_size);
+
+ void (*idct)(int16_t *block /* align 16 */);
+
+ /**
+ * block -> idct -> clip to unsigned 8 bit -> dest.
+ * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+ * @param line_size size in bytes of a horizontal line of dest
+ */
+ void (*idct_put)(uint8_t *dest /* align 8 */,
+ int line_size, int16_t *block /* align 16 */);
+
+ /**
+ * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+ * @param line_size size in bytes of a horizontal line of dest
+ */
+ void (*idct_add)(uint8_t *dest /* align 8 */,
+ int line_size, int16_t *block /* align 16 */);
+
+ /**
+ * IDCT input permutation.
+ * Several optimized IDCTs need a permutated input (relative to the
+ * normal order of the reference IDCT).
+ * This permutation must be performed before the idct_put/add.
+ * Note, normally this can be merged with the zigzag/alternate scan<br>
+ * An example to avoid confusion:
+ * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
+ * - (x -> reference DCT -> reference IDCT -> x)
+ * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
+ * -> simple_idct_mmx -> x)
+ * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
+ * -> simple_idct_mmx -> ...)
+ */
+ uint8_t idct_permutation[64];
+ int idct_permutation_type;
+#define FF_NO_IDCT_PERM 1
+#define FF_LIBMPEG2_IDCT_PERM 2
+#define FF_SIMPLE_IDCT_PERM 3
+#define FF_TRANSPOSE_IDCT_PERM 4
+#define FF_PARTTRANS_IDCT_PERM 5
+#define FF_SSE2_IDCT_PERM 6
+} IDCTDSPContext;
+
+void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx);
+
+void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth);
+
+#endif /* AVCODEC_IDCTDSP_H */
diff --git a/libavcodec/intrax8.c b/libavcodec/intrax8.c
index 2bda723..d37eb79 100644
--- a/libavcodec/intrax8.c
+++ b/libavcodec/intrax8.c
@@ -24,6 +24,7 @@
#include "avcodec.h"
#include "error_resilience.h"
#include "get_bits.h"
+#include "idctdsp.h"
#include "mpegvideo.h"
#include "msmpeg4data.h"
#include "intrax8huf.h"
@@ -440,7 +441,7 @@ lut2[q>12][c]={
static void x8_ac_compensation(IntraX8Context * const w, int const direction, int const dc_level){
MpegEncContext * const s= w->s;
int t;
-#define B(x,y) s->block[0][s->dsp.idct_permutation[(x)+(y)*8]]
+#define B(x, y) s->block[0][s->idsp.idct_permutation[(x) + (y) * 8]]
#define T(x) ((x) * dc_level + 0x8000) >> 16;
switch(direction){
case 0:
@@ -646,9 +647,9 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){
s->current_picture.f->linesize[!!chroma] );
}
if(!zeros_only)
- s->dsp.idct_add ( s->dest[chroma],
- s->current_picture.f->linesize[!!chroma],
- s->block[0] );
+ s->idsp.idct_add(s->dest[chroma],
+ s->current_picture.f->linesize[!!chroma],
+ s->block[0]);
block_placed:
@@ -698,9 +699,9 @@ av_cold void ff_intrax8_common_init(IntraX8Context * w, MpegEncContext * const s
assert(s->mb_width>0);
w->prediction_table=av_mallocz(s->mb_width*2*2);//two rows, 2 blocks per cannon mb
- ff_init_scantable(s->dsp.idct_permutation, &w->scantable[0], ff_wmv1_scantable[0]);
- ff_init_scantable(s->dsp.idct_permutation, &w->scantable[1], ff_wmv1_scantable[2]);
- ff_init_scantable(s->dsp.idct_permutation, &w->scantable[2], ff_wmv1_scantable[3]);
+ ff_init_scantable(s->idsp.idct_permutation, &w->scantable[0], ff_wmv1_scantable[0]);
+ ff_init_scantable(s->idsp.idct_permutation, &w->scantable[1], ff_wmv1_scantable[2]);
+ ff_init_scantable(s->idsp.idct_permutation, &w->scantable[2], ff_wmv1_scantable[3]);
ff_intrax8dsp_init(&w->dsp);
}
diff --git a/libavcodec/ljpegenc.c b/libavcodec/ljpegenc.c
index 9f06818..fbb024b 100644
--- a/libavcodec/ljpegenc.c
+++ b/libavcodec/ljpegenc.c
@@ -35,7 +35,7 @@
#include "libavutil/pixdesc.h"
#include "avcodec.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mjpegenc_common.h"
#include "mpegvideo.h"
@@ -43,7 +43,7 @@
#include "mjpegenc.h"
typedef struct LJpegEncContext {
- DSPContext dsp;
+ IDCTDSPContext idsp;
ScanTable scantable;
uint16_t matrix[64];
@@ -285,8 +285,9 @@ static av_cold int ljpeg_encode_init(AVCodecContext *avctx)
s->scratch = av_malloc_array(avctx->width + 1, sizeof(*s->scratch));
- ff_dsputil_init(&s->dsp, avctx);
- ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
+ ff_idctdsp_init(&s->idsp, avctx);
+ ff_init_scantable(s->idsp.idct_permutation, &s->scantable,
+ ff_zigzag_direct);
av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
&chroma_v_shift);
diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index b421397..6b70e37 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -29,6 +29,7 @@
#include "avcodec.h"
#include "blockdsp.h"
+#include "idctdsp.h"
#include "mpegvideo.h"
#include "mpeg12.h"
#include "thread.h"
@@ -36,7 +37,7 @@
typedef struct MDECContext {
AVCodecContext *avctx;
BlockDSPContext bdsp;
- DSPContext dsp;
+ IDCTDSPContext idsp;
ThreadFrame frame;
GetBitContext gb;
ScanTable scantable;
@@ -146,14 +147,14 @@ static inline void idct_put(MDECContext *a, AVFrame *frame, int mb_x, int mb_y)
uint8_t *dest_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8;
uint8_t *dest_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8;
- a->dsp.idct_put(dest_y, linesize, block[0]);
- a->dsp.idct_put(dest_y + 8, linesize, block[1]);
- a->dsp.idct_put(dest_y + 8 * linesize, linesize, block[2]);
- a->dsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]);
+ a->idsp.idct_put(dest_y, linesize, block[0]);
+ a->idsp.idct_put(dest_y + 8, linesize, block[1]);
+ a->idsp.idct_put(dest_y + 8 * linesize, linesize, block[2]);
+ a->idsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]);
if (!(a->avctx->flags & CODEC_FLAG_GRAY)) {
- a->dsp.idct_put(dest_cb, frame->linesize[1], block[4]);
- a->dsp.idct_put(dest_cr, frame->linesize[2], block[5]);
+ a->idsp.idct_put(dest_cb, frame->linesize[1], block[4]);
+ a->idsp.idct_put(dest_cr, frame->linesize[2], block[5]);
}
}
@@ -215,9 +216,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
a->avctx = avctx;
ff_blockdsp_init(&a->bdsp, avctx);
- ff_dsputil_init(&a->dsp, avctx);
+ ff_idctdsp_init(&a->idsp, avctx);
ff_mpeg12_init_vlcs();
- ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct);
+ ff_init_scantable(a->idsp.idct_permutation, &a->scantable,
+ ff_zigzag_direct);
if (avctx->idct_algo == FF_IDCT_AUTO)
avctx->idct_algo = FF_IDCT_SIMPLE;
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index 4d21b51..88ee5d3 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -29,8 +29,8 @@
#include "get_bits.h"
#include "bytestream.h"
#include "bswapdsp.h"
-#include "dsputil.h"
#include "hpeldsp.h"
+#include "idctdsp.h"
#include "thread.h"
#define MIMIC_HEADER_SIZE 20
@@ -56,8 +56,8 @@ typedef struct {
ScanTable scantable;
BlockDSPContext bdsp;
BswapDSPContext bbdsp;
- DSPContext dsp;
HpelDSPContext hdsp;
+ IDCTDSPContext idsp;
VLC vlc;
/* Kept in the context so multithreading can have a constant to read from */
@@ -151,9 +151,9 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
}
ff_blockdsp_init(&ctx->bdsp, avctx);
ff_bswapdsp_init(&ctx->bbdsp);
- ff_dsputil_init(&ctx->dsp, avctx);
ff_hpeldsp_init(&ctx->hdsp, avctx->flags);
- ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag);
+ ff_idctdsp_init(&ctx->idsp, avctx);
+ ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, col_zag);
for (i = 0; i < FF_ARRAY_ELEMS(ctx->frames); i++) {
ctx->frames[i].f = av_frame_alloc();
@@ -302,7 +302,7 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs,
"block.\n");
return ret;
}
- ctx->dsp.idct_put(dst, stride, ctx->dct_block);
+ ctx->idsp.idct_put(dst, stride, ctx->dct_block);
} else {
unsigned int backref = get_bits(&ctx->gb, 4);
int index = (ctx->cur_index + backref) & 15;
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index f674539..d9a73d8 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -36,6 +36,7 @@
#include "libavutil/opt.h"
#include "avcodec.h"
#include "blockdsp.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mjpeg.h"
#include "mjpegdec.h"
@@ -95,8 +96,9 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
s->avctx = avctx;
ff_blockdsp_init(&s->bdsp, avctx);
ff_hpeldsp_init(&s->hdsp, avctx->flags);
- ff_dsputil_init(&s->dsp, avctx);
- ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
+ ff_idctdsp_init(&s->idsp, avctx);
+ ff_init_scantable(s->idsp.idct_permutation, &s->scantable,
+ ff_zigzag_direct);
s->buffer_size = 0;
s->buffer = NULL;
s->start_code = -1;
@@ -889,7 +891,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
"error y=%d x=%d\n", mb_y, mb_x);
return AVERROR_INVALIDDATA;
}
- s->dsp.idct_put(ptr, linesize[c], s->block);
+ s->idsp.idct_put(ptr, linesize[c], s->block);
}
} else {
int block_idx = s->block_stride[c] * (v * mb_y + y) +
@@ -1002,7 +1004,7 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss,
reference_data + block_offset,
linesize, 8);
} else {
- s->dsp.idct_put(ptr, linesize, *block);
+ s->idsp.idct_put(ptr, linesize, *block);
ptr += 8;
}
}
diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
index 0d1dd9e..aa4703a 100644
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -35,8 +35,8 @@
#include "avcodec.h"
#include "blockdsp.h"
#include "get_bits.h"
-#include "dsputil.h"
#include "hpeldsp.h"
+#include "idctdsp.h"
#define MAX_COMPONENTS 4
@@ -97,8 +97,8 @@ typedef struct MJpegDecodeContext {
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
ScanTable scantable;
BlockDSPContext bdsp;
- DSPContext dsp;
HpelDSPContext hdsp;
+ IDCTDSPContext idsp;
int restart_interval;
int restart_count;
diff --git a/libavcodec/mjpegenc_common.c b/libavcodec/mjpegenc_common.c
index 3dba414..adb335e 100644
--- a/libavcodec/mjpegenc_common.c
+++ b/libavcodec/mjpegenc_common.c
@@ -26,7 +26,7 @@
#include "libavutil/pixfmt.h"
#include "avcodec.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "put_bits.h"
#include "mjpegenc_common.h"
#include "mjpeg.h"
diff --git a/libavcodec/mjpegenc_common.h b/libavcodec/mjpegenc_common.h
index 57dc9dd..b48911e 100644
--- a/libavcodec/mjpegenc_common.h
+++ b/libavcodec/mjpegenc_common.h
@@ -24,7 +24,7 @@
#include <stdint.h>
#include "avcodec.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "put_bits.h"
void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 0bf3c20..aa98454 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -33,8 +33,8 @@
#include "avcodec.h"
#include "bytestream.h"
-#include "dsputil.h"
#include "error_resilience.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mpeg_er.h"
#include "mpeg12.h"
@@ -1100,7 +1100,7 @@ static av_cold int mpeg_decode_init(AVCodecContext *avctx)
/* we need some permutation to store matrices,
* until MPV_common_init() sets the real permutation. */
for (i = 0; i < 64; i++)
- s2->dsp.idct_permutation[i] = i;
+ s2->idsp.idct_permutation[i] = i;
ff_MPV_decode_defaults(s2);
@@ -1309,15 +1309,15 @@ static int mpeg_decode_postinit(AVCodecContext *avctx)
/* Quantization matrices may need reordering
* if DCT permutation is changed. */
- memcpy(old_permutation, s->dsp.idct_permutation, 64 * sizeof(uint8_t));
+ memcpy(old_permutation, s->idsp.idct_permutation, 64 * sizeof(uint8_t));
if (ff_MPV_common_init(s) < 0)
return -2;
- quant_matrix_rebuild(s->intra_matrix, old_permutation, s->dsp.idct_permutation);
- quant_matrix_rebuild(s->inter_matrix, old_permutation, s->dsp.idct_permutation);
- quant_matrix_rebuild(s->chroma_intra_matrix, old_permutation, s->dsp.idct_permutation);
- quant_matrix_rebuild(s->chroma_inter_matrix, old_permutation, s->dsp.idct_permutation);
+ quant_matrix_rebuild(s->intra_matrix, old_permutation, s->idsp.idct_permutation);
+ quant_matrix_rebuild(s->inter_matrix, old_permutation, s->idsp.idct_permutation);
+ quant_matrix_rebuild(s->chroma_intra_matrix, old_permutation, s->idsp.idct_permutation);
+ quant_matrix_rebuild(s->chroma_inter_matrix, old_permutation, s->idsp.idct_permutation);
s1->mpeg_enc_ctx_allocated = 1;
}
@@ -1469,7 +1469,7 @@ static int load_matrix(MpegEncContext *s, uint16_t matrix0[64],
int i;
for (i = 0; i < 64; i++) {
- int j = s->dsp.idct_permutation[ff_zigzag_direct[i]];
+ int j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
int v = get_bits(&s->gb, 8);
if (v == 0) {
av_log(s->avctx, AV_LOG_ERROR, "matrix damaged\n");
@@ -1561,11 +1561,11 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
}
if (s->alternate_scan) {
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
} else {
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
}
/* composite display not parsed */
@@ -2070,7 +2070,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
load_matrix(s, s->chroma_intra_matrix, s->intra_matrix, 1);
} else {
for (i = 0; i < 64; i++) {
- j = s->dsp.idct_permutation[i];
+ j = s->idsp.idct_permutation[i];
v = ff_mpeg1_default_intra_matrix[i];
s->intra_matrix[j] = v;
s->chroma_intra_matrix[j] = v;
@@ -2080,7 +2080,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
load_matrix(s, s->chroma_inter_matrix, s->inter_matrix, 0);
} else {
for (i = 0; i < 64; i++) {
- int j = s->dsp.idct_permutation[i];
+ int j = s->idsp.idct_permutation[i];
v = ff_mpeg1_default_non_intra_matrix[i];
s->inter_matrix[j] = v;
s->chroma_inter_matrix[j] = v;
@@ -2142,7 +2142,7 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
s1->mpeg_enc_ctx_allocated = 1;
for (i = 0; i < 64; i++) {
- int j = s->dsp.idct_permutation[i];
+ int j = s->idsp.idct_permutation[i];
v = ff_mpeg1_default_intra_matrix[i];
s->intra_matrix[j] = v;
s->chroma_intra_matrix[j] = v;
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 0e3e580..a4e7725 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -21,6 +21,7 @@
*/
#include "error_resilience.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mpegutils.h"
#include "mpegvideo.h"
@@ -71,11 +72,11 @@ void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n, int dir)
n == 1 || n == 3) {
/* same qscale */
for (i = 1; i < 8; i++)
- block[s->dsp.idct_permutation[i << 3]] += ac_val[i];
+ block[s->idsp.idct_permutation[i << 3]] += ac_val[i];
} else {
/* different qscale, we must rescale */
for (i = 1; i < 8; i++)
- block[s->dsp.idct_permutation[i << 3]] += ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale);
+ block[s->idsp.idct_permutation[i << 3]] += ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale);
}
} else {
const int xy = s->mb_x + s->mb_y * s->mb_stride - s->mb_stride;
@@ -86,21 +87,21 @@ void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n, int dir)
n == 2 || n == 3) {
/* same qscale */
for (i = 1; i < 8; i++)
- block[s->dsp.idct_permutation[i]] += ac_val[i + 8];
+ block[s->idsp.idct_permutation[i]] += ac_val[i + 8];
} else {
/* different qscale, we must rescale */
for (i = 1; i < 8; i++)
- block[s->dsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale);
+ block[s->idsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale);
}
}
}
/* left copy */
for (i = 1; i < 8; i++)
- ac_val1[i] = block[s->dsp.idct_permutation[i << 3]];
+ ac_val1[i] = block[s->idsp.idct_permutation[i << 3]];
/* top copy */
for (i = 1; i < 8; i++)
- ac_val1[8 + i] = block[s->dsp.idct_permutation[i]];
+ ac_val1[8 + i] = block[s->idsp.idct_permutation[i]];
}
/**
@@ -1815,7 +1816,7 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
/* load default matrixes */
for (i = 0; i < 64; i++) {
- int j = s->dsp.idct_permutation[i];
+ int j = s->idsp.idct_permutation[i];
v = ff_mpeg4_default_intra_matrix[i];
s->intra_matrix[j] = v;
s->chroma_intra_matrix[j] = v;
@@ -1835,14 +1836,14 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
break;
last = v;
- j = s->dsp.idct_permutation[ff_zigzag_direct[i]];
+ j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
s->intra_matrix[j] = last;
s->chroma_intra_matrix[j] = last;
}
/* replicate last value */
for (; i < 64; i++) {
- int j = s->dsp.idct_permutation[ff_zigzag_direct[i]];
+ int j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
s->intra_matrix[j] = last;
s->chroma_intra_matrix[j] = last;
}
@@ -1858,14 +1859,14 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
break;
last = v;
- j = s->dsp.idct_permutation[ff_zigzag_direct[i]];
+ j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
s->inter_matrix[j] = v;
s->chroma_inter_matrix[j] = v;
}
/* replicate last value */
for (; i < 64; i++) {
- int j = s->dsp.idct_permutation[ff_zigzag_direct[i]];
+ int j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
s->inter_matrix[j] = last;
s->chroma_inter_matrix[j] = last;
}
@@ -2219,15 +2220,15 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
}
if (s->alternate_scan) {
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
} else {
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
}
if (s->pict_type == AV_PICTURE_TYPE_S &&
diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c
index b95752f..f120932 100644
--- a/libavcodec/mpeg4videoenc.c
+++ b/libavcodec/mpeg4videoenc.c
@@ -110,11 +110,11 @@ static inline void restore_ac_coeffs(MpegEncContext *s, int16_t block[6][64],
if (dir[n]) {
/* top prediction */
for (i = 1; i < 8; i++)
- block[n][s->dsp.idct_permutation[i]] = ac_val[i + 8];
+ block[n][s->idsp.idct_permutation[i]] = ac_val[i + 8];
} else {
/* left prediction */
for (i = 1; i < 8; i++)
- block[n][s->dsp.idct_permutation[i << 3]] = ac_val[i];
+ block[n][s->idsp.idct_permutation[i << 3]] = ac_val[i];
}
}
}
@@ -152,17 +152,17 @@ static inline int decide_ac_pred(MpegEncContext *s, int16_t block[6][64],
if (s->mb_y == 0 || s->qscale == qscale_table[xy] || n == 2 || n == 3) {
/* same qscale */
for (i = 1; i < 8; i++) {
- const int level = block[n][s->dsp.idct_permutation[i]];
- block[n][s->dsp.idct_permutation[i]] = level - ac_val[i + 8];
- ac_val1[i] = block[n][s->dsp.idct_permutation[i << 3]];
+ const int level = block[n][s->idsp.idct_permutation[i]];
+ block[n][s->idsp.idct_permutation[i]] = level - ac_val[i + 8];
+ ac_val1[i] = block[n][s->idsp.idct_permutation[i << 3]];
ac_val1[i + 8] = level;
}
} else {
/* different qscale, we must rescale */
for (i = 1; i < 8; i++) {
- const int level = block[n][s->dsp.idct_permutation[i]];
- block[n][s->dsp.idct_permutation[i]] = level - ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale);
- ac_val1[i] = block[n][s->dsp.idct_permutation[i << 3]];
+ const int level = block[n][s->idsp.idct_permutation[i]];
+ block[n][s->idsp.idct_permutation[i]] = level - ROUNDED_DIV(ac_val[i + 8] * qscale_table[xy], s->qscale);
+ ac_val1[i] = block[n][s->idsp.idct_permutation[i << 3]];
ac_val1[i + 8] = level;
}
}
@@ -174,18 +174,18 @@ static inline int decide_ac_pred(MpegEncContext *s, int16_t block[6][64],
if (s->mb_x == 0 || s->qscale == qscale_table[xy] || n == 1 || n == 3) {
/* same qscale */
for (i = 1; i < 8; i++) {
- const int level = block[n][s->dsp.idct_permutation[i << 3]];
- block[n][s->dsp.idct_permutation[i << 3]] = level - ac_val[i];
+ const int level = block[n][s->idsp.idct_permutation[i << 3]];
+ block[n][s->idsp.idct_permutation[i << 3]] = level - ac_val[i];
ac_val1[i] = level;
- ac_val1[i + 8] = block[n][s->dsp.idct_permutation[i]];
+ ac_val1[i + 8] = block[n][s->idsp.idct_permutation[i]];
}
} else {
/* different qscale, we must rescale */
for (i = 1; i < 8; i++) {
- const int level = block[n][s->dsp.idct_permutation[i << 3]];
- block[n][s->dsp.idct_permutation[i << 3]] = level - ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale);
+ const int level = block[n][s->idsp.idct_permutation[i << 3]];
+ block[n][s->idsp.idct_permutation[i << 3]] = level - ROUNDED_DIV(ac_val[i] * qscale_table[xy], s->qscale);
ac_val1[i] = level;
- ac_val1[i + 8] = block[n][s->dsp.idct_permutation[i]];
+ ac_val1[i + 8] = block[n][s->idsp.idct_permutation[i]];
}
}
st[n] = s->intra_v_scantable.permutated;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index fb63d6a..a4a37d4 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -34,7 +34,7 @@
#include "libavutil/timer.h"
#include "avcodec.h"
#include "blockdsp.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "internal.h"
#include "mathops.h"
#include "mpegutils.h"
@@ -380,6 +380,7 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
ff_blockdsp_init(&s->bdsp, s->avctx);
ff_dsputil_init(&s->dsp, s->avctx);
ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
+ ff_idctdsp_init(&s->idsp, s->avctx);
ff_mpegvideodsp_init(&s->mdsp);
ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
@@ -403,14 +404,14 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
* note: only wmv uses different ones
*/
if (s->alternate_scan) {
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
} else {
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct);
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_zigzag_direct);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
}
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
return 0;
}
@@ -2041,7 +2042,7 @@ static inline void put_dct(MpegEncContext *s,
int16_t *block, int i, uint8_t *dest, int line_size, int qscale)
{
s->dct_unquantize_intra(s, block, i, qscale);
- s->dsp.idct_put (dest, line_size, block);
+ s->idsp.idct_put(dest, line_size, block);
}
/* add block[] to dest[] */
@@ -2049,7 +2050,7 @@ static inline void add_dct(MpegEncContext *s,
int16_t *block, int i, uint8_t *dest, int line_size)
{
if (s->block_last_index[i] >= 0) {
- s->dsp.idct_add (dest, line_size, block);
+ s->idsp.idct_add(dest, line_size, block);
}
}
@@ -2059,7 +2060,7 @@ static inline void add_dequant_dct(MpegEncContext *s,
if (s->block_last_index[i] >= 0) {
s->dct_unquantize_inter(s, block, i, qscale);
- s->dsp.idct_add (dest, line_size, block);
+ s->idsp.idct_add(dest, line_size, block);
}
}
@@ -2127,7 +2128,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
av_log(s->avctx, AV_LOG_DEBUG, "DCT coeffs of MB at %dx%d:\n", s->mb_x, s->mb_y);
for(i=0; i<6; i++){
for(j=0; j<64; j++){
- av_log(s->avctx, AV_LOG_DEBUG, "%5d", block[i][s->dsp.idct_permutation[j]]);
+ av_log(s->avctx, AV_LOG_DEBUG, "%5d",
+ block[i][s->idsp.idct_permutation[j]]);
}
av_log(s->avctx, AV_LOG_DEBUG, "\n");
}
@@ -2304,29 +2306,29 @@ FF_ENABLE_DEPRECATION_WARNINGS
}
}
}else{
- s->dsp.idct_put(dest_y , dct_linesize, block[0]);
- s->dsp.idct_put(dest_y + block_size, dct_linesize, block[1]);
- s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]);
- s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
+ s->idsp.idct_put(dest_y, dct_linesize, block[0]);
+ s->idsp.idct_put(dest_y + block_size, dct_linesize, block[1]);
+ s->idsp.idct_put(dest_y + dct_offset, dct_linesize, block[2]);
+ s->idsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(s->chroma_y_shift){
- s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
- s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
+ s->idsp.idct_put(dest_cb, uvlinesize, block[4]);
+ s->idsp.idct_put(dest_cr, uvlinesize, block[5]);
}else{
dct_linesize = uvlinesize << s->interlaced_dct;
dct_offset = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
- s->dsp.idct_put(dest_cb, dct_linesize, block[4]);
- s->dsp.idct_put(dest_cr, dct_linesize, block[5]);
- s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
- s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
+ s->idsp.idct_put(dest_cb, dct_linesize, block[4]);
+ s->idsp.idct_put(dest_cr, dct_linesize, block[5]);
+ s->idsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
+ s->idsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
if(!s->chroma_x_shift){//Chroma444
- s->dsp.idct_put(dest_cb + 8, dct_linesize, block[8]);
- s->dsp.idct_put(dest_cr + 8, dct_linesize, block[9]);
- s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
- s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
+ s->idsp.idct_put(dest_cb + 8, dct_linesize, block[8]);
+ s->idsp.idct_put(dest_cr + 8, dct_linesize, block[9]);
+ s->idsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
+ s->idsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
}
}
}//gray
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 191dac0..27c72da 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -35,6 +35,7 @@
#include "get_bits.h"
#include "h263dsp.h"
#include "hpeldsp.h"
+#include "idctdsp.h"
#include "mpegvideodsp.h"
#include "put_bits.h"
#include "ratecontrol.h"
@@ -352,6 +353,7 @@ typedef struct MpegEncContext {
BlockDSPContext bdsp;
DSPContext dsp; ///< pointers for accelerated dsp functions
HpelDSPContext hdsp;
+ IDCTDSPContext idsp;
MpegVideoDSPContext mdsp;
QpelDSPContext qdsp;
VideoDSPContext vdsp;
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 3baf37a..65e2a8c 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -38,6 +38,7 @@
#include "avcodec.h"
#include "dct.h"
#include "dsputil.h"
+#include "idctdsp.h"
#include "mpeg12.h"
#include "mpegvideo.h"
#include "h261.h"
@@ -86,7 +87,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
dsp->fdct == ff_jpeg_fdct_islow_10 ||
dsp->fdct == ff_faandct) {
for (i = 0; i < 64; i++) {
- const int j = dsp->idct_permutation[i];
+ const int j = s->idsp.idct_permutation[i];
/* 16 <= qscale * quant_matrix[i] <= 7905
* Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
* 19952 <= x <= 249205026
@@ -98,7 +99,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
}
} else if (dsp->fdct == ff_fdct_ifast) {
for (i = 0; i < 64; i++) {
- const int j = dsp->idct_permutation[i];
+ const int j = s->idsp.idct_permutation[i];
/* 16 <= qscale * quant_matrix[i] <= 7905
* Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
* 19952 <= x <= 249205026
@@ -111,7 +112,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
}
} else {
for (i = 0; i < 64; i++) {
- const int j = dsp->idct_permutation[i];
+ const int j = s->idsp.idct_permutation[i];
/* We can safely suppose that 16 <= quant_matrix[i] <= 255
* Assume x = qscale * quant_matrix[i]
* So 16 <= x <= 7905
@@ -755,7 +756,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
/* init q matrix */
for (i = 0; i < 64; i++) {
- int j = s->dsp.idct_permutation[i];
+ int j = s->idsp.idct_permutation[i];
if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
s->mpeg_quant) {
s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
@@ -3360,7 +3361,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
if (s->out_format == FMT_MJPEG) {
/* for mjpeg, we do include qscale in the matrix */
for(i=1;i<64;i++){
- int j= s->dsp.idct_permutation[i];
+ int j = s->idsp.idct_permutation[i];
s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
}
@@ -3589,7 +3590,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
if(s->out_format == FMT_H263){
unquant_coeff= alevel*qmul + qadd;
}else{ //MPEG1
- j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
+ j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
if(s->mb_intra){
unquant_coeff = (int)( alevel * qscale * s->intra_matrix[j]) >> 3;
unquant_coeff = (unquant_coeff - 1) | 1;
@@ -3795,7 +3796,7 @@ static int messed_sign=0;
#endif
if(basis[0][0] == 0)
- build_basis(s->dsp.idct_permutation);
+ build_basis(s->idsp.idct_permutation);
qmul= qscale*2;
qadd= (qscale-1)|1;
@@ -4214,8 +4215,9 @@ int ff_dct_quantize_c(MpegEncContext *s,
*overflow= s->max_qcoeff < max; //overflow might have happened
/* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
- if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
- ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
+ if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM)
+ ff_block_permute(block, s->idsp.idct_permutation,
+ scantable, last_non_zero);
return last_non_zero;
}
diff --git a/libavcodec/mpegvideo_xvmc.c b/libavcodec/mpegvideo_xvmc.c
index aa6f49a..a8e068b 100644
--- a/libavcodec/mpegvideo_xvmc.c
+++ b/libavcodec/mpegvideo_xvmc.c
@@ -307,7 +307,7 @@ void ff_xvmc_decode_mb(MpegEncContext *s)
if (s->mb_intra && (render->idct || !render->unsigned_intra))
*s->pblocks[i][0] -= 1 << 10;
if (!render->idct) {
- s->dsp.idct(*s->pblocks[i]);
+ s->idsp.idct(*s->pblocks[i]);
/* It is unclear if MC hardware requires pixel diff values to be
* in the range [-255;255]. TODO: Clipping if such hardware is
* ever found. As of now it would only be an unnecessary
diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index f0eaa9b..95b5c93 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -28,7 +28,7 @@
*/
#include "avcodec.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "mpegvideo.h"
#include "msmpeg4.h"
#include "libavutil/x86/asm.h"
@@ -136,10 +136,10 @@ av_cold void ff_msmpeg4_common_init(MpegEncContext *s)
if(s->msmpeg4_version>=4){
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_wmv1_scantable[1]);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_wmv1_scantable[2]);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_wmv1_scantable[3]);
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_wmv1_scantable[0]);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_wmv1_scantable[1]);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable, ff_wmv1_scantable[2]);
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_wmv1_scantable[3]);
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_wmv1_scantable[0]);
}
//Note the default tables are set in common_init in mpegvideo.c
diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index 1bd848d..c31ff11 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c
@@ -28,6 +28,7 @@
#include "libavutil/lzo.h"
#include "libavutil/imgutils.h"
#include "avcodec.h"
+#include "idctdsp.h"
#include "internal.h"
#include "rtjpeg.h"
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index c6c0bcb..ee0c18c 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -9,6 +9,7 @@ OBJS-$(CONFIG_H264DSP) += ppc/h264dsp.o
OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o
OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o
OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o
+OBJS-$(CONFIG_IDCTDSP) += ppc/idctdsp.o
OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
ppc/mpegvideodsp.o
@@ -24,7 +25,6 @@ OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o
ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \
ppc/fdct_altivec.o \
- ppc/idct_altivec.o \
FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o
ALTIVEC-OBJS-$(CONFIG_FFT) += $(FFT-OBJS-yes)
diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h
index 42da933..be5fd58 100644
--- a/libavcodec/ppc/dsputil_altivec.h
+++ b/libavcodec/ppc/dsputil_altivec.h
@@ -29,9 +29,6 @@
void ff_fdct_altivec(int16_t *block);
-void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
-void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
-
void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 778d3e1..b541113 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -42,12 +42,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
c->fdct = ff_fdct_altivec;
}
#endif //CONFIG_ENCODERS
- if ((avctx->idct_algo == FF_IDCT_AUTO) ||
- (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
- c->idct_put = ff_idct_put_altivec;
- c->idct_add = ff_idct_add_altivec;
- c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
- }
}
}
}
diff --git a/libavcodec/ppc/idct_altivec.c b/libavcodec/ppc/idctdsp.c
similarity index 92%
rename from libavcodec/ppc/idct_altivec.c
rename to libavcodec/ppc/idctdsp.c
index 82fd929..8a1d290 100644
--- a/libavcodec/ppc/idct_altivec.c
+++ b/libavcodec/ppc/idctdsp.c
@@ -37,8 +37,13 @@
#include <altivec.h>
#endif
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/types_altivec.h"
-#include "dsputil_altivec.h"
+#include "libavcodec/idctdsp.h"
+
+#if HAVE_ALTIVEC
#define IDCT_HALF \
/* 1st stage */ \
@@ -148,7 +153,7 @@ static const vec_s16 constants[5] = {
{ 19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722 }
};
-void ff_idct_put_altivec(uint8_t *dest, int stride, int16_t *blk)
+static void idct_put_altivec(uint8_t *dest, int stride, int16_t *blk)
{
vec_s16 *block = (vec_s16 *) blk;
vec_u8 tmp;
@@ -177,7 +182,7 @@ void ff_idct_put_altivec(uint8_t *dest, int stride, int16_t *blk)
COPY(dest, vx7);
}
-void ff_idct_add_altivec(uint8_t *dest, int stride, int16_t *blk)
+static void idct_add_altivec(uint8_t *dest, int stride, int16_t *blk)
{
vec_s16 *block = (vec_s16 *) blk;
vec_u8 tmp;
@@ -219,3 +224,22 @@ void ff_idct_add_altivec(uint8_t *dest, int stride, int16_t *blk)
dest += stride;
ADD(dest, vx7, perm1);
}
+
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+#if HAVE_ALTIVEC
+ if (PPC_ALTIVEC(av_get_cpu_flags())) {
+ if (!high_bit_depth) {
+ if ((avctx->idct_algo == FF_IDCT_AUTO) ||
+ (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
+ c->idct_add = idct_add_altivec;
+ c->idct_put = idct_put_altivec;
+ c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+ }
+ }
+ }
+#endif /* HAVE_ALTIVEC */
+}
diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 144fa26..03f63d9 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -34,7 +34,7 @@
#include "libavutil/intmath.h"
#include "avcodec.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "internal.h"
#include "proresdata.h"
#include "proresdsp.h"
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index 1d60897..1d92d36 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -23,7 +23,7 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/common.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "proresdsp.h"
#include "simple_idct.h"
diff --git a/libavcodec/rtjpeg.c b/libavcodec/rtjpeg.c
index 3188e6f..67eeff8 100644
--- a/libavcodec/rtjpeg.c
+++ b/libavcodec/rtjpeg.c
@@ -121,7 +121,7 @@ int ff_rtjpeg_decode_frame_yuv420(RTJpegContext *c, AVFrame *f,
if (res < 0) \
return res; \
if (res > 0) \
- c->dsp.idct_put(dst, stride, block); \
+ c->idsp.idct_put(dst, stride, block); \
} while (0)
int16_t *block = c->block;
BLOCK(c->lquant, y1, f->linesize[0]);
@@ -159,7 +159,7 @@ void ff_rtjpeg_decode_init(RTJpegContext *c, int width, int height,
const uint32_t *lquant, const uint32_t *cquant) {
int i;
for (i = 0; i < 64; i++) {
- int p = c->dsp.idct_permutation[i];
+ int p = c->idsp.idct_permutation[i];
c->lquant[p] = lquant[i];
c->cquant[p] = cquant[i];
}
@@ -171,13 +171,13 @@ void ff_rtjpeg_init(RTJpegContext *c, AVCodecContext *avctx)
{
int i;
- ff_dsputil_init(&c->dsp, avctx);
+ ff_idctdsp_init(&c->idsp, avctx);
for (i = 0; i < 64; i++) {
int z = ff_zigzag_direct[i];
z = ((z << 3) | (z >> 3)) & 63; // rtjpeg uses a transposed variant
// permute the scan and quantization tables for the chosen idct
- c->scan[i] = c->dsp.idct_permutation[z];
+ c->scan[i] = c->idsp.idct_permutation[z];
}
}
diff --git a/libavcodec/rtjpeg.h b/libavcodec/rtjpeg.h
index 23609b3..cd30079 100644
--- a/libavcodec/rtjpeg.h
+++ b/libavcodec/rtjpeg.h
@@ -23,15 +23,16 @@
#define AVCODEC_RTJPEG_H
#include <stdint.h>
-#include "dsputil.h"
+
#include "libavutil/mem.h"
+#include "idctdsp.h"
#define RTJPEG_FILE_VERSION 0
#define RTJPEG_HEADER_SIZE 12
typedef struct RTJpegContext {
int w, h;
- DSPContext dsp;
+ IDCTDSPContext idsp;
uint8_t scan[64];
uint32_t lquant[64];
uint32_t cquant[64];
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 6d6c1ec..c83bb4f 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -109,24 +109,24 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
fieldtx = v->fieldtx_plane[topleft_mb_pos];
stride_y = s->linesize << fieldtx;
v_dist = (16 - fieldtx) >> (fieldtx == 0);
- s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
- s->dest[0] - 16 * s->linesize - 16,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1],
- s->dest[0] - 16 * s->linesize - 8,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2],
- s->dest[0] - v_dist * s->linesize - 16,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
- s->dest[0] - v_dist * s->linesize - 8,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
- s->dest[1] - 8 * s->uvlinesize - 8,
- s->uvlinesize);
- s->dsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5],
- s->dest[2] - 8 * s->uvlinesize - 8,
- s->uvlinesize);
+ s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
+ s->dest[0] - 16 * s->linesize - 16,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1],
+ s->dest[0] - 16 * s->linesize - 8,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2],
+ s->dest[0] - v_dist * s->linesize - 16,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
+ s->dest[0] - v_dist * s->linesize - 8,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
+ s->dest[1] - 8 * s->uvlinesize - 8,
+ s->uvlinesize);
+ s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5],
+ s->dest[2] - 8 * s->uvlinesize - 8,
+ s->uvlinesize);
}
if (s->mb_x == s->mb_width - 1) {
top_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x;
@@ -134,24 +134,24 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
fieldtx = v->fieldtx_plane[top_mb_pos];
stride_y = s->linesize << fieldtx;
v_dist = fieldtx ? 15 : 8;
- s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
- s->dest[0] - 16 * s->linesize,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1],
- s->dest[0] - 16 * s->linesize + 8,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2],
- s->dest[0] - v_dist * s->linesize,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
- s->dest[0] - v_dist * s->linesize + 8,
- stride_y);
- s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
- s->dest[1] - 8 * s->uvlinesize,
- s->uvlinesize);
- s->dsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5],
- s->dest[2] - 8 * s->uvlinesize,
- s->uvlinesize);
+ s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
+ s->dest[0] - 16 * s->linesize,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1],
+ s->dest[0] - 16 * s->linesize + 8,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2],
+ s->dest[0] - v_dist * s->linesize,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
+ s->dest[0] - v_dist * s->linesize + 8,
+ stride_y);
+ s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
+ s->dest[1] - 8 * s->uvlinesize,
+ s->uvlinesize);
+ s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5],
+ s->dest[2] - 8 * s->uvlinesize,
+ s->uvlinesize);
}
}
@@ -3280,7 +3280,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
else {
v->vc1dsp.vc1_inv_trans_8x8(block);
- s->dsp.add_pixels_clamped(block, dst, linesize);
+ s->idsp.add_pixels_clamped(block, dst, linesize);
}
}
break;
@@ -3611,7 +3611,10 @@ static int vc1_decode_p_mb(VC1Context *v)
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[dst_idx] + off,
+ i & 4 ? s->uvlinesize
+ : s->linesize);
if (v->pq >= 9 && v->overlap) {
if (v->c_avail)
v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
@@ -3719,8 +3722,10 @@ static int vc1_decode_p_mb(VC1Context *v)
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize : s->linesize);
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[dst_idx] + off,
+ (i & 4) ? s->uvlinesize
+ : s->linesize);
if (v->pq >= 9 && v->overlap) {
if (v->c_avail)
v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
@@ -3869,7 +3874,9 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
stride_y = s->uvlinesize;
off = 0;
}
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, stride_y);
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[dst_idx] + off,
+ stride_y);
//TODO: loop filter
}
@@ -4031,7 +4038,10 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
continue;
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize);
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[dst_idx] + off,
+ (i & 4) ? s->uvlinesize
+ : s->linesize);
// TODO: loop filter
}
} else {
@@ -4233,7 +4243,10 @@ static void vc1_decode_b_mb(VC1Context *v)
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[dst_idx] + off,
+ i & 4 ? s->uvlinesize
+ : s->linesize);
} else if (val) {
vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
first_block, s->dest[dst_idx] + off,
@@ -4305,7 +4318,10 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize);
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[dst_idx] + off,
+ (i & 4) ? s->uvlinesize
+ : s->linesize);
// TODO: yet to perform loop filter
}
} else {
@@ -4524,7 +4540,9 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
stride_y = s->uvlinesize;
off = 0;
}
- s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, stride_y);
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[dst_idx] + off,
+ stride_y);
}
} else {
s->mb_intra = v->is_intra[s->mb_x] = 0;
@@ -4828,12 +4846,16 @@ static void vc1_decode_i_blocks(VC1Context *v)
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[k][j] <<= 1;
- s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize);
+ s->idsp.put_signed_pixels_clamped(s->block[k], dst[k],
+ k & 4 ? s->uvlinesize
+ : s->linesize);
} else {
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[k][j] = (s->block[k][j] - 64) << 1;
- s->dsp.put_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize);
+ s->idsp.put_pixels_clamped(s->block[k], dst[k],
+ k & 4 ? s->uvlinesize
+ : s->linesize);
}
}
diff --git a/libavcodec/wmv2.c b/libavcodec/wmv2.c
index bd799d0..b6c7bc0 100644
--- a/libavcodec/wmv2.c
+++ b/libavcodec/wmv2.c
@@ -19,6 +19,7 @@
*/
#include "avcodec.h"
+#include "idctdsp.h"
#include "mpegvideo.h"
#include "msmpeg4data.h"
#include "simple_idct.h"
@@ -30,24 +31,24 @@ av_cold void ff_wmv2_common_init(Wmv2Context * w){
ff_blockdsp_init(&s->bdsp, s->avctx);
ff_wmv2dsp_init(&w->wdsp);
- s->dsp.idct_permutation_type = w->wdsp.idct_perm;
- ff_init_scantable_permutation(s->dsp.idct_permutation,
+ s->idsp.idct_permutation_type = w->wdsp.idct_perm;
+ ff_init_scantable_permutation(s->idsp.idct_permutation,
w->wdsp.idct_perm);
- ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[0],
+ ff_init_scantable(s->idsp.idct_permutation, &w->abt_scantable[0],
ff_wmv2_scantableA);
- ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[1],
+ ff_init_scantable(s->idsp.idct_permutation, &w->abt_scantable[1],
ff_wmv2_scantableB);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable,
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable,
ff_wmv1_scantable[1]);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable,
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_h_scantable,
ff_wmv1_scantable[2]);
- ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable,
+ ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable,
ff_wmv1_scantable[3]);
- ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable,
+ ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable,
ff_wmv1_scantable[0]);
- s->dsp.idct_put = w->wdsp.idct_put;
- s->dsp.idct_add = w->wdsp.idct_add;
- s->dsp.idct = NULL;
+ s->idsp.idct_put = w->wdsp.idct_put;
+ s->idsp.idct_add = w->wdsp.idct_add;
+ s->idsp.idct = NULL;
}
static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int stride, int n){
diff --git a/libavcodec/wmv2dsp.c b/libavcodec/wmv2dsp.c
index dff49f4..49df436 100644
--- a/libavcodec/wmv2dsp.c
+++ b/libavcodec/wmv2dsp.c
@@ -19,7 +19,7 @@
#include "libavutil/attributes.h"
#include "libavutil/common.h"
#include "avcodec.h"
-#include "dsputil.h"
+#include "idctdsp.h"
#include "mathops.h"
#include "wmv2dsp.h"
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 13f9aff..14e58f9 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -18,6 +18,7 @@ OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o
OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o
OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o
OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o
+OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_init.o
OBJS-$(CONFIG_LPC) += x86/lpc.o
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
@@ -49,13 +50,14 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o
MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o
-MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
- x86/idct_mmx_xvid.o \
- x86/idct_sse2_xvid.o \
- x86/simple_idct.o
+MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
x86/hpeldsp_mmx.o
MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o
+MMX-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_mmx.o \
+ x86/idct_mmx_xvid.o \
+ x86/idct_sse2_xvid.o \
+ x86/simple_idct.o
MMX-OBJS-$(CONFIG_QPELDSP) += x86/fpel_mmx.o
MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index d5c441f..f0e8cfc 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -28,9 +28,10 @@
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/cavsdsp.h"
+#include "libavcodec/idctdsp.h"
#include "constants.h"
-#include "dsputil_x86.h"
#include "fpel.h"
+#include "idctdsp.h"
#include "config.h"
#if HAVE_MMX_INLINE
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index 74dab48..adc7aa9 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -22,97 +22,18 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
-#include "libavcodec/simple_idct.h"
#include "dsputil_x86.h"
-#include "idct_xvid.h"
-
-/* Input permutation for the simple_idct_mmx */
-static const uint8_t simple_mmx_permutation[64] = {
- 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
- 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
- 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
- 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
- 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
- 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
- 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
- 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
-};
-
-static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
-
-av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
- int idct_permutation_type)
-{
- int i;
-
- switch (idct_permutation_type) {
- case FF_SIMPLE_IDCT_PERM:
- for (i = 0; i < 64; i++)
- idct_permutation[i] = simple_mmx_permutation[i];
- return 1;
- case FF_SSE2_IDCT_PERM:
- for (i = 0; i < 64; i++)
- idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7];
- return 1;
- }
-
- return 0;
-}
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
int cpu_flags, unsigned high_bit_depth)
{
#if HAVE_MMX_INLINE
- c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
- c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
- c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
-
if (!high_bit_depth) {
c->draw_edges = ff_draw_edges_mmx;
-
- switch (avctx->idct_algo) {
- case FF_IDCT_AUTO:
- case FF_IDCT_SIMPLEMMX:
- c->idct_put = ff_simple_idct_put_mmx;
- c->idct_add = ff_simple_idct_add_mmx;
- c->idct = ff_simple_idct_mmx;
- c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
- break;
- case FF_IDCT_XVIDMMX:
- c->idct_put = ff_idct_xvid_mmx_put;
- c->idct_add = ff_idct_xvid_mmx_add;
- c->idct = ff_idct_xvid_mmx;
- break;
- }
}
#endif /* HAVE_MMX_INLINE */
}
-static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
- int cpu_flags, unsigned high_bit_depth)
-{
-#if HAVE_MMXEXT_INLINE
- if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
- c->idct_put = ff_idct_xvid_mmxext_put;
- c->idct_add = ff_idct_xvid_mmxext_add;
- c->idct = ff_idct_xvid_mmxext;
- }
-#endif /* HAVE_MMXEXT_INLINE */
-}
-
-static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
- int cpu_flags, unsigned high_bit_depth)
-{
-#if HAVE_SSE2_INLINE
- if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
- c->idct_put = ff_idct_xvid_sse2_put;
- c->idct_add = ff_idct_xvid_sse2_add;
- c->idct = ff_idct_xvid_sse2;
- c->idct_permutation_type = FF_SSE2_IDCT_PERM;
- }
-#endif /* HAVE_SSE2_INLINE */
-}
-
av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
@@ -121,12 +42,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx,
if (X86_MMX(cpu_flags))
dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth);
- if (X86_MMXEXT(cpu_flags))
- dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth);
-
- if (X86_SSE2(cpu_flags))
- dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth);
-
if (CONFIG_ENCODERS)
ff_dsputilenc_init_mmx(c, avctx, high_bit_depth);
}
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 5fa047d..d205a48 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -30,141 +30,6 @@
#if HAVE_INLINE_ASM
-void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- int line_size)
-{
- const int16_t *p;
- uint8_t *pix;
-
- /* read the pixels */
- p = block;
- pix = pixels;
- /* unrolled loop */
- __asm__ volatile (
- "movq (%3), %%mm0 \n\t"
- "movq 8(%3), %%mm1 \n\t"
- "movq 16(%3), %%mm2 \n\t"
- "movq 24(%3), %%mm3 \n\t"
- "movq 32(%3), %%mm4 \n\t"
- "movq 40(%3), %%mm5 \n\t"
- "movq 48(%3), %%mm6 \n\t"
- "movq 56(%3), %%mm7 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "packuswb %%mm5, %%mm4 \n\t"
- "packuswb %%mm7, %%mm6 \n\t"
- "movq %%mm0, (%0) \n\t"
- "movq %%mm2, (%0, %1) \n\t"
- "movq %%mm4, (%0, %1, 2) \n\t"
- "movq %%mm6, (%0, %2) \n\t"
- :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
- "r" (p)
- : "memory");
- pix += line_size * 4;
- p += 32;
-
- // if here would be an exact copy of the code above
- // compiler would generate some very strange code
- // thus using "r"
- __asm__ volatile (
- "movq (%3), %%mm0 \n\t"
- "movq 8(%3), %%mm1 \n\t"
- "movq 16(%3), %%mm2 \n\t"
- "movq 24(%3), %%mm3 \n\t"
- "movq 32(%3), %%mm4 \n\t"
- "movq 40(%3), %%mm5 \n\t"
- "movq 48(%3), %%mm6 \n\t"
- "movq 56(%3), %%mm7 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "packuswb %%mm5, %%mm4 \n\t"
- "packuswb %%mm7, %%mm6 \n\t"
- "movq %%mm0, (%0) \n\t"
- "movq %%mm2, (%0, %1) \n\t"
- "movq %%mm4, (%0, %1, 2) \n\t"
- "movq %%mm6, (%0, %2) \n\t"
- :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
- "r" (p)
- : "memory");
-}
-
-#define put_signed_pixels_clamped_mmx_half(off) \
- "movq "#off"(%2), %%mm1 \n\t" \
- "movq 16 + "#off"(%2), %%mm2 \n\t" \
- "movq 32 + "#off"(%2), %%mm3 \n\t" \
- "movq 48 + "#off"(%2), %%mm4 \n\t" \
- "packsswb 8 + "#off"(%2), %%mm1 \n\t" \
- "packsswb 24 + "#off"(%2), %%mm2 \n\t" \
- "packsswb 40 + "#off"(%2), %%mm3 \n\t" \
- "packsswb 56 + "#off"(%2), %%mm4 \n\t" \
- "paddb %%mm0, %%mm1 \n\t" \
- "paddb %%mm0, %%mm2 \n\t" \
- "paddb %%mm0, %%mm3 \n\t" \
- "paddb %%mm0, %%mm4 \n\t" \
- "movq %%mm1, (%0) \n\t" \
- "movq %%mm2, (%0, %3) \n\t" \
- "movq %%mm3, (%0, %3, 2) \n\t" \
- "movq %%mm4, (%0, %1) \n\t"
-
-void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- int line_size)
-{
- x86_reg line_skip = line_size;
- x86_reg line_skip3;
-
- __asm__ volatile (
- "movq "MANGLE(ff_pb_80)", %%mm0 \n\t"
- "lea (%3, %3, 2), %1 \n\t"
- put_signed_pixels_clamped_mmx_half(0)
- "lea (%0, %3, 4), %0 \n\t"
- put_signed_pixels_clamped_mmx_half(64)
- : "+&r" (pixels), "=&r" (line_skip3)
- : "r" (block), "r" (line_skip)
- : "memory");
-}
-
-void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- int line_size)
-{
- const int16_t *p;
- uint8_t *pix;
- int i;
-
- /* read the pixels */
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- i = 4;
- do {
- __asm__ volatile (
- "movq (%2), %%mm0 \n\t"
- "movq 8(%2), %%mm1 \n\t"
- "movq 16(%2), %%mm2 \n\t"
- "movq 24(%2), %%mm3 \n\t"
- "movq %0, %%mm4 \n\t"
- "movq %1, %%mm6 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddsw %%mm4, %%mm0 \n\t"
- "paddsw %%mm5, %%mm1 \n\t"
- "movq %%mm6, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm6 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddsw %%mm6, %%mm2 \n\t"
- "paddsw %%mm5, %%mm3 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "movq %%mm0, %0 \n\t"
- "movq %%mm2, %1 \n\t"
- : "+m" (*pix), "+m" (*(pix + line_size))
- : "r" (p)
- : "memory");
- pix += line_size * 2;
- p += 16;
- } while (--i);
-}
-
/* Draw the edges of width 'w' of an image of size width, height
* this MMX version can only handle w == 8 || w == 16. */
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h
index 4beb6c1..7e1e8af 100644
--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@@ -31,13 +31,6 @@ void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx);
-void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- int line_size);
-void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- int line_size);
-void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- int line_size);
-
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
int w, int h, int sides);
diff --git a/libavcodec/x86/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c
index 2772339..920ea4c 100644
--- a/libavcodec/x86/idct_mmx_xvid.c
+++ b/libavcodec/x86/idct_mmx_xvid.c
@@ -44,8 +44,8 @@
#include "config.h"
#include "libavcodec/avcodec.h"
#include "libavutil/mem.h"
-#include "dsputil_x86.h"
#include "idct_xvid.h"
+#include "idctdsp.h"
#if HAVE_MMX_INLINE
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index 50655d6..aadeb12 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -42,7 +42,7 @@
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "idct_xvid.h"
-#include "dsputil_x86.h"
+#include "idctdsp.h"
#if HAVE_SSE2_INLINE
diff --git a/libavcodec/x86/idctdsp.h b/libavcodec/x86/idctdsp.h
new file mode 100644
index 0000000..22df3dd
--- /dev/null
+++ b/libavcodec/x86/idctdsp.h
@@ -0,0 +1,31 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_IDCTDSP_H
+#define AVCODEC_X86_IDCTDSP_H
+
+#include <stdint.h>
+
+void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+ int line_size);
+void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+ int line_size);
+void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+ int line_size);
+
+#endif /* AVCODEC_X86_IDCTDSP_H */
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
new file mode 100644
index 0000000..9b68497
--- /dev/null
+++ b/libavcodec/x86/idctdsp_init.c
@@ -0,0 +1,106 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/idctdsp.h"
+#include "libavcodec/simple_idct.h"
+#include "idct_xvid.h"
+#include "idctdsp.h"
+
+/* Input permutation for the simple_idct_mmx */
+static const uint8_t simple_mmx_permutation[64] = {
+ 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+ 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+ 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+ 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+ 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+ 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+ 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+ 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+
+static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
+
+av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
+ int idct_permutation_type)
+{
+ int i;
+
+ switch (idct_permutation_type) {
+ case FF_SIMPLE_IDCT_PERM:
+ for (i = 0; i < 64; i++)
+ idct_permutation[i] = simple_mmx_permutation[i];
+ return 1;
+ case FF_SSE2_IDCT_PERM:
+ for (i = 0; i < 64; i++)
+ idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7];
+ return 1;
+ }
+
+ return 0;
+}
+
+av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (INLINE_MMX(cpu_flags)) {
+ c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
+ c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
+ c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
+
+ if (!high_bit_depth) {
+ switch (avctx->idct_algo) {
+ case FF_IDCT_AUTO:
+ case FF_IDCT_SIMPLEMMX:
+ c->idct_put = ff_simple_idct_put_mmx;
+ c->idct_add = ff_simple_idct_add_mmx;
+ c->idct = ff_simple_idct_mmx;
+ c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
+ break;
+ case FF_IDCT_XVIDMMX:
+ c->idct_put = ff_idct_xvid_mmx_put;
+ c->idct_add = ff_idct_xvid_mmx_add;
+ c->idct = ff_idct_xvid_mmx;
+ break;
+ }
+ }
+ }
+
+ if (INLINE_MMXEXT(cpu_flags)) {
+ if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
+ c->idct_put = ff_idct_xvid_mmxext_put;
+ c->idct_add = ff_idct_xvid_mmxext_add;
+ c->idct = ff_idct_xvid_mmxext;
+ }
+ }
+
+ if (INLINE_SSE2(cpu_flags)) {
+ if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
+ c->idct_put = ff_idct_xvid_sse2_put;
+ c->idct_add = ff_idct_xvid_sse2_add;
+ c->idct = ff_idct_xvid_sse2;
+ c->idct_permutation_type = FF_SSE2_IDCT_PERM;
+ }
+ }
+}
diff --git a/libavcodec/x86/idctdsp_mmx.c b/libavcodec/x86/idctdsp_mmx.c
new file mode 100644
index 0000000..7285b1d
--- /dev/null
+++ b/libavcodec/x86/idctdsp_mmx.c
@@ -0,0 +1,168 @@
+/*
+ * SIMD-optimized IDCT-related routines
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "idctdsp.h"
+#include "inline_asm.h"
+
+#if HAVE_INLINE_ASM
+
+void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+ int line_size)
+{
+ const int16_t *p;
+ uint8_t *pix;
+
+ /* read the pixels */
+ p = block;
+ pix = pixels;
+ /* unrolled loop */
+ __asm__ volatile (
+ "movq (%3), %%mm0 \n\t"
+ "movq 8(%3), %%mm1 \n\t"
+ "movq 16(%3), %%mm2 \n\t"
+ "movq 24(%3), %%mm3 \n\t"
+ "movq 32(%3), %%mm4 \n\t"
+ "movq 40(%3), %%mm5 \n\t"
+ "movq 48(%3), %%mm6 \n\t"
+ "movq 56(%3), %%mm7 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "packuswb %%mm7, %%mm6 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm2, (%0, %1) \n\t"
+ "movq %%mm4, (%0, %1, 2) \n\t"
+ "movq %%mm6, (%0, %2) \n\t"
+ :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
+ "r" (p)
+ : "memory");
+ pix += line_size * 4;
+ p += 32;
+
+ // if here would be an exact copy of the code above
+ // compiler would generate some very strange code
+ // thus using "r"
+ __asm__ volatile (
+ "movq (%3), %%mm0 \n\t"
+ "movq 8(%3), %%mm1 \n\t"
+ "movq 16(%3), %%mm2 \n\t"
+ "movq 24(%3), %%mm3 \n\t"
+ "movq 32(%3), %%mm4 \n\t"
+ "movq 40(%3), %%mm5 \n\t"
+ "movq 48(%3), %%mm6 \n\t"
+ "movq 56(%3), %%mm7 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "packuswb %%mm7, %%mm6 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm2, (%0, %1) \n\t"
+ "movq %%mm4, (%0, %1, 2) \n\t"
+ "movq %%mm6, (%0, %2) \n\t"
+ :: "r" (pix), "r" ((x86_reg) line_size), "r" ((x86_reg) line_size * 3),
+ "r" (p)
+ : "memory");
+}
+
+#define put_signed_pixels_clamped_mmx_half(off) \
+ "movq "#off"(%2), %%mm1 \n\t" \
+ "movq 16 + "#off"(%2), %%mm2 \n\t" \
+ "movq 32 + "#off"(%2), %%mm3 \n\t" \
+ "movq 48 + "#off"(%2), %%mm4 \n\t" \
+ "packsswb 8 + "#off"(%2), %%mm1 \n\t" \
+ "packsswb 24 + "#off"(%2), %%mm2 \n\t" \
+ "packsswb 40 + "#off"(%2), %%mm3 \n\t" \
+ "packsswb 56 + "#off"(%2), %%mm4 \n\t" \
+ "paddb %%mm0, %%mm1 \n\t" \
+ "paddb %%mm0, %%mm2 \n\t" \
+ "paddb %%mm0, %%mm3 \n\t" \
+ "paddb %%mm0, %%mm4 \n\t" \
+ "movq %%mm1, (%0) \n\t" \
+ "movq %%mm2, (%0, %3) \n\t" \
+ "movq %%mm3, (%0, %3, 2) \n\t" \
+ "movq %%mm4, (%0, %1) \n\t"
+
+void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+ int line_size)
+{
+ x86_reg line_skip = line_size;
+ x86_reg line_skip3;
+
+ __asm__ volatile (
+ "movq "MANGLE(ff_pb_80)", %%mm0 \n\t"
+ "lea (%3, %3, 2), %1 \n\t"
+ put_signed_pixels_clamped_mmx_half(0)
+ "lea (%0, %3, 4), %0 \n\t"
+ put_signed_pixels_clamped_mmx_half(64)
+ : "+&r" (pixels), "=&r" (line_skip3)
+ : "r" (block), "r" (line_skip)
+ : "memory");
+}
+
+void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
+ int line_size)
+{
+ const int16_t *p;
+ uint8_t *pix;
+ int i;
+
+ /* read the pixels */
+ p = block;
+ pix = pixels;
+ MOVQ_ZERO(mm7);
+ i = 4;
+ do {
+ __asm__ volatile (
+ "movq (%2), %%mm0 \n\t"
+ "movq 8(%2), %%mm1 \n\t"
+ "movq 16(%2), %%mm2 \n\t"
+ "movq 24(%2), %%mm3 \n\t"
+ "movq %0, %%mm4 \n\t"
+ "movq %1, %%mm6 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddsw %%mm4, %%mm0 \n\t"
+ "paddsw %%mm5, %%mm1 \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm6 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddsw %%mm6, %%mm2 \n\t"
+ "paddsw %%mm5, %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "movq %%mm0, %0 \n\t"
+ "movq %%mm2, %1 \n\t"
+ : "+m" (*pix), "+m" (*(pix + line_size))
+ : "r" (p)
+ : "memory");
+ pix += line_size * 2;
+ p += 16;
+ } while (--i);
+}
+
+#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c
index d01ff1c..fa59006 100644
--- a/libavcodec/x86/mpegvideoenc_template.c
+++ b/libavcodec/x86/mpegvideoenc_template.c
@@ -229,7 +229,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if(s->mb_intra) block[0]= level;
else block[0]= temp_block[0];
- if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
+ if (s->idsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM) {
if(last_non_zero_p1 <= 1) goto end;
block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
block[0x20] = temp_block[0x10];
diff --git a/libavcodec/x86/proresdsp_init.c b/libavcodec/x86/proresdsp_init.c
index 68ad929..a66fc70 100644
--- a/libavcodec/x86/proresdsp_init.c
+++ b/libavcodec/x86/proresdsp_init.c
@@ -22,7 +22,7 @@
#include "libavutil/attributes.h"
#include "libavutil/x86/cpu.h"
-#include "libavcodec/dsputil.h"
+#include "libavcodec/idctdsp.h"
#include "libavcodec/proresdsp.h"
void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c
index a342110..bbe5a67 100644
--- a/libavcodec/x86/simple_idct.c
+++ b/libavcodec/x86/simple_idct.c
@@ -23,7 +23,7 @@
#include "libavutil/internal.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
-#include "dsputil_x86.h"
+#include "idctdsp.h"
#if HAVE_INLINE_ASM
More information about the ffmpeg-cvslog
mailing list