[FFmpeg-devel] [PATCH 1/3] Add add_pixels4/8() to h264dsp, and remove add_pixels4 from dsputil.
Ronald S. Bultje
rsbultje at gmail.com
Mon Feb 11 01:46:06 CET 2013
From: "Ronald S. Bultje" <rsbultje at gmail.com>
These functions are mostly H264-specific (the only other user I can
spot is bink), and this allows us to special-case some functionality
for H264. Also remove the 16-bit-coeff with >8bpp versions (unused)
and merge the duplicate 32-bit-coeff for >8bpp (identical).
---
libavcodec/dsputil.c | 27 ++++++++++++++--
libavcodec/dsputil.h | 1 -
libavcodec/dsputil_template.c | 42 -------------------------
libavcodec/h264.c | 14 ++++-----
libavcodec/h264_mb_template.c | 2 +-
libavcodec/h264addpx_template.c | 68 +++++++++++++++++++++++++++++++++++++++++
libavcodec/h264dsp.c | 18 +++++++++++
libavcodec/h264dsp.h | 4 +++
8 files changed, 122 insertions(+), 54 deletions(-)
create mode 100644 libavcodec/h264addpx_template.c
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index ac0e95a..117c5b3 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -443,6 +443,27 @@ static void put_signed_pixels_clamped_c(const int16_t *block,
}
}
+static void add_pixels8_c(uint8_t *av_restrict pixels,
+ int16_t *block,
+ int line_size)
+{
+ int i;
+
+ for(i=0;i<8;i++) {
+ pixels[0] += block[0];
+ pixels[1] += block[1];
+ pixels[2] += block[2];
+ pixels[3] += block[3];
+ pixels[4] += block[4];
+ pixels[5] += block[5];
+ pixels[6] += block[6];
+ pixels[7] += block[7];
+ pixels += line_size;
+ block += 8;
+ }
+}
+
+
static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
int line_size)
{
@@ -2852,6 +2873,8 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->shrink[2]= ff_shrink44;
c->shrink[3]= ff_shrink88;
+ c->add_pixels8 = add_pixels8_c;
+
#undef FUNC
#undef FUNCC
#define FUNC(f, depth) f ## _ ## depth
@@ -2861,9 +2884,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->get_pixels = FUNCC(get_pixels ## dct , depth);\
c->draw_edges = FUNCC(draw_edges , depth);\
c->clear_block = FUNCC(clear_block ## dct , depth);\
- c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
- c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
- c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
+ c->clear_blocks = FUNCC(clear_blocks ## dct , depth)
switch (avctx->bits_per_raw_sample) {
case 9:
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index cffd59d..102d86d 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -148,7 +148,6 @@ typedef struct DSPContext {
void (*put_signed_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels8)(uint8_t *pixels, int16_t *block, int line_size);
- void (*add_pixels4)(uint8_t *pixels, int16_t *block, int line_size);
int (*sum_abs_dctelem)(int16_t *block/*align 16*/);
/**
* translational global motion compensation.
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index df7c43d..cadbc43 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -89,48 +89,6 @@ static void FUNCC(get_pixels ## suffix)(int16_t *av_restrict _block, \
} \
} \
\
-static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels, \
- int16_t *_block, \
- int line_size) \
-{ \
- int i; \
- pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
- dctcoef *block = (dctcoef*)_block; \
- line_size /= sizeof(pixel); \
- \
- for(i=0;i<8;i++) { \
- pixels[0] += block[0]; \
- pixels[1] += block[1]; \
- pixels[2] += block[2]; \
- pixels[3] += block[3]; \
- pixels[4] += block[4]; \
- pixels[5] += block[5]; \
- pixels[6] += block[6]; \
- pixels[7] += block[7]; \
- pixels += line_size; \
- block += 8; \
- } \
-} \
- \
-static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels, \
- int16_t *_block, \
- int line_size) \
-{ \
- int i; \
- pixel *av_restrict pixels = (pixel *av_restrict)_pixels; \
- dctcoef *block = (dctcoef*)_block; \
- line_size /= sizeof(pixel); \
- \
- for(i=0;i<4;i++) { \
- pixels[0] += block[0]; \
- pixels[1] += block[1]; \
- pixels[2] += block[2]; \
- pixels[3] += block[3]; \
- pixels += line_size; \
- block += 4; \
- } \
-} \
- \
static void FUNCC(clear_block ## suffix)(int16_t *block) \
{ \
memset(block, 0, sizeof(dctcoef)*64); \
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 28d63d9..2d6c5e6 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1818,7 +1818,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
if (IS_8x8DCT(mb_type)) {
if (transform_bypass) {
idct_dc_add =
- idct_add = s->dsp.add_pixels8;
+ idct_add = h->h264dsp.h264_add_pixels8;
} else {
idct_dc_add = h->h264dsp.h264_idct8_dc_add;
idct_add = h->h264dsp.h264_idct8_add;
@@ -1843,7 +1843,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
} else {
if (transform_bypass) {
idct_dc_add =
- idct_add = s->dsp.add_pixels4;
+ idct_add = h->h264dsp.h264_add_pixels4;
} else {
idct_dc_add = h->h264dsp.h264_idct_dc_add;
idct_add = h->h264dsp.h264_idct_add;
@@ -1942,9 +1942,9 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
for (i = 0; i < 16; i++)
if (h->non_zero_count_cache[scan8[i + p * 16]] ||
dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
- s->dsp.add_pixels4(dest_y + block_offset[i],
- h->mb + (i * 16 + p * 256 << pixel_shift),
- linesize);
+ h->h264dsp.h264_add_pixels4(dest_y + block_offset[i],
+ h->mb + (i * 16 + p * 256 << pixel_shift),
+ linesize);
}
} else {
h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
@@ -1955,8 +1955,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
} else if (h->cbp & 15) {
if (transform_bypass) {
const int di = IS_8x8DCT(mb_type) ? 4 : 1;
- idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8
- : s->dsp.add_pixels4;
+ idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8
+ : h->h264dsp.h264_add_pixels4;
for (i = 0; i < 16; i += di)
if (h->non_zero_count_cache[scan8[i + p * 16]])
idct_add(dest_y + block_offset[i],
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index 679021f..0f0e451 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -205,7 +205,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
uvlinesize);
} else {
- idct_add = s->dsp.add_pixels4;
+ idct_add = h->h264dsp.h264_add_pixels4;
for (j = 1; j < 3; j++) {
for (i = j * 16; i < j * 16 + 4; i++)
if (h->non_zero_count_cache[scan8[i]] ||
diff --git a/libavcodec/h264addpx_template.c b/libavcodec/h264addpx_template.c
new file mode 100644
index 0000000..1173766
--- /dev/null
+++ b/libavcodec/h264addpx_template.c
@@ -0,0 +1,68 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2011 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG4 part10 DSP functions.
+ * @author Michael Niedermayer <michaelni at gmx.at>
+ */
+
+#include "bit_depth_template.c"
+
+static void FUNCC(ff_h264_add_pixels4)(uint8_t *_dst, int16_t *_src, int stride)
+{
+ int i;
+ pixel *dst = (pixel *) _dst;
+ dctcoef *src = (dctcoef *) _src;
+ stride /= sizeof(pixel);
+
+ for (i = 0; i < 4; i++) {
+ dst[0] += src[0];
+ dst[1] += src[1];
+ dst[2] += src[2];
+ dst[3] += src[3];
+
+ dst += stride;
+ src += 4;
+ }
+}
+
+static void FUNCC(ff_h264_add_pixels8)(uint8_t *_dst, int16_t *_src, int stride)
+{
+ int i;
+ pixel *dst = (pixel *) _dst;
+ dctcoef *src = (dctcoef *) _src;
+ stride /= sizeof(pixel);
+
+ for (i = 0; i < 8; i++) {
+ dst[0] += src[0];
+ dst[1] += src[1];
+ dst[2] += src[2];
+ dst[3] += src[3];
+ dst[4] += src[4];
+ dst[5] += src[5];
+ dst[6] += src[6];
+ dst[7] += src[7];
+
+ dst += stride;
+ src += 8;
+ }
+}
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 83de168..bb70d4e 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -52,11 +52,29 @@
#include "h264dsp_template.c"
#undef BIT_DEPTH
+#define BIT_DEPTH 8
+#include "h264addpx_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 16
+#include "h264addpx_template.c"
+#undef BIT_DEPTH
+
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
{
#undef FUNC
#define FUNC(a, depth) a ## _ ## depth ## _c
+#define ADDPX_DSP(depth) \
+ c->h264_add_pixels4 = FUNC(ff_h264_add_pixels4, depth);\
+ c->h264_add_pixels8 = FUNC(ff_h264_add_pixels8, depth)
+
+ if (bit_depth > 8 && bit_depth <= 16) {
+ ADDPX_DSP(16);
+ } else {
+ ADDPX_DSP(8);
+ }
+
#define H264_DSP(depth) \
c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index f0eaacc..82e147f 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -101,6 +101,10 @@ typedef struct H264DSPContext {
void (*h264_luma_dc_dequant_idct)(int16_t *output,
int16_t *input /*align 16*/, int qmul);
void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);
+
+ /* bypass-transform */
+ void (*h264_add_pixels8)(uint8_t *dst, int16_t *block, int stride);
+ void (*h264_add_pixels4)(uint8_t *dst, int16_t *block, int stride);
} H264DSPContext;
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
--
1.7.11.3
More information about the ffmpeg-devel
mailing list