[FFmpeg-cvslog] r24337 - in trunk/libavcodec: vp8.c vp8dsp.c vp8dsp.h x86/vp8dsp-init.c x86/vp8dsp.asm
rbultje
subversion
Mon Jul 19 23:18:05 CEST 2010
Author: rbultje
Date: Mon Jul 19 23:18:04 2010
New Revision: 24337
Log:
Change function prototypes for width=8 inner and mbedge loopfilter functions
so that it does both U and V planes at the same time. This will have speed
advantages when using SSE2 (or higher) optimizations, since we can do both
the U and V rows together in a single xmm register.
This also renames filter16 to filter16y and filter8 to filter8uv so that it's
more obvious what each function is used for.
Modified:
trunk/libavcodec/vp8.c
trunk/libavcodec/vp8dsp.c
trunk/libavcodec/vp8dsp.h
trunk/libavcodec/x86/vp8dsp-init.c
trunk/libavcodec/x86/vp8dsp.asm
Modified: trunk/libavcodec/vp8.c
==============================================================================
--- trunk/libavcodec/vp8.c Mon Jul 19 22:53:58 2010 (r24336)
+++ trunk/libavcodec/vp8.c Mon Jul 19 23:18:04 2010 (r24337)
@@ -1245,31 +1245,45 @@ static void filter_mb(VP8Context *s, uin
bedge_lim = 2* filter_level + inner_limit;
if (mb_x) {
- s->vp8dsp.vp8_h_loop_filter16(dst[0], s->linesize, mbedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_h_loop_filter8 (dst[1], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_h_loop_filter8 (dst[2], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh);
+ s->vp8dsp.vp8_h_loop_filter16y(dst[0], s->linesize,
+ mbedge_lim, inner_limit, hev_thresh);
+ s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], s->uvlinesize,
+ mbedge_lim, inner_limit, hev_thresh);
}
if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
- s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 4, s->linesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 8, s->linesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+12, s->linesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_h_loop_filter8_inner (dst[1]+ 4, s->uvlinesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_h_loop_filter8_inner (dst[2]+ 4, s->uvlinesize, bedge_lim, inner_limit, hev_thresh);
+ s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, s->linesize, bedge_lim,
+ inner_limit, hev_thresh);
+ s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, s->linesize, bedge_lim,
+ inner_limit, hev_thresh);
+ s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, s->linesize, bedge_lim,
+ inner_limit, hev_thresh);
+ s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
+ s->uvlinesize, bedge_lim,
+ inner_limit, hev_thresh);
}
if (mb_y) {
- s->vp8dsp.vp8_v_loop_filter16(dst[0], s->linesize, mbedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_v_loop_filter8 (dst[1], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_v_loop_filter8 (dst[2], s->uvlinesize, mbedge_lim, inner_limit, hev_thresh);
+ s->vp8dsp.vp8_v_loop_filter16y(dst[0], s->linesize,
+ mbedge_lim, inner_limit, hev_thresh);
+ s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], s->uvlinesize,
+ mbedge_lim, inner_limit, hev_thresh);
}
if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) {
- s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 4*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 8*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+12*s->linesize, s->linesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_v_loop_filter8_inner (dst[1]+ 4*s->uvlinesize, s->uvlinesize, bedge_lim, inner_limit, hev_thresh);
- s->vp8dsp.vp8_v_loop_filter8_inner (dst[2]+ 4*s->uvlinesize, s->uvlinesize, bedge_lim, inner_limit, hev_thresh);
+ s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*s->linesize,
+ s->linesize, bedge_lim,
+ inner_limit, hev_thresh);
+ s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*s->linesize,
+ s->linesize, bedge_lim,
+ inner_limit, hev_thresh);
+ s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*s->linesize,
+ s->linesize, bedge_lim,
+ inner_limit, hev_thresh);
+ s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * s->uvlinesize,
+ dst[2] + 4 * s->uvlinesize,
+ s->uvlinesize, bedge_lim,
+ inner_limit, hev_thresh);
}
}
Modified: trunk/libavcodec/vp8dsp.c
==============================================================================
--- trunk/libavcodec/vp8dsp.c Mon Jul 19 22:53:58 2010 (r24336)
+++ trunk/libavcodec/vp8dsp.c Mon Jul 19 23:18:04 2010 (r24337)
@@ -196,8 +196,8 @@ static av_always_inline void filter_mbed
p[ 2*stride] = cm[q2 - a2];
}
-#define LOOP_FILTER(dir, size, stridea, strideb) \
-static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\
+#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
+static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\
int flim_E, int flim_I, int hev_thresh)\
{\
int i;\
@@ -211,7 +211,7 @@ static void vp8_ ## dir ## _loop_filter
}\
}\
\
-static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\
+static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\
int flim_E, int flim_I, int hev_thresh)\
{\
int i;\
@@ -226,10 +226,26 @@ static void vp8_ ## dir ## _loop_filter
}\
}
-LOOP_FILTER(v, 16, 1, stride)
-LOOP_FILTER(h, 16, stride, 1)
-LOOP_FILTER(v, 8, 1, stride)
-LOOP_FILTER(h, 8, stride, 1)
+LOOP_FILTER(v, 16, 1, stride,)
+LOOP_FILTER(h, 16, stride, 1,)
+
+#define UV_LOOP_FILTER(dir, stridea, strideb) \
+LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
+static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\
+ int fE, int fI, int hev_thresh)\
+{\
+ vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\
+ vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\
+}\
+static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\
+ int fE, int fI, int hev_thresh)\
+{\
+ vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\
+ vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\
+}
+
+UV_LOOP_FILTER(v, 1, stride)
+UV_LOOP_FILTER(h, stride, 1)
static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
{
@@ -443,15 +459,15 @@ av_cold void ff_vp8dsp_init(VP8DSPContex
dsp->vp8_idct_add = vp8_idct_add_c;
dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
- dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c;
- dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c;
- dsp->vp8_v_loop_filter8 = vp8_v_loop_filter8_c;
- dsp->vp8_h_loop_filter8 = vp8_h_loop_filter8_c;
+ dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
+ dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
+ dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
+ dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
- dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c;
- dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c;
- dsp->vp8_v_loop_filter8_inner = vp8_v_loop_filter8_inner_c;
- dsp->vp8_h_loop_filter8_inner = vp8_h_loop_filter8_inner_c;
+ dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
+ dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
+ dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
+ dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
Modified: trunk/libavcodec/vp8dsp.h
==============================================================================
--- trunk/libavcodec/vp8dsp.h Mon Jul 19 22:53:58 2010 (r24336)
+++ trunk/libavcodec/vp8dsp.h Mon Jul 19 23:18:04 2010 (r24337)
@@ -35,16 +35,24 @@ typedef struct VP8DSPContext {
void (*vp8_idct_dc_add)(uint8_t *dst, DCTELEM block[16], int stride);
// loop filter applied to edges between macroblocks
- void (*vp8_v_loop_filter16)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
- void (*vp8_h_loop_filter16)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
- void (*vp8_v_loop_filter8)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
- void (*vp8_h_loop_filter8)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_v_loop_filter16y)(uint8_t *dst, int stride,
+ int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_h_loop_filter16y)(uint8_t *dst, int stride,
+ int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, int stride,
+ int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, int stride,
+ int flim_E, int flim_I, int hev_thresh);
// loop filter applied to inner macroblock edges
- void (*vp8_v_loop_filter16_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
- void (*vp8_h_loop_filter16_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
- void (*vp8_v_loop_filter8_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
- void (*vp8_h_loop_filter8_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_v_loop_filter16y_inner)(uint8_t *dst, int stride,
+ int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_h_loop_filter16y_inner)(uint8_t *dst, int stride,
+ int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, int stride,
+ int flim_E, int flim_I, int hev_thresh);
+ void (*vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV, int stride,
+ int flim_E, int flim_I, int hev_thresh);
void (*vp8_v_loop_filter_simple)(uint8_t *dst, int stride, int flim);
void (*vp8_h_loop_filter_simple)(uint8_t *dst, int stride, int flim);
Modified: trunk/libavcodec/x86/vp8dsp-init.c
==============================================================================
--- trunk/libavcodec/x86/vp8dsp-init.c Mon Jul 19 22:53:58 2010 (r24336)
+++ trunk/libavcodec/x86/vp8dsp-init.c Mon Jul 19 23:18:04 2010 (r24337)
@@ -230,18 +230,18 @@ extern void ff_vp8_h_loop_filter_simple_
extern void ff_vp8_h_loop_filter_simple_mmxext(uint8_t *dst, int stride, int flim);
extern void ff_vp8_h_loop_filter_simple_sse2 (uint8_t *dst, int stride, int flim);
-extern void ff_vp8_v_loop_filter16_inner_mmx (uint8_t *dst, int stride,
- int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter16_inner_mmxext(uint8_t *dst, int stride,
- int e, int i, int hvt);
-extern void ff_vp8_v_loop_filter16_inner_sse2 (uint8_t *dst, int stride,
- int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16_inner_mmx (uint8_t *dst, int stride,
- int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16_inner_mmxext(uint8_t *dst, int stride,
- int e, int i, int hvt);
-extern void ff_vp8_h_loop_filter16_inner_sse2 (uint8_t *dst, int stride,
- int e, int i, int hvt);
+extern void ff_vp8_v_loop_filter16y_inner_mmx (uint8_t *dst, int stride,
+ int e, int i, int hvt);
+extern void ff_vp8_v_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
+ int e, int i, int hvt);
+extern void ff_vp8_v_loop_filter16y_inner_sse2 (uint8_t *dst, int stride,
+ int e, int i, int hvt);
+extern void ff_vp8_h_loop_filter16y_inner_mmx (uint8_t *dst, int stride,
+ int e, int i, int hvt);
+extern void ff_vp8_h_loop_filter16y_inner_mmxext(uint8_t *dst, int stride,
+ int e, int i, int hvt);
+extern void ff_vp8_h_loop_filter16y_inner_sse2 (uint8_t *dst, int stride,
+ int e, int i, int hvt);
#endif
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
@@ -284,8 +284,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPCo
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
- c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmx;
- c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmx;
+ c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
+ c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
}
/* note that 4-tap width=16 functions are missing because w=16
@@ -302,8 +302,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPCo
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
- c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_mmxext;
- c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_mmxext;
+ c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
+ c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
}
if (mm_flags & FF_MM_SSE) {
@@ -320,8 +320,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPCo
c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
- c->vp8_v_loop_filter16_inner = ff_vp8_v_loop_filter16_inner_sse2;
- c->vp8_h_loop_filter16_inner = ff_vp8_h_loop_filter16_inner_sse2;
+ c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
+ c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
}
if (mm_flags & FF_MM_SSSE3) {
Modified: trunk/libavcodec/x86/vp8dsp.asm
==============================================================================
--- trunk/libavcodec/x86/vp8dsp.asm Mon Jul 19 22:53:58 2010 (r24336)
+++ trunk/libavcodec/x86/vp8dsp.asm Mon Jul 19 23:18:04 2010 (r24337)
@@ -1379,7 +1379,7 @@ SIMPLE_LOOPFILTER sse2, h, 6
;-----------------------------------------------------------------------------
%macro INNER_LOOPFILTER 4
-cglobal vp8_%2_loop_filter16_inner_%1, 5, %3, %4
+cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4
%define dst_reg r0
%define mstride_reg r1
%define E_reg r2
More information about the ffmpeg-cvslog
mailing list