[FFmpeg-cvslog] r16312 - in trunk/libavcodec/arm: dsputil_neon.c h264idct_neon.S
mru
subversion
Fri Dec 26 00:13:43 CET 2008
Author: mru
Date: Fri Dec 26 00:13:43 2008
New Revision: 16312
Log:
ARM: add new h264 idct functions
Modified:
trunk/libavcodec/arm/dsputil_neon.c
trunk/libavcodec/arm/h264idct_neon.S
Modified: trunk/libavcodec/arm/dsputil_neon.c
==============================================================================
--- trunk/libavcodec/arm/dsputil_neon.c Thu Dec 25 19:27:49 2008 (r16311)
+++ trunk/libavcodec/arm/dsputil_neon.c Fri Dec 26 00:13:43 2008 (r16312)
@@ -94,6 +94,15 @@ void ff_h264_h_loop_filter_chroma_neon(u
void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride,
+ const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride,
+ const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
+ DCTELEM *block, int stride,
+ const uint8_t nnzc[6*8]);
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
@@ -166,4 +175,7 @@ void ff_dsputil_init_neon(DSPContext *c,
c->h264_idct_add = ff_h264_idct_add_neon;
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
+ c->h264_idct_add16 = ff_h264_idct_add16_neon;
+ c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+ c->h264_idct_add8 = ff_h264_idct_add8_neon;
}
Modified: trunk/libavcodec/arm/h264idct_neon.S
==============================================================================
--- trunk/libavcodec/arm/h264idct_neon.S Thu Dec 25 19:27:49 2008 (r16311)
+++ trunk/libavcodec/arm/h264idct_neon.S Fri Dec 26 00:13:43 2008 (r16312)
@@ -20,6 +20,7 @@
#include "asm.S"
+ preserve8
.fpu neon
.text
@@ -94,3 +95,95 @@ function ff_h264_idct_dc_add_neon, expor
vst1.32 {d1[1]}, [r0,:32], r2
bx lr
.endfunc
+
+function ff_h264_idct_add16_neon, export=1
+ push {r4-r8,lr}
+ mov r4, r0
+ mov r5, r1
+ mov r1, r2
+ mov r2, r3
+ ldr r6, [sp, #24]
+ movw r7, #:lower16:scan8
+ movt r7, #:upper16:scan8
+ mov ip, #16
+1: ldrb r8, [r7], #1
+ ldr r0, [r5], #4
+ ldrb r8, [r6, r8]
+ subs r8, r8, #1
+ blt 2f
+ ldrsh lr, [r1]
+ add r0, r0, r4
+ movne lr, #0
+ cmp lr, #0
+ adrne lr, ff_h264_idct_dc_add_neon
+ adreq lr, ff_h264_idct_add_neon
+ blx lr
+2: subs ip, ip, #1
+ add r1, r1, #32
+ bne 1b
+ pop {r4-r8,pc}
+ .endfunc
+
+function ff_h264_idct_add16intra_neon, export=1
+ push {r4-r8,lr}
+ mov r4, r0
+ mov r5, r1
+ mov r1, r2
+ mov r2, r3
+ ldr r6, [sp, #24]
+ movw r7, #:lower16:scan8
+ movt r7, #:upper16:scan8
+ mov ip, #16
+1: ldrb r8, [r7], #1
+ ldr r0, [r5], #4
+ ldrb r8, [r6, r8]
+ add r0, r0, r4
+ cmp r8, #0
+ ldrsh r8, [r1]
+ adrne lr, ff_h264_idct_add_neon
+ adreq lr, ff_h264_idct_dc_add_neon
+ cmpeq r8, #0
+ blxne lr
+ subs ip, ip, #1
+ add r1, r1, #32
+ bne 1b
+ pop {r4-r8,pc}
+ .endfunc
+
+function ff_h264_idct_add8_neon, export=1
+ push {r4-r10,lr}
+ ldm r0, {r4,r9}
+ add r5, r1, #16*4
+ add r1, r2, #16*32
+ mov r2, r3
+ ldr r6, [sp, #32]
+ movw r7, #:lower16:scan8+16
+ movt r7, #:upper16:scan8+16
+ mov ip, #8
+1: ldrb r8, [r7], #1
+ ldr r0, [r5], #4
+ ldrb r8, [r6, r8]
+ tst ip, #4
+ addeq r0, r0, r4
+ addne r0, r0, r9
+ cmp r8, #0
+ ldrsh r8, [r1]
+ adrne lr, ff_h264_idct_add_neon
+ adreq lr, ff_h264_idct_dc_add_neon
+ cmpeq r8, #0
+ blxne lr
+ subs ip, ip, #1
+ add r1, r1, #32
+ bne 1b
+ pop {r4-r10,pc}
+ .endfunc
+
+ .section .rodata
+scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
+ .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
+ .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
+ .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
+ .byte 1+1*8, 2+1*8
+ .byte 1+2*8, 2+2*8
+ .byte 1+4*8, 2+4*8
+ .byte 1+5*8, 2+5*8
More information about the ffmpeg-cvslog
mailing list