[FFmpeg-cvslog] rv34: NEON optimised 4x4 dequant

Mans Rullgard git at videolan.org
Tue Dec 13 23:33:44 CET 2011


ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Mon Dec 12 23:22:04 2011 +0000| [4722a03c75d17d88312b91cd1006776844237349] | committer: Mans Rullgard

rv34: NEON optimised 4x4 dequant

Signed-off-by: Mans Rullgard <mans at mansr.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4722a03c75d17d88312b91cd1006776844237349
---

 libavcodec/arm/rv34dsp_init_neon.c |    3 +++
 libavcodec/arm/rv34dsp_neon.S      |   24 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/libavcodec/arm/rv34dsp_init_neon.c b/libavcodec/arm/rv34dsp_init_neon.c
index 9a09fde..acf2a7d 100644
--- a/libavcodec/arm/rv34dsp_init_neon.c
+++ b/libavcodec/arm/rv34dsp_init_neon.c
@@ -25,9 +25,12 @@
 
 void ff_rv34_inv_transform_neon(DCTELEM *block);
 void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
+void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);
 
 void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
 {
     c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
     c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
+
+    c->rv34_dequant4x4 = ff_rv34_dequant4x4_neon;
 }
diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S
index f700f5c..423b537 100644
--- a/libavcodec/arm/rv34dsp_neon.S
+++ b/libavcodec/arm/rv34dsp_neon.S
@@ -107,3 +107,27 @@ function ff_rv34_inv_transform_noround_neon, export=1
         vst4.16         {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
         bx              lr
 endfunc
+
+function ff_rv34_dequant4x4_neon, export=1
+        mov             r3,  r0
+        mov             r12, #16
+        vdup.16         q0,  r2
+        vmov.16         d0[0], r1
+        vld1.16         {d2},     [r0,:64], r12
+        vld1.16         {d4},     [r0,:64], r12
+        vld1.16         {d6},     [r0,:64], r12
+        vld1.16         {d16},    [r0,:64], r12
+        vmull.s16       q1,  d2,  d0
+        vmull.s16       q2,  d4,  d1
+        vmull.s16       q3,  d6,  d1
+        vmull.s16       q8,  d16, d1
+        vqrshrn.s32     d2,  q1,  #4
+        vqrshrn.s32     d4,  q2,  #4
+        vqrshrn.s32     d6,  q3,  #4
+        vqrshrn.s32     d16, q8,  #4
+        vst1.16         {d2},     [r3,:64], r12
+        vst1.16         {d4},     [r3,:64], r12
+        vst1.16         {d6},     [r3,:64], r12
+        vst1.16         {d16},    [r3,:64], r12
+        bx              lr
+endfunc



More information about the ffmpeg-cvslog mailing list