[Ffmpeg-devel] I'm giving up

Fri Dec 8 23:00:48 CET 2006

Hi Michael,

On Thu, Dec 07, 2006 at 07:06:53PM +0100, Michael Niedermayer wrote:
> > >[...]
> > > > +static uint8_t *h264_write_nal_unit(int nal_ref_idc, int nal_unit_type, uint8_t *dest, int *destsize,
> > > > +                          PutBitContext *b2)
> > > > +{
> > > > +    PutBitContext b;
> > > > +    int i, destpos, rbsplen, escape_count;
> > > > +    uint8_t *rbsp;
> > > > +
> > > > +    // Align b2 on a byte boundary
> > > > +
> > > > +    align_put_bits(b2);
> > > 
> > > is the rbsp trailing stuff correct? shouldnt there be a put_bits(1,1) ?
> > Yes, but we had the put_bits(1,1) call in the calling context. For
> > end-of-stream NAL units, the rbsp should be empty, so in that case we
> > did not add the stopbit.
> > 
> > How about something like this? (Tested and works, but the reference
> > decoder also decodes it if the extra bit is there.)
> 
> iam fine with any solution as long as it conforms to the h.264 spec
Ok.

> > > [...]
> > > > +    block[3][3] = pieces[3][0]-(pieces[3][1]<<1)+(pieces[3][2]<<1)-pieces[3][3];
> > > 
> > > theres are alot of redundant operations in the above, these should be
> > > simplified
> > I haven't spotted the redundant operations yet, but I'll go over them
> > tomorrow (or this evening).
> 
> pieces[0][0] = block[0][0]+block[1][0]+block[2][0]+block[3][0];
> pieces[1][0] = (block[0][0]<<1)+block[1][0]-block[2][0]-(block[3][0]<<1);
> pieces[2][0] = block[0][0]-block[1][0]-block[2][0]+block[3][0];
> pieces[3][0] = block[0][0]-(block[1][0]<<1)+(block[2][0]<<1)-block[3][0];
> 
> vs .
> 
> A = block[0][0]+block[3][0];
> C = block[0][0]-block[3][0];
> B = block[1][0]+block[2][0];
> D = block[1][0]-block[2][0];
> pieces[0][0] = A+B;
> pieces[2][0] = A-B;
> pieces[1][0] = (C<<1)+ D;
> pieces[3][0] =  C    -(D<<1);
Thanks! :)

Continuing using your suggestion and measuring using START|STOP_TIMER showed
that it brought down the decicycles from over 2700 down to 1911!

> > > [...]
> > > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > > > index 03c1ae4..ac369eb 100644
> > > > --- a/libavcodec/Makefile
> > > > +++ b/libavcodec/Makefile
> > > > @@ -86,6 +86,7 @@ OBJS-$(CONFIG_GIF_ENCODER)             +
> > > >  OBJS-$(CONFIG_H261_DECODER)            += h261.o
> > > >  OBJS-$(CONFIG_H261_ENCODER)            += h261.o
> > > >  OBJS-$(CONFIG_H264_DECODER)            += h264.o
> > > > +OBJS-$(CONFIG_H264_ENCODER)            += h264enc.o h264cavlc.o h264dsp.o
> > > >  OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o
> > > >  OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o
> > > >  OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
> > > 
> > > this can be commited if it does not break compilation
> > By adding a stub for h264cavlc.c or by disabling compilation by default?
> 
> by waiting until h264cavlc.c is in svn IMHO
Ok. Or what would you think about adding the line with only the currently added
files? 

With friendly regards,
Takis
-------------- next part --------------
Index: libavcodec/dsputil.c
===================================================================

--- libavcodec/dsputil.c	(revision 7262)
+++ libavcodec/dsputil.c	(working copy)
@@ -2549,6 +2549,11 @@
 }
 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
 
+#if defined(CONFIG_H264_ENCODER)
+/* H264 specific */
+void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_H264_ENCODER */
+
 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int i;
@@ -4027,6 +4032,9 @@
 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
     ff_vc1dsp_init(c,avctx);
 #endif
+#if defined(CONFIG_H264_ENCODER)
+    ff_h264dsp_init(c,avctx);
+#endif
 
     c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
Index: libavcodec/h264dsp.c
===================================================================
--- libavcodec/h264dsp.c	(revision 0)
+++ libavcodec/h264dsp.c	(revision 0)
@@ -0,0 +1,122 @@
+/*
+ * H.264/MPEG-4 Part 10 (Base profile) encoder.
+ *
+ * DSP functions
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264dsp.c
+ * H.264 encoder related DSP utils
+ *
+ */
+
+#include "dsputil.h"
+
+extern const uint8_t ff_div6[52];
+extern const uint8_t ff_rem6[52];
+
+/**
+ * Transform the provided matrix using the H.264 modified DCT.
+ * @note
+ * we'll always work with transposed input blocks, to avoid having to make a
+ * distinction between C and mmx implementations.
+ *
+ * @param block transposed input block
+ */
+static void h264_dct_c(DCTELEM block[4][4])
+{
+    DCTELEM pieces[4][4];
+    DCTELEM a, b, c, d;
+
+    a = block[0][0]+block[3][0];
+    c = block[0][0]-block[3][0];
+    b = block[1][0]+block[2][0];
+    d = block[1][0]-block[2][0];
+    pieces[0][0] = a+b;
+    pieces[2][0] = a-b;
+    pieces[1][0] = (c<<1)+d;
+    pieces[3][0] = c-(d<<1);
+
+    a = block[0][1]+block[3][1];
+    c = block[0][1]-block[3][1];
+    b = block[1][1]+block[2][1];
+    d = block[1][1]-block[2][1];
+    pieces[0][1] = a+b;
+    pieces[2][1] = a-b;
+    pieces[1][1] = (c<<1)+d;
+    pieces[3][1] = c-(d<<1);
+
+    a = block[0][2]+block[3][2];
+    b = block[1][2]+block[2][2];
+    c = block[0][2]-block[3][2];
+    d = block[1][2]-block[2][2];
+    pieces[0][2] = a+b;
+    pieces[2][2] = a-b;
+    pieces[1][2] = (c<<1)+d;
+    pieces[3][2] = c-(d<<1);
+
+    a = block[0][3]+block[3][3];
+    b = block[1][3]+block[2][3];
+    c = block[0][3]-block[3][3];
+    d = block[1][3]-block[2][3];
+    pieces[0][3] = a+b;
+    pieces[1][3] = (c<<1)+d;
+    pieces[2][3] = a-b;
+    pieces[3][3] = c-(d<<1);
+
+    a = pieces[0][0]+pieces[0][3];
+    c = pieces[0][0]-pieces[0][3];
+    b = pieces[0][1]+pieces[0][2];
+    d = pieces[0][1]-pieces[0][2];
+    block[0][0] = a+b;
+    block[2][0] = a-b;
+    block[1][0] = (c<<1)+d;
+    block[3][0] = c-(d<<1);
+
+    a = pieces[1][0]+pieces[1][3];
+    c = pieces[1][0]-pieces[1][3];
+    b = pieces[1][1]+pieces[1][2];
+    d = pieces[1][1]-pieces[1][2];
+    block[0][1] = a+b;
+    block[2][1] = a-b;
+    block[1][1] = (c<<1)+d;
+    block[3][1] = c-(d<<1);
+
+    a = pieces[2][0]+pieces[2][3];
+    c = pieces[2][0]-pieces[2][3];
+    b = pieces[2][1]+pieces[2][2];
+    d = pieces[2][1]-pieces[2][2];
+    block[0][2] = a+b;
+    block[2][2] = a-b;
+    block[1][2] = (c<<1)+d;
+    block[3][2] = c-(d<<1);
+
+    a = pieces[3][0]+pieces[3][3];
+    c = pieces[3][0]-pieces[3][3];
+    b = pieces[3][1]+pieces[3][2];
+    d = pieces[3][1]-pieces[3][2];
+    block[0][3] = a+b;
+    block[2][3] = a-b;
+    block[1][3] = (c<<1)+d;
+    block[3][3] = c-(d<<1);
+}
+
+void ff_h264dsp_init(DSPContext* c, AVCodecContext *avctx)
+{
+    c->h264_dct = h264_dct_c;
+}
+
Index: libavcodec/Makefile
===================================================================
--- libavcodec/Makefile	(revision 7262)
+++ libavcodec/Makefile	(working copy)
@@ -86,6 +86,7 @@
 OBJS-$(CONFIG_H261_DECODER)            += h261.o
 OBJS-$(CONFIG_H261_ENCODER)            += h261.o
 OBJS-$(CONFIG_H264_DECODER)            += h264.o
+OBJS-$(CONFIG_H264_ENCODER)            += h264enc.o h264dsp.o
 OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
Index: libavcodec/h264enc.c
===================================================================
--- libavcodec/h264enc.c	(revision 0)
+++ libavcodec/h264enc.c	(revision 0)
@@ -0,0 +1,107 @@
+/*
+ * H.264 encoder
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "common.h"
+#include "bitstream.h"
+#include "mpegvideo.h"
+#include "h264data.h"
+
+/**
+ * Write out the provided data into a NAL unit.
+ * @param nal_ref_idc NAL reference IDC
+ * @param nal_unit_type NAL unit payload type
+ * @param dest the target buffer, dst+1 == src is allowed as a special case
+ * @param destsize the length of the dst array
+ * @param b2 the data which should be escaped
+ * @returns pointer to current position in the output buffer or NULL if an error occured
+ */
+static uint8_t *h264_write_nal_unit(int nal_ref_idc, int nal_unit_type, uint8_t *dest, int *destsize,
+                          PutBitContext *b2)
+{
+    PutBitContext b;
+    int i, destpos, rbsplen, escape_count;
+    uint8_t *rbsp;
+
+    if (nal_unit_type != NAL_END_STREAM)
+        put_bits(b2,1,1); // rbsp_stop_bit
+
+    // Align b2 on a byte boundary
+    align_put_bits(b2);
+    rbsplen = put_bits_count(b2)/8;
+    flush_put_bits(b2);
+    rbsp = b2->buf;
+
+    init_put_bits(&b,dest,*destsize);
+
+    put_bits(&b,16,0);
+    put_bits(&b,16,0x01);
+
+    put_bits(&b,1,0); // forbidden zero bit
+    put_bits(&b,2,nal_ref_idc); // nal_ref_idc
+    put_bits(&b,5,nal_unit_type); // nal_unit_type
+
+    flush_put_bits(&b);
+
+    destpos = 5;
+    escape_count= 0;
+
+    for (i=0; i<rbsplen; i+=2)
+    {
+        if (rbsp[i]) continue;
+        if (i>0 && rbsp[i-1]==0)
+            i--;
+        if (i+2<rbsplen && rbsp[i+1]==0 && rbsp[i+2]<=3)
+        {
+            escape_count++;
+            i+=2;
+        }
+    }
+
+    if(escape_count==0)
+    {
+        if(dest+destpos != rbsp)
+        {
+            memcpy(dest+destpos, rbsp, rbsplen);
+            *destsize -= (rbsplen+destpos);
+        }
+        return dest+rbsplen+destpos;
+    }
+
+    if(rbsplen + escape_count + 1> *destsize)
+    {
+        av_log(NULL, AV_LOG_ERROR, "Destination buffer too small!\n");
+        return NULL;
+    }
+
+    // this should be damn rare (hopefully)
+    for (i = 0 ; i < rbsplen ; i++)
+    {
+        if (i + 2 < rbsplen && (rbsp[i] == 0 && rbsp[i+1] == 0 && rbsp[i+2] < 4))
+        {
+            dest[destpos++] = rbsp[i++];
+            dest[destpos++] = rbsp[i];
+            dest[destpos++] = 0x03; // emulation prevention byte
+        }
+        else
+            dest[destpos++] = rbsp[i];
+    }
+    *destsize -= destpos;
+    return dest+destpos;
+}
+