[FFmpeg-devel] [PATCH] encoder for adobe's flash ScreenVideo2 codec
Vitor Sessak
vitor1001
Wed Jul 22 07:23:23 CEST 2009
Joshua Warner wrote:
> Hi all,
>
> I've developed an encoder for Adobe's Flash ScreenVideo2 format, which is
> stored in flv files. My encoder currently only supports a large subset of
> the format. The only player that supports this codec (so far) is Adobe
> Flash Player itself, but ScreenVideo2 makes dramatic improvement in file
> size over ScreenVideo (currently in ffmpeg as flashsv) - and should be very
> useful for uploading screencasts, etc. Most options (block size, etc) now
> just fall back on defaults because I couldn't find a general algorithm that
> produced consistantly better results than these. All the code is in place
> to be able to change these parameters dynamically, so future improvements
> there should be easy. The patch is attached.
>
> The codec is listed as flashsv2 in ffmpeg.
I'll give a few comments...
> +typedef struct FlashSV2Context {
> + AVCodecContext *avctx;
> + uint8_t *current_frame;
> + uint8_t *key_frame;
> + AVFrame frame;
> + uint8_t *encbuffer;
> + uint8_t *keybuffer;
> + uint8_t *databuffer;
> +
> + Block *frame_blocks;
> + Block *key_blocks;
> + int frame_size;
> + int blocks_size;
> +
> + int use15_7, dist, comp;
> +
> + int rows, cols;
> +
> + int last_key_frame;
> +
> + int image_width, image_height;
> + int block_width, block_height;
> + uint8_t flags;
> + uint8_t use_custom_pallet;
> + uint8_t pallet_type; //0=>default, 1=>custom - changed when pallet regenerated.
> + Pallet pallet;
> +
> + double tot_blocks; //blocks encoded since last keyframe
> + double diff_blocks; //blocks that were different since last keyframe
> + double tot_lines; //total scanlines in image since last keyframe
> + double diff_lines; //scanlines that were different since last keyframe
> + double raw_size; //size of raw frames since last keyframe
> + double comp_size; //size of compressed data since last keyframe
> + double uncomp_size; //size of uncompressed data since last keyframe
> +
> + double total_bits; //total bits written to stream so far
> +} FlashSV2Context;
If it is preferable to use integers here if possible. It is easier to
have a bit-exact output across platforms and is faster in systems with
no FPU.
> +static void cleanup(FlashSV2Context * s)
> +{
> + if (s->encbuffer)
> + av_free(s->encbuffer);
> + if (s->keybuffer)
> + av_free(s->keybuffer);
> + if (s->databuffer)
> + av_free(s->databuffer);
> + if (s->current_frame)
> + av_free(s->current_frame);
> + if (s->key_frame)
> + av_free(s->key_frame);
> +
> + if (s->frame_blocks)
> + av_free(s->frame_blocks);
> + if (s->key_blocks)
> + av_free(s->key_blocks);
> +}
Why not simply put this code inside flashsv2_encode_end()?
> +static int generate_default_pallet(Pallet * pallet);
You can reorder the functions to avoid this forward reference.
> +static void init_blocks(FlashSV2Context * s, Block * blocks,
> + uint8_t * encbuf, uint8_t * databuf)
> +{
> + int row, col;
> + Block *b;
> + for (col = 0; col < s->cols; col++) {
> + for (row = 0; row < s->rows; row++) {
> + b = blocks + (col + row * s->cols);
> + b->width = (col < s->cols - 1)
> + || (s->image_width % s->block_width ==
> + 0) ? s->block_width : s->image_width % s->block_width;
Hmm, isn't
b->width = (col < s->cols - 1) :
s->block_width ?
s->image_width - col*s->block_width;
simpler?
> +static av_cold int flashsv2_encode_init(AVCodecContext * avctx)
> +{
> + FlashSV2Context *s = avctx->priv_data;
> +
> + s->avctx = avctx;
> +
> + s->comp = avctx->compression_level;
> + if (s->comp == -1)
> + s->comp = 9;
> + if (s->comp < 0 || s->comp > 9) {
> + av_log(avctx, AV_LOG_ERROR,
> + "Compression level should be 0-9, not %d\n", s->comp);
> + return -1;
> + }
> +
> +
> + if ((avctx->width > 4095) || (avctx->height > 4095)) {
> + av_log(avctx, AV_LOG_ERROR,
> + "Input dimensions too large, input must be max 4096x4096 !\n");
> + return -1;
> + }
> +
> + if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
> + return -1;
> + }
> +
> +
> + s->last_key_frame = 0;
> +
> + s->image_width = avctx->width;
> + s->image_height = avctx->height;
> +
> + s->block_width = (s->image_width / 12) & ~15;
> + s->block_height = (s->image_height / 12) & ~15;
> +
> + s->rows = (s->image_height + s->block_height - 1) / s->block_height;
> + s->cols = (s->image_width + s->block_width - 1) / s->block_width;
> +
> + s->frame_size = s->image_width * s->image_height * 3;
> + s->blocks_size = s->rows * s->cols * sizeof(Block);
> +
> + s->encbuffer = av_mallocz(s->frame_size);
> + s->keybuffer = av_mallocz(s->frame_size);
> + s->databuffer = av_mallocz(s->frame_size * 6);
> + s->current_frame = av_mallocz(s->frame_size);
> + s->key_frame = av_mallocz(s->frame_size);
> + s->frame_blocks = av_mallocz(s->blocks_size);
> + s->key_blocks = av_mallocz(s->blocks_size);
> + s->pallet.index = av_mallocz(1 << 15);
I think it is simpler to define Pallet as
typedef struct Pallet {
unsigned colors[128];
uint8_t index[1 << 15];
} Pallet;
?
> +static int new_key_frame(FlashSV2Context * s)
> +{
> + int i;
> + memcpy(s->keybuffer, s->encbuffer, s->frame_size);
Can't this memcpy() be avoided by doing FFSWAP(s->keybuffer,
s->encbuffer) at some point?
> + memcpy(s->key_blocks, s->frame_blocks, s->blocks_size);
> + memcpy(s->key_frame, s->current_frame, s->frame_size);
same for those
> +static int write_block(Block * b, uint8_t * buf, int buf_size)
> +{
> + int buf_pos = 0;
> + unsigned block_size = b->data_size;
> +
> + if (b->flags & HAS_DIFF_BLOCKS)
> + block_size += 2;
> + if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT)
> + block_size += 2;
> + if (block_size > 0)
> + block_size += 1;
> + if (buf_size < block_size + 2)
> + return -1;
> +
> + buf[buf_pos++] = block_size >> 8;
> + buf[buf_pos++] = block_size;
See AV_WL16().
> +
> + if (block_size == 0)
> + return buf_pos;
> +
> + buf[buf_pos++] = b->flags;
> +
> + if (b->flags & HAS_DIFF_BLOCKS) {
> + buf[buf_pos++] = (uint8_t) (b->start);
> + buf[buf_pos++] = (uint8_t) (b->len);
> + }
> +
> + if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT) {
> + //This feature of the format is poorly understood, and as of now, unused.
> + buf[buf_pos++] = (uint8_t) (b->col);
> + buf[buf_pos++] = (uint8_t) (b->row);
> + }
> +
> + memcpy(buf + buf_pos, b->data, b->data_size);
Is it really necessary to memcpy() data, or there is a way to write
directly to the buffer?
> +
> +static int encode_zlib(Block * b, uint8_t * buf, int *buf_size, int comp)
> +{
> + int res =
> + compress2(buf, buf_size, b->sl_begin, b->sl_end - b->sl_begin,
> + comp);
> + return res == Z_OK ? 0 : -1;
> +}
I think it would be cleaner to use compress2() directly all over the
code, for ex. instead of
> + res = encode_zlib(b, b->data, &b->data_size, comp);
> + if (res != 0)
> + return res;
just
if (compress2(b->data, &b->data_size, b->sl_begin,
b->sl_end - b->sl_begin, comp) != Z_OK)
return -1;
> +static inline unsigned pixel_bgr(uint8_t * src)
> +{
> + return (src[2]) | (src[1] << 8) | (src[2] << 16);
> +}
Hm, src[0] is unused?
> +static int generate_default_pallet(Pallet * pallet)
> +{
> + memcpy(pallet->colors, default_screen_video_v2_palette,
> + sizeof(default_screen_video_v2_palette));
> +
When using the default palette, it is better to just make pallet->colors
point to default_screen_video_v2_palette instead of allocating and
memcpy'ing.
> +static int generate_optimum_pallet(Pallet * pallet, uint8_t * image,
> + int width, int height, int stride)
> +{
> + //this isn't implemented yet! Default pallet only!
> + return -1;
> +}
I think it would be interesting to handle first the case where the
encoder gets the input already as paletted data.
> +#ifndef FLASHSV2_DUMB
> + //double save = (1-pow(s->diff_lines/s->diff_blocks/s->block_height, 0.5)) * s->comp_size/s->tot_blocks;
> + //double width = block_size_fraction * sqrt(0.5 * save * s->rows * s->cols) * s->image_width;
> + //int pwidth;
> + //av_log(s->avctx, AV_LOG_DEBUG, "block width: %g\n", width);
> + double width;
> + width = ((double) s->image_width) / 10.0;
> + pwidth = ((int) width);
> + pwidth &= ~15;
> + if (pwidth > 256)
> + pwidth = 256;
> + if (pwidth < 16)
> + pwidth = 16;
Hm, this is the same as
pwidth = FFCLIP((s->image_width/10) & (~15), 16, 256);
no? Avoid floating point is nice.
-Vitor
More information about the ffmpeg-devel
mailing list