[FFmpeg-devel] Trans.: a64multienc.c and drawutils.c optimisations
yann.lepetitcorps at free.fr
yann.lepetitcorps at free.fr
Tue Dec 27 18:41:04 CET 2011
Hi,
I have begin to analyse FFMPEG 0.9 sources and think to has found somes areas
that can to be a little optimised such as in a64multienc.c or drawutils.c
For example in the a64multienc.c source file, the computation of the current src
line adress can to be only make at each line, not for each pixel
(cf. this divide by width the number of lines address into the luma computation)
static void to_meta_with_crop(AVCodecContext *avctx, AVFrame *p, int *dest)
{
int blockx, blocky, x, y;
int luma = 0;
int height = FFMIN(avctx->height, C64YRES);
int width = FFMIN(avctx->width , C64XRES);
uint8_t *src = p->data[0];
uint8_t *src2;
for (blocky = 0; blocky < C64YRES; blocky += 8) {
for (blockx = 0; blockx < C64XRES; blockx += 8) {
for (y = blocky; y < blocky + 8 && y < C64YRES; y++) {
if ( y >= height) return;
src2 = src + y * p->linesize[0];
for (x = blockx; x < blockx + 8 && x < C64XRES; x += 2) {
/* if(x < width && y < height) { */
if ( x < width ) {
/* build average over 2 pixels */
/* luma = (src[(x + 0 + y * p->linesize[0])] + src[(x + 1 + y *
p->linesize[0])]) / 2; */
luma = ((int)(src2[x]) + (int)(src2[x+1])) / 2;
/* write blocks as linear data now so they are suitable
for elbg */
dest[0] = luma;
}
dest++;
}
}
}
}
}
I have too begin tomodify the ff_fill_line_with_color() func in drawutils.c, for
to use differents paths for char, short, int or others filling types where the
format is packed
void memset8(char *dst, char *val, int num)
{
memset(dst, *val, num);
}
void memset16(short int *dst, short int *val, int num)
{
int i;
short int set16 = *val;
for(i=0;i<num;i++)
*dst++ = set16;
}
void memset32(int *dst, int *val, int num)
{
int i;
int set32 = *val;
for(i=0;i<num;i++)
*dst++ = set32;
}
void memset24(char *dst, char *src, int num)
{
int i;
for (i = 0; i < num; i++)
memcpy(dst + i * 3, src, 3);
}
void memset_sized(char *dst, char *src, int num, int stepsize)
{
int i;
for (i = 0; i < num; i++)
memcpy(dst + i * stepsize, src, stepsize);
}
int ff_fill_line_with_color(uint8_t *line[4], int pixel_step[4], int w, uint8_t
dst_color[4],
enum PixelFormat pix_fmt, uint8_t rgba_color[4],
int *is_packed_rgba, uint8_t rgba_map_ptr[4])
{
uint8_t rgba_map[4] = {0};
int i;
const AVPixFmtDescriptor *pix_desc = &av_pix_fmt_descriptors[pix_fmt];
int hsub = pix_desc->log2_chroma_w;
*is_packed_rgba = ff_fill_rgba_map(rgba_map, pix_fmt) >= 0;
if (*is_packed_rgba) {
pixel_step[0] = (av_get_bits_per_pixel(pix_desc))>>3;
for (i = 0; i < 4; i++)
dst_color[rgba_map[i]] = rgba_color[i];
line[0] = av_malloc(w * pixel_step[0]);
/*
for (i = 0; i < w; i++)
memcpy(line[0] + i * pixel_step[0], dst_color, pixel_step[0]);
*/
swich(av_get_bits_per_pixel(pix_desc))
{
case 8 : memset8((char *)line[0], (char *)dst_color, w);
break;
case 16 : memset16((short int *)line[0], (short int *)dst_color, w);
break;
case 24 : memset24((char *)line[0], (char *)dst_color, w);
break;
case 32 : memset32((int *)line[0], (int *)dst_color, w);
break;
default : memset_sized((char *)line[0], (char *)dst_color, w, pixel_step[0]);
break;
}
if (rgba_map_ptr)
memcpy(rgba_map_ptr, rgba_map, sizeof(rgba_map[0]) * 4);
}
...
return 0;
}
I think to spend some time to analyse the 0.9 FFMPEG code source for to find
similars parts that can be upgraded
=> what are the FFMPEG parts that are the more CPU
limited (for that I test to find some optimisations rooms into them) ?
@+
Yannoo
----- Fin du message transféré -----
More information about the ffmpeg-devel
mailing list