[FFmpeg-devel] [FFMPEG] [PATCH] cavs encoder
Michael Niedermayer
michaelni
Fri Nov 20 13:58:25 CET 2009
On Thu, Nov 19, 2009 at 09:36:26AM +0800, zhihang wang wrote:
[...]
> +#define ENCODE_FLAG 0
> +#define LAMBDA_ACCURACY_BITS 16
> +#define LAMBDA_FACTOR(lambda) ((int32_t)((float)(1 << LAMBDA_ACCURACY_BITS)*(lambda) + 0.5f))
floats should generally be avoided so bitexact regression tests are
possible across architectures
> +#define MV_COST(factor, bits) (((factor)*(bits)) >> LAMBDA_ACCURACY_BITS)
> +
> +#define SAVE_MB_BEST min_mb_cost = cost; \
> + best_blk_sz = blk_sz; \
> + memcpy(mb_best_ref, best_ref , 4*sizeof(int)); \
> + memcpy(mb_best_mv , h->mv , (2*4*3)*sizeof(cavs_vector)); \
> + memcpy(mb_best_pmv, tmp_best_pmv, 4*sizeof(cavs_vector));
> +
> +#define SSE_FUN_HEIGHT(blk_sz) int sse_fun = cavs_sse_fun[blk_sz]; \
> + int height = cavs_block_height[blk_sz];
i suggest:
#define SSE_FUN_HEIGHT(blk_sz) \
int sse_fun = cavs_sse_fun [blk_sz]; \
int height = cavs_block_height[blk_sz];
[...]
> +/**
> + * encode the cavs sequence header
> + */
> +static int cavs_encode_seq_header(MpegEncContext *s)
> +{
> + int frame_rate_code = -1;
> + int i;
> + PutBitContext *pb = &s->pb;
> +
> + put_header(s, CAVS_START_CODE);
> + put_bits(pb, 8 , s->avctx->profile);
> + put_bits(pb, 8 , s->avctx->level);
> + put_bits(pb, 1 , s->progressive_sequence); //progressive sequence only
> + put_bits(pb, 14, s->width);
> + put_bits(pb, 14, s->height);
> + put_bits(pb, 2 , 1); // 1 = YUV 4:2:0, 2 = YUV 4:2:2
> + put_bits(pb, 3 , 1); //sample_precision 8bits
> + put_bits(pb, 4 , s->aspect_ratio_info);
> + for (i = 0; i < 15; i++)
> + if ((ff_frame_rate_tab[i].den == s->avctx->time_base.num) &&
> + (ff_frame_rate_tab[i].num == s->avctx->time_base.den))
> + frame_rate_code = i;
> + if (frame_rate_code < 0) {
> + av_log(s->avctx, AV_LOG_ERROR, "unsupported framerate %d/%d\n",
> + s->avctx->time_base.den, s->avctx->time_base.num);
> + return -1;
> + }
> + put_bits(pb, 4, frame_rate_code & 0xF);
the & 15 is useles
[...]
> + if (s->pict_type == FF_I_TYPE)
> + put_bits(pb, 4, 0); //reserved bits
> + else {
that should have {}, as it simplifies future patches and requires
no extra lines
[...]
> +/**
> + * decide the lambda for me and mode decision
> + */
> +static inline void decide_lambda(AVSContext *h)
> +{
> + MpegEncContext *s = &h->s;
> + double lambda_motion;
> + if (h->rdo[s->pict_type]) {
> + int rd_qp;
> + rd_qp = s->qscale - SHIFT_QP;
> + if (s->max_b_frames) {
> + h->lambda_mode = 0.68 * pow (2, rd_qp>>2);
id that intended to be >>2 instead of *0.25 ?
> + if (s->pict_type == FF_B_TYPE)
> + h->lambda_mode *= FFMAX(2.0F, FFMIN(4.0F, (rd_qp>>3)));
av_clip()
[...]
> +/**
> + * mapping the mb type to the entropy code
> + */
> +static int mbtype_to_code(AVSContext *h, enum cavs_block blk_sz, int best_ref[4])
> +{
> + MpegEncContext* s = &h->s;
> + const int dir2offset[3][3] = {
id make that static const
[...]
> +/**
> + * get the max and min mv range
> + */
> +static inline void get_limits(MpegEncContext *s, int x, int y)
> +{
> +#define FLAG_QPEL 1 //must be 1
> + MotionEstContext * const c = &s->me;
> + int range = c->avctx->me_range >> (1 + !!(c->flags & FLAG_QPEL));
> + c->xmin = - x - 16;
> + c->ymin = - y - 16;
> + c->xmax = - x + s->mb_width * 16;
> + c->ymax = - y + s->mb_height * 16;
> + if (range) {
> + c->xmin = FFMAX(c->xmin, -range);
> + c->xmax = FFMIN(c->xmax, range);
> + c->ymin = FFMAX(c->ymin, -range);
> + c->ymax = FFMIN(c->ymax, range);
> + }
duplicate of the equally named code in motion_est.c
[...]
> +
> +/**
> + * decide the available intra mode
> + */
> +static inline void decide_available_mode(AVSContext *h, int block, int *available_mode,
> + int *mode_num, int *pred_mode, int pos)
> +{
> + int top_mode, left_mode;
> + static const int neighbor[4] = {
> + 0, A_AVAIL, B_AVAIL | C_AVAIL, A_AVAIL | B_AVAIL
> + };
> + static const int32_t fix[4] = {
> + ~0, ~0, ~0, ~C_AVAIL
> + };
> + int32_t i_neighbor = (h->mb_neighbor | neighbor[block]) & fix[block];
> +
> + top_mode = h->pred_mode_Y[pos-3];
> + left_mode= h->pred_mode_Y[pos-1];
> + switch (i_neighbor) {
> + case 15: // 1111: downleft, topright, top, left
> + available_mode[0] = INTRA_L_VERT;
> + available_mode[1] = INTRA_L_HORIZ;
> + available_mode[2] = INTRA_L_LP;
> + available_mode[3] = INTRA_L_DOWN_LEFT;
> + available_mode[4] = INTRA_L_DOWN_RIGHT;
> + (*mode_num) = 5;
> + break;
> + case 7: // 0111: -, topright, top, left
> + available_mode[0] = INTRA_L_VERT;
> + available_mode[1] = INTRA_L_HORIZ;
> + available_mode[2] = INTRA_L_LP;
> + available_mode[3] = INTRA_L_DOWN_LEFT;
> + available_mode[4] = INTRA_L_DOWN_RIGHT;
> + (*mode_num) = 5;
> + break;
> + case 9: // 1001: downleft, -, -, left
> + available_mode[0] = INTRA_L_HORIZ;
> + available_mode[1] = INTRA_L_LP_LEFT;
> + (*mode_num) = 2;
> + break;
> + case 6: // 0110: -, topright, top, -
> + available_mode[0] = INTRA_L_VERT;
> + available_mode[1] = INTRA_L_LP_TOP;
> + (*mode_num) = 2;
> + break;
> + case 3: // 0011: -, -, top, left
> + available_mode[0] = INTRA_L_VERT;
> + available_mode[1] = INTRA_L_HORIZ;
> + available_mode[2] = INTRA_L_LP;
> + available_mode[3] = INTRA_L_DOWN_LEFT;
> + available_mode[4] = INTRA_L_DOWN_RIGHT;
> + (*mode_num) = 5;
> + break;
> + case 1: // 0001: -, -, -, left
> + available_mode[0] = INTRA_L_HORIZ;
> + available_mode[1] = INTRA_L_LP_LEFT;
> + (*mode_num) = 2;
> + break;
> + case 0: // 0000: -, -, -, -
> + available_mode[0] = INTRA_L_DC_128;
> + (*mode_num) = 1;
> + break;
theres alot of simplification possible
[...]
> +
> +/*****************************************************************************
> + *
> + * 2.3). P frame specific functions of the body encoding part
> + *
> + ****************************************************************************/
> +
> +/**
> + * check the specific mv cost
> + */
> +#define CHECK_MV_RANGE(mv) \
> + if ((mv.x < c->xmin || mv.x > c->xmax || mv.y < c->ymin || mv.y > c->ymax)) \
> + { \
> + return 0; \
> + }
> +static inline int check_mv(AVSContext *h, cavs_vector new_mv, cavs_vector *pmv,
> + cavs_vector *mvmin, int *min_cost, enum cavs_block blk_sz, int blk_idx)
> +{
> + MpegEncContext *s = &h->s;
> + int cost;
> + uint8_t *ey, *cy;
> + MotionEstContext * const c = &s->me;
> + SSE_FUN_HEIGHT(blk_sz);
> + cy = OFF_LUMA_SCAN(h->cy, blk_idx);
> + ey = OFF_LUMA_SCAN(h->ey, blk_idx);
> + //check the mv range
> + CHECK_MV_RANGE(new_mv);
> + cavs_partition_pred_y(h, cy, s->linesize, blk_sz, blk_idx, new_mv);
> + cost = s->dsp.sad[sse_fun](NULL, ey, cy, s->linesize, height);
> + cost += MV_COST(h->lambda_motion, mv_bits(&new_mv, pmv));
> + if (cost < *min_cost) {
> + *mvmin = new_mv;
> + *min_cost = cost;
> + return 1;
> + }
> + return 0;
> +}
> +
> +/**
> + * hexagon motion estimation
> + */
> +static inline int hex_me(AVSContext *h, enum cavs_block blk_sz, int blk_idx,
> + int ref, cavs_vector* pmv, cavs_vector* mv)
> +{
> + MpegEncContext *s = &h->s;
> + int i, sub, x, y, cost, count;
> + int newmv = 1;
> + cavs_vector mvmin, tmp_mv;
> + enum cavs_mv_loc mv_p = ff_cavs_mv_scan[blk_idx];
> + SSE_FUN_HEIGHT(blk_sz);
> + tmp_mv = *mv = *pmv;
> + /* predicted full-pel mv as reference */
> + h->mv[mv_p].x = 0;
> + h->mv[mv_p].y = 0;
> + h->mv[mv_p].ref = ref;
> + cavs_partition_pred_y(h, OFF_LUMA_SCAN(h->cy,blk_idx), s->linesize, blk_sz, blk_idx, h->mv[mv_p]);
> + cost = s->dsp.sad[sse_fun](NULL, h->ey, h->cy, s->linesize, height);
> + cost += MV_COST(h->lambda_motion, mv_bits(mv, pmv));
> + mvmin = h->mv[mv_p];
> + /* iterative hex search */
> + count = ME_ITER;
> + while (newmv && count--) {
> + newmv = 0;
> + for (i = 0; i < 6; i++) {
> + tmp_mv.x = mv->x + hex[i][0] * 4;
> + tmp_mv.y = mv->y + hex[i][0] * 4;
> + newmv = check_mv(h, tmp_mv, pmv, &mvmin, &cost, blk_sz, blk_idx);
> + }
> + *mv = mvmin;
> + }
> +
> + /* refinement (full,half,quarter) */
> + for (sub = 4; sub > 0; sub >>= 1) {
> + for (x = -sub; x <= sub; x += sub) {
> + for (y = -sub; y <= sub; y += sub) {
> + if (x|y) {
> + tmp_mv.x = mv->x + x;
> + tmp_mv.y = mv->y + y;
> + check_mv(h, tmp_mv, pmv, &mvmin, &cost, blk_sz, blk_idx);
> + }
> + }
> + }
> + *mv = mvmin;
> + }
> + return cost;
> +}
we already have ME code, so this could be considered duplicated
iam against having seperate ME code in every encoder (h264, avs and mpeg*)
[...]
> +/*****************************************************************************
> + *
> + * 2.4). B frame specific functions of the body encoding part
> + *
> + ****************************************************************************/
> +
> +/**
> + * average the y pixels
> + */
> +static void cavs_avg(uint8_t *dst, int dst_stride, uint8_t *tmp, int tmp_stride, enum cavs_block blk_sz)
> +{
> + int i, j;
> + for (j = 0; j < block_height[blk_sz]; j++) {
> + for (i = 0; i < block_width[blk_sz]; i++)
> + dst[i] = (uint8_t)((dst[i] + tmp[i] + 1) / 2);
> +
> + dst += dst_stride;
> + tmp += tmp_stride;
> + }
> +}
isnt there a suitable function in dsputil?
[...]
> +static int cavs_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
> +{
> + AVSContext *h = avctx->priv_data;
> + MpegEncContext *s = &h->s;
> + AVFrame *pic_arg = data;
> + int i;
> +
> + for (i = 0; i < avctx->thread_count; i++) {
> + int start_y = h->thread_context[i]->s.start_mb_y;
> + int end_y = h->thread_context[i]->s. end_mb_y;
> + int height = s->mb_height;
> + uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y / height);
> + uint8_t *end = buf + (size_t)(((int64_t) buf_size)* end_y / height);
> + h->thread_context[i]->s.end_mb_x = s->mb_width;
> +
> + init_put_bits(&h->thread_context[i]->s.pb, start, end - start);
> + init_put_bits(&h->thread_context[i]->rdo_pb, rdo_buf, end - start);
rdo_buf is a static variable
[...]
> + // TODO: init me search engine
> + switch (s->me_method) {
> + case ME_ZERO:
> + break;
> + case ME_FULL:
> + break;
> + case ME_LOG:
> + break;
> + case ME_PHODS:
> + break;
> + case ME_EPZS:
> + break;
> + case ME_X1:
> + break;
> + case ME_HEX:
> + break;
> + case ME_UMH:
> + break;
> + case ME_ITER:
> + break;
> + case ME_TESA:
> + break;
> + }
useless
[...]
> + */
> +av_cold int ff_cavs_enc_end(AVCodecContext *avctx)
> +{
> + int ref_num, i, j;
> + AVSContext *h = avctx->priv_data;
> + MpegEncContext *s = &h->s;
> + ref_num = s->progressive_sequence ? 2 : 4;
> + for (i = 0; i < ref_num+1; i++) {
> + for (j = 0; j < 3; j++)
> + av_free(h->DPB[i].buffer[j]);
> + for (j = 0; j < IPL_BUF; j++) {
> + if (j != P_GG && j != P_ee && j != P_gg && j != P_pp && j != P_rr)
> + av_free(h->DPB[i].interpolated[j]);
> + }
> + s->avctx->release_buffer(s->avctx, (AVFrame *)&h->DPB[i]);
> + }
> +
> + for(i = 0; i < MAX_PICTURE_COUNT; i++)
> + free_picture(s, &s->picture[i]);
> +
> + av_free(h->levels[0]);
> + av_free(h->runs[0]);
> + av_free(rdo_buf);
this does not look safe in light of multiple encoders from multiple threads
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
If you really think that XML is the answer, then you definitly missunderstood
the question -- Attila Kinali
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20091120/2cd38cc8/attachment.pgp>
More information about the ffmpeg-devel
mailing list