[FFmpeg-soc] [soc]: r1250 - dirac/libavcodec/dirac.c
marco
subversion at mplayerhq.hu
Thu Aug 30 19:59:39 CEST 2007
Author: marco
Date: Thu Aug 30 19:59:39 2007
New Revision: 1250
Log:
use precalculated matrix for qpel/eighthpel interpolation
Modified:
dirac/libavcodec/dirac.c
Modified: dirac/libavcodec/dirac.c
==============================================================================
--- dirac/libavcodec/dirac.c (original)
+++ dirac/libavcodec/dirac.c Thu Aug 30 19:59:39 2007
@@ -212,6 +212,36 @@ static const transfer_func_t preset_tran
static const float preset_kr[3] = { 0.2126, 0.299, 0 /* XXX */ };
static const float preset_kb[3] = {0.0722, 0.114, 0 /* XXX */ };
+/* Weights for qpel/eighth pel interpolation. */
+typedef uint8_t weights_t[4];
+
+/* Quarter pixel interpolation. */
+static const weights_t qpel_weights[4] = {
+ { 4, 0, 0, 0 }, /* rx=0, ry=0 */
+ { 2, 0, 2, 0 }, /* rx=0, ry=1 */
+ { 2, 2, 0, 0 }, /* rx=1, ry=0 */
+ { 1, 1, 1, 1 }, /* rx=1, ry=1 */
+};
+
+static const weights_t eighthpel_weights[16] = {
+ { 16, 0, 0, 0 }, /* rx=0, ry=0 */
+ { 12, 0, 4, 0 }, /* rx=0, ry=1 */
+ { 8, 0, 8, 0 }, /* rx=0, ry=2 */
+ { 4, 0, 12, 0 }, /* rx=0, ry=3 */
+ { 12, 4, 0, 0 }, /* rx=1, ry=0 */
+ { 9, 3, 3, 1 }, /* rx=1, ry=1 */
+ { 6, 2, 6, 2 }, /* rx=1, ry=2 */
+ { 3, 1, 9, 3 }, /* rx=1, ry=3 */
+ { 8, 8, 0, 0 }, /* rx=2, ry=0 */
+ { 6, 6, 2, 2 }, /* rx=2, ry=1 */
+ { 4, 4, 4, 4 }, /* rx=2, ry=2 */
+ { 2, 2, 6, 6 }, /* rx=2, ry=3 */
+ { 4, 12, 0, 0 }, /* rx=3, ry=0 */
+ { 3, 9, 1, 3 }, /* rx=3, ry=1 */
+ { 2, 6, 2, 6 }, /* rx=3, ry=2 */
+ { 1, 3, 3, 9 }, /* rx=3, ry=3 */
+};
+
typedef int16_t vect_t[2];
#define DIRAC_REF_MASK_REF1 1
@@ -2108,55 +2138,63 @@ START_TIMER
val1 = get_halfpel(ref1, s->refwidth, s->refheight, x, y);
val2 = get_halfpel(ref2, s->refwidth, s->refheight, x, y);
} else {
- int w00, w01, w10, w11;
- int w002, w012, w102, w112;
int rx1, ry1, rx2, ry2;
+ const uint8_t *w1;
+ const uint8_t *w2;
rx1 = px1 - (hx1 << (s->frame_decoding.mv_precision - 1));
ry1 = py1 - (hy1 << (s->frame_decoding.mv_precision - 1));
rx2 = px2 - (hx2 << (s->frame_decoding.mv_precision - 1));
ry2 = py2 - (hy2 << (s->frame_decoding.mv_precision - 1));
- w00 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry1)
- * ((1 << (s->frame_decoding.mv_precision - 1)) - rx1);
- w01 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry1) * rx1;
- w10 = ((1 << (s->frame_decoding.mv_precision - 1)) - rx1) * ry1;
- w11 = ry1 * rx1;
+ if (s->frame_decoding.mv_precision == 2) {
+ /* Do qpel interpolation. */
+ rx1 = px1 & 1;
+ ry1 = py1 & 1;
+ rx2 = px2 & 1;
+ ry2 = py2 & 1;
- w002 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry2)
- * ((1 << (s->frame_decoding.mv_precision - 1)) - rx2);
- w012 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry2) * rx2;
- w102 = ((1 << (s->frame_decoding.mv_precision - 1)) - rx2) * ry2;
- w112 = ry2 * rx2;
+ w1 = qpel_weights[(rx1 << 1) | ry1];
+ w2 = qpel_weights[(rx2 << 1) | ry2];
+ } else {
+ /* Do eighthpel interpolation. */
+ rx1 = px1 & 3;
+ ry1 = py1 & 3;
+ rx2 = px2 & 3;
+ ry2 = py2 & 3;
+
+ w1 = eighthpel_weights[(rx1 << 2) | ry1];
+ w2 = eighthpel_weights[(rx2 << 2) | ry2];
+ }
/* For val1. */
if (hx1 > 0 && hy1 > 0 && hx1 < (s->refwidth - 1) && hy1 < (s->refheight - 11)) {
- val1 += w00 * refline1[hx1 ];
- val1 += w01 * refline1[hx1 + 1];
- val1 += w10 * refline1[hx1 + s->refwidth ];
- val1 += w11 * refline1[hx1 + s->refwidth + 1];
+ val1 += w1[0] * refline1[hx1 ];
+ val1 += w1[1] * refline1[hx1 + 1];
+ val1 += w1[2] * refline1[hx1 + s->refwidth ];
+ val1 += w1[3] * refline1[hx1 + s->refwidth + 1];
} else {
/* Border condition, keep using the slower code. */
- val1 += w00 * get_halfpel(ref1, s->refwidth, s->refheight, hx1 , hy1 );
- val1 += w01 * get_halfpel(ref1, s->refwidth, s->refheight, hx1 + 1, hy1 );
- val1 += w10 * get_halfpel(ref1, s->refwidth, s->refheight, hx1 , hy1 + 1);
- val1 += w11 * get_halfpel(ref1, s->refwidth, s->refheight, hx1 + 1, hy1 + 1);
+ val1 += w1[0] * get_halfpel(ref1, s->refwidth, s->refheight, hx1 , hy1 );
+ val1 += w1[1] * get_halfpel(ref1, s->refwidth, s->refheight, hx1 + 1, hy1 );
+ val1 += w1[2] * get_halfpel(ref1, s->refwidth, s->refheight, hx1 , hy1 + 1);
+ val1 += w1[3] * get_halfpel(ref1, s->refwidth, s->refheight, hx1 + 1, hy1 + 1);
}
val1 += 1 << (s->frame_decoding.mv_precision - 1);
val1 >>= s->frame_decoding.mv_precision;
/* For val2. */
if (hx2 > 0 && hy2 > 0 && hx2 < (s->refwidth - 1) && hy2 < (s->refheight - 11)) {
- val2 += w002 * refline2[hx2 ];
- val2 += w012 * refline2[hx2 + 1];
- val2 += w102 * refline2[hx2 + s->refwidth ];
- val2 += w112 * refline2[hx2 + s->refwidth + 1];
+ val2 += w2[0] * refline2[hx2 ];
+ val2 += w2[1] * refline2[hx2 + 1];
+ val2 += w2[2] * refline2[hx2 + s->refwidth ];
+ val2 += w2[3] * refline2[hx2 + s->refwidth + 1];
} else {
/* Border condition, keep using the slower code. */
- val2 += w002 * get_halfpel(ref2, s->refwidth, s->refheight, hx2 , hy2 );
- val2 += w012 * get_halfpel(ref2, s->refwidth, s->refheight, hx2 + 1, hy2 );
- val2 += w102 * get_halfpel(ref2, s->refwidth, s->refheight, hx2 , hy2 + 1);
- val2 += w112 * get_halfpel(ref2, s->refwidth, s->refheight, hx2 + 1, hy2 + 1);
+ val2 += w2[0] * get_halfpel(ref2, s->refwidth, s->refheight, hx2 , hy2 );
+ val2 += w2[1] * get_halfpel(ref2, s->refwidth, s->refheight, hx2 + 1, hy2 );
+ val2 += w2[2] * get_halfpel(ref2, s->refwidth, s->refheight, hx2 , hy2 + 1);
+ val2 += w2[3] * get_halfpel(ref2, s->refwidth, s->refheight, hx2 + 1, hy2 + 1);
}
val2 += 1 << (s->frame_decoding.mv_precision - 1);
val2 >>= s->frame_decoding.mv_precision;
@@ -2252,9 +2290,6 @@ START_TIMER
for (x = xs; x < xstop; x++) {
int hx, hy;
int rx, ry;
- /* XXX: This matrix can perhaps be stored in a fixed
- table. */
- int w00, w01, w10, w11;
int val = 0;
if (s->frame_decoding.mv_precision > 0) {
@@ -2272,26 +2307,33 @@ START_TIMER
|| s->frame_decoding.mv_precision == 1) {
val = get_halfpel(refframe, s->refwidth, s->refheight, x, y);
} else {
- rx = px - (hx << (s->frame_decoding.mv_precision - 1));
- ry = py - (hy << (s->frame_decoding.mv_precision - 1));
+ const uint8_t *w;
+
+ if (s->frame_decoding.mv_precision == 2) {
+ /* Do qpel interpolation. */
+ rx = px & 1;
+ ry = py & 1;
+
+ w = qpel_weights[(rx << 1) | ry];
+ } else {
+ /* Do eighthpel interpolation. */
+ rx = px & 3;
+ ry = py & 3;
+ w = eighthpel_weights[(rx << 2) | ry];
+ }
- w00 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry)
- * ((1 << (s->frame_decoding.mv_precision - 1)) - rx);
- w01 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry) * rx;
- w10 = ((1 << (s->frame_decoding.mv_precision - 1)) - rx) * ry;
- w11 = ry * rx;
if (hx > 0 && hy > 0 && hx < (s->refwidth - 1) && hy < (s->refheight - 11)) {
- val += w00 * refline[hx ];
- val += w01 * refline[hx + 1];
- val += w10 * refline[hx + s->refwidth ];
- val += w11 * refline[hx + s->refwidth + 1];
+ val += w[0] * refline[hx ];
+ val += w[1] * refline[hx + 1];
+ val += w[2] * refline[hx + s->refwidth ];
+ val += w[3] * refline[hx + s->refwidth + 1];
} else {
/* Border condition, keep using the slower code. */
- val += w00 * get_halfpel(refframe, s->refwidth, s->refheight, hx , hy );
- val += w01 * get_halfpel(refframe, s->refwidth, s->refheight, hx + 1, hy );
- val += w10 * get_halfpel(refframe, s->refwidth, s->refheight, hx , hy + 1);
- val += w11 * get_halfpel(refframe, s->refwidth, s->refheight, hx + 1, hy + 1);
+ val += w[0] * get_halfpel(refframe, s->refwidth, s->refheight, hx , hy );
+ val += w[1] * get_halfpel(refframe, s->refwidth, s->refheight, hx + 1, hy );
+ val += w[2] * get_halfpel(refframe, s->refwidth, s->refheight, hx , hy + 1);
+ val += w[3] * get_halfpel(refframe, s->refwidth, s->refheight, hx + 1, hy + 1);
}
val += 1 << (s->frame_decoding.mv_precision - 1);
More information about the FFmpeg-soc
mailing list