[Ffmpeg-devel] [PATCH] DV weights
Dan Maas
dmaas
Sat Feb 25 20:41:33 CET 2006
This patch implements AC coefficient weighing (per SMPTE 314M) for the
DV encoder and decoder.
I have also attached the Python script I used to make the tables.
Weighing is implemented as fixed-point multiplication. I have chosen
the number of bits of precision that gives the smallest error without
causing 32-bit overflows. Weighing is inherently a lossy process
because weighted coefficients have fewer bits than unweighted ones.
But, in no case is the error greater than one least-significant bit.
(the Python script checks this)
Performance impact is less than 1%.
Regards,
Dan
-------------- next part --------------
Index: libavcodec/dv.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dv.c,v
retrieving revision 1.75
diff -u -p -u -r1.75 dv.c
--- libavcodec/dv.c 24 Feb 2006 09:16:26 -0000 1.75
+++ libavcodec/dv.c 25 Feb 2006 19:29:18 -0000
@@ -253,6 +253,7 @@ static int dvvideo_init(AVCodecContext *
typedef struct BlockInfo {
const uint8_t *shift_table;
const uint8_t *scan_table;
+ const int *iweight_table;
uint8_t pos; /* position in block */
uint8_t dct_mode;
uint8_t partial_bit_count;
@@ -295,6 +296,7 @@ static void dv_decode_ac(GetBitContext *
int last_index = get_bits_size(gb);
const uint8_t *scan_table = mb->scan_table;
const uint8_t *shift_table = mb->shift_table;
+ const int *iweight_table = mb->iweight_table;
int pos = mb->pos;
int partial_bit_count = mb->partial_bit_count;
int level, pos1, run, vlc_len, index;
@@ -344,7 +346,12 @@ static void dv_decode_ac(GetBitContext *
assert(level);
pos1 = scan_table[pos];
- block[pos1] = level << shift_table[pos1];
+ level <<= shift_table[pos1];
+
+ /* unweigh, round, and shift down */
+ level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits;
+
+ block[pos1] = level;
UPDATE_CACHE(re, gb);
}
@@ -410,6 +417,7 @@ static inline void dv_decode_video_segme
dct_mode = get_bits1(&gb);
mb->dct_mode = dct_mode;
mb->scan_table = s->dv_zigzag[dct_mode];
+ mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88;
class1 = get_bits(&gb, 2);
mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode]
[quant + dv_quant_offset[class1]];
@@ -648,7 +656,7 @@ static always_inline PutBitContext* dv_e
}
static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
- const uint8_t* zigzag_scan, int bias)
+ const uint8_t* zigzag_scan, const int *weight, int bias)
{
int i, area;
static const int classes[] = {12, 24, 36, 0xffff};
@@ -665,7 +673,11 @@ static always_inline void dv_set_class_n
if (level+15 > 30U) {
bi->sign[i] = (level>>31)&1;
- bi->mb[i] = level= ABS(level)>>4;
+ /* weigh it and and shift down into range, adding for rounding */
+ /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT
+ AND the 2x doubling of the weights */
+ level = (ABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4);
+ bi->mb[i] = level;
if(level>max) max= level;
bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level);
bi->next[prev]= i;
@@ -872,7 +887,9 @@ static inline void dv_encode_video_segme
s->fdct[enc_blk->dct_mode](block);
dv_set_class_number(block, enc_blk,
- enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
+ enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct,
+ enc_blk->dct_mode ? dv_weight_248 : dv_weight_88,
+ j/4);
init_put_bits(pb, ptr, block_sizes[j]/8);
put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
Index: libavcodec/dvdata.h
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dvdata.h,v
retrieving revision 1.15
diff -u -p -u -r1.15 dvdata.h
--- libavcodec/dvdata.h 12 Jan 2006 22:43:15 -0000 1.15
+++ libavcodec/dvdata.h 25 Feb 2006 19:29:21 -0000
@@ -1256,6 +1256,51 @@ static const uint16_t dv_place_411[1350]
0x0834, 0x2320, 0x2f44, 0x3810, 0x1658,
};
+/* DV25/50 DCT coefficient weights and inverse weights */
+/* created by dvtables.py */
+static const int dv_weight_bits = 18;
+static const int dv_weight_88[64] = {
+ 131072, 257107, 257107, 242189, 252167, 242189, 235923, 237536,
+ 237536, 235923, 229376, 231390, 223754, 231390, 229376, 222935,
+ 224969, 217965, 217965, 224969, 222935, 200636, 218652, 211916,
+ 212325, 211916, 218652, 200636, 188995, 196781, 205965, 206433,
+ 206433, 205965, 196781, 188995, 185364, 185364, 200636, 200704,
+ 200636, 185364, 185364, 174609, 180568, 195068, 195068, 180568,
+ 174609, 170091, 175557, 189591, 175557, 170091, 165371, 170627,
+ 170627, 165371, 160727, 153560, 160727, 144651, 144651, 136258,
+};
+static const int dv_weight_248[64] = {
+ 131072, 242189, 257107, 237536, 229376, 200636, 242189, 223754,
+ 224969, 196781, 262144, 242189, 229376, 200636, 257107, 237536,
+ 211916, 185364, 235923, 217965, 229376, 211916, 206433, 180568,
+ 242189, 223754, 224969, 196781, 211916, 185364, 235923, 217965,
+ 200704, 175557, 222935, 205965, 200636, 185364, 195068, 170627,
+ 229376, 211916, 206433, 180568, 200704, 175557, 222935, 205965,
+ 175557, 153560, 188995, 174609, 165371, 144651, 200636, 185364,
+ 195068, 170627, 175557, 153560, 188995, 174609, 165371, 144651,
+};
+static const int dv_iweight_bits = 14;
+static const int dv_iweight_88[64] = {
+ 32768, 16710, 16710, 17735, 17015, 17735, 18197, 18079,
+ 18079, 18197, 18725, 18559, 19196, 18559, 18725, 19284,
+ 19108, 19692, 19692, 19108, 19284, 21400, 19645, 20262,
+ 20214, 20262, 19645, 21400, 22733, 21845, 20867, 20815,
+ 20815, 20867, 21845, 22733, 23173, 23173, 21400, 21400,
+ 21400, 23173, 23173, 24600, 23764, 22017, 22017, 23764,
+ 24600, 25267, 24457, 22672, 24457, 25267, 25971, 25191,
+ 25191, 25971, 26715, 27962, 26715, 29642, 29642, 31536,
+};
+static const int dv_iweight_248[64] = {
+ 32768, 17735, 16710, 18079, 18725, 21400, 17735, 19196,
+ 19108, 21845, 16384, 17735, 18725, 21400, 16710, 18079,
+ 20262, 23173, 18197, 19692, 18725, 20262, 20815, 23764,
+ 17735, 19196, 19108, 21845, 20262, 23173, 18197, 19692,
+ 21400, 24457, 19284, 20867, 21400, 23173, 22017, 25191,
+ 18725, 20262, 20815, 23764, 21400, 24457, 19284, 20867,
+ 24457, 27962, 22733, 24600, 25971, 29642, 21400, 23173,
+ 22017, 25191, 24457, 27962, 22733, 24600, 25971, 29642,
+};
+
static const uint16_t dv_audio_shuffle525[10][9] = {
{ 0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */
{ 6, 36, 66, 26, 56, 86, 16, 46, 76 },
-------------- next part --------------
#!/usr/bin/python
# dvtables - precompute some tables for ffmpeg's DV codec
# Copyright (C) 2006 Daniel Maas <dmaas at maasdigital.com>
# last update 25 Feb 2006
#
# dvtables is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# dvtables is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
import sys
from math import *
def clamp(x, lo, hi):
if x < lo:
return lo
elif x > hi:
return hi
else:
return x
# DV25 and DV50 zig-zag permutations, as used by ffmpeg
zigzag_88 = (
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
)
zigzag_248 = (
0, 8, 1, 9, 16, 24, 2, 10,
17, 25, 32, 40, 48, 56, 33, 41,
18, 26, 3, 11, 4, 12, 19, 27,
34, 42, 49, 57, 50, 58, 35, 43,
20, 28, 5, 13, 6, 14, 21, 29,
36, 44, 51, 59, 52, 60, 37, 45,
22, 30, 7, 15, 23, 31, 38, 46,
53, 61, 54, 62, 39, 47, 55, 63,
)
#
# Compute and print zig-zagged DV25/DV50 forward weight tables
#
# The spec gives weights as floating point numbers, where the "unity"
# value is 1/2. (i.e. a weight of 1/2 leaves the coefficient unchanged)
# To make the math simpler, we double the weights so that "unity" is 1,
# and double the incoming AC coefficients as well, so the result is the same.
# weight formulas from SMPTE 314M
def CS(x):
return cos(pi * (x/16.0))
w = [1.0,
CS(4)/(4.0*CS(7)*CS(2)),
CS(4)/(2.0*CS(6)),
1.0/(2.0*CS(5)),
7.0/8.0,
CS(4)/CS(3),
CS(4)/CS(2),
CS(4)/CS(1)]
def compute_weight_88(h,v):
# compute weight per the spec
if h == 0 and v == 0:
weight = 0.25
else:
weight = w[h]*w[v]/2.0
# double it
weight *= 2.0
return weight
def compute_weight_248(h,v):
# compute weight per the spec
if h == 0 and v == 0:
weight = 0.25
elif v < 4:
weight = w[h] * w[2*v]/2.0
else:
weight = w[h] * w[2*(v-4)]/2.0
# double it
weight *= 2.0
return weight
# We implement weighing as fixed-point integer multiplication.
# The maximum possible precision is 18 bits, to avoid overflowing 32
# bits when multiplying by 10-bit DCT levels which have been expanded
# by a factor of 8 by the DCT, times two because we double the
# coefficients).
weight_bits = 18
print "static const int dv_weight_bits = %d;" % weight_bits
# forward weights for 8-8 DCT
weights_88 = range(64)
fwd_error = 0
for v in range(8):
for h in range(8):
fweight = compute_weight_88(h,v)
# convert to fixed-point and round to closest value
iweight = int(fweight * (1 << weight_bits) + 0.5)
weights_88[8*v+h] = iweight
# check the precision of the integer weight for all possible input levels
for level in range(1024):
fresult = int(level * fweight + 0.5)
iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
if fresult != iresult:
# ensure the error is no more than one least-significant bit
assert abs(fresult - iresult) < 2
fwd_error += 1
#sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))
# forward weights for 2-4-8 DCT
weights_248 = range(64)
for v in range(8):
# interlace fields
v = (v/2) + 4*(v&1)
for h in range(8):
fweight = compute_weight_248(h,v)
# convert to fixed-point and round to closest value
iweight = int(fweight * (1 << weight_bits) + 0.5)
weights_248[8*v+h] = iweight
# check the precision of the integer weight for all possible input levels
for level in range(1024):
fresult = int(level * fweight + 0.5)
iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
if fresult != iresult:
# ensure the error is no more than one least-significant bit
assert abs(fresult - iresult) < 2
fwd_error += 1
#sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))
if 0:
sys.stderr.write("DV25/50 forward level/weight errors %d\n" % fwd_error)
print "static const int dv_weight_88[64] = {"
for j in range(8):
print "",
for i in range(8):
print "%6d," % (weights_88[zigzag_88[8*j+i]]),
print ""
print "};"
print "static const int dv_weight_248[64] = {"
for j in range(8):
print "",
for i in range(8):
print "%6d," % (weights_248[zigzag_248[8*j+i]]),
print ""
print "};"
#
# Compute inverse DV25/DV50 weights in fixed point
#
# Weighing and then unweighing an AC coefficient is inherently a lossy
# process, because the weighted coefficients have one bit less
# precision than unweighted ones. The best can do is minimize the
# error by using weights of sufficient precision.
# Fixed-point precision of inverse weights
# The maximum possible precision is 24 bits, to avoid 32-bit overflow when multiplying
# by 8-bit weighted coefficients.
# However, using more than 14 bits of precision does not reduce error any further
iweight_bits = 14
print "static const int dv_iweight_bits = %d;" % iweight_bits
def make_fixed_dv25(w):
w = 1.0 / float(w)
w *= (1 << weight_bits) * (1 << iweight_bits)
return int(w + 0.5)
# inverse weights for 8-8 DCT
print "static const int dv_iweight_88[64] = {"
for j in range(8):
print "",
for i in range(8):
w = weights_88[zigzag_88[8*j+i]]
print "%5d," % (make_fixed_dv25(w)),
print ""
print "};"
# inverse weights for 2-4-8 DCT
print "static const int dv_iweight_248[64] = {"
for j in range(8):
print "",
for i in range(8):
w = weights_248[zigzag_248[8*j+i]]
print "%5d," % (make_fixed_dv25(w)),
print ""
print "};"
# Find the error created by weighing and then unweighing an AC coefficient.
# We test all possible combinations of weight and level.
if 0:
inv_error = 0
for i in range(128):
for level in range(1024):
if i >= 64:
fwd_weight = weights_248[zigzag_248[i-64]]
else:
fwd_weight = weights_88[zigzag_88[i]]
inv_weight = make_fixed_dv25(fwd_weight)
# weigh the coefficient
weighted = (level * fwd_weight + (1 << (weight_bits-1))) >> weight_bits
# unweigh the coefficient
result = (weighted * inv_weight + (1 << (iweight_bits-1))) >> iweight_bits
# compare input vs output
if result != level:
# make sure the error is at most one least-significant bit
assert abs(result-level) < 2
#sys.stderr.write("level %d fwd_weight %d inv_weight %d weighted %d result %d\n" % (level,fwd_weight,inv_weight,weighted,result))
inv_error += 1
sys.stderr.write("DV25/50 inverse level/weight errors %d\n" % inv_error)
#
# DV50 macroblock locations
#
# find_macroblock() returns a 16-bit quantity
# lower 8 bits are horizontal index, upper 8 bits are vertical index
def find_macroblock_dv50(channel, seq, vid_block_num, is_pal):
if is_pal:
ysupers = 24
else:
ysupers = 20
# X and Y indices of the superblock we're inside
super_x_order = (2,1,3,0,4)
super_y_bases = (2,6,8,0,4)
super_x = super_x_order[vid_block_num % 5]
super_y = (2*(super_y_bases[vid_block_num % 5] + seq) + channel) % ysupers
# index within the superblock
within_super = vid_block_num / 5
# locate us within the superblock's "brick" pattern
if(within_super < 3):
within_super_x = 0;
within_super_y = within_super;
elif(within_super < 6):
within_super_x = 1;
within_super_y = 2 - (within_super-3);
elif(within_super < 9):
within_super_x = 2;
within_super_y = (within_super-6);
elif(within_super < 12):
within_super_x = 3;
within_super_y = 2 - (within_super-9);
elif(within_super < 15):
within_super_x = 4;
within_super_y = (within_super-12);
elif(within_super < 18):
within_super_x = 5;
within_super_y = 2 - (within_super-15);
elif(within_super < 21):
within_super_x = 6;
within_super_y = (within_super-18);
elif(within_super < 24):
within_super_x = 7;
within_super_y = 2 - (within_super-21);
else:
within_super_x = 8;
within_super_y = (within_super-24);
return ((3*super_y + within_super_y) << 8) | \
4*(9*super_x + within_super_x)
# print macroblock location table for DV50 NTSC
if 0:
# 2 channels per frame, 10 DIF sequences per channel,
# 27 video segments per DIF sequence, 5 macroblocks per video segment
print "static const uint16_t dv_place_422_525[2*10*27*5] = {"
for slice in range(2*10*27):
channel = slice / (10*27)
slice %= (10*27)
seq = slice / 27
slice %= 27
vid_block_num = 5*slice
print "",
for mb in range(5):
print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 0)),
print ""
print "};"
# print macroblock location table for DV50 PAL
if 0:
# 2 channels per frame, 12 DIF sequences per channel,
# 27 video segments per DIF sequence, 5 macroblocks per video segment
print "static const uint16_t dv_place_422_625[2*12*27*5] = {"
for slice in range(2*12*27):
channel = slice / (12*27)
slice %= (12*27)
seq = slice / 27
slice %= 27
vid_block_num = 5*slice
print "",
for mb in range(5):
print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 1)),
print ""
print "};"
More information about the ffmpeg-devel
mailing list