[MPlayer-dev-eng] [PATCH] Add support for ARIB STD-B24 captions v4

Reimar Döffinger Reimar.Doeffinger at gmx.de
Thu Oct 29 20:31:38 CET 2009


> +struct arib_conv_state {
> +	unsigned char G[4];
> +	unsigned char G_WIDTH[4];
> +	unsigned char GL, GR;
> +	unsigned char SS;

Avoid uppercase names, and something more descriptive
would be nice, too.
Also for generic data using uint8_t is nicer than
unsigned char IMO.

> +static const unsigned int arib_default_color_pal[] = {
> +	//AABBGGRR
> +	0xFF000000,
> +	0xFF0000FF,
> +	0xFF00FF00,
> +	0xFF00FFFF,
> +	0xFFFF0000,
> +	0xFFFF00FF,
> +	0xFFFFFF00,
> +	0xFFFFFFFF
> +};

Putting alpha in a separate table in case it is ever needed would allow
to get rid of some & 0x00FFFFFF in the code...

> +	long long start = state.pts * 1000;

Use int64_t I'd say.

> +static void arib_prepare_buf(void)
> +{
> +	int len;
> +	state.inptr = state.inbuf;
> +	len = snprintf(state.inbuf, sizeof(state.inbuf), "{\\a1\\c&H%06X&\\fs%d}",
> +		       arib_default_color_pal[state.fg_color] & 0x00FFFFFF,
> +		       state.font_height + state.font_size_adj * 8);
> +	if (len >= 0)
> +		state.inptr += len;

This is wrong, the result of snprintf can be more than the number of characters
printed, and inptr would then point outside the buffer.
I think you meant state.inptr += strlen(state.inptr)

> +static int arib_process_escapecode(const unsigned char *data, size_t len)
> +{
> +	int params[4] = { 0, -1, -1, -1 };
> +	int current_param = 0;
> +	size_t skip = 1;
> +
> +	while (skip <= len) {
> +		unsigned char val = data[skip];
> +		skip++;

Can be one line.

> +		if (val == 0x3b && current_param < 4)
> +			params[++current_param] = 0;
> +		else if (val >= 0x30 && val < 0x3A) {
> +			params[current_param] *= 10;
> +			params[current_param] += val & 0x0F;
> +		} else if (val >= 0x40 && val < 0x80) {

what about val == 0x3c, 0x3e, 0x3f, 0x00 - 0x3f and 0x80 - 0xff?

> +			case 0x53:
> +				if (params[1] < 0)
> +					state.format = params[0];
> +				else
> +					arib_stub("unhandled SWF [%d, %d, %d, %d]\n", params[0], params[1], params[2], params[3]);
> +				arib_flush_buf();
> +				break;
> +			case 0x61:
> +				arib_flush_buf();
> +				state.x = params[0];
> +				state.y = params[1];
> +				break;

I'd place the arib_flush_buf consistently, either always on top or always at the bottom.

> +static int arib_process_controlcode(const unsigned char *data, size_t len)

I think almost all functions miss doxygen documentation, but this on in particular
needs an explanation for the return value.

> +	// C0

What is that supposed to mean?

> +	case 0x0E: // LS1
> +		state.GL = 1;
> +		return 1;
> +	case 0x0F: // LS0
> +		state.GL = 0;
> +		return 1;

Maybe

> +	case 0x0E: // LS1
> +	case 0x0F: // LS0
> +		state.GL = 0x0F - *data;
> +		return 1;

> +	case 0x1B: // ESC
> +		// Graphic set designation
> +		if (data[1] == 0x24) {
> +			if (data[2] == 0x28) {

You either need to check len or document padding or minimum size requirements in the function documentation.

> +				state.G[0] = data[4] | GS_DRCS;
> +				state.G_WIDTH[0] = 2;
> +				return 5;
> +			} else if (data[2] >= 0x29 && data[2] <= 0x2b) {
> +				state.G_WIDTH[data[2] - 0x28] = 2;
> +				if (data[3] == 0x20) {
> +					state.G[data[2] - 0x28] = data[4] | GS_DRCS;
> +					return 5;

Uh, what is the point of the data[2] == 0x28 case above, it seems to be the same code as this...

> +				} else {
> +					state.G[data[2] - 0x28] = data[3];
> +					return 4;
> +				}
> +			} else {
> +				state.G[0] = data[2];
> +				state.G_WIDTH[0] = 2;
> +				return 3;
> +			}
> +		} else if (data[1] >= 0x28 && data[1] <= 0x2b) {
> +			state.G_WIDTH[data[1] - 0x28] = 1;
> +			if (data[2] == 0x20) {
> +				state.G[data[1] - 0x28] = data[3] | GS_DRCS;
> +				return 4;
> +			} else {
> +				state.G[data[1] - 0x28] = data[2];
> +				return 3;
> +			}

And this code is almost a duplicate of the above.

> +		} else if (data[1] == 0x6e) {
> +			state.GL = 2;
> +			return 2;
> +		} else if (data[1] == 0x6f) {
> +			state.GL = 3;
> +			return 2;

state.GL = 0x70 - data[1]; for both

> +		} else if (data[1] == 0x7e) {
> +			state.GR = 1;
> +			return 2;
> +		} else if (data[1] == 0x7d) {
> +			state.GR = 2;
> +			return 2;
> +		} else if (data[1] == 0x7c) {
> +			state.GR = 3;
> +			return 2;

state.GR = 0x7f - data[1];

> +			size_t left = sizeof(state.inbuf) - (state.inptr - state.inbuf);

a inbuf_end pointer would allow to simplify this to
state.inbuf_end - state.inptr, with the additional advantage that it is
easy to change to malloced buffers at a later time.

> +			len = snprintf(state.inptr, left, "{\\c%06X}",
> +				       arib_default_color_pal[state.fg_color] & 0x00FFFFFF);
> +			if (len >= 0)
> +				state.inptr += len;

another misuse of snprintf return value.
Also this piece of code seems to be duplicated quite often, I'd say it should be
in a special append_inbuf function or so.

> +static const unsigned char *arib_default_macros[] = {
> +	"\x1B\x24\x42\x1B\x29\x4A\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x24\x42\x1B\x29\x31\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x24\x42\x1B\x29\x20\x41\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x32\x1B\x29\x34\x1B\x2A\x35\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x32\x1B\x29\x33\x1B\x2A\x35\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x32\x1B\x29\x20\x41\x1B\x2A\x35\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x20\x41\x1B\x29\x20\x42\x1B\x2A\x20\x43\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x20\x44\x1B\x29\x20\x45\x1B\x2A\x20\x46\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x20\x47\x1B\x29\x20\x48\x1B\x2A\x20\x49\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x20\x4A\x1B\x29\x20\x4B\x1B\x2A\x20\x4C\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x20\x4D\x1B\x29\x20\x4E\x1B\x2A\x20\x4F\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x24\x42\x1B\x29\x20\x42\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x24\x42\x1B\x29\x20\x43\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x24\x42\x1B\x29\x20\x44\x1B\x2A\x30\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x31\x1B\x29\x30\x1B\x2A\x4A\x1B\x2B\x20\x70\x0F\x1B\x7D",
> +	"\x1B\x28\x4A\x1B\x29\x32\x1B\x2A\x20\x41\x1B\x2B\x20\x70\x0F\x1B\x7D",

Please don't use string constants for binary data.
Also changing the type to [][20] should save memory and a pointer dereference on access.

> +static void arib_encode_sjis(unsigned char row, unsigned char col)
> +{
> +	size_t charbytes, outbytes;
> +	char charbuf[2];
> +	char *charptr = charbuf;
> +	charbuf[0] = ((row + 1) >> 1) + (row <= 94 ? 112 : 176);
> +
> +	if (row & 0x01)
> +		charbuf[1] = col + 31 + (col >= 96);
> +	else
> +		charbuf[1] = col + 126;
> +
> +	charbytes = 2;
> +	outbytes = sizeof(state.inbuf) - (state.inptr - state.inbuf);
> +	iconv(conv, &charptr, &charbytes, (char **)&state.inptr, &outbytes);

I'm tempted to suggest to use a lookup-table instead of iconv...
At least for the cases where the parameters to this functions
are constant I'd suggest to code the result directly.
I'd even consider to write the strings directly as UTF-8, though
I am not 100% sure how well the compilers and other people's
editors handle that...

> +			arib_process_statement(arib_default_macros[row & 0xF],
> +					strlen(arib_default_macros[row & 0xF]));

Use an extra variable instead of writing the arib_default_macros... stuff twice.

> +static void arib_process_statement(const unsigned char *data, size_t len)
> +{
> +	int skip;
> +
> +	while (len > 0) {
> +		switch ((*data >> 5) & 0x3) {
> +		case 0x3:
> +		case 0x2:
> +		case 0x1:
> +			if ((*data & 0x7F) == 0x20 ||
> +			    (*data & 0x7F) == 0x7F) {
> +				if (!state.inptr)
> +					arib_prepare_buf();
> +				*state.inptr++ = *data;
> +				skip = 1;
> +			} else if (*data & 0x80) { // GR
> +				skip = arib_process_char(state.GR, data[0] & 0x7F, data[1] & 0x7F);
> +			} else { // GL
> +				skip = arib_process_char(state.SS ? state.SS : state.GL, data[0], data[1]);
> +				state.SS = 0;
> +			}
> +			len -= skip;
> +			data += skip;

If you use a data_end = data + len; you don't need to update len.
Also it allows to pass &data to e.g. arib_process_char and have that function
update the "data" pointer instead of returning a skip value.

> +static void arib_process_data_units(const unsigned char *data, int len)
> +{
> +	while (len >= 5 && *data++ == 0x1F) {
> +		int du_len = (data[1] << 16) | (data[2] << 8) | data[3];

AV_RB24(data + 1)

> +	if (offset_mode) {
> +		arib_dbg("%d%d:%d%d:%d%d:%d%d%d ",
> +			data[0] >> 4, data[0] & 0xF,
> +			data[1] >> 4, data[1] & 0xF,
> +			data[2] >> 4, data[2] & 0xF,
> +			data[3] >> 4, data[3] & 0xF,
> +			data[4] >> 4);

I think the arguments are unsigned char, but %d is for int...

> +		data = &data[5];

data += 5?

> +struct caption_data_group {
> +	unsigned char id_version;
> +	unsigned char link_num;
> +	unsigned char last_link_num;
> +	unsigned char size[2];
> +	unsigned char data[0];
> +} __attribute__ ((__packed__));

No further packed structs will ever again be allowed in MPlayer if I
have anything to say.
Also some compilers have issues with [0] and some with [] declarations in
structs.
Since you don't use many part of this struct avoiding it should not really
make the code any less readable.



More information about the MPlayer-dev-eng mailing list