[FFmpeg-devel] [PATCH 2/5] avcodec/vc1: optimize block functions
zhaoxiu.zeng
zhaoxiu.zeng at gmail.com
Tue Feb 24 17:28:45 CET 2015
在 2015/2/19 2:12, Michael Niedermayer 写道:
> On Sat, Feb 14, 2015 at 10:56:28PM +0800, zhaoxiu.zeng wrote:
>> From 960eca51e6e65e6969f7d829e29ddc2387420733 Mon Sep 17 00:00:00 2001
>> From: Zeng Zhaoxiu <zhaoxiu.zeng at gmail.com>
>> Date: Sat, 14 Feb 2015 19:46:51 +0800
>> Subject: [PATCH 2/5] avcodec/vc1: optimize block functions
>
> This patch breaks make fate:
> also please include benchmark values
>
> --- ./tests/ref/fate/mss2-wmv 2015-02-12 16:53:42.138771890 +0100
> +++ tests/data/fate/mss2-wmv 2015-02-18 19:10:41.449866314 +0100
> @@ -27,11 +27,11 @@
> 0, 31, 31, 1, 230400, 0x18a2f97a
> 0, 32, 32, 1, 230400, 0xf9e82961
> 0, 33, 33, 1, 230400, 0x57a8e9e8
> -0, 34, 34, 1, 230400, 0xdef6fd66
> -0, 35, 35, 1, 230400, 0xc7d923a9
> -0, 36, 36, 1, 230400, 0x08bb41ee
> -0, 37, 37, 1, 230400, 0x43ccbd29
> -0, 38, 38, 1, 230400, 0x46666ee3
> +0, 34, 34, 1, 230400, 0xbd60fd5a
> +0, 35, 35, 1, 230400, 0x289b2391
> +0, 36, 36, 1, 230400, 0x8d5a4205
> +0, 37, 37, 1, 230400, 0xbed1bcb6
> +0, 38, 38, 1, 230400, 0x24086ea2
> 0, 39, 39, 1, 230400, 0xbfd2ef29
> 0, 40, 40, 1, 230400, 0x6504545f
> 0, 41, 41, 1, 230400, 0x8fb86901
> @@ -40,25 +40,25 @@
> 0, 44, 44, 1, 230400, 0xf808106b
> 0, 45, 45, 1, 230400, 0x34150020
> 0, 46, 46, 1, 230400, 0x50fdfe89
> -0, 47, 47, 1, 230400, 0x920b7708
> +0, 47, 47, 1, 230400, 0xe8287631
> 0, 48, 48, 1, 230400, 0xed64fcc4
> 0, 49, 49, 1, 230400, 0x6291a170
> 0, 50, 50, 1, 230400, 0x20524643
> -0, 51, 51, 1, 230400, 0x92aafecd
> +0, 51, 51, 1, 230400, 0x5e9efe62
> 0, 52, 52, 1, 230400, 0xf00ee14d
> 0, 53, 53, 1, 230400, 0xfa3113ea
> 0, 54, 54, 1, 230400, 0x99c06df1
> 0, 55, 55, 1, 230400, 0x625c6918
> -0, 56, 56, 1, 230400, 0xb277b25e
> +0, 56, 56, 1, 230400, 0xeb34b22e
> 0, 57, 57, 1, 230400, 0x2e913006
> 0, 58, 58, 1, 230400, 0x3f6f1d99
> 0, 59, 59, 1, 230400, 0x100ab60f
> -0, 60, 60, 1, 230400, 0x9b73d0bf
> +0, 60, 60, 1, 230400, 0xe21acfc4
> 0, 61, 61, 1, 230400, 0xda0df2ce
> 0, 62, 62, 1, 230400, 0x67f7ca24
> 0, 63, 63, 1, 230400, 0xbde9b3d0
> 0, 64, 64, 1, 230400, 0x92e14d07
> -0, 65, 65, 1, 230400, 0x9426c3d9
> +0, 65, 65, 1, 230400, 0xa584c3c4
> 0, 66, 66, 1, 230400, 0x6104be70
> 0, 67, 67, 1, 230400, 0xc4d1078a
> 0, 68, 68, 1, 230400, 0x89426a42
> @@ -67,35 +67,35 @@
> 0, 71, 71, 1, 230400, 0x4249b8c6
> 0, 72, 72, 1, 230400, 0x4b88cad3
> 0, 73, 73, 1, 230400, 0x76af545d
> -0, 74, 74, 1, 230400, 0xfe47e3c4
> +0, 74, 74, 1, 230400, 0xb165e37d
> 0, 75, 75, 1, 230400, 0xa2e0e721
> 0, 76, 76, 1, 230400, 0xde974a42
> -0, 77, 77, 1, 230400, 0x87bf38ba
> +0, 77, 77, 1, 230400, 0x3dad37cc
> 0, 78, 78, 1, 230400, 0xd52318fd
> 0, 79, 79, 1, 230400, 0x0bbb1526
> -0, 80, 80, 1, 230400, 0xa22c5e5e
> +0, 80, 80, 1, 230400, 0xe85b5e88
> 0, 81, 81, 1, 230400, 0x4532c5d2
> 0, 82, 82, 1, 230400, 0x88b560ec
> -0, 83, 83, 1, 230400, 0xcee9d9c9
> +0, 83, 83, 1, 230400, 0xeddad96d
> 0, 84, 84, 1, 230400, 0x0429358f
> -0, 85, 85, 1, 230400, 0xf18a9b98
> -0, 86, 86, 1, 230400, 0x63f7a12c
> -0, 87, 87, 1, 230400, 0x98635515
> +0, 85, 85, 1, 230400, 0xaee09b6d
> +0, 86, 86, 1, 230400, 0xce98a02b
> +0, 87, 87, 1, 230400, 0x127654f4
> 0, 88, 88, 1, 230400, 0x36affebc
> 0, 89, 89, 1, 230400, 0xd8c19629
> -0, 90, 90, 1, 230400, 0x9ef5344d
> -0, 91, 91, 1, 230400, 0x545668dc
> -0, 92, 92, 1, 230400, 0x50e65e74
> -0, 93, 93, 1, 230400, 0xe3258be3
> -0, 94, 94, 1, 230400, 0xeb479e1b
> -0, 95, 95, 1, 230400, 0x91894243
> -0, 96, 96, 1, 230400, 0x3c5660fc
> -0, 97, 97, 1, 230400, 0xf0c35673
> +0, 90, 90, 1, 230400, 0xfaac34dd
> +0, 91, 91, 1, 230400, 0x552568d9
> +0, 92, 92, 1, 230400, 0xc0015fad
> +0, 93, 93, 1, 230400, 0x50778be0
> +0, 94, 94, 1, 230400, 0x5d569f88
> +0, 95, 95, 1, 230400, 0xe2c5424a
> +0, 96, 96, 1, 230400, 0x72d6631f
> +0, 97, 97, 1, 230400, 0x64e656b2
> 0, 98, 98, 1, 230400, 0x552832e8
> 0, 99, 99, 1, 230400, 0x1970f2b1
> -0, 100, 100, 1, 230400, 0x812d4c91
> +0, 100, 100, 1, 230400, 0x464549e2
> 0, 101, 101, 1, 230400, 0xa3fbd4ef
> -0, 102, 102, 1, 230400, 0x486f9649
> +0, 102, 102, 1, 230400, 0xc44493bc
> 0, 103, 103, 1, 230400, 0x850f315a
> -0, 104, 104, 1, 230400, 0xc18ec66b
> -0, 105, 105, 1, 230400, 0xc9ef266e
> +0, 104, 104, 1, 230400, 0xa3a4c41c
> +0, 105, 105, 1, 230400, 0x0f5523c7
> Test mss2-wmv failed. Look at tests/data/fate/mss2-wmv.err for details.
> make: *** [fate-mss2-wmv] Error 1
> make: *** Waiting for unfinished jobs....
>
> [...]
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>From f55454c496ff8d372528dcc61536f3a575de3c4b Mon Sep 17 00:00:00 2001
From: Zeng Zhaoxiu <zhaoxiu.zeng at gmail.com>
Date: Wed, 25 Feb 2015 00:04:20 +0800
Subject: [PATCH 1/1] vcodec/vc1: optimize block functions
I tested on fedora21-x86_64 (running on vmware workstation, windows7 32bits).
Follwing are the results:
ffplay fate-suite/vc1/SA10143.vc1
before:
109760 decicycles in vc1_decode_i_block_adv, 1 runs, 0 skips
83680 decicycles in vc1_decode_i_block_adv, 2 runs, 0 skips
54310 decicycles in vc1_decode_i_block_adv, 4 runs, 0 skips
44165 decicycles in vc1_decode_i_block_adv, 8 runs, 0 skips
30712 decicycles in vc1_decode_i_block_adv, 16 runs, 0 skips
22447 decicycles in vc1_decode_i_block_adv, 32 runs, 0 skips
14320 decicycles in vc1_decode_i_block_adv, 64 runs, 0 skips
9794 decicycles in vc1_decode_i_block_adv, 128 runs, 0 skips
6982 decicycles in vc1_decode_i_block_adv, 256 runs, 0 skips
6472 decicycles in vc1_decode_i_block_adv, 512 runs, 0 skips
7362 decicycles in vc1_decode_i_block_adv, 1024 runs, 0 skips
9127 decicycles in vc1_decode_i_block_adv, 2047 runs, 1 skips
63200 decicycles in vc1_decode_p_block, 1 runs, 0 skips
53260 decicycles in vc1_decode_p_block, 2 runs, 0 skips
35307480 decicycles in vc1_decode_p_mb_intfi, 1 runs, 0 skips
45460 decicycles in vc1_decode_p_block, 4 runs, 0 skips
18741780 decicycles in vc1_decode_p_mb_intfi, 2 runs, 0 skips
57960 decicycles in vc1_decode_intra_block, 1 runs, 0 skips
39140 decicycles in vc1_decode_intra_block, 2 runs, 0 skips
32020 decicycles in vc1_decode_intra_block, 4 runs, 0 skips
32145 decicycles in vc1_decode_p_block, 8 runs, 0 skips
11380690 decicycles in vc1_decode_p_mb_intfi, 4 runs, 0 skips
25795 decicycles in vc1_decode_intra_block, 8 runs, 0 skips
20387 decicycles in vc1_decode_intra_block, 16 runs, 0 skips
6241875 decicycles in vc1_decode_p_mb_intfi, 8 runs, 0 skips
21740 decicycles in vc1_decode_p_block, 16 runs, 0 skips
13972 decicycles in vc1_decode_intra_block, 32 runs, 0 skips
3401612 decicycles in vc1_decode_p_mb_intfi, 16 runs, 0 skips
9278 decicycles in vc1_decode_intra_block, 64 runs, 0 skips
1823050 decicycles in vc1_decode_p_mb_intfi, 32 runs, 0 skips
7602 decicycles in vc1_decode_intra_block, 128 runs, 0 skips
15457 decicycles in vc1_decode_p_block, 32 runs, 0 skips
990403 decicycles in vc1_decode_p_mb_intfi, 63 runs, 1 skips
11653 decicycles in vc1_decode_p_block, 64 runs, 0 skips
8031 decicycles in vc1_decode_intra_block, 256 runs, 0 skips
549345 decicycles in vc1_decode_p_mb_intfi, 127 runs, 1 skips
9518 decicycles in vc1_decode_p_block, 128 runs, 0 skips
10095 decicycles in vc1_decode_p_block, 256 runs, 0 skips
319886 decicycles in vc1_decode_p_mb_intfi, 255 runs, 1 skips
8981 decicycles in vc1_decode_intra_block, 512 runs, 0 skips
11607 decicycles in vc1_decode_p_block, 512 runs, 0 skips
13843 decicycles in vc1_decode_p_block, 1024 runs, 0 skips
222799 decicycles in vc1_decode_p_mb_intfi, 508 runs, 4 skips
13811 decicycles in vc1_decode_intra_block, 1024 runs, 0 skips
147749 decicycles in vc1_decode_p_mb_intfi, 1019 runs, 5 skips
13281 decicycles in vc1_decode_intra_block, 2047 runs, 1 skips
13434 decicycles in vc1_decode_p_block, 2048 runs, 0 skips
327920 decicycles in vc1_decode_b_mb_intfi, 1 runs, 0 skips
2382940 decicycles in vc1_decode_b_mb_intfi, 2 runs, 0 skips
1242270 decicycles in vc1_decode_b_mb_intfi, 4 runs, 0 skips
646465 decicycles in vc1_decode_b_mb_intfi, 8 runs, 0 skips
340417 decicycles in vc1_decode_b_mb_intfi, 16 runs, 0 skips
192212 decicycles in vc1_decode_b_mb_intfi, 32 runs, 0 skips
113155 decicycles in vc1_decode_b_mb_intfi, 64 runs, 0 skips
70909 decicycles in vc1_decode_b_mb_intfi, 128 runs, 0 skips
53167 decicycles in vc1_decode_b_mb_intfi, 255 runs, 1 skips
12262 decicycles in vc1_decode_p_block, 4096 runs, 0 skips
59743 decicycles in vc1_decode_b_mb_intfi, 509 runs, 3 skips
50252 decicycles in vc1_decode_b_mb_intfi, 1021 runs, 3 skips
12524 decicycles in vc1_decode_intra_block, 4094 runs, 2 skips
113632 decicycles in vc1_decode_p_mb_intfi, 2040 runs, 8 skips
11518 decicycles in vc1_decode_p_block, 8191 runs, 1 skips
51172 decicycles in vc1_decode_b_mb_intfi, 2045 runs, 3 skips
93654 decicycles in vc1_decode_p_mb_intfi, 4079 runs, 17 skips
11403 decicycles in vc1_decode_intra_block, 8188 runs, 4 skips
10747 decicycles in vc1_decode_p_block, 16381 runs, 3 skips
47553 decicycles in vc1_decode_b_mb_intfi, 4089 runs, 7 skips
10803 decicycles in vc1_decode_intra_block, 16374 runs, 10 skips
81273 decicycles in vc1_decode_p_mb_intfi, 8163 runs, 29 skips
44723 decicycles in vc1_decode_b_mb_intfi, 8175 runs, 17 skips
10169 decicycles in vc1_decode_p_block, 32755 runs, 13 skips
9893 decicycles in vc1_decode_intra_block, 32749 runs, 19 skips
72528 decicycles in vc1_decode_p_mb_intfi, 16338 runs, 46 skips
39796 decicycles in vc1_decode_b_mb_intfi, 16351 runs, 33 skips
after:
104200 decicycles in vc1_decode_i_block_adv, 1 runs, 0 skips
84300 decicycles in vc1_decode_i_block_adv, 2 runs, 0 skips
51940 decicycles in vc1_decode_i_block_adv, 4 runs, 0 skips
42210 decicycles in vc1_decode_i_block_adv, 8 runs, 0 skips
29867 decicycles in vc1_decode_i_block_adv, 16 runs, 0 skips
21970 decicycles in vc1_decode_i_block_adv, 32 runs, 0 skips
14093 decicycles in vc1_decode_i_block_adv, 64 runs, 0 skips
9542 decicycles in vc1_decode_i_block_adv, 128 runs, 0 skips
6809 decicycles in vc1_decode_i_block_adv, 256 runs, 0 skips
6349 decicycles in vc1_decode_i_block_adv, 512 runs, 0 skips
7303 decicycles in vc1_decode_i_block_adv, 1024 runs, 0 skips
8936 decicycles in vc1_decode_i_block_adv, 2048 runs, 0 skips
57120 decicycles in vc1_decode_p_block, 1 runs, 0 skips
52000 decicycles in vc1_decode_p_block, 2 runs, 0 skips
27188760 decicycles in vc1_decode_p_mb_intfi, 1 runs, 0 skips
47030 decicycles in vc1_decode_p_block, 4 runs, 0 skips
14698180 decicycles in vc1_decode_p_mb_intfi, 2 runs, 0 skips
46520 decicycles in vc1_decode_intra_block, 1 runs, 0 skips
32760 decicycles in vc1_decode_intra_block, 2 runs, 0 skips
29860 decicycles in vc1_decode_intra_block, 4 runs, 0 skips
34490 decicycles in vc1_decode_p_block, 8 runs, 0 skips
9363940 decicycles in vc1_decode_p_mb_intfi, 4 runs, 0 skips
25265 decicycles in vc1_decode_intra_block, 8 runs, 0 skips
19467 decicycles in vc1_decode_intra_block, 16 runs, 0 skips
5225080 decicycles in vc1_decode_p_mb_intfi, 8 runs, 0 skips
23847 decicycles in vc1_decode_p_block, 16 runs, 0 skips
13388 decicycles in vc1_decode_intra_block, 32 runs, 0 skips
2888357 decicycles in vc1_decode_p_mb_intfi, 16 runs, 0 skips
8846 decicycles in vc1_decode_intra_block, 64 runs, 0 skips
1530398 decicycles in vc1_decode_p_mb_intfi, 32 runs, 0 skips
7410 decicycles in vc1_decode_intra_block, 128 runs, 0 skips
15976 decicycles in vc1_decode_p_block, 32 runs, 0 skips
857584 decicycles in vc1_decode_p_mb_intfi, 64 runs, 0 skips
11717 decicycles in vc1_decode_p_block, 64 runs, 0 skips
7667 decicycles in vc1_decode_intra_block, 256 runs, 0 skips
485425 decicycles in vc1_decode_p_mb_intfi, 128 runs, 0 skips
9558 decicycles in vc1_decode_p_block, 128 runs, 0 skips
9939 decicycles in vc1_decode_p_block, 256 runs, 0 skips
287999 decicycles in vc1_decode_p_mb_intfi, 256 runs, 0 skips
8808 decicycles in vc1_decode_intra_block, 512 runs, 0 skips
11288 decicycles in vc1_decode_p_block, 512 runs, 0 skips
13544 decicycles in vc1_decode_p_block, 1024 runs, 0 skips
208198 decicycles in vc1_decode_p_mb_intfi, 510 runs, 2 skips
13573 decicycles in vc1_decode_intra_block, 1024 runs, 0 skips
138593 decicycles in vc1_decode_p_mb_intfi, 1021 runs, 3 skips
13239 decicycles in vc1_decode_intra_block, 2048 runs, 0 skips
13184 decicycles in vc1_decode_p_block, 2048 runs, 0 skips
337160 decicycles in vc1_decode_b_mb_intfi, 1 runs, 0 skips
309520 decicycles in vc1_decode_b_mb_intfi, 2 runs, 0 skips
200250 decicycles in vc1_decode_b_mb_intfi, 4 runs, 0 skips
126440 decicycles in vc1_decode_b_mb_intfi, 8 runs, 0 skips
80085 decicycles in vc1_decode_b_mb_intfi, 16 runs, 0 skips
63417 decicycles in vc1_decode_b_mb_intfi, 32 runs, 0 skips
48461 decicycles in vc1_decode_b_mb_intfi, 64 runs, 0 skips
37764 decicycles in vc1_decode_b_mb_intfi, 128 runs, 0 skips
36558 decicycles in vc1_decode_b_mb_intfi, 255 runs, 1 skips
12089 decicycles in vc1_decode_p_block, 4095 runs, 1 skips
50633 decicycles in vc1_decode_b_mb_intfi, 510 runs, 2 skips
44833 decicycles in vc1_decode_b_mb_intfi, 1022 runs, 2 skips
12613 decicycles in vc1_decode_intra_block, 4095 runs, 1 skips
109006 decicycles in vc1_decode_p_mb_intfi, 2042 runs, 6 skips
11326 decicycles in vc1_decode_p_block, 8191 runs, 1 skips
47920 decicycles in vc1_decode_b_mb_intfi, 2046 runs, 2 skips
90381 decicycles in vc1_decode_p_mb_intfi, 4090 runs, 6 skips
11445 decicycles in vc1_decode_intra_block, 8191 runs, 1 skips
10592 decicycles in vc1_decode_p_block, 16383 runs, 1 skips
45227 decicycles in vc1_decode_b_mb_intfi, 4094 runs, 2 skips
10848 decicycles in vc1_decode_intra_block, 16380 runs, 4 skips
79049 decicycles in vc1_decode_p_mb_intfi, 8183 runs, 9 skips
42179 decicycles in vc1_decode_b_mb_intfi, 8190 runs, 2 skips
10011 decicycles in vc1_decode_p_block, 32765 runs, 3 skips
9995 decicycles in vc1_decode_intra_block, 32760 runs, 8 skips
71169 decicycles in vc1_decode_p_mb_intfi, 16369 runs, 15 skips
38272 decicycles in vc1_decode_b_mb_intfi, 16372 runs, 12 skips
ffplay fate-suite/vc1/SA20021.vc1
before:
120560 decicycles in vc1_decode_i_block_adv, 1 runs, 0 skips
142580 decicycles in vc1_decode_i_block_adv, 2 runs, 0 skips
101460 decicycles in vc1_decode_i_block_adv, 4 runs, 0 skips
104735 decicycles in vc1_decode_i_block_adv, 8 runs, 0 skips
75700 decicycles in vc1_decode_i_block_adv, 16 runs, 0 skips
51305 decicycles in vc1_decode_i_block_adv, 32 runs, 0 skips
34807 decicycles in vc1_decode_i_block_adv, 64 runs, 0 skips
26759 decicycles in vc1_decode_i_block_adv, 128 runs, 0 skips
21359 decicycles in vc1_decode_i_block_adv, 255 runs, 1 skips
18802 decicycles in vc1_decode_i_block_adv, 511 runs, 1 skips
18105 decicycles in vc1_decode_i_block_adv, 1015 runs, 9 skips
16806 decicycles in vc1_decode_i_block_adv, 2038 runs, 10 skips
13855 decicycles in vc1_decode_i_block_adv, 4085 runs, 11 skips
74000 decicycles in vc1_decode_p_block, 1 runs, 0 skips
56900 decicycles in vc1_decode_p_block, 2 runs, 0 skips
52840 decicycles in vc1_decode_p_block, 4 runs, 0 skips
2556880 decicycles in vc1_decode_p_mb, 1 runs, 0 skips
94680 decicycles in vc1_decode_intra_block, 1 runs, 0 skips
66640 decicycles in vc1_decode_intra_block, 2 runs, 0 skips
46950 decicycles in vc1_decode_intra_block, 4 runs, 0 skips
1733940 decicycles in vc1_decode_p_mb, 2 runs, 0 skips
33230 decicycles in vc1_decode_p_block, 8 runs, 0 skips
33640 decicycles in vc1_decode_intra_block, 8 runs, 0 skips
1047390 decicycles in vc1_decode_p_mb, 4 runs, 0 skips
21462 decicycles in vc1_decode_p_block, 16 runs, 0 skips
24275 decicycles in vc1_decode_intra_block, 16 runs, 0 skips
636020 decicycles in vc1_decode_p_mb, 8 runs, 0 skips
17845 decicycles in vc1_decode_p_block, 32 runs, 0 skips
19492 decicycles in vc1_decode_intra_block, 32 runs, 0 skips
405710 decicycles in vc1_decode_p_mb, 16 runs, 0 skips
18106 decicycles in vc1_decode_intra_block, 64 runs, 0 skips
14416 decicycles in vc1_decode_p_block, 64 runs, 0 skips
287747 decicycles in vc1_decode_p_mb, 32 runs, 0 skips
15117 decicycles in vc1_decode_intra_block, 128 runs, 0 skips
12311 decicycles in vc1_decode_p_block, 128 runs, 0 skips
199671 decicycles in vc1_decode_p_mb, 63 runs, 1 skips
12412 decicycles in vc1_decode_p_block, 256 runs, 0 skips
16608 decicycles in vc1_decode_intra_block, 256 runs, 0 skips
171804 decicycles in vc1_decode_p_mb, 125 runs, 3 skips
13926 decicycles in vc1_decode_p_block, 512 runs, 0 skips
18409 decicycles in vc1_decode_intra_block, 512 runs, 0 skips
152801 decicycles in vc1_decode_p_mb, 251 runs, 5 skips
11951 decicycles in vc1_decode_p_block, 1024 runs, 0 skips
15527 decicycles in vc1_decode_intra_block, 1024 runs, 0 skips
118882 decicycles in vc1_decode_p_mb, 505 runs, 7 skips
12140 decicycles in vc1_decode_p_block, 2048 runs, 0 skips
16176 decicycles in vc1_decode_intra_block, 2046 runs, 2 skips
130388 decicycles in vc1_decode_p_mb, 1012 runs, 12 skips
14547 decicycles in vc1_decode_p_block, 4093 runs, 3 skips
15390 decicycles in vc1_decode_intra_block, 4091 runs, 5 skips
116925 decicycles in vc1_decode_p_mb, 2029 runs, 19 skips
13081 decicycles in vc1_decode_intra_block, 8183 runs, 9 skips
98449 decicycles in vc1_decode_p_mb, 4067 runs, 29 skips
12712 decicycles in vc1_decode_p_block, 8183 runs, 9 skips
10512 decicycles in vc1_decode_intra_block, 16370 runs, 14 skips
80419 decicycles in vc1_decode_p_mb, 8146 runs, 46 skips
11303 decicycles in vc1_decode_p_block, 16369 runs, 15 skips
8666 decicycles in vc1_decode_intra_block, 32740 runs, 28 skips
68238 decicycles in vc1_decode_p_mb, 16313 runs, 71 skips
10360 decicycles in vc1_decode_p_block, 32743 runs, 25 skips
8093 decicycles in vc1_decode_intra_block, 65467 runs, 69 skips
66530 decicycles in vc1_decode_p_mb, 32621 runs, 147 skips
after:
92560 decicycles in vc1_decode_i_block_adv, 1 runs, 0 skips
91300 decicycles in vc1_decode_i_block_adv, 2 runs, 0 skips
67220 decicycles in vc1_decode_i_block_adv, 4 runs, 0 skips
49370 decicycles in vc1_decode_i_block_adv, 8 runs, 0 skips
33667 decicycles in vc1_decode_i_block_adv, 16 runs, 0 skips
23667 decicycles in vc1_decode_i_block_adv, 32 runs, 0 skips
18514 decicycles in vc1_decode_i_block_adv, 64 runs, 0 skips
16161 decicycles in vc1_decode_i_block_adv, 128 runs, 0 skips
13475 decicycles in vc1_decode_i_block_adv, 256 runs, 0 skips
12499 decicycles in vc1_decode_i_block_adv, 512 runs, 0 skips
11917 decicycles in vc1_decode_i_block_adv, 1023 runs, 1 skips
11256 decicycles in vc1_decode_i_block_adv, 2046 runs, 2 skips
10976 decicycles in vc1_decode_i_block_adv, 4092 runs, 4 skips
90360 decicycles in vc1_decode_p_block, 1 runs, 0 skips
72720 decicycles in vc1_decode_p_block, 2 runs, 0 skips
60630 decicycles in vc1_decode_p_block, 4 runs, 0 skips
9561080 decicycles in vc1_decode_p_mb, 1 runs, 0 skips
134360 decicycles in vc1_decode_intra_block, 1 runs, 0 skips
91580 decicycles in vc1_decode_intra_block, 2 runs, 0 skips
60960 decicycles in vc1_decode_intra_block, 4 runs, 0 skips
8349620 decicycles in vc1_decode_p_mb, 2 runs, 0 skips
40990 decicycles in vc1_decode_p_block, 8 runs, 0 skips
42555 decicycles in vc1_decode_intra_block, 8 runs, 0 skips
5928030 decicycles in vc1_decode_p_mb, 4 runs, 0 skips
26845 decicycles in vc1_decode_p_block, 16 runs, 0 skips
30737 decicycles in vc1_decode_intra_block, 16 runs, 0 skips
3525450 decicycles in vc1_decode_p_mb, 8 runs, 0 skips
21793 decicycles in vc1_decode_p_block, 32 runs, 0 skips
23512 decicycles in vc1_decode_intra_block, 32 runs, 0 skips
2103475 decicycles in vc1_decode_p_mb, 16 runs, 0 skips
20467 decicycles in vc1_decode_intra_block, 64 runs, 0 skips
16995 decicycles in vc1_decode_p_block, 64 runs, 0 skips
1230400 decicycles in vc1_decode_p_mb, 32 runs, 0 skips
16564 decicycles in vc1_decode_intra_block, 128 runs, 0 skips
13764 decicycles in vc1_decode_p_block, 128 runs, 0 skips
750279 decicycles in vc1_decode_p_mb, 64 runs, 0 skips
11652 decicycles in vc1_decode_p_block, 256 runs, 0 skips
15900 decicycles in vc1_decode_intra_block, 256 runs, 0 skips
456913 decicycles in vc1_decode_p_mb, 128 runs, 0 skips
10833 decicycles in vc1_decode_p_block, 512 runs, 0 skips
14797 decicycles in vc1_decode_intra_block, 512 runs, 0 skips
291080 decicycles in vc1_decode_p_mb, 256 runs, 0 skips
10124 decicycles in vc1_decode_p_block, 1024 runs, 0 skips
13775 decicycles in vc1_decode_intra_block, 1023 runs, 1 skips
190311 decicycles in vc1_decode_p_mb, 510 runs, 2 skips
10360 decicycles in vc1_decode_p_block, 2046 runs, 2 skips
13429 decicycles in vc1_decode_intra_block, 2046 runs, 2 skips
144685 decicycles in vc1_decode_p_mb, 1020 runs, 4 skips
10555 decicycles in vc1_decode_p_block, 4092 runs, 4 skips
11877 decicycles in vc1_decode_intra_block, 4089 runs, 7 skips
105510 decicycles in vc1_decode_p_mb, 2036 runs, 12 skips
9920 decicycles in vc1_decode_intra_block, 8181 runs, 11 skips
78771 decicycles in vc1_decode_p_mb, 4075 runs, 21 skips
9712 decicycles in vc1_decode_p_block, 8185 runs, 7 skips
9013 decicycles in vc1_decode_intra_block, 16368 runs, 16 skips
70765 decicycles in vc1_decode_p_mb, 8154 runs, 38 skips
9780 decicycles in vc1_decode_p_block, 16367 runs, 17 skips
7972 decicycles in vc1_decode_intra_block, 32741 runs, 27 skips
63213 decicycles in vc1_decode_p_mb, 16319 runs, 65 skips
9337 decicycles in vc1_decode_p_block, 32738 runs, 30 skips
7542 decicycles in vc1_decode_intra_block, 65486 runs, 50 skips
60802 decicycles in vc1_decode_p_mb, 32639 runs, 129 skips
Signed-off-by: Zeng Zhaoxiu <zhaoxiu.zeng at gmail.com>
---
libavcodec/vc1_block.c | 1689 ++++++++++++++++++++++++------------------------
1 file changed, 846 insertions(+), 843 deletions(-)
diff --git a/libavcodec/vc1_block.c b/libavcodec/vc1_block.c
index aa62ec2..1c0141e 100644
--- a/libavcodec/vc1_block.c
+++ b/libavcodec/vc1_block.c
@@ -40,8 +40,10 @@
#define DC_VLC_BITS 9
// offset tables for interlaced picture MVDATA decoding
-static const int offset_table1[9] = { 0, 1, 2, 4, 8, 16, 32, 64, 128 };
-static const int offset_table2[9] = { 0, 1, 3, 7, 15, 31, 63, 127, 255 };
+static const uint8_t offset_table[2][9] = {
+ { 0, 1, 2, 4, 8, 16, 32, 64, 128 },
+ { 0, 1, 3, 7, 15, 31, 63, 127, 255 },
+};
/***********************************************************************/
/**
@@ -51,7 +53,7 @@ static const int offset_table2[9] = { 0, 1, 3, 7, 15, 31, 63, 127, 255 };
*/
-static void init_block_index(VC1Context *v)
+static inline void init_block_index(VC1Context *v)
{
MpegEncContext *s = &v->s;
ff_init_block_index(s);
@@ -64,12 +66,9 @@ static void init_block_index(VC1Context *v)
/** @} */ //Bitplane group
-static void vc1_put_signed_blocks_clamped(VC1Context *v)
+static void vc1_put_signed_blocks_clamped(VC1Context *v, int mb_pos)
{
MpegEncContext *s = &v->s;
- int topleft_mb_pos, top_mb_pos;
- int stride_y, fieldtx = 0;
- int v_dist;
/* The put pixels loop is always one MB row behind the decoding loop,
* because we can only put pixels when overlap filtering is done, and
@@ -79,12 +78,16 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
* decoding loop. The reason for this is again, because for filtering
* of the right MB edge, we need the next MB present. */
if (!s->first_slice_line) {
+ int stride_y, fieldtx = 0;
+ int v_dist;
+
if (s->mb_x) {
- topleft_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x - 1;
- if (v->fcm == ILACE_FRAME)
+ if (v->fcm == ILACE_FRAME) {
+ int topleft_mb_pos = mb_pos - s->mb_stride - 1;
fieldtx = v->fieldtx_plane[topleft_mb_pos];
+ }
stride_y = s->linesize << fieldtx;
- v_dist = (16 - fieldtx) >> (fieldtx == 0);
+ v_dist = fieldtx ? 15 : 8;
s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
s->dest[0] - 16 * s->linesize - 16,
stride_y);
@@ -105,9 +108,10 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
s->uvlinesize);
}
if (s->mb_x == s->mb_width - 1) {
- top_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x;
- if (v->fcm == ILACE_FRAME)
+ if (v->fcm == ILACE_FRAME) {
+ int top_mb_pos = mb_pos - s->mb_stride;
fieldtx = v->fieldtx_plane[top_mb_pos];
+ }
stride_y = s->linesize << fieldtx;
v_dist = fieldtx ? 15 : 8;
s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
@@ -156,7 +160,6 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
*/
#define GET_MQUANT() \
if (v->dquantfrm) { \
- int edges = 0; \
if (v->dqprofile == DQPROFILE_ALL_MBS) { \
if (v->dqbilevel) { \
mquant = (get_bits1(gb)) ? v->altpq : v->pq; \
@@ -167,21 +170,20 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
else \
mquant = get_bits(gb, 5); \
} \
+ } else { \
+ int edges; \
+ if (v->dqprofile == DQPROFILE_SINGLE_EDGE) \
+ edges = 1 << v->dqsbedge; \
+ else if (v->dqprofile == DQPROFILE_DOUBLE_EDGES) \
+ edges = (3 << v->dqsbedge) % 15; \
+ else /*if (v->dqprofile == DQPROFILE_FOUR_EDGES)*/ \
+ edges = 15; \
+ if (((edges & 1) && !s->mb_x) || \
+ ((edges & 2) && s->first_slice_line) || \
+ ((edges & 4) && s->mb_x == (s->mb_width - 1)) || \
+ ((edges & 8) && s->mb_y == (s->mb_height - 1))) \
+ mquant = v->altpq; \
} \
- if (v->dqprofile == DQPROFILE_SINGLE_EDGE) \
- edges = 1 << v->dqsbedge; \
- else if (v->dqprofile == DQPROFILE_DOUBLE_EDGES) \
- edges = (3 << v->dqsbedge) % 15; \
- else if (v->dqprofile == DQPROFILE_FOUR_EDGES) \
- edges = 15; \
- if ((edges&1) && !s->mb_x) \
- mquant = v->altpq; \
- if ((edges&2) && s->first_slice_line) \
- mquant = v->altpq; \
- if ((edges&4) && s->mb_x == (s->mb_width - 1)) \
- mquant = v->altpq; \
- if ((edges&8) && s->mb_y == (s->mb_height - 1)) \
- mquant = v->altpq; \
if (!mquant || mquant > 31) { \
av_log(v->s.avctx, AV_LOG_ERROR, \
"Overriding invalid mquant %d\n", mquant); \
@@ -199,11 +201,9 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
#define GET_MVDATA(_dmv_x, _dmv_y) \
index = 1 + get_vlc2(gb, ff_vc1_mv_diff_vlc[s->mv_table_index].table, \
VC1_MV_DIFF_VLC_BITS, 2); \
- if (index > 36) { \
- mb_has_coeffs = 1; \
+ mb_has_coeffs = (index >= 37); \
+ if (mb_has_coeffs) \
index -= 37; \
- } else \
- mb_has_coeffs = 0; \
s->mb_intra = 0; \
if (!index) { \
_dmv_x = _dmv_y = 0; \
@@ -216,33 +216,32 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
s->mb_intra = 1; \
} else { \
index1 = index % 6; \
- if (!s->quarter_sample && index1 == 5) val = 1; \
- else val = 0; \
- if (size_table[index1] - val > 0) \
- val = get_bits(gb, size_table[index1] - val); \
- else val = 0; \
- sign = 0 - (val&1); \
- _dmv_x = (sign ^ ((val>>1) + offset_table[index1])) - sign; \
+ _dmv_x = offset_table[1][index1]; \
+ val = size_table[index1] - (!s->quarter_sample && index1 == 5); \
+ if (val > 0) { \
+ val = get_bits(gb, val); \
+ sign = 0 - (val & 1); \
+ _dmv_x = (sign ^ ((val >> 1) + _dmv_x)) - sign; \
+ } \
\
index1 = index / 6; \
- if (!s->quarter_sample && index1 == 5) val = 1; \
- else val = 0; \
- if (size_table[index1] - val > 0) \
- val = get_bits(gb, size_table[index1] - val); \
- else val = 0; \
- sign = 0 - (val & 1); \
- _dmv_y = (sign ^ ((val >> 1) + offset_table[index1])) - sign; \
+ _dmv_y = offset_table[1][index1]; \
+ val = size_table[index1] - (!s->quarter_sample && index1 == 5); \
+ if (val > 0) { \
+ val = get_bits(gb, val); \
+ sign = 0 - (val & 1); \
+ _dmv_y = (sign ^ ((val >> 1) + _dmv_y)) - sign; \
+ } \
}
static av_always_inline void get_mvdata_interlaced(VC1Context *v, int *dmv_x,
int *dmv_y, int *pred_flag)
{
int index, index1;
- int extend_x = 0, extend_y = 0;
+ int extend_x, extend_y;
GetBitContext *gb = &v->s.gb;
int bits, esc;
int val, sign;
- const int* offs_tab;
if (v->numref) {
bits = VC1_2REF_MVDATA_VLC_BITS;
@@ -251,52 +250,32 @@ static av_always_inline void get_mvdata_interlaced(VC1Context *v, int *dmv_x,
bits = VC1_1REF_MVDATA_VLC_BITS;
esc = 71;
}
- switch (v->dmvrange) {
- case 1:
- extend_x = 1;
- break;
- case 2:
- extend_y = 1;
- break;
- case 3:
- extend_x = extend_y = 1;
- break;
- }
+ extend_x = v->dmvrange & 1;
+ extend_y = (v->dmvrange >> 1) & 1;
index = get_vlc2(gb, v->imv_vlc->table, bits, 3);
if (index == esc) {
*dmv_x = get_bits(gb, v->k_x);
*dmv_y = get_bits(gb, v->k_y);
if (v->numref) {
- if (pred_flag) {
+ if (pred_flag)
*pred_flag = *dmv_y & 1;
- *dmv_y = (*dmv_y + *pred_flag) >> 1;
- } else {
- *dmv_y = (*dmv_y + (*dmv_y & 1)) >> 1;
- }
+ *dmv_y = (*dmv_y + (*dmv_y & 1)) >> 1;
}
}
else {
av_assert0(index < esc);
- if (extend_x)
- offs_tab = offset_table2;
- else
- offs_tab = offset_table1;
index1 = (index + 1) % 9;
if (index1 != 0) {
val = get_bits(gb, index1 + extend_x);
- sign = 0 -(val & 1);
- *dmv_x = (sign ^ ((val >> 1) + offs_tab[index1])) - sign;
+ sign = 0 - (val & 1);
+ *dmv_x = (sign ^ ((val >> 1) + offset_table[extend_x][index1])) - sign;
} else
*dmv_x = 0;
- if (extend_y)
- offs_tab = offset_table2;
- else
- offs_tab = offset_table1;
index1 = (index + 1) / 9;
if (index1 > v->numref) {
- val = get_bits(gb, (index1 + (extend_y << v->numref)) >> v->numref);
+ val = get_bits(gb, (index1 >> v->numref) + extend_y);
sign = 0 - (val & 1);
- *dmv_y = (sign ^ ((val >> 1) + offs_tab[index1 >> v->numref])) - sign;
+ *dmv_y = (sign ^ ((val >> 1) + offset_table[extend_y][index1 >> v->numref])) - sign;
} else
*dmv_y = 0;
if (v->numref && pred_flag)
@@ -345,8 +324,10 @@ static inline int vc1_i_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
};
/* find prediction - wmv3_dc_scale always used here in fact */
- if (n < 4) scale = s->y_dc_scale;
- else scale = s->c_dc_scale;
+ if (n < 4)
+ scale = s->y_dc_scale;
+ else
+ scale = s->c_dc_scale;
wrap = s->block_wrap[n];
dc_val = s->dc_val[0] + s->block_index[n];
@@ -399,14 +380,19 @@ static inline int vc1_i_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
*/
static inline int ff_vc1_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
int a_avail, int c_avail,
- int16_t **dc_val_ptr, int *dir_ptr)
+ int16_t **dc_val_ptr, int *dir_ptr, int mb_pos)
{
int a, b, c, wrap, pred;
int16_t *dc_val;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
- int q1, q2 = 0;
+ int q1, q2;
int dqscale_index;
+ /* scale predictors if needed */
+ q1 = s->current_picture.qscale_table[mb_pos];
+ dqscale_index = s->y_dc_scale_table[q1] - 1;
+ if (dqscale_index < 0)
+ return 0;
+
wrap = s->block_wrap[n];
dc_val = s->dc_val[0] + s->block_index[n];
@@ -416,11 +402,7 @@ static inline int ff_vc1_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
c = dc_val[ - 1];
b = dc_val[ - 1 - wrap];
a = dc_val[ - wrap];
- /* scale predictors if needed */
- q1 = s->current_picture.qscale_table[mb_pos];
- dqscale_index = s->y_dc_scale_table[q1] - 1;
- if (dqscale_index < 0)
- return 0;
+
if (c_avail && (n != 1 && n != 3)) {
q2 = s->current_picture.qscale_table[mb_pos - 1];
if (q2 && q2 != q1)
@@ -442,20 +424,12 @@ static inline int ff_vc1_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
b = (b * s->y_dc_scale_table[q2] * ff_vc1_dqscale[dqscale_index] + 0x20000) >> 18;
}
- if (a_avail && c_avail) {
- if (abs(a - b) <= abs(b - c)) {
- pred = c;
- *dir_ptr = 1; // left
- } else {
- pred = a;
- *dir_ptr = 0; // top
- }
+ if (c_avail && (!a_avail || abs(a - b) <= abs(b - c))) {
+ pred = c;
+ *dir_ptr = 1; // left
} else if (a_avail) {
pred = a;
*dir_ptr = 0; // top
- } else if (c_avail) {
- pred = c;
- *dir_ptr = 1; // left
} else {
pred = 0;
*dir_ptr = 1; // left
@@ -514,17 +488,16 @@ static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip,
int *value, int codingset)
{
GetBitContext *gb = &v->s.gb;
- int index, escape, run = 0, level = 0, lst = 0;
+ int index, run, level, lst, sign;
index = get_vlc2(gb, ff_vc1_ac_coeff_table[codingset].table, AC_VLC_BITS, 3);
if (index != ff_vc1_ac_sizes[codingset] - 1) {
run = vc1_index_decode_table[codingset][index][0];
level = vc1_index_decode_table[codingset][index][1];
lst = index >= vc1_last_decode_table[codingset] || get_bits_left(gb) < 0;
- if (get_bits1(gb))
- level = -level;
+ sign = get_bits1(gb);
} else {
- escape = decode210(gb);
+ int escape = decode210(gb);
if (escape != 2) {
index = get_vlc2(gb, ff_vc1_ac_coeff_table[codingset].table, AC_VLC_BITS, 3);
run = vc1_index_decode_table[codingset][index][0];
@@ -541,10 +514,8 @@ static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip,
else
run += vc1_delta_run_table[codingset][level] + 1;
}
- if (get_bits1(gb))
- level = -level;
+ sign = get_bits1(gb);
} else {
- int sign;
lst = get_bits1(gb);
if (v->s.esc3_level_length == 0) {
if (v->pq < 8 || v->dquantfrm) { // table 59
@@ -559,14 +530,12 @@ static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip,
run = get_bits(gb, v->s.esc3_run_length);
sign = get_bits1(gb);
level = get_bits(gb, v->s.esc3_level_length);
- if (sign)
- level = -level;
}
}
*last = lst;
*skip = run;
- *value = level;
+ *value = (level ^ -sign) + sign;
}
/** Decode intra block in intra frames - should be faster than decode_intra_block
@@ -585,29 +554,24 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
int i;
int16_t *dc_val;
int16_t *ac_val, *ac_val2;
- int dcdiff;
+ int dcdiff, scale;
/* Get DC differential */
- if (n < 4) {
+ if (n < 4)
dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
- } else {
+ else
dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
- }
if (dcdiff < 0) {
av_log(s->avctx, AV_LOG_ERROR, "Illegal DC VLC\n");
return -1;
}
if (dcdiff) {
+ const int m = (v->pq == 1 || v->pq == 2) ? 3 - v->pq : 0;
if (dcdiff == 119 /* ESC index value */) {
- /* TODO: Optimize */
- if (v->pq == 1) dcdiff = get_bits(gb, 10);
- else if (v->pq == 2) dcdiff = get_bits(gb, 9);
- else dcdiff = get_bits(gb, 8);
+ dcdiff = get_bits(gb, 8 + m);
} else {
- if (v->pq == 1)
- dcdiff = (dcdiff << 2) + get_bits(gb, 2) - 3;
- else if (v->pq == 2)
- dcdiff = (dcdiff << 1) + get_bits1(gb) - 1;
+ if (m)
+ dcdiff = (dcdiff << m) + get_bits(gb, m) - ((1 << m) - 1);
}
if (get_bits1(gb))
dcdiff = -dcdiff;
@@ -618,27 +582,29 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
*dc_val = dcdiff;
/* Store the quantized DC coeff, used for prediction */
- if (n < 4) {
- block[0] = dcdiff * s->y_dc_scale;
- } else {
- block[0] = dcdiff * s->c_dc_scale;
- }
- /* Skip ? */
- if (!coded) {
- goto not_coded;
- }
+ if (n < 4)
+ scale = s->y_dc_scale;
+ else
+ scale = s->c_dc_scale;
+ block[0] = dcdiff * scale;
- // AC Decoding
- i = 1;
+ ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+ ac_val2 = ac_val;
+ if (dc_pred_dir) // left
+ ac_val -= 16;
+ else // top
+ ac_val -= 16 * s->block_wrap[n];
+
+ scale = v->pq * 2 + v->halfpq;
+
+ //AC Decoding
+ i = !!coded;
- {
+ if (coded) {
int last = 0, skip, value;
const uint8_t *zz_table;
- int scale;
int k;
- scale = v->pq * 2 + v->halfpq;
-
if (v->s.ac_pred) {
if (!dc_pred_dir)
zz_table = v->zz_8x8[2];
@@ -647,13 +613,6 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
} else
zz_table = v->zz_8x8[1];
- ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
- ac_val2 = ac_val;
- if (dc_pred_dir) // left
- ac_val -= 16;
- else // top
- ac_val -= 16 * s->block_wrap[n];
-
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
i += skip;
@@ -664,13 +623,15 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
/* apply AC prediction if needed */
if (s->ac_pred) {
+ int sh;
if (dc_pred_dir) { // left
- for (k = 1; k < 8; k++)
- block[k << v->left_blk_sh] += ac_val[k];
+ sh = v->left_blk_sh;
} else { // top
- for (k = 1; k < 8; k++)
- block[k << v->top_blk_sh] += ac_val[k + 8];
+ sh = v->top_blk_sh;
+ ac_val += 8;
}
+ for (k = 1; k < 8; k++)
+ block[k << sh] += ac_val[k];
}
/* save AC coeffs for further prediction */
for (k = 1; k < 8; k++) {
@@ -686,46 +647,30 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
block[k] += (block[k] < 0) ? -v->pq : v->pq;
}
- if (s->ac_pred) i = 63;
- }
-
-not_coded:
- if (!coded) {
- int k, scale;
- ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
- ac_val2 = ac_val;
+ } else {
+ int k;
- i = 0;
- scale = v->pq * 2 + v->halfpq;
memset(ac_val2, 0, 16 * 2);
- if (dc_pred_dir) { // left
- ac_val -= 16;
- if (s->ac_pred)
- memcpy(ac_val2, ac_val, 8 * 2);
- } else { // top
- ac_val -= 16 * s->block_wrap[n];
- if (s->ac_pred)
- memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
- }
/* apply AC prediction if needed */
if (s->ac_pred) {
+ int sh;
if (dc_pred_dir) { //left
- for (k = 1; k < 8; k++) {
- block[k << v->left_blk_sh] = ac_val[k] * scale;
- if (!v->pquantizer && block[k << v->left_blk_sh])
- block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -v->pq : v->pq;
- }
+ sh = v->left_blk_sh;
} else { // top
- for (k = 1; k < 8; k++) {
- block[k << v->top_blk_sh] = ac_val[k + 8] * scale;
- if (!v->pquantizer && block[k << v->top_blk_sh])
- block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -v->pq : v->pq;
- }
+ sh = v->top_blk_sh;
+ ac_val += 8;
+ ac_val2 += 8;
+ }
+ memcpy(ac_val2, ac_val, 8 * 2);
+ for (k = 1; k < 8; k++) {
+ block[k << sh] = ac_val[k] * scale;
+ if (!v->pquantizer && block[k << sh])
+ block[k << sh] += (block[k << sh] < 0) ? -v->pq : v->pq;
}
- i = 63;
}
}
+ if (s->ac_pred) i = 63;
s->block_last_index[n] = i;
return 0;
@@ -740,7 +685,7 @@ not_coded:
* @param mquant quantizer value for this macroblock
*/
static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
- int coded, int codingset, int mquant)
+ int coded, int codingset, int mquant, int mb_pos)
{
GetBitContext *gb = &v->s.gb;
MpegEncContext *s = &v->s;
@@ -753,72 +698,69 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
int use_pred = s->ac_pred;
int scale;
int q1, q2 = 0;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
/* Get DC differential */
- if (n < 4) {
+ if (n < 4)
dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
- } else {
+ else
dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
- }
if (dcdiff < 0) {
av_log(s->avctx, AV_LOG_ERROR, "Illegal DC VLC\n");
return -1;
}
if (dcdiff) {
+ const int m = (mquant == 1 || mquant == 2) ? 3 - mquant : 0;
if (dcdiff == 119 /* ESC index value */) {
- /* TODO: Optimize */
- if (mquant == 1) dcdiff = get_bits(gb, 10);
- else if (mquant == 2) dcdiff = get_bits(gb, 9);
- else dcdiff = get_bits(gb, 8);
+ dcdiff = get_bits(gb, 8 + m);
} else {
- if (mquant == 1)
- dcdiff = (dcdiff << 2) + get_bits(gb, 2) - 3;
- else if (mquant == 2)
- dcdiff = (dcdiff << 1) + get_bits1(gb) - 1;
+ if (m)
+ dcdiff = (dcdiff << m) + get_bits(gb, m) - ((1 << m) - 1);
}
if (get_bits1(gb))
dcdiff = -dcdiff;
}
/* Prediction */
- dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, mquant, n, v->a_avail, v->c_avail, &dc_val, &dc_pred_dir);
+ dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, mquant, n, v->a_avail, v->c_avail, &dc_val, &dc_pred_dir, mb_pos);
*dc_val = dcdiff;
/* Store the quantized DC coeff, used for prediction */
- if (n < 4) {
- block[0] = dcdiff * s->y_dc_scale;
- } else {
- block[0] = dcdiff * s->c_dc_scale;
- }
-
- //AC Decoding
- i = 1;
+ if (n < 4)
+ scale = s->y_dc_scale;
+ else
+ scale = s->c_dc_scale;
+ block[0] = dcdiff * scale;
/* check if AC is needed at all */
if (!a_avail && !c_avail)
use_pred = 0;
+
ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
ac_val2 = ac_val;
-
- scale = mquant * 2 + ((mquant == v->pq) ? v->halfpq : 0);
-
if (dc_pred_dir) // left
ac_val -= 16;
else // top
ac_val -= 16 * s->block_wrap[n];
+ scale = mquant * 2 + ((mquant == v->pq) ? v->halfpq : 0);
+
q1 = s->current_picture.qscale_table[mb_pos];
- if ( dc_pred_dir && c_avail && mb_pos)
- q2 = s->current_picture.qscale_table[mb_pos - 1];
- if (!dc_pred_dir && a_avail && mb_pos >= s->mb_stride)
- q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
- if ( dc_pred_dir && n == 1)
- q2 = q1;
- if (!dc_pred_dir && n == 2)
- q2 = q1;
if (n == 3)
q2 = q1;
+ else if (dc_pred_dir) {
+ if (n == 1)
+ q2 = q1;
+ else if (c_avail && mb_pos)
+ q2 = s->current_picture.qscale_table[mb_pos - 1];
+ } else {
+ if (n == 2)
+ q2 = q1;
+ else if (a_avail && mb_pos >= s->mb_stride)
+ q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+ }
+
+ //AC Decoding
+ i = 1;
if (coded) {
int last = 0, skip, value;
@@ -851,28 +793,24 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
/* apply AC prediction if needed */
if (use_pred) {
+ int sh;
+ if (dc_pred_dir) { // left
+ sh = v->left_blk_sh;
+ } else { // top
+ sh = v->top_blk_sh;
+ ac_val += 8;
+ }
/* scale predictors if needed*/
if (q2 && q1 != q2) {
q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
- q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
-
if (q1 < 1)
return AVERROR_INVALIDDATA;
- if (dc_pred_dir) { // left
- for (k = 1; k < 8; k++)
- block[k << v->left_blk_sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- } else { // top
- for (k = 1; k < 8; k++)
- block[k << v->top_blk_sh] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- }
+ q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+ for (k = 1; k < 8; k++)
+ block[k << sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
} else {
- if (dc_pred_dir) { //left
- for (k = 1; k < 8; k++)
- block[k << v->left_blk_sh] += ac_val[k];
- } else { //top
- for (k = 1; k < 8; k++)
- block[k << v->top_blk_sh] += ac_val[k + 8];
- }
+ for (k = 1; k < 8; k++)
+ block[k << sh] += ac_val[k];
}
}
/* save AC coeffs for further prediction */
@@ -889,55 +827,38 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
block[k] += (block[k] < 0) ? -mquant : mquant;
}
- if (use_pred) i = 63;
} else { // no AC coeffs
int k;
memset(ac_val2, 0, 16 * 2);
- if (dc_pred_dir) { // left
- if (use_pred) {
- memcpy(ac_val2, ac_val, 8 * 2);
- if (q2 && q1 != q2) {
- q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
- q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
- if (q1 < 1)
- return AVERROR_INVALIDDATA;
- for (k = 1; k < 8; k++)
- ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- }
- }
- } else { // top
- if (use_pred) {
- memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
- if (q2 && q1 != q2) {
- q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
- q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
- if (q1 < 1)
- return AVERROR_INVALIDDATA;
- for (k = 1; k < 8; k++)
- ac_val2[k + 8] = (ac_val2[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- }
- }
- }
/* apply AC prediction if needed */
if (use_pred) {
+ int sh;
if (dc_pred_dir) { // left
- for (k = 1; k < 8; k++) {
- block[k << v->left_blk_sh] = ac_val2[k] * scale;
- if (!v->pquantizer && block[k << v->left_blk_sh])
- block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -mquant : mquant;
- }
+ sh = v->left_blk_sh;
} else { // top
- for (k = 1; k < 8; k++) {
- block[k << v->top_blk_sh] = ac_val2[k + 8] * scale;
- if (!v->pquantizer && block[k << v->top_blk_sh])
- block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -mquant : mquant;
- }
+ sh = v->top_blk_sh;
+ ac_val += 8;
+ ac_val2 += 8;
+ }
+ memcpy(ac_val2, ac_val, 8 * 2);
+ if (q2 && q1 != q2) {
+ q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+ q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+ if (q1 < 1)
+ return AVERROR_INVALIDDATA;
+ for (k = 1; k < 8; k++)
+ ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+ }
+ for (k = 1; k < 8; k++) {
+ block[k << sh] = ac_val2[k] * scale;
+ if (!v->pquantizer && block[k << sh])
+ block[k << sh] += (block[k << sh] < 0) ? -mquant : mquant;
}
- i = 63;
}
}
+ if (use_pred) i = 63;
s->block_last_index[n] = i;
return 0;
@@ -952,7 +873,7 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
* @param codingset set of VLC to decode data
*/
static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
- int coded, int mquant, int codingset)
+ int coded, int mquant, int codingset, int mb_pos)
{
GetBitContext *gb = &v->s.gb;
MpegEncContext *s = &v->s;
@@ -961,7 +882,6 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
int16_t *dc_val = NULL;
int16_t *ac_val, *ac_val2;
int dcdiff;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int a_avail = v->a_avail, c_avail = v->c_avail;
int use_pred = s->ac_pred;
int scale;
@@ -970,125 +890,122 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
s->bdsp.clear_block(block);
/* XXX: Guard against dumb values of mquant */
- mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant);
+ mquant = av_clip(mquant, 0, 31);
/* Set DC scale - y and c use the same */
s->y_dc_scale = s->y_dc_scale_table[mquant];
s->c_dc_scale = s->c_dc_scale_table[mquant];
/* Get DC differential */
- if (n < 4) {
+ if (n < 4)
dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
- } else {
+ else
dcdiff = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3);
- }
if (dcdiff < 0) {
av_log(s->avctx, AV_LOG_ERROR, "Illegal DC VLC\n");
return -1;
}
if (dcdiff) {
+ const int m = (mquant == 1 || mquant == 2) ? 3 - mquant : 0;
if (dcdiff == 119 /* ESC index value */) {
- /* TODO: Optimize */
- if (mquant == 1) dcdiff = get_bits(gb, 10);
- else if (mquant == 2) dcdiff = get_bits(gb, 9);
- else dcdiff = get_bits(gb, 8);
+ dcdiff = get_bits(gb, 8 + m);
} else {
- if (mquant == 1)
- dcdiff = (dcdiff << 2) + get_bits(gb, 2) - 3;
- else if (mquant == 2)
- dcdiff = (dcdiff << 1) + get_bits1(gb) - 1;
+ if (m)
+ dcdiff = (dcdiff << m) + get_bits(gb, m) - ((1 << m) - 1);
}
if (get_bits1(gb))
dcdiff = -dcdiff;
}
/* Prediction */
- dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, mquant, n, a_avail, c_avail, &dc_val, &dc_pred_dir);
+ dcdiff += ff_vc1_pred_dc(&v->s, v->overlap, mquant, n, a_avail, c_avail, &dc_val, &dc_pred_dir, mb_pos);
*dc_val = dcdiff;
/* Store the quantized DC coeff, used for prediction */
-
- if (n < 4) {
- block[0] = dcdiff * s->y_dc_scale;
- } else {
- block[0] = dcdiff * s->c_dc_scale;
- }
-
- //AC Decoding
- i = 1;
+ if (n < 4)
+ scale = s->y_dc_scale;
+ else
+ scale = s->c_dc_scale;
+ block[0] = dcdiff * scale;
/* check if AC is needed at all and adjust direction if needed */
if (!a_avail) dc_pred_dir = 1;
if (!c_avail) dc_pred_dir = 0;
if (!a_avail && !c_avail) use_pred = 0;
+
ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
ac_val2 = ac_val;
-
- scale = mquant * 2 + v->halfpq;
-
if (dc_pred_dir) //left
ac_val -= 16;
else //top
ac_val -= 16 * s->block_wrap[n];
+ scale = mquant * 2 + v->halfpq;
+
q1 = s->current_picture.qscale_table[mb_pos];
- if (dc_pred_dir && c_avail && mb_pos)
- q2 = s->current_picture.qscale_table[mb_pos - 1];
- if (!dc_pred_dir && a_avail && mb_pos >= s->mb_stride)
- q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
- if ( dc_pred_dir && n == 1)
- q2 = q1;
- if (!dc_pred_dir && n == 2)
+ if (n == 3)
q2 = q1;
- if (n == 3) q2 = q1;
+ else if (dc_pred_dir) {
+ if (n == 1)
+ q2 = q1;
+ else if (c_avail && mb_pos)
+ q2 = s->current_picture.qscale_table[mb_pos - 1];
+ } else {
+ if (n == 2)
+ q2 = q1;
+ else if (a_avail && mb_pos >= s->mb_stride)
+ q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
+ }
+
+ //AC Decoding
+ i = 1;
if (coded) {
int last = 0, skip, value;
+ const uint8_t *zz_table;
int k;
+ if (v->fcm == PROGRESSIVE)
+ zz_table = v->zz_8x8[0];
+ else {
+ if (use_pred && (v->fcm == ILACE_FRAME)) {
+ if (!dc_pred_dir) // top
+ zz_table = v->zz_8x8[2];
+ else // left
+ zz_table = v->zz_8x8[3];
+ } else {
+ zz_table = v->zzi_8x8;
+ }
+ }
+
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
i += skip;
if (i > 63)
break;
- if (v->fcm == PROGRESSIVE)
- block[v->zz_8x8[0][i++]] = value;
- else {
- if (use_pred && (v->fcm == ILACE_FRAME)) {
- if (!dc_pred_dir) // top
- block[v->zz_8x8[2][i++]] = value;
- else // left
- block[v->zz_8x8[3][i++]] = value;
- } else {
- block[v->zzi_8x8[i++]] = value;
- }
- }
+ block[zz_table[i++]] = value;
}
/* apply AC prediction if needed */
if (use_pred) {
+ int sh;
+ if (dc_pred_dir) { // left
+ sh = v->left_blk_sh;
+ } else { //top
+ sh = v->top_blk_sh;
+ ac_val += 8;
+ }
/* scale predictors if needed*/
if (q2 && q1 != q2) {
q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
- q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
-
if (q1 < 1)
return AVERROR_INVALIDDATA;
- if (dc_pred_dir) { // left
- for (k = 1; k < 8; k++)
- block[k << v->left_blk_sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- } else { //top
- for (k = 1; k < 8; k++)
- block[k << v->top_blk_sh] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- }
+ q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+ for (k = 1; k < 8; k++)
+ block[k << sh] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
} else {
- if (dc_pred_dir) { // left
- for (k = 1; k < 8; k++)
- block[k << v->left_blk_sh] += ac_val[k];
- } else { // top
- for (k = 1; k < 8; k++)
- block[k << v->top_blk_sh] += ac_val[k + 8];
- }
+ for (k = 1; k < 8; k++)
+ block[k << sh] += ac_val[k];
}
}
/* save AC coeffs for further prediction */
@@ -1105,55 +1022,38 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
block[k] += (block[k] < 0) ? -mquant : mquant;
}
- if (use_pred) i = 63;
} else { // no AC coeffs
int k;
memset(ac_val2, 0, 16 * 2);
- if (dc_pred_dir) { // left
- if (use_pred) {
- memcpy(ac_val2, ac_val, 8 * 2);
- if (q2 && q1 != q2) {
- q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
- q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
- if (q1 < 1)
- return AVERROR_INVALIDDATA;
- for (k = 1; k < 8; k++)
- ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- }
- }
- } else { // top
- if (use_pred) {
- memcpy(ac_val2 + 8, ac_val + 8, 8 * 2);
- if (q2 && q1 != q2) {
- q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
- q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
- if (q1 < 1)
- return AVERROR_INVALIDDATA;
- for (k = 1; k < 8; k++)
- ac_val2[k + 8] = (ac_val2[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
- }
- }
- }
/* apply AC prediction if needed */
if (use_pred) {
+ int sh;
if (dc_pred_dir) { // left
- for (k = 1; k < 8; k++) {
- block[k << v->left_blk_sh] = ac_val2[k] * scale;
- if (!v->pquantizer && block[k << v->left_blk_sh])
- block[k << v->left_blk_sh] += (block[k << v->left_blk_sh] < 0) ? -mquant : mquant;
- }
+ sh = v->left_blk_sh;
} else { // top
- for (k = 1; k < 8; k++) {
- block[k << v->top_blk_sh] = ac_val2[k + 8] * scale;
- if (!v->pquantizer && block[k << v->top_blk_sh])
- block[k << v->top_blk_sh] += (block[k << v->top_blk_sh] < 0) ? -mquant : mquant;
- }
+ sh = v->top_blk_sh;
+ ac_val += 8;
+ ac_val2 += 8;
+ }
+ memcpy(ac_val2, ac_val, 8 * 2);
+ if (q2 && q1 != q2) {
+ q1 = q1 * 2 + ((q1 == v->pq) ? v->halfpq : 0) - 1;
+ if (q1 < 1)
+ return AVERROR_INVALIDDATA;
+ q2 = q2 * 2 + ((q2 == v->pq) ? v->halfpq : 0) - 1;
+ for (k = 1; k < 8; k++)
+ ac_val2[k] = (ac_val2[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+ }
+ for (k = 1; k < 8; k++) {
+ block[k << sh] = ac_val2[k] * scale;
+ if (!v->pquantizer && block[k << sh])
+ block[k << sh] += (block[k << sh] < 0) ? -mquant : mquant;
}
- i = 63;
}
}
+ if (use_pred) i = 63;
s->block_last_index[n] = i;
return 0;
@@ -1173,6 +1073,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
int scale, off, idx, last, skip, value;
int ttblk = ttmb & 7;
int pat = 0;
+ const uint8_t *zz_table;
s->bdsp.clear_block(block);
@@ -1190,34 +1091,33 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
subblkpat ^= 3; // swap decoded pattern bits
if (ttblk == TT_8X4_TOP || ttblk == TT_8X4_BOTTOM)
ttblk = TT_8X4;
- if (ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT)
+ else if (ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT)
ttblk = TT_4X8;
}
- scale = 2 * mquant + ((v->pq == mquant) ? v->halfpq : 0);
-
// convert transforms like 8X4_TOP to generic TT and SUBBLKPAT
- if (ttblk == TT_8X4_TOP || ttblk == TT_8X4_BOTTOM) {
+ else if (ttblk == TT_8X4_TOP || ttblk == TT_8X4_BOTTOM) {
subblkpat = 2 - (ttblk == TT_8X4_TOP);
ttblk = TT_8X4;
}
- if (ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT) {
+ else if (ttblk == TT_4X8_RIGHT || ttblk == TT_4X8_LEFT) {
subblkpat = 2 - (ttblk == TT_4X8_LEFT);
ttblk = TT_4X8;
}
+
+ scale = 2 * mquant + ((v->pq == mquant) ? v->halfpq : 0);
+
switch (ttblk) {
case TT_8X8:
pat = 0xF;
i = 0;
last = 0;
+ zz_table = !v->fcm ? v->zz_8x8[0] : v->zzi_8x8;
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2);
i += skip;
if (i > 63)
break;
- if (!v->fcm)
- idx = v->zz_8x8[0][i++];
- else
- idx = v->zzi_8x8[i++];
+ idx = zz_table[i++];
block[idx] = value * scale;
if (!v->pquantizer)
block[idx] += (block[idx] < 0) ? -mquant : mquant;
@@ -1234,34 +1134,33 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
case TT_4X4:
pat = ~subblkpat & 0xF;
for (j = 0; j < 4; j++) {
- last = subblkpat & (1 << (3 - j));
+ last = subblkpat & (8 >> j);
i = 0;
off = (j & 1) * 4 + (j & 2) * 16;
+ zz_table = !v->fcm ? ff_vc1_simple_progressive_4x4_zz : ff_vc1_adv_interlaced_4x4_zz;
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, v->codingset2);
i += skip;
if (i > 15)
break;
- if (!v->fcm)
- idx = ff_vc1_simple_progressive_4x4_zz[i++];
- else
- idx = ff_vc1_adv_interlaced_4x4_zz[i++];
+ idx = zz_table[i++];
block[idx + off] = value * scale;
if (!v->pquantizer)
block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
}
- if (!(subblkpat & (1 << (3 - j))) && !skip_block) {
+ if (!(subblkpat & (8 >> j)) && !skip_block) {
if (i == 1)
- v->vc1dsp.vc1_inv_trans_4x4_dc(dst + (j & 1) * 4 + (j & 2) * 2 * linesize, linesize, block + off);
+ v->vc1dsp.vc1_inv_trans_4x4_dc(dst + ((j & 1) + (j & 2 ? linesize : 0)) * 4, linesize, block + off);
else
- v->vc1dsp.vc1_inv_trans_4x4(dst + (j & 1) * 4 + (j & 2) * 2 * linesize, linesize, block + off);
+ v->vc1dsp.vc1_inv_trans_4x4(dst + ((j & 1) + (j & 2 ? linesize : 0)) * 4, linesize, block + off);
}
}
break;
case TT_8X4:
pat = ~((subblkpat & 2) * 6 + (subblkpat & 1) * 3) & 0xF;
+ zz_table = !v->fcm ? v->zz_8x4 : ff_vc1_adv_interlaced_8x4_zz;
for (j = 0; j < 2; j++) {
- last = subblkpat & (1 << (1 - j));
+ last = subblkpat & (2 >> j);
i = 0;
off = j * 32;
while (!last) {
@@ -1269,15 +1168,12 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
i += skip;
if (i > 31)
break;
- if (!v->fcm)
- idx = v->zz_8x4[i++] + off;
- else
- idx = ff_vc1_adv_interlaced_8x4_zz[i++] + off;
+ idx = zz_table[i++] + off;
block[idx] = value * scale;
if (!v->pquantizer)
block[idx] += (block[idx] < 0) ? -mquant : mquant;
}
- if (!(subblkpat & (1 << (1 - j))) && !skip_block) {
+ if (!(subblkpat & (2 >> j)) && !skip_block) {
if (i == 1)
v->vc1dsp.vc1_inv_trans_8x4_dc(dst + j * 4 * linesize, linesize, block + off);
else
@@ -1287,8 +1183,9 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
break;
case TT_4X8:
pat = ~(subblkpat * 5) & 0xF;
+ zz_table = !v->fcm ? v->zz_4x8 : ff_vc1_adv_interlaced_4x8_zz;
for (j = 0; j < 2; j++) {
- last = subblkpat & (1 << (1 - j));
+ last = subblkpat & (2 >> j);
i = 0;
off = j * 4;
while (!last) {
@@ -1296,15 +1193,12 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
i += skip;
if (i > 31)
break;
- if (!v->fcm)
- idx = v->zz_4x8[i++] + off;
- else
- idx = ff_vc1_adv_interlaced_4x8_zz[i++] + off;
+ idx = zz_table[i++] + off;
block[idx] = value * scale;
if (!v->pquantizer)
block[idx] += (block[idx] < 0) ? -mquant : mquant;
}
- if (!(subblkpat & (1 << (1 - j))) && !skip_block) {
+ if (!(subblkpat & (2 >> j)) && !skip_block) {
if (i == 1)
v->vc1dsp.vc1_inv_trans_4x8_dc(dst + j * 4, linesize, block + off);
else
@@ -1320,17 +1214,15 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
/** @} */ // Macroblock group
-static const int size_table [6] = { 0, 2, 3, 4, 5, 8 };
-static const int offset_table[6] = { 0, 1, 3, 7, 15, 31 };
+static const uint8_t size_table[6] = { 0, 2, 3, 4, 5, 8 };
/** Decode one P-frame MB
*/
-static int vc1_decode_p_mb(VC1Context *v)
+static int vc1_decode_p_mb(VC1Context *v, int mb_pos)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
int i, j;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
int ttmb = v->ttfrm; /* MB Transform type */
@@ -1340,7 +1232,7 @@ static int vc1_decode_p_mb(VC1Context *v)
int index, index1; /* LUT indexes */
int val, sign; /* temp values */
int first_block = 1;
- int dst_idx, off;
+ int off;
int skipped, fourmv;
int block_cbp = 0, pat, block_tt = 0, block_intra = 0;
@@ -1355,8 +1247,8 @@ static int vc1_decode_p_mb(VC1Context *v)
else
skipped = v->s.mbskip_table[mb_pos];
- if (!fourmv) { /* 1MV mode */
- if (!skipped) {
+ if (!skipped) {
+ if (!fourmv) { /* 1MV mode */
GET_MVDATA(dmv_x, dmv_y);
if (s->mb_intra) {
@@ -1367,83 +1259,97 @@ static int vc1_decode_p_mb(VC1Context *v)
ff_vc1_pred_mv(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], 0, 0);
/* FIXME Set DC val for inter block ? */
- if (s->mb_intra && !mb_has_coeffs) {
- GET_MQUANT();
- s->ac_pred = get_bits1(gb);
- cbp = 0;
- } else if (mb_has_coeffs) {
+ if (mb_has_coeffs) {
if (s->mb_intra)
s->ac_pred = get_bits1(gb);
cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
GET_MQUANT();
} else {
- mquant = v->pq;
- cbp = 0;
+ if (s->mb_intra) {
+ GET_MQUANT();
+ s->ac_pred = get_bits1(gb);
+ }
+ cbp = 0;
}
s->current_picture.qscale_table[mb_pos] = mquant;
if (!v->ttmbf && !s->mb_intra && mb_has_coeffs)
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table,
VC1_TTMB_VLC_BITS, 2);
- if (!s->mb_intra) ff_vc1_mc_1mv(v, 0);
- dst_idx = 0;
+ if (!s->mb_intra)
+ ff_vc1_mc_1mv(v, 0);
for (i = 0; i < 6; i++) {
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
val = ((cbp >> (5 - i)) & 1);
- off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
v->mb_type[0][s->block_index[i]] = s->mb_intra;
if (s->mb_intra) {
/* check if prediction blocks A and C are available */
v->a_avail = v->c_avail = 0;
- if (i == 2 || i == 3 || !s->first_slice_line)
- v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
- if (i == 1 || i == 3 || s->mb_x)
- v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+ if (i < 4) {
+ if (!s->first_slice_line || (i & 2))
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x || (i & 1))
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
- vc1_decode_intra_block(v, s->block[i], i, val, mquant,
- (i & 4) ? v->codingset2 : v->codingset);
- if ((i>3) && (s->flags & CODEC_FLAG_GRAY))
- continue;
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset, mb_pos);
+ } else {
+ if (!s->first_slice_line)
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x)
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset2, mb_pos);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
- s->idsp.put_signed_pixels_clamped(s->block[i],
- s->dest[dst_idx] + off,
- i & 4 ? s->uvlinesize
- : s->linesize);
- if (v->pq >= 9 && v->overlap) {
- if (v->c_avail)
- v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
- if (v->a_avail)
- v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[0] + off,
+ s->linesize);
+ if (v->pq >= 9 && v->overlap) {
+ if (v->c_avail)
+ v->vc1dsp.vc1_h_overlap(s->dest[0] + off, s->linesize);
+ if (v->a_avail)
+ v->vc1dsp.vc1_v_overlap(s->dest[0] + off, s->linesize);
+ }
+ } else {
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[i - 3],
+ s->uvlinesize);
+ if (v->pq >= 9 && v->overlap) {
+ if (v->c_avail)
+ v->vc1dsp.vc1_h_overlap(s->dest[i - 3], s->uvlinesize);
+ if (v->a_avail)
+ v->vc1dsp.vc1_v_overlap(s->dest[i - 3], s->uvlinesize);
+ }
}
block_cbp |= 0xF << (i << 2);
block_intra |= 1 << i;
} else if (val) {
- pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block,
- s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize,
- (i & 4) && (s->flags & CODEC_FLAG_GRAY), &block_tt);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block,
+ s->dest[0] + off, s->linesize,
+ 0, &block_tt);
+ } else {
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block,
+ s->dest[i - 3], s->uvlinesize,
+ (s->flags & CODEC_FLAG_GRAY), &block_tt);
+ }
block_cbp |= pat << (i << 2);
if (!v->ttmbf && ttmb < 8)
ttmb = -1;
first_block = 0;
}
}
- } else { // skipped
- s->mb_intra = 0;
- for (i = 0; i < 6; i++) {
- v->mb_type[0][s->block_index[i]] = 0;
- s->dc_val[0][s->block_index[i]] = 0;
- }
- s->current_picture.mb_type[mb_pos] = MB_TYPE_SKIP;
- s->current_picture.qscale_table[mb_pos] = 0;
- ff_vc1_pred_mv(v, 0, 0, 0, 1, v->range_x, v->range_y, v->mb_type[0], 0, 0);
- ff_vc1_mc_1mv(v, 0);
- }
- } else { // 4MV mode
- if (!skipped /* unskipped MB */) {
+ } else { // 4MV mode
int intra_count = 0, coded_inter = 0;
int is_intra[6], is_coded[6];
/* Get CBPCY */
@@ -1454,7 +1360,6 @@ static int vc1_decode_p_mb(VC1Context *v)
s->mb_intra = 0;
if (i < 4) {
dmv_x = dmv_y = 0;
- s->mb_intra = 0;
mb_has_coeffs = 0;
if (val) {
GET_MVDATA(dmv_x, dmv_y);
@@ -1465,98 +1370,126 @@ static int vc1_decode_p_mb(VC1Context *v)
intra_count += s->mb_intra;
is_intra[i] = s->mb_intra;
is_coded[i] = mb_has_coeffs;
- }
- if (i & 4) {
+ } else {
is_intra[i] = (intra_count >= 3);
is_coded[i] = val;
+ if (i == 4)
+ ff_vc1_mc_4mv_chroma(v, 0);
}
- if (i == 4)
- ff_vc1_mc_4mv_chroma(v, 0);
v->mb_type[0][s->block_index[i]] = is_intra[i];
- if (!coded_inter)
- coded_inter = !is_intra[i] & is_coded[i];
+ coded_inter += !is_intra[i] & is_coded[i];
}
// if there are no coded blocks then don't do anything more
- dst_idx = 0;
if (!intra_count && !coded_inter)
goto end;
GET_MQUANT();
s->current_picture.qscale_table[mb_pos] = mquant;
/* test if block is intra and has pred */
- {
- int intrapred = 0;
- for (i = 0; i < 6; i++)
- if (is_intra[i]) {
- if (((!s->first_slice_line || (i == 2 || i == 3)) && v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
- || ((s->mb_x || (i == 1 || i == 3)) && v->mb_type[0][s->block_index[i] - 1])) {
- intrapred = 1;
- break;
- }
- }
- if (intrapred)
- s->ac_pred = get_bits1(gb);
- else
- s->ac_pred = 0;
- }
+ for (i = 0; i < 6; i++)
+ if (is_intra[i]) {
+ if (((!s->first_slice_line || (i == 2 || i == 3)) &&
+ v->mb_type[0][s->block_index[i] - s->block_wrap[i]]) ||
+ ((s->mb_x || (i == 1 || i == 3)) &&
+ v->mb_type[0][s->block_index[i] - 1]))
+ break;
+ }
+ if (i < 6)
+ s->ac_pred = get_bits1(gb);
+ else
+ s->ac_pred = 0;
if (!v->ttmbf && coded_inter)
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
for (i = 0; i < 6; i++) {
- dst_idx += i >> 2;
- off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
s->mb_intra = is_intra[i];
if (is_intra[i]) {
/* check if prediction blocks A and C are available */
v->a_avail = v->c_avail = 0;
- if (i == 2 || i == 3 || !s->first_slice_line)
- v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
- if (i == 1 || i == 3 || s->mb_x)
- v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+ if (i < 4) {
+ if (!s->first_slice_line || (i & 2))
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x || (i & 1))
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
- vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant,
- (i & 4) ? v->codingset2 : v->codingset);
- if ((i>3) && (s->flags & CODEC_FLAG_GRAY))
- continue;
+ vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant,
+ v->codingset, mb_pos);
+ } else {
+ if (!s->first_slice_line)
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x)
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant,
+ v->codingset2, mb_pos);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
- s->idsp.put_signed_pixels_clamped(s->block[i],
- s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize
- : s->linesize);
- if (v->pq >= 9 && v->overlap) {
- if (v->c_avail)
- v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
- if (v->a_avail)
- v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[0] + off,
+ s->linesize);
+ if (v->pq >= 9 && v->overlap) {
+ if (v->c_avail)
+ v->vc1dsp.vc1_h_overlap(s->dest[0] + off, s->linesize);
+ if (v->a_avail)
+ v->vc1dsp.vc1_v_overlap(s->dest[0] + off, s->linesize);
+ }
+ } else {
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[i - 3],
+ s->uvlinesize);
+ if (v->pq >= 9 && v->overlap) {
+ if (v->c_avail)
+ v->vc1dsp.vc1_h_overlap(s->dest[i - 3], s->uvlinesize);
+ if (v->a_avail)
+ v->vc1dsp.vc1_v_overlap(s->dest[i - 3], s->uvlinesize);
+ }
}
block_cbp |= 0xF << (i << 2);
block_intra |= 1 << i;
} else if (is_coded[i]) {
- pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
- first_block, s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize : s->linesize,
- (i & 4) && (s->flags & CODEC_FLAG_GRAY),
- &block_tt);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[0] + off,
+ s->linesize,
+ 0,
+ &block_tt);
+ } else {
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[i - 3],
+ s->uvlinesize,
+ (s->flags & CODEC_FLAG_GRAY),
+ &block_tt);
+ }
block_cbp |= pat << (i << 2);
if (!v->ttmbf && ttmb < 8)
ttmb = -1;
first_block = 0;
}
}
- } else { // skipped MB
- s->mb_intra = 0;
- s->current_picture.qscale_table[mb_pos] = 0;
- for (i = 0; i < 6; i++) {
- v->mb_type[0][s->block_index[i]] = 0;
- s->dc_val[0][s->block_index[i]] = 0;
- }
+ }
+ } else { // skipped
+ s->mb_intra = 0;
+ for (i = 0; i < 6; i++) {
+ v->mb_type[0][s->block_index[i]] = 0;
+ s->dc_val[0][s->block_index[i]] = 0;
+ }
+ s->current_picture.qscale_table[mb_pos] = 0;
+ if (!fourmv) { /* 1MV mode */
+ s->current_picture.mb_type[mb_pos] = MB_TYPE_SKIP;
+ ff_vc1_pred_mv(v, 0, 0, 0, 1, v->range_x, v->range_y, v->mb_type[0], 0, 0);
+ ff_vc1_mc_1mv(v, 0);
+ } else { // 4MV mode
for (i = 0; i < 4; i++) {
ff_vc1_pred_mv(v, i, 0, 0, 0, v->range_x, v->range_y, v->mb_type[0], 0, 0);
ff_vc1_mc_4mv_luma(v, i, 0, 0);
}
ff_vc1_mc_4mv_chroma(v, 0);
- s->current_picture.qscale_table[mb_pos] = 0;
}
}
end:
@@ -1569,12 +1502,11 @@ end:
/* Decode one macroblock in an interlaced frame p picture */
-static int vc1_decode_p_mb_intfr(VC1Context *v)
+static int vc1_decode_p_mb_intfr(VC1Context *v, int mb_pos)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
int i;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp = 0; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
int ttmb = v->ttfrm; /* MB Transform type */
@@ -1583,11 +1515,11 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
int dmv_x, dmv_y; /* Differential MV components */
int val; /* temp value */
int first_block = 1;
- int dst_idx, off;
+ int off;
int skipped, fourmv = 0, twomv = 0;
int block_cbp = 0, pat, block_tt = 0;
int idx_mbmode = 0, mvbp;
- int stride_y, fieldtx;
+ int fieldtx;
mquant = v->pq; /* Lossy initialization */
@@ -1648,32 +1580,42 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
/* Set DC scale - y and c use the same (not sure if necessary here) */
s->y_dc_scale = s->y_dc_scale_table[mquant];
s->c_dc_scale = s->c_dc_scale_table[mquant];
- dst_idx = 0;
for (i = 0; i < 6; i++) {
v->a_avail = v->c_avail = 0;
v->mb_type[0][s->block_index[i]] = 1;
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
val = ((cbp >> (5 - i)) & 1);
- if (i == 2 || i == 3 || !s->first_slice_line)
- v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
- if (i == 1 || i == 3 || s->mb_x)
- v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+ if (i < 4) {
+ if (!s->first_slice_line || (i & 2))
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x || (i & 1))
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset, mb_pos);
+ } else {
+ if (!s->first_slice_line)
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x)
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
- vc1_decode_intra_block(v, s->block[i], i, val, mquant,
- (i & 4) ? v->codingset2 : v->codingset);
- if ((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset2, mb_pos);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
if (i < 4) {
- stride_y = s->linesize << fieldtx;
- off = (fieldtx) ? ((i & 1) * 8) + ((i & 2) >> 1) * s->linesize : (i & 1) * 8 + 4 * (i & 2) * s->linesize;
+ off = fieldtx ? ((i & 1) * 8 + (i & 2 ? s->linesize : 0))
+ : ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[0] + off,
+ s->linesize << fieldtx);
} else {
- stride_y = s->uvlinesize;
- off = 0;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[i - 3],
+ s->uvlinesize);
}
- s->idsp.put_signed_pixels_clamped(s->block[i],
- s->dest[dst_idx] + off,
- stride_y);
//TODO: loop filter
}
@@ -1683,46 +1625,36 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
cbp = 1 + get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
if (ff_vc1_mbmode_intfrp[v->fourmvswitch][idx_mbmode][0] == MV_PMODE_INTFR_2MV_FIELD) {
v->twomvbp = get_vlc2(gb, v->twomvbp_vlc->table, VC1_2MV_BLOCK_PATTERN_VLC_BITS, 1);
- } else {
- if ((ff_vc1_mbmode_intfrp[v->fourmvswitch][idx_mbmode][0] == MV_PMODE_INTFR_4MV)
- || (ff_vc1_mbmode_intfrp[v->fourmvswitch][idx_mbmode][0] == MV_PMODE_INTFR_4MV_FIELD)) {
- v->fourmvbp = get_vlc2(gb, v->fourmvbp_vlc->table, VC1_4MV_BLOCK_PATTERN_VLC_BITS, 1);
- }
+ } else if (ff_vc1_mbmode_intfrp[v->fourmvswitch][idx_mbmode][0] == MV_PMODE_INTFR_4MV ||
+ ff_vc1_mbmode_intfrp[v->fourmvswitch][idx_mbmode][0] == MV_PMODE_INTFR_4MV_FIELD) {
+ v->fourmvbp = get_vlc2(gb, v->fourmvbp_vlc->table, VC1_4MV_BLOCK_PATTERN_VLC_BITS, 1);
}
s->mb_intra = v->is_intra[s->mb_x] = 0;
for (i = 0; i < 6; i++)
v->mb_type[0][s->block_index[i]] = 0;
fieldtx = v->fieldtx_plane[mb_pos] = ff_vc1_mbmode_intfrp[v->fourmvswitch][idx_mbmode][1];
/* for all motion vector read MVDATA and motion compensate each block */
- dst_idx = 0;
if (fourmv) {
mvbp = v->fourmvbp;
- for (i = 0; i < 6; i++) {
- if (i < 4) {
- dmv_x = dmv_y = 0;
- val = ((mvbp >> (3 - i)) & 1);
- if (val) {
- get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- }
- ff_vc1_pred_mv_intfr(v, i, dmv_x, dmv_y, 0, v->range_x, v->range_y, v->mb_type[0], 0);
- ff_vc1_mc_4mv_luma(v, i, 0, 0);
- } else if (i == 4) {
- ff_vc1_mc_4mv_chroma4(v, 0, 0, 0);
- }
+ for (i = 0; i < 4; i++) {
+ dmv_x = dmv_y = 0;
+ if (mvbp & (8 >> i))
+ get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
+ ff_vc1_pred_mv_intfr(v, i, dmv_x, dmv_y, 0, v->range_x, v->range_y, v->mb_type[0], 0);
+ ff_vc1_mc_4mv_luma(v, i, 0, 0);
}
+ ff_vc1_mc_4mv_chroma4(v, 0, 0, 0);
} else if (twomv) {
mvbp = v->twomvbp;
dmv_x = dmv_y = 0;
- if (mvbp & 2) {
+ if (mvbp & 2)
get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- }
ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], 0);
ff_vc1_mc_4mv_luma(v, 0, 0, 0);
ff_vc1_mc_4mv_luma(v, 1, 0, 0);
dmv_x = dmv_y = 0;
- if (mvbp & 1) {
+ if (mvbp & 1)
get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- }
ff_vc1_pred_mv_intfr(v, 2, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], 0);
ff_vc1_mc_4mv_luma(v, 2, 0, 0);
ff_vc1_mc_4mv_luma(v, 3, 0, 0);
@@ -1743,17 +1675,22 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
for (i = 0; i < 6; i++) {
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
- val = ((cbp >> (5 - i)) & 1);
- if (!fieldtx)
- off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
- else
- off = (i & 4) ? 0 : ((i & 1) * 8 + ((i > 1) * s->linesize));
- if (val) {
- pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
- first_block, s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize : (s->linesize << fieldtx),
- (i & 4) && (s->flags & CODEC_FLAG_GRAY), &block_tt);
+ if (cbp & (32 >> i)) {
+ if (i < 4) {
+ if (!fieldtx)
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ else
+ off = ((i & 1) * 8 + (i & 2 ? s->linesize : 0));
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[0] + off,
+ (s->linesize << fieldtx),
+ 0, &block_tt);
+ } else {
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[i - 3],
+ s->uvlinesize,
+ (s->flags & CODEC_FLAG_GRAY), &block_tt);
+ }
block_cbp |= pat << (i << 2);
if (!v->ttmbf && ttmb < 8)
ttmb = -1;
@@ -1781,12 +1718,11 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
return 0;
}
-static int vc1_decode_p_mb_intfi(VC1Context *v)
+static int vc1_decode_p_mb_intfi(VC1Context *v, int mb_pos)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
int i;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp = 0; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
int ttmb = v->ttfrm; /* MB Transform type */
@@ -1795,7 +1731,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
int dmv_x, dmv_y; /* Differential MV components */
int val; /* temp values */
int first_block = 1;
- int dst_idx, off;
+ int off;
int pred_flag = 0;
int block_cbp = 0, pat, block_tt = 0;
int idx_mbmode = 0;
@@ -1818,56 +1754,65 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
mb_has_coeffs = idx_mbmode & 1;
if (mb_has_coeffs)
cbp = 1 + get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_ICBPCY_VLC_BITS, 2);
- dst_idx = 0;
for (i = 0; i < 6; i++) {
v->a_avail = v->c_avail = 0;
v->mb_type[0][s->block_index[i]] = 1;
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
val = ((cbp >> (5 - i)) & 1);
- if (i == 2 || i == 3 || !s->first_slice_line)
- v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
- if (i == 1 || i == 3 || s->mb_x)
- v->c_avail = v->mb_type[0][s->block_index[i] - 1];
-
- vc1_decode_intra_block(v, s->block[i], i, val, mquant,
- (i & 4) ? v->codingset2 : v->codingset);
- if ((i>3) && (s->flags & CODEC_FLAG_GRAY))
- continue;
+ if (i < 4) {
+ if (!s->first_slice_line || (i & 2))
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x || (i & 1))
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset, mb_pos);
+ } else {
+ if (!s->first_slice_line)
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x)
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset2, mb_pos);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
- off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
- s->idsp.put_signed_pixels_clamped(s->block[i],
- s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize
- : s->linesize);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[0] + off,
+ s->linesize);
+ } else {
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[i - 3],
+ s->uvlinesize);
+ }
// TODO: loop filter
}
} else {
s->mb_intra = v->is_intra[s->mb_x] = 0;
s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_16x16;
- for (i = 0; i < 6; i++) v->mb_type[0][s->block_index[i]] = 0;
+ for (i = 0; i < 6; i++)
+ v->mb_type[0][s->block_index[i]] = 0;
if (idx_mbmode <= 5) { // 1-MV
dmv_x = dmv_y = pred_flag = 0;
- if (idx_mbmode & 1) {
+ if (idx_mbmode & 1)
get_mvdata_interlaced(v, &dmv_x, &dmv_y, &pred_flag);
- }
ff_vc1_pred_mv(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], pred_flag, 0);
ff_vc1_mc_1mv(v, 0);
mb_has_coeffs = !(idx_mbmode & 2);
} else { // 4-MV
v->fourmvbp = get_vlc2(gb, v->fourmvbp_vlc->table, VC1_4MV_BLOCK_PATTERN_VLC_BITS, 1);
- for (i = 0; i < 6; i++) {
- if (i < 4) {
- dmv_x = dmv_y = pred_flag = 0;
- val = ((v->fourmvbp >> (3 - i)) & 1);
- if (val) {
- get_mvdata_interlaced(v, &dmv_x, &dmv_y, &pred_flag);
- }
- ff_vc1_pred_mv(v, i, dmv_x, dmv_y, 0, v->range_x, v->range_y, v->mb_type[0], pred_flag, 0);
- ff_vc1_mc_4mv_luma(v, i, 0, 0);
- } else if (i == 4)
- ff_vc1_mc_4mv_chroma(v, 0);
+ for (i = 0; i < 4; i++) {
+ dmv_x = dmv_y = pred_flag = 0;
+ if (v->fourmvbp & (8 >> i))
+ get_mvdata_interlaced(v, &dmv_x, &dmv_y, &pred_flag);
+ ff_vc1_pred_mv(v, i, dmv_x, dmv_y, 0, v->range_x, v->range_y, v->mb_type[0], pred_flag, 0);
+ ff_vc1_mc_4mv_luma(v, i, 0, 0);
}
+ ff_vc1_mc_4mv_chroma(v, 0);
mb_has_coeffs = idx_mbmode & 1;
}
if (mb_has_coeffs)
@@ -1879,20 +1824,26 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
if (!v->ttmbf && cbp) {
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
}
- dst_idx = 0;
for (i = 0; i < 6; i++) {
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
- val = ((cbp >> (5 - i)) & 1);
- off = (i & 4) ? 0 : (i & 1) * 8 + (i & 2) * 4 * s->linesize;
- if (val) {
- pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
- first_block, s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize : s->linesize,
- (i & 4) && (s->flags & CODEC_FLAG_GRAY),
- &block_tt);
+ if (cbp & (32 >> i)) {
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[0] + off,
+ s->linesize,
+ 0,
+ &block_tt);
+ } else {
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[i - 3],
+ s->uvlinesize,
+ (s->flags & CODEC_FLAG_GRAY),
+ &block_tt);
+ }
block_cbp |= pat << (i << 2);
- if (!v->ttmbf && ttmb < 8) ttmb = -1;
+ if (!v->ttmbf && ttmb < 8)
+ ttmb = -1;
first_block = 0;
}
}
@@ -1904,12 +1855,11 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
/** Decode one B-frame MB (in Main profile)
*/
-static void vc1_decode_b_mb(VC1Context *v)
+static void vc1_decode_b_mb(VC1Context *v, int mb_pos)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
int i, j;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp = 0; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
int ttmb = v->ttfrm; /* MB Transform type */
@@ -1917,7 +1867,7 @@ static void vc1_decode_b_mb(VC1Context *v)
int index, index1; /* LUT indexes */
int val, sign; /* temp values */
int first_block = 1;
- int dst_idx, off;
+ int off;
int skipped, direct;
int dmv_x[2], dmv_y[2];
int bmvtype = BMV_TYPE_BACKWARD;
@@ -1982,74 +1932,93 @@ static void vc1_decode_b_mb(VC1Context *v)
dmv_x[0] = dmv_y[0] = dmv_x[1] = dmv_y[1] = 0;
ff_vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
- } else {
- if (!mb_has_coeffs && !s->mb_intra) {
+ } else if (!mb_has_coeffs) {
+ if (!s->mb_intra) {
/* no coded blocks - effectively skipped */
ff_vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
return;
- }
- if (s->mb_intra && !mb_has_coeffs) {
+ } else {
GET_MQUANT();
s->current_picture.qscale_table[mb_pos] = mquant;
s->ac_pred = get_bits1(gb);
cbp = 0;
ff_vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
- } else {
- if (bmvtype == BMV_TYPE_INTERPOLATED) {
- GET_MVDATA(dmv_x[0], dmv_y[0]);
- if (!mb_has_coeffs) {
- /* interpolated skipped block */
- ff_vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
- vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
- return;
- }
- }
- ff_vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
- if (!s->mb_intra) {
+ }
+ } else {
+ if (bmvtype == BMV_TYPE_INTERPOLATED) {
+ GET_MVDATA(dmv_x[0], dmv_y[0]);
+ if (!mb_has_coeffs) {
+ /* interpolated skipped block */
+ ff_vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
+ return;
}
- if (s->mb_intra)
- s->ac_pred = get_bits1(gb);
- cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
- GET_MQUANT();
- s->current_picture.qscale_table[mb_pos] = mquant;
- if (!v->ttmbf && !s->mb_intra && mb_has_coeffs)
- ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
}
+ ff_vc1_pred_b_mv(v, dmv_x, dmv_y, direct, bmvtype);
+ if (!s->mb_intra)
+ vc1_b_mc(v, dmv_x, dmv_y, direct, bmvtype);
+ else
+ s->ac_pred = get_bits1(gb);
+ cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
+ GET_MQUANT();
+ s->current_picture.qscale_table[mb_pos] = mquant;
+ if (!v->ttmbf && !s->mb_intra && mb_has_coeffs)
+ ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
}
- dst_idx = 0;
for (i = 0; i < 6; i++) {
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
val = ((cbp >> (5 - i)) & 1);
- off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
v->mb_type[0][s->block_index[i]] = s->mb_intra;
if (s->mb_intra) {
/* check if prediction blocks A and C are available */
v->a_avail = v->c_avail = 0;
- if (i == 2 || i == 3 || !s->first_slice_line)
- v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
- if (i == 1 || i == 3 || s->mb_x)
- v->c_avail = v->mb_type[0][s->block_index[i] - 1];
-
- vc1_decode_intra_block(v, s->block[i], i, val, mquant,
- (i & 4) ? v->codingset2 : v->codingset);
- if ((i>3) && (s->flags & CODEC_FLAG_GRAY))
- continue;
+ if (i < 4) {
+ if (!s->first_slice_line || (i & 2))
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x || (i & 1))
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset, mb_pos);
+ } else {
+ if (!s->first_slice_line)
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x)
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset2, mb_pos);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
- s->idsp.put_signed_pixels_clamped(s->block[i],
- s->dest[dst_idx] + off,
- i & 4 ? s->uvlinesize
- : s->linesize);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[0] + off,
+ s->linesize);
+ } else {
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[i - 3],
+ s->uvlinesize);
+ }
} else if (val) {
- vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
- first_block, s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize : s->linesize,
- (i & 4) && (s->flags & CODEC_FLAG_GRAY), NULL);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[0] + off,
+ s->linesize,
+ 0, NULL);
+ } else {
+ vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[i - 3],
+ s->uvlinesize,
+ (s->flags & CODEC_FLAG_GRAY), NULL);
+ }
if (!v->ttmbf && ttmb < 8)
ttmb = -1;
first_block = 0;
@@ -2059,19 +2028,18 @@ static void vc1_decode_b_mb(VC1Context *v)
/** Decode one B-frame MB (in interlaced field B picture)
*/
-static void vc1_decode_b_mb_intfi(VC1Context *v)
+static void vc1_decode_b_mb_intfi(VC1Context *v, int mb_pos)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
int i, j;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp = 0; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
int ttmb = v->ttfrm; /* MB Transform type */
int mb_has_coeffs = 0; /* last_flag */
int val; /* temp value */
int first_block = 1;
- int dst_idx, off;
+ int off;
int fwd;
int dmv_x[2], dmv_y[2], pred_flag[2];
int bmvtype = BMV_TYPE_BACKWARD;
@@ -2096,37 +2064,51 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
mb_has_coeffs = idx_mbmode & 1;
if (mb_has_coeffs)
cbp = 1 + get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_ICBPCY_VLC_BITS, 2);
- dst_idx = 0;
for (i = 0; i < 6; i++) {
v->a_avail = v->c_avail = 0;
v->mb_type[0][s->block_index[i]] = 1;
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
val = ((cbp >> (5 - i)) & 1);
- if (i == 2 || i == 3 || !s->first_slice_line)
- v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
- if (i == 1 || i == 3 || s->mb_x)
- v->c_avail = v->mb_type[0][s->block_index[i] - 1];
-
- vc1_decode_intra_block(v, s->block[i], i, val, mquant,
- (i & 4) ? v->codingset2 : v->codingset);
- if ((i>3) && (s->flags & CODEC_FLAG_GRAY))
- continue;
+ if (i < 4) {
+ if (!s->first_slice_line || (i & 2))
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x || (i & 1))
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset, mb_pos);
+ } else {
+ if (!s->first_slice_line)
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x)
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset2, mb_pos);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[i][j] <<= 1;
- off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
- s->idsp.put_signed_pixels_clamped(s->block[i],
- s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize
- : s->linesize);
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[0] + off,
+ s->linesize);
+ } else {
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[i - 3],
+ s->uvlinesize);
+ }
// TODO: yet to perform loop filter
}
} else {
s->mb_intra = v->is_intra[s->mb_x] = 0;
s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_16x16;
- for (i = 0; i < 6; i++) v->mb_type[0][s->block_index[i]] = 0;
+ for (i = 0; i < 6; i++)
+ v->mb_type[0][s->block_index[i]] = 0;
if (v->fmb_is_raw)
fwd = v->forward_mb_plane[mb_pos] = get_bits1(gb);
else
@@ -2174,21 +2156,18 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
bmvtype = BMV_TYPE_FORWARD;
v->bmvtype = bmvtype;
v->fourmvbp = get_vlc2(gb, v->fourmvbp_vlc->table, VC1_4MV_BLOCK_PATTERN_VLC_BITS, 1);
- for (i = 0; i < 6; i++) {
- if (i < 4) {
- dmv_x[0] = dmv_y[0] = pred_flag[0] = 0;
- dmv_x[1] = dmv_y[1] = pred_flag[1] = 0;
- val = ((v->fourmvbp >> (3 - i)) & 1);
- if (val) {
- get_mvdata_interlaced(v, &dmv_x[bmvtype == BMV_TYPE_BACKWARD],
- &dmv_y[bmvtype == BMV_TYPE_BACKWARD],
- &pred_flag[bmvtype == BMV_TYPE_BACKWARD]);
- }
- ff_vc1_pred_b_mv_intfi(v, i, dmv_x, dmv_y, 0, pred_flag);
- ff_vc1_mc_4mv_luma(v, i, bmvtype == BMV_TYPE_BACKWARD, 0);
- } else if (i == 4)
- ff_vc1_mc_4mv_chroma(v, bmvtype == BMV_TYPE_BACKWARD);
+ for (i = 0; i < 4; i++) {
+ dmv_x[0] = dmv_y[0] = pred_flag[0] = 0;
+ dmv_x[1] = dmv_y[1] = pred_flag[1] = 0;
+ if (v->fourmvbp & (8 >> i)) {
+ get_mvdata_interlaced(v, &dmv_x[bmvtype == BMV_TYPE_BACKWARD],
+ &dmv_y[bmvtype == BMV_TYPE_BACKWARD],
+ &pred_flag[bmvtype == BMV_TYPE_BACKWARD]);
+ }
+ ff_vc1_pred_b_mv_intfi(v, i, dmv_x, dmv_y, 0, pred_flag);
+ ff_vc1_mc_4mv_luma(v, i, bmvtype == BMV_TYPE_BACKWARD, 0);
}
+ ff_vc1_mc_4mv_chroma(v, bmvtype == BMV_TYPE_BACKWARD);
mb_has_coeffs = idx_mbmode & 1;
}
if (mb_has_coeffs)
@@ -2200,17 +2179,21 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
if (!v->ttmbf && cbp) {
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
}
- dst_idx = 0;
for (i = 0; i < 6; i++) {
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
- val = ((cbp >> (5 - i)) & 1);
- off = (i & 4) ? 0 : (i & 1) * 8 + (i & 2) * 4 * s->linesize;
- if (val) {
- vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
- first_block, s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize : s->linesize,
- (i & 4) && (s->flags & CODEC_FLAG_GRAY), NULL);
+ if (cbp & (32 >> i)) {
+ if (i < 4) {
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[0] + off,
+ s->linesize,
+ 0, NULL);
+ } else {
+ vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[i - 3],
+ s->uvlinesize,
+ (s->flags & CODEC_FLAG_GRAY), NULL);
+ }
if (!v->ttmbf && ttmb < 8)
ttmb = -1;
first_block = 0;
@@ -2221,12 +2204,11 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
/** Decode one B-frame MB (in interlaced frame B picture)
*/
-static int vc1_decode_b_mb_intfr(VC1Context *v)
+static int vc1_decode_b_mb_intfr(VC1Context *v, int mb_pos)
{
MpegEncContext *s = &v->s;
GetBitContext *gb = &s->gb;
int i, j;
- int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
int cbp = 0; /* cbp decoding stuff */
int mqdiff, mquant; /* MB quantization */
int ttmb = v->ttfrm; /* MB Transform type */
@@ -2235,11 +2217,11 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
int dmv_x, dmv_y; /* Differential MV components */
int val; /* temp value */
int first_block = 1;
- int dst_idx, off;
+ int off;
int skipped, direct, twomv = 0;
int block_cbp = 0, pat, block_tt = 0;
int idx_mbmode = 0, mvbp;
- int stride_y, fieldtx;
+ int fieldtx;
int bmvtype = BMV_TYPE_BACKWARD;
int dir, dir2;
@@ -2321,33 +2303,42 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
/* Set DC scale - y and c use the same (not sure if necessary here) */
s->y_dc_scale = s->y_dc_scale_table[mquant];
s->c_dc_scale = s->c_dc_scale_table[mquant];
- dst_idx = 0;
for (i = 0; i < 6; i++) {
v->a_avail = v->c_avail = 0;
v->mb_type[0][s->block_index[i]] = 1;
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
val = ((cbp >> (5 - i)) & 1);
- if (i == 2 || i == 3 || !s->first_slice_line)
- v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
- if (i == 1 || i == 3 || s->mb_x)
- v->c_avail = v->mb_type[0][s->block_index[i] - 1];
-
- vc1_decode_intra_block(v, s->block[i], i, val, mquant,
- (i & 4) ? v->codingset2 : v->codingset);
- if (i > 3 && (s->flags & CODEC_FLAG_GRAY))
- continue;
+ if (i < 4) {
+ if (!s->first_slice_line || (i & 2))
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x || (i & 1))
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset, mb_pos);
+ } else {
+ if (!s->first_slice_line)
+ v->a_avail = v->mb_type[0][s->block_index[i] - s->block_wrap[i]];
+ if (s->mb_x)
+ v->c_avail = v->mb_type[0][s->block_index[i] - 1];
+
+ vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+ v->codingset2, mb_pos);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
if (i < 4) {
- stride_y = s->linesize << fieldtx;
- off = (fieldtx) ? ((i & 1) * 8) + ((i & 2) >> 1) * s->linesize : (i & 1) * 8 + 4 * (i & 2) * s->linesize;
+ off = fieldtx ? ((i & 1) * 8 + (i & 2 ? s->linesize : 0))
+ : ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[0] + off,
+ s->linesize << fieldtx);
} else {
- stride_y = s->uvlinesize;
- off = 0;
+ s->idsp.put_signed_pixels_clamped(s->block[i],
+ s->dest[i - 3],
+ s->uvlinesize);
}
- s->idsp.put_signed_pixels_clamped(s->block[i],
- s->dest[dst_idx] + off,
- stride_y);
}
} else {
s->mb_intra = v->is_intra[s->mb_x] = 0;
@@ -2386,102 +2377,100 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
v->mb_type[0][s->block_index[i]] = 0;
fieldtx = v->fieldtx_plane[mb_pos] = ff_vc1_mbmode_intfrp[0][idx_mbmode][1];
/* for all motion vector read MVDATA and motion compensate each block */
- dst_idx = 0;
- if (direct) {
- if (twomv) {
+ if (twomv) {
+ if (direct) {
for (i = 0; i < 4; i++) {
ff_vc1_mc_4mv_luma(v, i, 0, 0);
ff_vc1_mc_4mv_luma(v, i, 1, 1);
}
ff_vc1_mc_4mv_chroma4(v, 0, 0, 0);
ff_vc1_mc_4mv_chroma4(v, 1, 1, 1);
+ } else if (bmvtype == BMV_TYPE_INTERPOLATED) {
+ mvbp = v->fourmvbp;
+ for (i = 0; i < 4; i++) {
+ dir = i & 1;
+ dmv_x = dmv_y = 0;
+ if (mvbp & (8 >> i))
+ get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
+ j = i & 2;
+ ff_vc1_pred_mv_intfr(v, j, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], dir);
+ ff_vc1_mc_4mv_luma(v, j, dir, dir);
+ ff_vc1_mc_4mv_luma(v, j+1, dir, dir);
+ }
+
+ ff_vc1_mc_4mv_chroma4(v, 0, 0, 0);
+ ff_vc1_mc_4mv_chroma4(v, 1, 1, 1);
} else {
- ff_vc1_mc_1mv(v, 0);
- ff_vc1_interp_mc(v);
- }
- } else if (twomv && bmvtype == BMV_TYPE_INTERPOLATED) {
- mvbp = v->fourmvbp;
- for (i = 0; i < 4; i++) {
- dir = i==1 || i==3;
+ dir = bmvtype == BMV_TYPE_BACKWARD;
+ dir2 = dir ^ mvsw;
+ mvbp = v->twomvbp;
dmv_x = dmv_y = 0;
- val = ((mvbp >> (3 - i)) & 1);
- if (val)
+ if (mvbp & 2)
get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- j = i > 1 ? 2 : 0;
- ff_vc1_pred_mv_intfr(v, j, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], dir);
- ff_vc1_mc_4mv_luma(v, j, dir, dir);
- ff_vc1_mc_4mv_luma(v, j+1, dir, dir);
- }
+ ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], dir);
- ff_vc1_mc_4mv_chroma4(v, 0, 0, 0);
- ff_vc1_mc_4mv_chroma4(v, 1, 1, 1);
- } else if (bmvtype == BMV_TYPE_INTERPOLATED) {
- mvbp = v->twomvbp;
- dmv_x = dmv_y = 0;
- if (mvbp & 2)
- get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
+ dmv_x = dmv_y = 0;
+ if (mvbp & 1)
+ get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
+ ff_vc1_pred_mv_intfr(v, 2, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], dir2);
- ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], 0);
- ff_vc1_mc_1mv(v, 0);
+ if (mvsw) {
+ for (i = 0; i < 2; i++) {
+ s->mv[dir][i+2][0] = s->mv[dir][i][0] = s->current_picture.motion_val[dir][s->block_index[i+2]][0] = s->current_picture.motion_val[dir][s->block_index[i]][0];
+ s->mv[dir][i+2][1] = s->mv[dir][i][1] = s->current_picture.motion_val[dir][s->block_index[i+2]][1] = s->current_picture.motion_val[dir][s->block_index[i]][1];
+ s->mv[dir2][i+2][0] = s->mv[dir2][i][0] = s->current_picture.motion_val[dir2][s->block_index[i]][0] = s->current_picture.motion_val[dir2][s->block_index[i+2]][0];
+ s->mv[dir2][i+2][1] = s->mv[dir2][i][1] = s->current_picture.motion_val[dir2][s->block_index[i]][1] = s->current_picture.motion_val[dir2][s->block_index[i+2]][1];
+ }
+ } else {
+ ff_vc1_pred_mv_intfr(v, 0, 0, 0, 2, v->range_x, v->range_y, v->mb_type[0], !dir);
+ ff_vc1_pred_mv_intfr(v, 2, 0, 0, 2, v->range_x, v->range_y, v->mb_type[0], !dir);
+ }
- dmv_x = dmv_y = 0;
- if (mvbp & 1)
- get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
+ ff_vc1_mc_4mv_luma(v, 0, dir, 0);
+ ff_vc1_mc_4mv_luma(v, 1, dir, 0);
+ ff_vc1_mc_4mv_luma(v, 2, dir2, 0);
+ ff_vc1_mc_4mv_luma(v, 3, dir2, 0);
+ ff_vc1_mc_4mv_chroma4(v, dir, dir2, 0);
+ }
+ } else {
+ if (direct) {
+ ff_vc1_mc_1mv(v, 0);
+ ff_vc1_interp_mc(v);
+ } else if (bmvtype == BMV_TYPE_INTERPOLATED) {
+ mvbp = v->twomvbp;
+ dmv_x = dmv_y = 0;
+ if (mvbp & 2)
+ get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], 1);
- ff_vc1_interp_mc(v);
- } else if (twomv) {
- dir = bmvtype == BMV_TYPE_BACKWARD;
- dir2 = dir;
- if (mvsw)
- dir2 = !dir;
- mvbp = v->twomvbp;
- dmv_x = dmv_y = 0;
- if (mvbp & 2)
- get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], dir);
+ ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], 0);
+ ff_vc1_mc_1mv(v, 0);
- dmv_x = dmv_y = 0;
- if (mvbp & 1)
- get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- ff_vc1_pred_mv_intfr(v, 2, dmv_x, dmv_y, 2, v->range_x, v->range_y, v->mb_type[0], dir2);
+ dmv_x = dmv_y = 0;
+ if (mvbp & 1)
+ get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- if (mvsw) {
- for (i = 0; i < 2; i++) {
- s->mv[dir][i+2][0] = s->mv[dir][i][0] = s->current_picture.motion_val[dir][s->block_index[i+2]][0] = s->current_picture.motion_val[dir][s->block_index[i]][0];
- s->mv[dir][i+2][1] = s->mv[dir][i][1] = s->current_picture.motion_val[dir][s->block_index[i+2]][1] = s->current_picture.motion_val[dir][s->block_index[i]][1];
- s->mv[dir2][i+2][0] = s->mv[dir2][i][0] = s->current_picture.motion_val[dir2][s->block_index[i]][0] = s->current_picture.motion_val[dir2][s->block_index[i+2]][0];
- s->mv[dir2][i+2][1] = s->mv[dir2][i][1] = s->current_picture.motion_val[dir2][s->block_index[i]][1] = s->current_picture.motion_val[dir2][s->block_index[i+2]][1];
- }
+ ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], 1);
+ ff_vc1_interp_mc(v);
} else {
- ff_vc1_pred_mv_intfr(v, 0, 0, 0, 2, v->range_x, v->range_y, v->mb_type[0], !dir);
- ff_vc1_pred_mv_intfr(v, 2, 0, 0, 2, v->range_x, v->range_y, v->mb_type[0], !dir);
- }
-
- ff_vc1_mc_4mv_luma(v, 0, dir, 0);
- ff_vc1_mc_4mv_luma(v, 1, dir, 0);
- ff_vc1_mc_4mv_luma(v, 2, dir2, 0);
- ff_vc1_mc_4mv_luma(v, 3, dir2, 0);
- ff_vc1_mc_4mv_chroma4(v, dir, dir2, 0);
- } else {
- dir = bmvtype == BMV_TYPE_BACKWARD;
+ dir = bmvtype == BMV_TYPE_BACKWARD;
- mvbp = ff_vc1_mbmode_intfrp[0][idx_mbmode][2];
- dmv_x = dmv_y = 0;
- if (mvbp)
- get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
+ mvbp = ff_vc1_mbmode_intfrp[0][idx_mbmode][2];
+ dmv_x = dmv_y = 0;
+ if (mvbp)
+ get_mvdata_interlaced(v, &dmv_x, &dmv_y, 0);
- ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], dir);
- v->blk_mv_type[s->block_index[0]] = 1;
- v->blk_mv_type[s->block_index[1]] = 1;
- v->blk_mv_type[s->block_index[2]] = 1;
- v->blk_mv_type[s->block_index[3]] = 1;
- ff_vc1_pred_mv_intfr(v, 0, 0, 0, 2, v->range_x, v->range_y, 0, !dir);
- for (i = 0; i < 2; i++) {
- s->mv[!dir][i+2][0] = s->mv[!dir][i][0] = s->current_picture.motion_val[!dir][s->block_index[i+2]][0] = s->current_picture.motion_val[!dir][s->block_index[i]][0];
- s->mv[!dir][i+2][1] = s->mv[!dir][i][1] = s->current_picture.motion_val[!dir][s->block_index[i+2]][1] = s->current_picture.motion_val[!dir][s->block_index[i]][1];
+ ff_vc1_pred_mv_intfr(v, 0, dmv_x, dmv_y, 1, v->range_x, v->range_y, v->mb_type[0], dir);
+ v->blk_mv_type[s->block_index[0]] = 1;
+ v->blk_mv_type[s->block_index[1]] = 1;
+ v->blk_mv_type[s->block_index[2]] = 1;
+ v->blk_mv_type[s->block_index[3]] = 1;
+ ff_vc1_pred_mv_intfr(v, 0, 0, 0, 2, v->range_x, v->range_y, 0, !dir);
+ for (i = 0; i < 2; i++) {
+ s->mv[!dir][i+2][0] = s->mv[!dir][i][0] = s->current_picture.motion_val[!dir][s->block_index[i+2]][0] = s->current_picture.motion_val[!dir][s->block_index[i]][0];
+ s->mv[!dir][i+2][1] = s->mv[!dir][i][1] = s->current_picture.motion_val[!dir][s->block_index[i+2]][1] = s->current_picture.motion_val[!dir][s->block_index[i]][1];
+ }
+ ff_vc1_mc_1mv(v, dir);
}
- ff_vc1_mc_1mv(v, dir);
}
if (cbp)
@@ -2491,17 +2480,22 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
for (i = 0; i < 6; i++) {
s->dc_val[0][s->block_index[i]] = 0;
- dst_idx += i >> 2;
- val = ((cbp >> (5 - i)) & 1);
- if (!fieldtx)
- off = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
- else
- off = (i & 4) ? 0 : ((i & 1) * 8 + ((i > 1) * s->linesize));
- if (val) {
- pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
- first_block, s->dest[dst_idx] + off,
- (i & 4) ? s->uvlinesize : (s->linesize << fieldtx),
- (i & 4) && (s->flags & CODEC_FLAG_GRAY), &block_tt);
+ if (cbp & (32 >> i)) {
+ if (i < 4) {
+ if (!fieldtx)
+ off = ((i & 1) + (i & 2 ? s->linesize : 0)) * 8;
+ else
+ off = ((i & 1) * 8 + (i & 2 ? s->linesize : 0));
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[0] + off,
+ (s->linesize << fieldtx),
+ 0, &block_tt);
+ } else {
+ pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+ first_block, s->dest[i - 3],
+ s->uvlinesize,
+ (s->flags & CODEC_FLAG_GRAY), &block_tt);
+ }
block_cbp |= pat << (i << 2);
if (!v->ttmbf && ttmb < 8)
ttmb = -1;
@@ -2530,9 +2524,7 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
dir = bmvtype == BMV_TYPE_BACKWARD;
ff_vc1_pred_mv_intfr(v, 0, 0, 0, 1, v->range_x, v->range_y, v->mb_type[0], dir);
if (mvsw) {
- int dir2 = dir;
- if (mvsw)
- dir2 = !dir;
+ int dir2 = !dir;
for (i = 0; i < 2; i++) {
s->mv[dir][i+2][0] = s->mv[dir][i][0] = s->current_picture.motion_val[dir][s->block_index[i+2]][0] = s->current_picture.motion_val[dir][s->block_index[i]][0];
s->mv[dir][i+2][1] = s->mv[dir][i][1] = s->current_picture.motion_val[dir][s->block_index[i+2]][1] = s->current_picture.motion_val[dir][s->block_index[i]][1];
@@ -2574,7 +2566,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
MpegEncContext *s = &v->s;
int cbp, val;
uint8_t *coded_val;
- int mb_pos;
+ int mb_index = 0;
/* select codingmode used for VLC tables selection */
switch (v->y_ac_table_index) {
@@ -2606,7 +2598,6 @@ static void vc1_decode_i_blocks(VC1Context *v)
s->c_dc_scale = s->c_dc_scale_table[v->pq];
//do frame decode
- s->mb_x = s->mb_y = 0;
s->mb_intra = 1;
s->first_slice_line = 1;
for (s->mb_y = 0; s->mb_y < s->end_mb_y; s->mb_y++) {
@@ -2622,9 +2613,8 @@ static void vc1_decode_i_blocks(VC1Context *v)
dst[4] = s->dest[1];
dst[5] = s->dest[2];
s->bdsp.clear_blocks(s->block[0]);
- mb_pos = s->mb_x + s->mb_y * s->mb_width;
- s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
- s->current_picture.qscale_table[mb_pos] = v->pq;
+ s->current_picture.mb_type[mb_index + s->mb_x] = MB_TYPE_INTRA;
+ s->current_picture.qscale_table[mb_index + s->mb_x] = v->pq;
s->current_picture.motion_val[1][s->block_index[0]][0] = 0;
s->current_picture.motion_val[1][s->block_index[0]][1] = 0;
@@ -2639,28 +2629,28 @@ static void vc1_decode_i_blocks(VC1Context *v)
int pred = vc1_coded_block_pred(&v->s, k, &coded_val);
val = val ^ pred;
*coded_val = val;
- }
- cbp |= val << (5 - k);
-
- vc1_decode_i_block(v, s->block[k], k, val, (k < 4) ? v->codingset : v->codingset2);
- if (k > 3 && (s->flags & CODEC_FLAG_GRAY))
- continue;
+ vc1_decode_i_block(v, s->block[k], k, val, v->codingset);
+ } else {
+ vc1_decode_i_block(v, s->block[k], k, val, v->codingset2);
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
if (v->pq >= 9 && v->overlap) {
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[k][j] <<= 1;
s->idsp.put_signed_pixels_clamped(s->block[k], dst[k],
- k & 4 ? s->uvlinesize
- : s->linesize);
+ k < 4 ? s->linesize
+ : s->uvlinesize);
} else {
if (v->rangeredfrm)
for (j = 0; j < 64; j++)
s->block[k][j] = (s->block[k][j] - 64) << 1;
s->idsp.put_pixels_clamped(s->block[k], dst[k],
- k & 4 ? s->uvlinesize
- : s->linesize);
+ k < 4 ? s->linesize
+ : s->uvlinesize);
}
}
@@ -2702,6 +2692,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
ff_mpeg_draw_horiz_band(s, (s->mb_y - 1) * 16, 16);
s->first_slice_line = 0;
+ mb_index += s->mb_width;
}
if (v->s.loop_filter)
ff_mpeg_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
@@ -2750,24 +2741,19 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
}
// do frame decode
- s->mb_x = s->mb_y = 0;
s->mb_intra = 1;
s->first_slice_line = 1;
- s->mb_y = s->start_mb_y;
- if (s->start_mb_y) {
- s->mb_x = 0;
- init_block_index(v);
- memset(&s->coded_block[s->block_index[0] - s->b8_stride], 0,
- (1 + s->b8_stride) * sizeof(*s->coded_block));
- }
- for (; s->mb_y < s->end_mb_y; s->mb_y++) {
+ mb_pos = s->start_mb_y * s->mb_stride;
+ for (s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
s->mb_x = 0;
init_block_index(v);
- for (;s->mb_x < s->mb_width; s->mb_x++) {
+ if (s->mb_y == s->start_mb_y && s->start_mb_y)
+ memset(&s->coded_block[s->block_index[0] - s->b8_stride], 0,
+ (1 + s->b8_stride) * sizeof(*s->coded_block));
+ for (; s->mb_x < s->mb_width; s->mb_x++) {
int16_t (*block)[64] = v->block[v->cur_blk_idx];
ff_update_block_index(s);
s->bdsp.clear_blocks(block[0]);
- mb_pos = s->mb_x + s->mb_y * s->mb_stride;
s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_INTRA;
s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = 0;
@@ -2798,22 +2784,28 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
int pred = vc1_coded_block_pred(&v->s, k, &coded_val);
val = val ^ pred;
*coded_val = val;
- }
- cbp |= val << (5 - k);
- v->a_avail = !s->first_slice_line || (k == 2 || k == 3);
- v->c_avail = !!s->mb_x || (k == 1 || k == 3);
+ v->a_avail = !s->first_slice_line || (k & 2);
+ v->c_avail = s->mb_x || (k & 1);
- vc1_decode_i_block_adv(v, block[k], k, val,
- (k < 4) ? v->codingset : v->codingset2, mquant);
+ vc1_decode_i_block_adv(v, block[k], k, val,
+ v->codingset, mquant, mb_pos);
+ } else {
+ v->a_avail = !s->first_slice_line;
+ v->c_avail = !!s->mb_x;
+
+ vc1_decode_i_block_adv(v, block[k], k, val,
+ v->codingset2, mquant, mb_pos);
+
+ if (s->flags & CODEC_FLAG_GRAY)
+ continue;
+ }
- if (k > 3 && (s->flags & CODEC_FLAG_GRAY))
- continue;
v->vc1dsp.vc1_inv_trans_8x8(block[k]);
}
ff_vc1_smooth_overlap_filter_iblk(v);
- vc1_put_signed_blocks_clamped(v);
+ vc1_put_signed_blocks_clamped(v, mb_pos);
if (v->s.loop_filter)
ff_vc1_loop_filter_iblk_delayed(v, v->pq);
@@ -2824,26 +2816,28 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
get_bits_count(&s->gb), v->bits);
return;
}
+ mb_pos++;
}
if (!v->s.loop_filter)
ff_mpeg_draw_horiz_band(s, s->mb_y * 16, 16);
else if (s->mb_y)
- ff_mpeg_draw_horiz_band(s, (s->mb_y-1) * 16, 16);
+ ff_mpeg_draw_horiz_band(s, (s->mb_y - 1) * 16, 16);
s->first_slice_line = 0;
+ mb_pos += s->mb_stride - s->mb_width;
}
/* raw bottom MB row */
s->mb_x = 0;
init_block_index(v);
-
- for (;s->mb_x < s->mb_width; s->mb_x++) {
+ for (; s->mb_x < s->mb_width; s->mb_x++) {
ff_update_block_index(s);
- vc1_put_signed_blocks_clamped(v);
+ vc1_put_signed_blocks_clamped(v, mb_pos);
if (v->s.loop_filter)
ff_vc1_loop_filter_iblk_delayed(v, v->pq);
+ mb_pos++;
}
if (v->s.loop_filter)
- ff_mpeg_draw_horiz_band(s, (s->end_mb_y-1)*16, 16);
+ ff_mpeg_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
ff_er_add_slice(&s->er, 0, s->start_mb_y << v->field_mode, s->mb_width - 1,
(s->end_mb_y << v->field_mode) - 1, ER_MB_END);
}
@@ -2852,6 +2846,7 @@ static void vc1_decode_p_blocks(VC1Context *v)
{
MpegEncContext *s = &v->s;
int apply_loop_filter;
+ int mb_pos;
/* select codingmode used for VLC tables selection */
switch (v->c_ac_table_index) {
@@ -2881,7 +2876,8 @@ static void vc1_decode_p_blocks(VC1Context *v)
apply_loop_filter = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY) &&
v->fcm == PROGRESSIVE;
s->first_slice_line = 1;
- memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride);
+ memset(v->cbp_base, 0, sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
+ mb_pos = s->start_mb_y * s->mb_stride;
for (s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
s->mb_x = 0;
init_block_index(v);
@@ -2889,10 +2885,11 @@ static void vc1_decode_p_blocks(VC1Context *v)
ff_update_block_index(s);
if (v->fcm == ILACE_FIELD)
- vc1_decode_p_mb_intfi(v);
+ vc1_decode_p_mb_intfi(v, mb_pos);
else if (v->fcm == ILACE_FRAME)
- vc1_decode_p_mb_intfr(v);
- else vc1_decode_p_mb(v);
+ vc1_decode_p_mb_intfr(v, mb_pos);
+ else
+ vc1_decode_p_mb(v, mb_pos);
if (s->mb_y != s->start_mb_y && apply_loop_filter)
ff_vc1_apply_p_loop_filter(v);
if (get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
@@ -2902,13 +2899,16 @@ static void vc1_decode_p_blocks(VC1Context *v)
get_bits_count(&s->gb), v->bits, s->mb_x, s->mb_y);
return;
}
+ mb_pos++;
}
memmove(v->cbp_base, v->cbp, sizeof(v->cbp_base[0]) * s->mb_stride);
memmove(v->ttblk_base, v->ttblk, sizeof(v->ttblk_base[0]) * s->mb_stride);
memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0]) * s->mb_stride);
memmove(v->luma_mv_base, v->luma_mv, sizeof(v->luma_mv_base[0]) * s->mb_stride);
- if (s->mb_y != s->start_mb_y) ff_mpeg_draw_horiz_band(s, (s->mb_y - 1) * 16, 16);
+ if (s->mb_y != s->start_mb_y)
+ ff_mpeg_draw_horiz_band(s, (s->mb_y - 1) * 16, 16);
s->first_slice_line = 0;
+ mb_pos += s->mb_stride - s->mb_width;
}
if (apply_loop_filter) {
s->mb_x = 0;
@@ -2927,6 +2927,7 @@ static void vc1_decode_p_blocks(VC1Context *v)
static void vc1_decode_b_blocks(VC1Context *v)
{
MpegEncContext *s = &v->s;
+ int mb_pos;
/* select codingmode used for VLC tables selection */
switch (v->c_ac_table_index) {
@@ -2954,6 +2955,7 @@ static void vc1_decode_b_blocks(VC1Context *v)
}
s->first_slice_line = 1;
+ mb_pos = s->start_mb_y * s->mb_stride;
for (s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
s->mb_x = 0;
init_block_index(v);
@@ -2961,11 +2963,11 @@ static void vc1_decode_b_blocks(VC1Context *v)
ff_update_block_index(s);
if (v->fcm == ILACE_FIELD)
- vc1_decode_b_mb_intfi(v);
+ vc1_decode_b_mb_intfi(v, mb_pos);
else if (v->fcm == ILACE_FRAME)
- vc1_decode_b_mb_intfr(v);
+ vc1_decode_b_mb_intfr(v, mb_pos);
else
- vc1_decode_b_mb(v);
+ vc1_decode_b_mb(v, mb_pos);
if (get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
// TODO: may need modification to handle slice coding
ff_er_add_slice(&s->er, 0, s->start_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR);
@@ -2975,12 +2977,14 @@ static void vc1_decode_b_blocks(VC1Context *v)
}
if (v->s.loop_filter)
ff_vc1_loop_filter_iblk(v, v->pq);
+ mb_pos++;
}
if (!v->s.loop_filter)
ff_mpeg_draw_horiz_band(s, s->mb_y * 16, 16);
else if (s->mb_y)
ff_mpeg_draw_horiz_band(s, (s->mb_y - 1) * 16, 16);
s->first_slice_line = 0;
+ mb_pos += s->mb_stride - s->mb_width;
}
if (v->s.loop_filter)
ff_mpeg_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
@@ -3012,7 +3016,6 @@ static void vc1_decode_skip_blocks(VC1Context *v)
void ff_vc1_decode_blocks(VC1Context *v)
{
-
v->s.esc3_level_length = 0;
if (v->x8_type) {
ff_intrax8_decode_picture(&v->x8, 2*v->pq + v->halfpq, v->pq * !v->pquantizer);
--
2.1.0
More information about the ffmpeg-devel
mailing list