[FFmpeg-cvslog] H264: replace pixel_size by pixel_shift

Sun Apr 10 22:42:38 CEST 2011

ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Sun Apr 10 16:12:56 2011 +0200| [e7077f5e7b509c4fec62620d136a80b676428bb1] | committer: Michael Niedermayer

H264: replace pixel_size by pixel_shift
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e7077f5e7b509c4fec62620d136a80b676428bb1
---

 libavcodec/h264.c            |  132 +++++++++++++++++++++---------------------
 libavcodec/h264.h            |    2 +-
 libavcodec/h264_cabac.c      |   12 ++--
 libavcodec/h264_cavlc.c      |   12 ++--
 libavcodec/h264_loopfilter.c |    6 +-
 5 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index c9f2ad9..1fa333c 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -459,7 +459,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
     const int luma_xy= (mx&3) + ((my&3)<<2);
-    uint8_t * src_y = pic->data[0] + (mx>>2)*h->pixel_size + (my>>2)*h->mb_linesize;
+    uint8_t * src_y = pic->data[0] + ((mx>>2)<<h->pixel_shift) + (my>>2)*h->mb_linesize;
     uint8_t * src_cb, * src_cr;
     int extra_width= h->emu_edge_width;
     int extra_height= h->emu_edge_height;
@@ -476,8 +476,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
        || full_my < 0-extra_height
        || full_mx + 16/*FIXME*/ > pic_width + extra_width
        || full_my + 16/*FIXME*/ > pic_height + extra_height){
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2*h->pixel_size - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
-            src_y= s->edge_emu_buffer + 2*h->pixel_size + 2*h->mb_linesize;
+        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<h->pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+            src_y= s->edge_emu_buffer + (2<<h->pixel_shift) + 2*h->mb_linesize;
         emu=1;
     }
 
@@ -493,8 +493,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
     }
-    src_cb= pic->data[1] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
-    src_cr= pic->data[2] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
+    src_cb= pic->data[1] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
+    src_cr= pic->data[2] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
 
     if(emu){
         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
@@ -519,9 +519,9 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
 
-    dest_y  += 2*x_offset*h->pixel_size + 2*y_offset*h->  mb_linesize;
-    dest_cb +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
-    dest_cr +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -552,9 +552,9 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
                            int list0, int list1){
     MpegEncContext * const s = &h->s;
 
-    dest_y  += 2*x_offset*h->pixel_size + 2*y_offset*h->  mb_linesize;
-    dest_cb +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
-    dest_cr +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
+    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
+    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
+    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*(s->mb_y >> MB_FIELD);
 
@@ -562,7 +562,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
         uint8_t *tmp_cb = s->obmc_scratchpad;
-        uint8_t *tmp_cr = s->obmc_scratchpad + 8*h->pixel_size;
+        uint8_t *tmp_cr = s->obmc_scratchpad + (8<<h->pixel_shift);
         uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
         int refn0 = h->ref_cache[0][ scan8[n] ];
         int refn1 = h->ref_cache[1][ scan8[n] ];
@@ -637,9 +637,9 @@ static inline void prefetch_motion(H264Context *h, int list){
         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
         uint8_t **src= h->ref_list[list][refn].data;
-        int off= mx*h->pixel_size + (my + (s->mb_x&3)*4)*h->mb_linesize + 64*h->pixel_size;
+        int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
         s->dsp.prefetch(src[0]+off, s->linesize, 4);
-        off= (mx>>1)*h->pixel_size + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64*h->pixel_size;
+        off= (((mx>>1)+64)<<h->pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
     }
 }
@@ -664,11 +664,11 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                 weight_op, weight_avg,
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
     }else if(IS_16X8(mb_type)){
-        mc_part(h, 0, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 0,
+        mc_part(h, 0, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
-        mc_part(h, 8, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 4,
+        mc_part(h, 8, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 4,
                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                 &weight_op[1], &weight_avg[1],
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
@@ -698,11 +698,11 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                     &weight_op[3], &weight_avg[3],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
             }else if(IS_SUB_8X4(sub_mb_type)){
-                mc_part(h, n  , 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset,
+                mc_part(h, n  , 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
-                mc_part(h, n+2, 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
+                mc_part(h, n+2, 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                     &weight_op[4], &weight_avg[4],
                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
@@ -1006,7 +1006,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
     ff_h264_decode_init_vlc();
 
     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
-    h->pixel_size = 1;
+    h->pixel_shift = 0;
 
     h->thread_context[0] = h;
     h->outputed_poc = h->next_outputed_poc = INT_MIN;
@@ -1170,14 +1170,14 @@ int ff_h264_frame_start(H264Context *h){
     assert(s->linesize && s->uvlinesize);
 
     for(i=0; i<16; i++){
-        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->linesize*((scan8[i] - scan8[0])>>3);
-        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
     }
     for(i=0; i<4; i++){
         h->block_offset[16+i]=
-        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
         h->block_offset[24+16+i]=
-        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
     }
 
     /* can't be in alloc_tables because linesize isn't known there.
@@ -1377,10 +1377,10 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
             if(!MB_MBAFF){
                 top_border = h->top_borders[0][s->mb_x];
                 AV_COPY128(top_border, src_y + 15*linesize);
-                if (h->pixel_size == 2)
+                if (h->pixel_shift)
                     AV_COPY128(top_border+16, src_y+15*linesize+16);
                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-                    if (h->pixel_size == 2) {
+                    if (h->pixel_shift) {
                         AV_COPY128(top_border+32, src_cb+7*uvlinesize);
                         AV_COPY128(top_border+48, src_cr+7*uvlinesize);
                     } else {
@@ -1399,11 +1399,11 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
     // There are two lines saved, the line above the the top macroblock of a pair,
     // and the line above the bottom macroblock
     AV_COPY128(top_border, src_y + 16*linesize);
-    if (h->pixel_size == 2)
+    if (h->pixel_shift)
         AV_COPY128(top_border+16, src_y+16*linesize+16);
 
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
-        if (h->pixel_size == 2) {
+        if (h->pixel_shift) {
             AV_COPY128(top_border+32, src_cb+8*uvlinesize);
             AV_COPY128(top_border+48, src_cr+8*uvlinesize);
         } else {
@@ -1438,15 +1438,15 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c
         deblock_top =  (s->mb_y > !!MB_FIELD);
     }
 
-    src_y  -=   linesize + h->pixel_size;
-    src_cb -= uvlinesize + h->pixel_size;
-    src_cr -= uvlinesize + h->pixel_size;
+    src_y  -=   linesize + 1 + h->pixel_shift;
+    src_cb -= uvlinesize + 1 + h->pixel_shift;
+    src_cr -= uvlinesize + 1 + h->pixel_shift;
 
     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
     top_border    = h->top_borders[top_idx][s->mb_x];
 
 #define XCHG(a,b,xchg)\
-    if (h->pixel_size == 2) {\
+    if (h->pixel_shift) {\
         if (xchg) {\
             AV_SWAP64(b+0,a+0);\
             AV_SWAP64(b+8,a+8);\
@@ -1459,35 +1459,35 @@ else      AV_COPY64(b,a);
 
     if(deblock_top){
         if(deblock_left){
-            XCHG(top_border_m1+8*h->pixel_size, src_y -7*h->pixel_size, 1);
+            XCHG(top_border_m1+(8<<h->pixel_shift), src_y -(7<<h->pixel_shift), 1);
         }
-        XCHG(top_border+0*h->pixel_size, src_y +1*h->pixel_size, xchg);
-        XCHG(top_border+8*h->pixel_size, src_y +9*h->pixel_size, 1);
+        XCHG(top_border+(0<<h->pixel_shift), src_y +(1<<h->pixel_shift), xchg);
+        XCHG(top_border+(8<<h->pixel_shift), src_y +(9<<h->pixel_shift), 1);
         if(s->mb_x+1 < s->mb_width){
-            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17*h->pixel_size, 1);
+            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<h->pixel_shift), 1);
         }
     }
     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
         if(deblock_top){
             if(deblock_left){
-                XCHG(top_border_m1+16*h->pixel_size, src_cb -7*h->pixel_size, 1);
-                XCHG(top_border_m1+24*h->pixel_size, src_cr -7*h->pixel_size, 1);
+                XCHG(top_border_m1+(16<<h->pixel_shift), src_cb -(7<<h->pixel_shift), 1);
+                XCHG(top_border_m1+(24<<h->pixel_shift), src_cr -(7<<h->pixel_shift), 1);
             }
-            XCHG(top_border+16*h->pixel_size, src_cb+h->pixel_size, 1);
-            XCHG(top_border+24*h->pixel_size, src_cr+h->pixel_size, 1);
+            XCHG(top_border+(16<<h->pixel_shift), src_cb+1+h->pixel_shift, 1);
+            XCHG(top_border+(24<<h->pixel_shift), src_cr+1+h->pixel_shift, 1);
         }
     }
 }
 
 static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index) {
-    if (h->pixel_size == 1)
+    if (!h->pixel_shift)
         return mb[index];
     else
         return ((int32_t*)mb)[index];
 }
 
 static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) {
-    if (h->pixel_size == 1)
+    if (!h->pixel_shift)
         mb[index] = value;
     else
         ((int32_t*)mb)[index] = value;
@@ -1509,12 +1509,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
 
-    dest_y  = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize  ) * 16;
-    dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
-    dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
+    dest_y  = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize  ) * 16;
+    dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
+    dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
 
-    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64*h->pixel_size, s->linesize, 4);
-    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64*h->pixel_size, dest_cr - dest_cb, 2);
+    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<h->pixel_shift), s->linesize, 4);
+    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<h->pixel_shift), dest_cr - dest_cb, 2);
 
     h->list_counts[mb_xy]= h->list_count;
 
@@ -1551,7 +1551,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
     }
 
     if (!simple && IS_INTRA_PCM(mb_type)) {
-        if (h->pixel_size == 2) {
+        if (h->pixel_shift) {
             const int bit_depth = h->sps.bit_depth_luma;
             int j;
             GetBitContext gb;
@@ -1605,16 +1605,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             uint8_t * const ptr= dest_y + block_offset[i];
                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
+                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
                             }else{
                                 const int nnz = h->non_zero_count_cache[ scan8[i] ];
                                 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
                                                             (h->topright_samples_available<<i)&0x4000, linesize);
                                 if(nnz){
                                     if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
-                                        idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
+                                        idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
                                     else
-                                        idct_add   (ptr, h->mb + i*16*h->pixel_size, linesize);
+                                        idct_add   (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
                                 }
                             }
                         }
@@ -1631,7 +1631,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
 
                             if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
-                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
+                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
                             }else{
                                 uint8_t *topright;
                                 int nnz, tr;
@@ -1640,7 +1640,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                                     const int topright_avail= (h->topright_samples_available<<i)&0x8000;
                                     assert(mb_y || linesize <= block_offset[i]);
                                     if(!topright_avail){
-                                        if (h->pixel_size == 2) {
+                                        if (h->pixel_shift) {
                                             tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
                                             topright= (uint8_t*) &tr_high;
                                         } else {
@@ -1648,7 +1648,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                                         topright= (uint8_t*) &tr;
                                         }
                                     }else
-                                        topright= ptr + 4*h->pixel_size - linesize;
+                                        topright= ptr + (4<<h->pixel_shift) - linesize;
                                 }else
                                     topright= NULL;
 
@@ -1657,9 +1657,9 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                                 if(nnz){
                                     if(is_h264){
                                         if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
-                                            idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
+                                            idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
                                         else
-                                            idct_add   (ptr, h->mb + i*16*h->pixel_size, linesize);
+                                            idct_add   (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
                                     }
 #if CONFIG_SVQ3_DECODER
                                     else
@@ -1708,7 +1708,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                         }else{
                             for(i=0; i<16; i++){
                                 if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
-                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
+                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
                             }
                         }
                     }else{
@@ -1720,7 +1720,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                         idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
                         for(i=0; i<16; i+=di){
                             if(h->non_zero_count_cache[ scan8[i] ]){
-                                idct_add(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
+                                idct_add(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
                             }
                         }
                     }else{
@@ -1748,21 +1748,21 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
             uint8_t *dest[2] = {dest_cb, dest_cr};
             if(transform_bypass){
                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16*h->pixel_size, uvlinesize);
-                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16*h->pixel_size, uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<h->pixel_shift), uvlinesize);
+                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<h->pixel_shift), uvlinesize);
                 }else{
                     idct_add = s->dsp.add_pixels4;
                     for(i=16; i<16+8; i++){
                         if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
-                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16*h->pixel_size, uvlinesize);
+                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<h->pixel_shift), uvlinesize);
                     }
                 }
             }else{
                 if(is_h264){
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*h->pixel_size       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<h->pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
-                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16+4*16)*h->pixel_size, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<h->pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
@@ -2986,9 +2986,9 @@ static void loop_filter(H264Context *h){
 
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
-                dest_y  = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize  ) * 16;
-                dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
-                dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
+                dest_y  = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize  ) * 16;
+                dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
+                dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
                     //FIXME simplify above
 
                 if (MB_FIELD) {
@@ -3482,7 +3482,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
-                    h->pixel_size = (h->sps.bit_depth_luma+7)/8;
+                    h->pixel_shift = h->sps.bit_depth_luma/9;
 
                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index d3ae90c..c9a2f00 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -266,7 +266,7 @@ typedef struct MMCO{
 typedef struct H264Context{
     MpegEncContext s;
     H264DSPContext h264dsp;
-    int pixel_size;
+    int pixel_shift;
     int chroma_qp[2]; //QPc
 
     int qp_thresh;      ///< QP threshold to skip loopfilter
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 2b4b4fc..b9c1c62 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1143,7 +1143,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
             } \
         }
 
-        if (h->pixel_size == 2) {
+        if (h->pixel_shift) {
             STORE_BLOCK(int32_t)
         } else {
             STORE_BLOCK(int16_t)
@@ -1670,7 +1670,7 @@ decode_intra_mb:
                 qmul = h->dequant4_coeff[0][s->qscale];
                 for( i = 0; i < 16; i++ ) {
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
-                    decode_cabac_residual_nondc(h, h->mb + 16*i*h->pixel_size, 1, i, scan + 1, qmul, 15);
+                    decode_cabac_residual_nondc(h, h->mb + (16*i<<h->pixel_shift), 1, i, scan + 1, qmul, 15);
                 }
             } else {
                 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
@@ -1680,7 +1680,7 @@ decode_intra_mb:
             for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
                 if( cbp & (1<<i8x8) ) {
                     if( IS_8x8DCT(mb_type) ) {
-                        decode_cabac_residual_nondc(h, h->mb + 64*i8x8*h->pixel_size, 5, 4*i8x8,
+                        decode_cabac_residual_nondc(h, h->mb + (64*i8x8<<h->pixel_shift), 5, 4*i8x8,
                             scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
                     } else {
                         qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
@@ -1688,7 +1688,7 @@ decode_intra_mb:
                             const int index = 4*i8x8 + i4x4;
                             //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
 //START_TIMER
-                            decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 2, index, scan, qmul, 16);
+                            decode_cabac_residual_nondc(h, h->mb + (16*index<<h->pixel_shift), 2, index, scan, qmul, 16);
 //STOP_TIMER("decode_residual")
                         }
                     }
@@ -1703,7 +1703,7 @@ decode_intra_mb:
             int c;
             for( c = 0; c < 2; c++ ) {
                 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
-                decode_cabac_residual_dc(h, h->mb + (256 + 16*4*c)*h->pixel_size, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
+                decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c)<<h->pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
             }
         }
 
@@ -1714,7 +1714,7 @@ decode_intra_mb:
                 for( i = 0; i < 4; i++ ) {
                     const int index = 16 + 4 * c + i;
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
-                    decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 4, index, scan + 1, qmul, 15);
+                    decode_cabac_residual_nondc(h, h->mb + (16*index<<h->pixel_shift), 4, index, scan + 1, qmul, 15);
                 }
             }
         } else {
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 41bafee..92911c1 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -522,7 +522,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
         } \
     }
 
-    if (h->pixel_size == 2) {
+    if (h->pixel_shift) {
         STORE_BLOCK(int32_t)
     } else {
         STORE_BLOCK(int16_t)
@@ -961,7 +961,7 @@ decode_intra_mb:
                 for(i8x8=0; i8x8<4; i8x8++){
                     for(i4x4=0; i4x4<4; i4x4++){
                         const int index= i4x4 + 4*i8x8;
-                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index*h->pixel_size, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
+                        if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index<<h->pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
                             return -1;
                         }
                     }
@@ -973,7 +973,7 @@ decode_intra_mb:
             for(i8x8=0; i8x8<4; i8x8++){
                 if(cbp & (1<<i8x8)){
                     if(IS_8x8DCT(mb_type)){
-                        DCTELEM *buf = &h->mb[64*i8x8*h->pixel_size];
+                        DCTELEM *buf = &h->mb[64*i8x8<<h->pixel_shift];
                         uint8_t *nnz;
                         for(i4x4=0; i4x4<4; i4x4++){
                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
@@ -986,7 +986,7 @@ decode_intra_mb:
                         for(i4x4=0; i4x4<4; i4x4++){
                             const int index= i4x4 + 4*i8x8;
 
-                            if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
+                            if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
                                 return -1;
                             }
                         }
@@ -1000,7 +1000,7 @@ decode_intra_mb:
 
         if(cbp&0x30){
             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                if( decode_residual(h, gb, h->mb + (256 + 16*4*chroma_idx)*h->pixel_size, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
+                if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx)<<h->pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
                     return -1;
                 }
         }
@@ -1010,7 +1010,7 @@ decode_intra_mb:
                 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
                 for(i4x4=0; i4x4<4; i4x4++){
                     const int index= 16 + 4*chroma_idx + i4x4;
-                    if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan + 1, qmul, 15) < 0){
+                    if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan + 1, qmul, 15) < 0){
                         return -1;
                     }
                 }
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 8899c26..325fd3c 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -650,10 +650,10 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
         //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
         if( dir == 0 ) {
-            filter_mb_edgev( &img_y[4*edge*h->pixel_size], linesize, bS, qp, h );
+            filter_mb_edgev( &img_y[4*edge<<h->pixel_shift], linesize, bS, qp, h );
             if( (edge&1) == 0 ) {
-                filter_mb_edgecv( &img_cb[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[0], h);
-                filter_mb_edgecv( &img_cr[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[1], h);
+                filter_mb_edgecv( &img_cb[2*edge<<h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
+                filter_mb_edgecv( &img_cr[2*edge<<h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
             }
         } else {
             filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );