[FFmpeg-devel] [PATCH] Rearrange MpegEncContext to simplify access from asm
Mans Rullgard
mans
Sat Jan 29 18:25:13 CET 2011
This moves the fields needed by asm near the top, before any
structs or other members which complicate the offset calculation.
Modifying other structs will no longer require updating the offsets,
and the asm code is slightly simpler due to the smaller offsets.
Signed-off-by: Mans Rullgard <mans at mansr.com>
---
libavcodec/arm/asm-offsets.h | 21 ++++++---------------
libavcodec/arm/mpegvideo_neon.S | 13 +++++--------
libavcodec/mpegvideo.h | 29 +++++++++++++++++------------
3 files changed, 28 insertions(+), 35 deletions(-)
diff --git a/libavcodec/arm/asm-offsets.h b/libavcodec/arm/asm-offsets.h
index 174b5aa..7f73534 100644
--- a/libavcodec/arm/asm-offsets.h
+++ b/libavcodec/arm/asm-offsets.h
@@ -29,20 +29,11 @@
#endif
/* MpegEncContext */
-#if defined(__ARM_EABI__) || defined(__eabi__)
-#define Y_DC_SCALE 0xab4
-#define C_DC_SCALE 0xab8
-#define AC_PRED 0xae0
-#define BLOCK_LAST_INDEX 0x21c0
-#define INTER_SCANTAB_RASTER_END 0x23c0
-#define H263_AIC 0x2670
-#elif defined(__APPLE__)
-#define Y_DC_SCALE 0xa70
-#define C_DC_SCALE 0xa74
-#define AC_PRED 0xa9c
-#define BLOCK_LAST_INDEX 0x217c
-#define INTER_SCANTAB_RASTER_END 0x237c
-#define H263_AIC 0x2620
-#endif
+#define Y_DC_SCALE 0xb4
+#define C_DC_SCALE 0xb8
+#define AC_PRED 0xbc
+#define BLOCK_LAST_INDEX 0xc0
+#define H263_AIC 0xf0
+#define INTER_SCANTAB_RASTER_END 0x138
#endif
diff --git a/libavcodec/arm/mpegvideo_neon.S b/libavcodec/arm/mpegvideo_neon.S
index b25ead4..b695fb7 100644
--- a/libavcodec/arm/mpegvideo_neon.S
+++ b/libavcodec/arm/mpegvideo_neon.S
@@ -22,10 +22,9 @@
#include "asm-offsets.h"
function ff_dct_unquantize_h263_inter_neon, export=1
- add r0, r0, #0x2200
- add r12, r0, #BLOCK_LAST_INDEX-0x2200
+ add r12, r0, #BLOCK_LAST_INDEX
ldr r12, [r12, r2, lsl #2]
- add r0, r0, #INTER_SCANTAB_RASTER_END-0x2200
+ add r0, r0, #INTER_SCANTAB_RASTER_END
ldrb r12, [r0, r12]
sub r2, r3, #1
lsl r0, r3, #1
@@ -75,17 +74,15 @@ endfunc
function ff_dct_unquantize_h263_intra_neon, export=1
push {r4-r6,lr}
+ add r12, r0, #BLOCK_LAST_INDEX
ldr r6, [r0, #AC_PRED]
- add r5, r0, #0x2700
+ add lr, r0, #INTER_SCANTAB_RASTER_END
cmp r6, #0
movne r12, #63
bne 1f
- add lr, r0, #0x2200
- add r12, lr, #BLOCK_LAST_INDEX-0x2200
- add lr, lr, #INTER_SCANTAB_RASTER_END-0x2200
ldr r12, [r12, r2, lsl #2]
ldrb r12, [lr, r12]
-1: ldr r5, [r5, #H263_AIC-0x2700]
+1: ldr r5, [r0, #H263_AIC]
ldrsh r4, [r1]
cmp r5, #0
mov r5, r1
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 8cd20b7..53ab0f4 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -224,9 +224,6 @@ typedef struct MpegEncContext {
int stream_codec_tag; ///< internal stream_codec_tag upper case converted from avctx stream_codec_tag
/* the following fields are managed internally by the encoder */
- /** bit output */
- PutBitContext pb;
-
/* sequence parameters */
int context_initialized;
int input_picture_number; ///< used to set pic->display_picture_number, should not be used for/by anything else
@@ -247,6 +244,23 @@ typedef struct MpegEncContext {
Picture **input_picture; ///< next pictures on display order for encoding
Picture **reordered_input_picture; ///< pointer to the next pictures in codedorder for encoding
+ int y_dc_scale, c_dc_scale;
+ int ac_pred;
+ int block_last_index[12]; ///< last non zero coefficient in block
+ int h263_aic; ///< Advanded INTRA Coding (AIC)
+
+ /* scantables */
+ ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce tha cache usage
+ ScanTable intra_scantable;
+ ScanTable intra_h_scantable;
+ ScanTable intra_v_scantable;
+
+ /* WARNING: changes above this line require updates to hardcoded
+ * offsets used in asm. */
+
+ /** bit output */
+ PutBitContext pb;
+
int start_mb_y; ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
int end_mb_y; ///< end mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
struct MpegEncContext *thread_context[MAX_THREADS];
@@ -283,7 +297,6 @@ typedef struct MpegEncContext {
int16_t *dc_val_base;
int16_t *dc_val[3]; ///< used for mpeg4 DC prediction, all 3 arrays must be continuous
int16_t dc_cache[4*5];
- int y_dc_scale, c_dc_scale;
const uint8_t *y_dc_scale_table; ///< qscale -> y_dc_scale table
const uint8_t *c_dc_scale_table; ///< qscale -> c_dc_scale table
const uint8_t *chroma_qscale_table; ///< qscale -> chroma_qscale (h263)
@@ -291,7 +304,6 @@ typedef struct MpegEncContext {
uint8_t *coded_block; ///< used for coded block pattern prediction (msmpeg4v3, wmv1)
int16_t (*ac_val_base)[16];
int16_t (*ac_val[3])[16]; ///< used for for mpeg4 AC prediction, all 3 arrays must be continuous
- int ac_pred;
uint8_t *prev_pict_types; ///< previous picture types in bitstream order, used for mb skip
#define PREV_PICT_TYPES_BUFFER_SIZE 256
int mb_skipped; ///< MUST BE SET only during DECODING
@@ -434,12 +446,6 @@ typedef struct MpegEncContext {
/** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
uint16_t (*q_intra_matrix16)[2][64];
uint16_t (*q_inter_matrix16)[2][64];
- int block_last_index[12]; ///< last non zero coefficient in block
- /* scantables */
- ScanTable intra_scantable;
- ScanTable intra_h_scantable;
- ScanTable intra_v_scantable;
- ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce tha cache usage
/* noise reduction */
int (*dct_error_sum)[64];
@@ -495,7 +501,6 @@ typedef struct MpegEncContext {
/* H.263+ specific */
int umvplus; ///< == H263+ && unrestricted_mv
- int h263_aic; ///< Advanded INTRA Coding (AIC)
int h263_aic_dir; ///< AIC direction: 0 = left, 1 = top
int h263_slice_structured;
int alt_inter_vlc; ///< alternative inter vlc
--
1.7.3.5
More information about the ffmpeg-devel
mailing list