[FFmpeg-devel] [PATCH] Remove REP_RET usage throughout x86 asm files
Rostislav Pehlivanov
atomnuker at gmail.com
Sun Nov 12 22:59:56 EET 2017
No longer needed as AUTO_REP_RET deals with it on normal RETs.
Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>
---
libavcodec/x86/aacpsdsp.asm | 10 ++++----
libavcodec/x86/ac3dsp.asm | 10 ++++----
libavcodec/x86/alacdsp.asm | 4 +--
libavcodec/x86/audiodsp.asm | 2 +-
libavcodec/x86/dirac_dwt.asm | 14 +++++------
libavcodec/x86/fft.asm | 8 +++---
libavcodec/x86/flacdsp.asm | 8 +++---
libavcodec/x86/h264_chromamc.asm | 18 +++++++-------
libavcodec/x86/h264_chromamc_10bit.asm | 10 ++++----
libavcodec/x86/h264_deblock_10bit.asm | 10 ++++----
libavcodec/x86/h264_idct.asm | 22 ++++++++---------
libavcodec/x86/h264_idct_10bit.asm | 8 +++---
libavcodec/x86/h264_intrapred.asm | 30 +++++++++++-----------
libavcodec/x86/h264_intrapred_10bit.asm | 16 ++++++------
libavcodec/x86/h264_qpel_10bit.asm | 2 +-
libavcodec/x86/h264_qpel_8bit.asm | 26 +++++++++----------
libavcodec/x86/h264_weight.asm | 16 ++++++------
libavcodec/x86/h264_weight_10bit.asm | 12 ++++-----
libavcodec/x86/hevc_sao.asm | 2 +-
libavcodec/x86/hevc_sao_10bit.asm | 2 +-
libavcodec/x86/hpeldsp.asm | 22 ++++++++---------
libavcodec/x86/hpeldsp_vp3.asm | 4 +--
libavcodec/x86/huffyuvdsp.asm | 2 +-
libavcodec/x86/jpeg2000dsp.asm | 4 +--
libavcodec/x86/lossless_videodsp.asm | 2 +-
libavcodec/x86/lossless_videoencdsp.asm | 2 +-
libavcodec/x86/mdct15.asm | 2 +-
libavcodec/x86/me_cmp.asm | 2 +-
libavcodec/x86/pixblockdsp.asm | 2 +-
libavcodec/x86/pngdsp.asm | 2 +-
libavcodec/x86/qpel.asm | 6 ++---
libavcodec/x86/qpeldsp.asm | 12 ++++-----
libavcodec/x86/rv34dsp.asm | 2 +-
libavcodec/x86/rv40dsp.asm | 10 ++++----
libavcodec/x86/sbrdsp.asm | 12 ++++-----
libavcodec/x86/takdsp.asm | 8 +++---
libavcodec/x86/utvideodsp.asm | 4 +--
libavcodec/x86/v210.asm | 2 +-
libavcodec/x86/vc1dsp_mc.asm | 2 +-
libavcodec/x86/videodsp.asm | 2 +-
libavcodec/x86/vp8dsp.asm | 30 +++++++++++-----------
libavcodec/x86/vp8dsp_loopfilter.asm | 6 ++---
libavfilter/x86/af_afir.asm | 2 +-
libavfilter/x86/af_volume.asm | 6 ++---
libavfilter/x86/avf_showcqt.asm | 4 +--
libavfilter/x86/vf_blend.asm | 2 +-
libavfilter/x86/vf_gradfun.asm | 6 ++---
libavfilter/x86/vf_hqdn3d.asm | 2 +-
libavfilter/x86/vf_interlace.asm | 6 ++---
libavfilter/x86/vf_maskedmerge.asm | 2 +-
libavfilter/x86/vf_stereo3d.asm | 2 +-
libavfilter/x86/vf_w3fdif.asm | 10 ++++----
libavresample/x86/audio_convert.asm | 44 ++++++++++++++++-----------------
libavresample/x86/audio_mix.asm | 10 ++++----
libavresample/x86/dither.asm | 6 ++---
libavutil/x86/float_dsp.asm | 18 +++++++-------
libavutil/x86/lls.asm | 4 +--
libavutil/x86/x86inc.asm | 16 ------------
libswresample/x86/audio_convert.asm | 12 ++++-----
libswresample/x86/rematrix.asm | 8 +++---
libswscale/x86/input.asm | 14 +++++------
libswscale/x86/output.asm | 10 ++++----
libswscale/x86/scale.asm | 2 +-
tests/checkasm/x86/checkasm.asm | 2 +-
64 files changed, 271 insertions(+), 287 deletions(-)
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index 4acd087c85..73c7c09514 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -49,7 +49,7 @@ align 16
add dstq, mmsize
add nq, mmsize*2
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -83,7 +83,7 @@ align 16
add src2q, mmsize
add nq, mmsize*2
jl .loop
- REP_RET
+ RET
;***********************************************************************
;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
@@ -116,7 +116,7 @@ align 16
movhps [rq+nq], m2
add nq, 8
jl .loop
- REP_RET
+ RET
;***************************************************************************
;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2],
@@ -164,7 +164,7 @@ align 16
movhps [rq+nq], m2
add nq, 8
jl .loop
- REP_RET
+ RET
;**********************************************************
;void ps_hybrid_analysis_ileave_sse(float out[2][38][64],
@@ -478,7 +478,7 @@ align 16
add outq, strideq
add nq, 64
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 675ade3101..304c6cfd8c 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -64,7 +64,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
sub expnq, mmsize
jg .nextexp
.end:
- REP_RET
+ RET
%endmacro
%define LOOP_ALIGN
@@ -187,7 +187,7 @@ cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift
sub lend, mmsize*32/%2
ja .loop
.end:
- REP_RET
+ RET
%endmacro
;-----------------------------------------------------------------------------
@@ -318,7 +318,7 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
sub lenq, 16
%endif
ja .loop
- REP_RET
+ RET
;------------------------------------------------------------------------------
; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
@@ -412,7 +412,7 @@ cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len
add lenq, 4
jl .loop
- REP_RET
+ RET
%endmacro
%if HAVE_SSE2_EXTERNAL
@@ -534,7 +534,7 @@ cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2
add offsetd, mmsize
sub offset2d, mmsize
jae .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/alacdsp.asm b/libavcodec/x86/alacdsp.asm
index bb2069f785..1cfd302de2 100644
--- a/libavcodec/x86/alacdsp.asm
+++ b/libavcodec/x86/alacdsp.asm
@@ -100,7 +100,7 @@ align 16
add lenq, mmsize*2
jl .loop
- REP_RET
+ RET
%if ARCH_X86_64
cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len
@@ -130,4 +130,4 @@ align 16
add lenq, mmsize*2
jl .loop
- REP_RET
+ RET
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index de395e5fa8..c3ef8dd7da 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -114,7 +114,7 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
add dstq, mmsize*4*(%2+%3)
sub lend, mmsize*(%2+%3)
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
diff --git a/libavcodec/x86/dirac_dwt.asm b/libavcodec/x86/dirac_dwt.asm
index 22a5c2bbbb..875419aa87 100644
--- a/libavcodec/x86/dirac_dwt.asm
+++ b/libavcodec/x86/dirac_dwt.asm
@@ -75,7 +75,7 @@ cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width
COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2
mova [b1q+2*widthq], m0
jg .loop
- REP_RET
+ RET
; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
; int width)
@@ -93,7 +93,7 @@ cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width
paddw m0, [b1q+2*widthq]
mova [b1q+2*widthq], m0
jg .loop
- REP_RET
+ RET
; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
; IDWTELEM *b3, IDWTELEM *b4, int width)
@@ -110,7 +110,7 @@ cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width
COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq]
mova [b2q+2*widthq], m1
jg .loop
- REP_RET
+ RET
; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
; IDWTELEM *b3, IDWTELEM *b4, int width)
@@ -139,7 +139,7 @@ cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width
psubw m5, m1
mova [b2q+2*widthq], m5
jg .loop
- REP_RET
+ RET
; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
@@ -159,7 +159,7 @@ cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
paddw m2, m0
mova [b1q+2*widthq], m2
jg .loop
- REP_RET
+ RET
%endmacro
; extend the left and right edges of the tmp array by %1 and %2 respectively
@@ -225,7 +225,7 @@ cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
cmp xq, w2q
jl .highpass_loop
.end:
- REP_RET
+ RET
%endmacro
@@ -290,7 +290,7 @@ cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2
cmp xd, w2d
jl .highpass_loop
.end:
- REP_RET
+ RET
%if ARCH_X86_64 == 0
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index 53cfd64b3a..a119ebf930 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -566,7 +566,7 @@ cglobal fft_calc, 2,5,8
mov r0, r1
mov r1, r3
FFT_DISPATCH _interleave %+ SUFFIX, r1
- REP_RET
+ RET
%endif
@@ -610,7 +610,7 @@ cglobal fft_calc, 2,5,8
femms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
@@ -654,7 +654,7 @@ cglobal fft_permute, 2,7,1
movaps [r1 + r2 + 16], xmm1
add r2, 32
jl .loopcopy
- REP_RET
+ RET
%macro IMDCT_CALC_FUNC 0
cglobal imdct_calc, 3,5,3
@@ -704,7 +704,7 @@ cglobal imdct_calc, 3,5,3
femms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 7138611526..07ea783c56 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -75,7 +75,7 @@ ALIGN 16
movd [decodedq+4], m1
jg .loop_sample
.ret:
- REP_RET
+ RET
%endmacro
%if HAVE_XOP_EXTERNAL
@@ -120,7 +120,7 @@ align 16
mova [outq + lenq], m%2
add lenq, 16
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -164,7 +164,7 @@ align 16
add outq, mmsize*2
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -289,7 +289,7 @@ align 16
add outq, mmsize*REPCOUNT
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index b5a78b537d..8f183d0a09 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -112,7 +112,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
jne .at_least_one_non_zero
; mx == 0 AND my == 0 - no filter needed
mv0_pixels_mc8
- REP_RET
+ RET
.at_least_one_non_zero:
%ifidn %2, rv40
@@ -192,7 +192,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
add r1, r2
dec r3d
jne .next1drow
- REP_RET
+ RET
.both_non_zero: ; general case, bilinear
movd m4, r4d ; x
@@ -365,7 +365,7 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
add r0, r2
sub r3d, 2
jnz .next2rows
- REP_RET
+ RET
%endmacro
%macro chroma_mc2_mmx_func 2
@@ -407,7 +407,7 @@ cglobal %1_%2_chroma_mc2, 6, 7, 0
add r0, r2
sub r3d, 1
jnz .nextrow
- REP_RET
+ RET
%endmacro
%define rnd_1d_h264 pw_4
@@ -460,7 +460,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
jne .at_least_one_non_zero
; mx == 0 AND my == 0 - no filter needed
mv0_pixels_mc8
- REP_RET
+ RET
.at_least_one_non_zero:
test r5d, r5d
@@ -521,7 +521,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
sub r3d, 2
lea r0, [r0+r2*2]
jg .next2rows
- REP_RET
+ RET
.my_is_zero:
mov r5d, r4d
@@ -558,7 +558,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
lea r0, [r0+r2*2]
lea r1, [r1+r2*2]
jg .next2xrows
- REP_RET
+ RET
.mx_is_zero:
mov r4d, r5d
@@ -595,7 +595,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7, 8
sub r3d, 2
lea r0, [r0+r2*2]
jg .next2yrows
- REP_RET
+ RET
%endmacro
%macro chroma_mc4_ssse3_func 2
@@ -645,7 +645,7 @@ cglobal %1_%2_chroma_mc4, 6, 7, 0
sub r3d, 2
lea r0, [r0+r2*2]
jg .next2rows
- REP_RET
+ RET
%endmacro
%define CHROMAMC_AVG NOTHING
diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm
index 34bc41969b..a43713bcda 100644
--- a/libavcodec/x86/h264_chromamc_10bit.asm
+++ b/libavcodec/x86/h264_chromamc_10bit.asm
@@ -67,7 +67,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
jne .at_least_one_non_zero
; mx == 0 AND my == 0 - no filter needed
MV0_PIXELS_MC8
- REP_RET
+ RET
.at_least_one_non_zero:
mov r6d, 2
@@ -102,7 +102,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
add r1, r2
dec r3d
jne .next1drow
- REP_RET
+ RET
.xy_interpolation: ; general case, bilinear
movd m4, r4m ; x
@@ -144,7 +144,7 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
add r0, r2
dec r3d
jne .next2drow
- REP_RET
+ RET
%endmacro
;-----------------------------------------------------------------------------
@@ -194,7 +194,7 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7
MC4_OP m6, m0
sub r3d, 2
jnz .next2rows
- REP_RET
+ RET
%endmacro
;-----------------------------------------------------------------------------
@@ -234,7 +234,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7
add r0, r2
dec r3d
jnz .nextrow
- REP_RET
+ RET
%endmacro
%macro NOTHING 2-3
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index 1af3257a67..1d7e67a78c 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -372,7 +372,7 @@ cglobal deblock_v_luma_10, 5,5,15
add r4, 2
dec r3
jg .loop
- REP_RET
+ RET
cglobal deblock_h_luma_10, 5,7,15
shl r2d, 2
@@ -411,7 +411,7 @@ cglobal deblock_h_luma_10, 5,7,15
lea r5, [r5+r1*8]
dec r6
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -648,7 +648,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
add r4, mmsize
dec r6
jg .loop
- REP_RET
+ RET
;-----------------------------------------------------------------------------
; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
@@ -958,7 +958,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
add r4, mmsize/4
dec r6
jg .loop
- REP_RET
+ RET
%else
RET
%endif
@@ -987,7 +987,7 @@ cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
add r4, mmsize
dec r5
jg .loop
- REP_RET
+ RET
%else
RET
%endif
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 8804638091..d892095ec0 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -380,7 +380,7 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
add r2, 32
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
; void ff_h264_idct8_add4_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
@@ -452,7 +452,7 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
add r2, 32
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
.no_dc:
mov r6d, dword [r1+r5*4]
add r6, r0
@@ -462,7 +462,7 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
add r2, 32
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
INIT_MMX mmx
; void ff_h264_idct_add16intra_8_mmx(uint8_t *dst, const int *block_offset,
@@ -488,7 +488,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st
add r2, 32
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
INIT_MMX mmxext
; void ff_h264_idct_add16intra_8_mmxext(uint8_t *dst, const int *block_offset,
@@ -512,7 +512,7 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
add r2, 32
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
.try_dc:
movsx r6, word [r2]
test r6, r6
@@ -534,7 +534,7 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
add r2, 32
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
; void ff_h264_idct8_add4_8_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride,
@@ -637,7 +637,7 @@ INIT_MMX cpuname
add r2, 128
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
.no_dc:
INIT_XMM cpuname
mov dst2d, dword [r1+r5*4]
@@ -651,7 +651,7 @@ INIT_XMM cpuname
add r2, 128
cmp r5, 16
jl .nextblock
- REP_RET
+ RET
INIT_MMX mmx
h264_idct_add8_mmx_plane:
@@ -883,7 +883,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
add16_sse2_cycle 5, 0x24
add16_sse2_cycle 6, 0x1e
add16_sse2_cycle 7, 0x26
-REP_RET
+RET
%macro add16intra_sse2_cycle 2
movzx r0, word [r4+%2]
@@ -930,7 +930,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
add16intra_sse2_cycle 5, 0x24
add16intra_sse2_cycle 6, 0x1e
add16intra_sse2_cycle 7, 0x26
-REP_RET
+RET
%macro add8_sse2_cycle 2
movzx r0, word [r4+%2]
@@ -985,7 +985,7 @@ cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8
%endif
add8_sse2_cycle 2, 0x5c
add8_sse2_cycle 3, 0x64
-REP_RET
+RET
;void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul)
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 9fd05abb2b..b990db7121 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -155,7 +155,7 @@ cglobal h264_idct_add16_10, 5,6
ADD16_OP 13, 7+3*8
ADD16_OP 14, 6+4*8
ADD16_OP 15, 7+4*8
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -292,7 +292,7 @@ cglobal h264_idct_add16intra_10,5,7,8
ADD16_OP_INTRA 10, 4+4*8
ADD16_OP_INTRA 12, 6+3*8
ADD16_OP_INTRA 14, 6+4*8
- REP_RET
+ RET
AC 8
AC 10
AC 12
@@ -335,7 +335,7 @@ cglobal h264_idct_add8_10,5,8,7
%endif
ADD16_OP_INTRA 32, 4+11*8
ADD16_OP_INTRA 34, 4+12*8
- REP_RET
+ RET
AC 16
AC 18
AC 32
@@ -384,7 +384,7 @@ cglobal h264_idct_add8_422_10, 5, 8, 7
ADD16_OP_INTRA 34, 4+12*8
ADD16_OP_INTRA 40, 4+13*8 ; i+4
ADD16_OP_INTRA 42, 4+14*8 ; i+4
-REP_RET
+RET
AC 16
AC 18
AC 24 ; i+4
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index f3aa3172f0..bc299ef7b4 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -66,7 +66,7 @@ cglobal pred16x16_vertical_8, 2,3
lea r0, [r0+r1*2]
dec r2
jg .loop
- REP_RET
+ RET
INIT_XMM sse
cglobal pred16x16_vertical_8, 2,3
@@ -82,7 +82,7 @@ cglobal pred16x16_vertical_8, 2,3
lea r0, [r0+r1*2]
dec r2
jg .loop
- REP_RET
+ RET
;-----------------------------------------------------------------------------
; void ff_pred16x16_horizontal_8(uint8_t *src, ptrdiff_t stride)
@@ -115,7 +115,7 @@ cglobal pred16x16_horizontal_8, 2,3
lea r0, [r0+r1*2]
dec r2
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
@@ -177,7 +177,7 @@ cglobal pred16x16_dc_8, 2,7
lea r4, [r4+r1*2]
dec r3d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -224,7 +224,7 @@ cglobal pred16x16_tm_vp8_8, 2,5
add r0, r1
dec r4d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
@@ -266,7 +266,7 @@ cglobal pred16x16_tm_vp8_8, 2,6,6
lea r0, [r0+r1*2]
dec r5d
jg .loop
- REP_RET
+ RET
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
@@ -302,7 +302,7 @@ cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
lea dstq, [dstq+strideq*4]
dec iterationd
jg .loop
- REP_RET
+ RET
%endif
;-----------------------------------------------------------------------------
@@ -566,7 +566,7 @@ cglobal pred16x16_plane_%1_8, 2,9,7
lea r0, [r0+r2*2]
dec r4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
@@ -748,7 +748,7 @@ ALIGN 16
lea r0, [r0+r2*2]
dec r4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
@@ -795,7 +795,7 @@ cglobal pred8x8_horizontal_8, 2,3
lea r0, [r0+r1*2]
dec r2
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
@@ -935,7 +935,7 @@ cglobal pred8x8_dc_rv40_8, 2,7
lea r4, [r4+r1*2]
dec r3d
jg .loop
- REP_RET
+ RET
;-----------------------------------------------------------------------------
; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
@@ -973,7 +973,7 @@ cglobal pred8x8_tm_vp8_8, 2,6
lea r0, [r0+r1*2]
dec r5d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
@@ -1008,7 +1008,7 @@ cglobal pred8x8_tm_vp8_8, 2,6,4
lea r0, [r0+r1*2]
dec r5d
jg .loop
- REP_RET
+ RET
INIT_XMM ssse3
cglobal pred8x8_tm_vp8_8, 2,3,6
@@ -1035,7 +1035,7 @@ cglobal pred8x8_tm_vp8_8, 2,3,6
lea r0, [r0+r1*2]
dec r2d
jg .loop
- REP_RET
+ RET
; dest, left, right, src, tmp
; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
@@ -2509,7 +2509,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
lea r0, [r0+r2*2]
dec r5d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 629e0a72e3..d368e246a7 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -327,7 +327,7 @@ cglobal pred8x8_horizontal_10, 2, 3
lea r0, [r0+r1*2]
dec r2d
jg .loop
- REP_RET
+ RET
;-----------------------------------------------------------------------------
; void ff_predict_8x8_dc_10(pixel *src, ptrdiff_t stride)
@@ -503,7 +503,7 @@ cglobal pred8x8_plane_10, 2, 7, 7
add r0, r1
dec r2d
jg .loop
- REP_RET
+ RET
;-----------------------------------------------------------------------------
@@ -1030,7 +1030,7 @@ cglobal pred16x16_vertical_10, 2, 3
lea r0, [r0+r1*2]
dec r2d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -1054,7 +1054,7 @@ cglobal pred16x16_horizontal_10, 2, 3
lea r0, [r0+r1*2]
dec r2d
jg .vloop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -1100,7 +1100,7 @@ cglobal pred16x16_dc_10, 2, 6
lea r5, [r5+r1*2]
dec r3d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -1132,7 +1132,7 @@ cglobal pred16x16_top_dc_10, 2, 3
lea r0, [r0+r1*2]
dec r2d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -1169,7 +1169,7 @@ cglobal pred16x16_left_dc_10, 2, 6
lea r5, [r5+r1*2]
dec r3d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -1190,7 +1190,7 @@ cglobal pred16x16_128_dc_10, 2,3
lea r0, [r0+r1*2]
dec r2d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index 872268300a..9969798daf 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -211,7 +211,7 @@ cglobal %1_h264_qpel16_mc00_10, 3,4
lea r1, [r1+r2*2]
dec r3d
jg .loop
- REP_RET
+ RET
%endmacro
%define OP_MOV mova
diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm
index 2d287ba443..510ede9ea3 100644
--- a/libavcodec/x86/h264_qpel_8bit.asm
+++ b/libavcodec/x86/h264_qpel_8bit.asm
@@ -89,7 +89,7 @@ cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
add r1, r3
dec r4d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -149,7 +149,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
add r1, r3
dec r4d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -192,7 +192,7 @@ cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
add r0, r2
dec r4d
jne .loop
- REP_RET
+ RET
%endmacro
INIT_XMM ssse3
@@ -239,7 +239,7 @@ cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
add r2, r4
dec r5d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -303,7 +303,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
add r2, r4
dec r5d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -350,7 +350,7 @@ cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Strid
add r2, r4
dec r5d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM ssse3
@@ -458,7 +458,7 @@ cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride,
FILT_V %1
FILT_V %1
.end:
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -535,7 +535,7 @@ cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
add r1, r2
dec r3d
jnz .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -578,7 +578,7 @@ cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
FILT_HV 14*48
FILT_HV 15*48
.end:
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -627,7 +627,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass_op, 5,5 ; dst, tmp, dstStride, unused, h
add r0, r2
dec r4d
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -718,7 +718,7 @@ cglobal %1_h264_qpel8or16_hv2_lowpass, 5,5,8 ; dst, tmp, dstStride, tmpStride, s
dec r4d
jne .op16
.done:
- REP_RET
+ RET
%endmacro
INIT_XMM ssse3
@@ -784,7 +784,7 @@ cglobal %1_pixels8_l2_shift5, 6, 6 ; dst, src16, src8, dstStride, src8Stride, h
lea r0, [r0+2*r3]
sub r5d, 2
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -853,7 +853,7 @@ cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2S
add r2, r4
dec r5d
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM ssse3
diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm
index 0975d74fcf..9e0d505748 100644
--- a/libavcodec/x86/h264_weight.asm
+++ b/libavcodec/x86/h264_weight.asm
@@ -81,7 +81,7 @@ cglobal h264_weight_16, 6, 6, 0
add r0, r1
dec r2d
jnz .nextrow
- REP_RET
+ RET
%macro WEIGHT_FUNC_MM 2
cglobal h264_weight_%1, 6, 6, %2
@@ -92,7 +92,7 @@ cglobal h264_weight_%1, 6, 6, %2
add r0, r1
dec r2d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -117,7 +117,7 @@ cglobal h264_weight_%1, 6, 6, %2
add r0, r3
dec r2d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -215,7 +215,7 @@ cglobal h264_biweight_16, 7, 8, 0
add r1, r2
dec r3d
jnz .nextrow
- REP_RET
+ RET
%macro BIWEIGHT_FUNC_MM 2
cglobal h264_biweight_%1, 7, 8, %2
@@ -230,7 +230,7 @@ cglobal h264_biweight_%1, 7, 8, %2
add r1, r2
dec r3d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -259,7 +259,7 @@ cglobal h264_biweight_%1, 7, 8, %2
add r1, r4
dec r3d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -294,7 +294,7 @@ cglobal h264_biweight_16, 7, 8, 8
add r1, r2
dec r3d
jnz .nextrow
- REP_RET
+ RET
INIT_XMM ssse3
cglobal h264_biweight_8, 7, 8, 8
@@ -317,4 +317,4 @@ cglobal h264_biweight_8, 7, 8, 8
add r1, r4
dec r3d
jnz .nextrow
- REP_RET
+ RET
diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm
index f924e55854..356871bc62 100644
--- a/libavcodec/x86/h264_weight_10bit.asm
+++ b/libavcodec/x86/h264_weight_10bit.asm
@@ -101,7 +101,7 @@ cglobal h264_weight_16_10
add r0, r1
dec r2d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -120,7 +120,7 @@ cglobal h264_weight_8_10
add r0, r1
dec r2d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -142,7 +142,7 @@ cglobal h264_weight_4_10
add r0, r3
dec r2d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -234,7 +234,7 @@ cglobal h264_biweight_16_10
add r1, r2
dec r3d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -253,7 +253,7 @@ cglobal h264_biweight_8_10
add r1, r2
dec r3d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -275,7 +275,7 @@ cglobal h264_biweight_4_10
add r1, r4
dec r3d
jnz .nextrow
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 888a28afa7..1ed861e0ae 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -166,7 +166,7 @@ INIT_YMM cpuname
add srcq, srcstrideq ; src += srcstride
dec heightd ; cmp height
jnz .loop ; height loop
- REP_RET
+ RET
%endmacro
diff --git a/libavcodec/x86/hevc_sao_10bit.asm b/libavcodec/x86/hevc_sao_10bit.asm
index f81e2d5033..6148b66c9b 100644
--- a/libavcodec/x86/hevc_sao_10bit.asm
+++ b/libavcodec/x86/hevc_sao_10bit.asm
@@ -145,7 +145,7 @@ align 16
add srcq, srcstrideq
dec heightd
jg .loop
- REP_RET
+ RET
%endmacro
%macro HEVC_SAO_BAND_FILTER_FUNCS 0
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index ce5d7a4e28..10141290cb 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -78,7 +78,7 @@ cglobal put_pixels8_x2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -122,7 +122,7 @@ cglobal put_pixels16_x2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -166,7 +166,7 @@ cglobal put_no_rnd_pixels8_x2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -204,7 +204,7 @@ cglobal put_pixels8_y2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -244,7 +244,7 @@ cglobal put_no_rnd_pixels8_y2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -276,7 +276,7 @@ cglobal avg_pixels8, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX 3dnow
@@ -332,7 +332,7 @@ cglobal avg_pixels8_x2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmx
@@ -379,7 +379,7 @@ cglobal avg_pixels8_y2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -428,7 +428,7 @@ cglobal avg_approx_pixels8_xy2, 4,5
add r0, r4
sub r3d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -512,7 +512,7 @@ cglobal %1_pixels8_xy2, 4,5
add r4, r2
sub r3d, 2
jnz .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -580,7 +580,7 @@ cglobal %1_pixels8_xy2, 4,5
add r4, r2
sub r3d, 2
jnz .loop
- REP_RET
+ RET
%endmacro
INIT_MMX ssse3
diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
index cba96d06cb..6118ac44e0 100644
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ b/libavcodec/x86/hpeldsp_vp3.asm
@@ -60,7 +60,7 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
lea r0, [r0+r2*4]
sub r3d, 4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -102,7 +102,7 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
lea r0, [r0+r2*4]
sub r3d, 4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index 0d8cae354a..3c70e3a010 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -126,7 +126,7 @@ cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
jl .loop
movd m0, [dstq-4]
movd [leftq], m0
- REP_RET
+ RET
%endmacro
%if ARCH_X86_32
diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm
index 56b5fbd606..61a4822099 100644
--- a/libavcodec/x86/jpeg2000dsp.asm
+++ b/libavcodec/x86/jpeg2000dsp.asm
@@ -99,7 +99,7 @@ align 16
movaps [src1q+csizeq], m5
add csizeq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -133,7 +133,7 @@ align 16
mova [src0q+csizeq], m2
add csizeq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index ba4d4f0153..7fe7f4255e 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -214,7 +214,7 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
inc wq
jl .3
.end:
- REP_RET
+ RET
%endmacro
%if ARCH_X86_32
diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm
index 4d79eee36b..480be67fe1 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -108,7 +108,7 @@ cglobal diff_bytes, 4,5,2, dst, src1, src2, w
inc wq
jl .loop_gpr_%1%2
.end_%1%2:
- REP_RET
+ RET
%endmacro
%if ARCH_X86_32
diff --git a/libavcodec/x86/mdct15.asm b/libavcodec/x86/mdct15.asm
index 0309112538..22aa938d24 100644
--- a/libavcodec/x86/mdct15.asm
+++ b/libavcodec/x86/mdct15.asm
@@ -212,7 +212,7 @@ cglobal mdct15_postreindex, 5, 7, 8 + cpuflag(avx2)*2, out, in, exp, lut, len8,
cmp offset_nq, offset_pq
jle .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse3
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index ad06d485ab..100225bdf3 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -463,7 +463,7 @@ cglobal hf_noise%1, 3,3,0, pix1, lsize, h
psrlq m6, 32
paddd m0, m6
movd eax, m0 ; eax = result of hf_noise8;
- REP_RET ; return eax;
+ RET ; return eax;
%endmacro
INIT_MMX mmx
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 440fe29bcc..3f028d6d3f 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -47,7 +47,7 @@ cglobal get_pixels, 3,4
lea r1, [r1+r2*2]
add r3, 32
js .loop
- REP_RET
+ RET
INIT_XMM sse2
cglobal get_pixels, 3, 4, 5
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index 50e4255dec..b4973a2ee9 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -78,7 +78,7 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
.end_s:
cmp iq, wq
jl .loop_s
- REP_RET
+ RET
%endmacro
%if ARCH_X86_32
diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm
index 4e72d5084f..481251314a 100644
--- a/libavcodec/x86/qpel.asm
+++ b/libavcodec/x86/qpel.asm
@@ -81,7 +81,7 @@ cglobal %1_pixels4_l2, 6,6
add r2, 16
sub r5d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -125,7 +125,7 @@ cglobal %1_pixels8_l2, 6,6
add r2, 32
sub r5d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -171,7 +171,7 @@ cglobal %1_pixels16_l2, 6,6
add r2, 32
sub r5d, 2
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/qpeldsp.asm b/libavcodec/x86/qpeldsp.asm
index 282faed14f..cf8ac764fd 100644
--- a/libavcodec/x86/qpeldsp.asm
+++ b/libavcodec/x86/qpeldsp.asm
@@ -92,7 +92,7 @@ cglobal put_no_rnd_pixels8_l2, 6,6
add r2, 32
sub r5d, 4
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -161,7 +161,7 @@ cglobal put_no_rnd_pixels16_l2, 6,6
add r2, 32
sub r5d, 2
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -276,7 +276,7 @@ cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16
add r0, r2
dec r4d
jne .loop
- REP_RET
+ RET
%endmacro
%macro PUT_OP 2-3
@@ -359,7 +359,7 @@ cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8
add r0, r2
dec r4d
jne .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -468,7 +468,7 @@ cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544
add r0, r1
dec r4d
jne .loopv
- REP_RET
+ RET
%endmacro
%macro PUT_OPH 2-3
@@ -545,7 +545,7 @@ cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288
add r0, r1
dec r4d
jne .loopv
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index 692b4acfcd..2324b62636 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -54,7 +54,7 @@ cglobal rv34_idct_%1, 1, 2, 0
movq [r0+ 8], m0
movq [r0+16], m0
movq [r0+24], m0
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index bcad1aee80..7523cc978d 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -170,7 +170,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height,
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
%endmacro
%macro FILTER_H 1
@@ -227,7 +227,7 @@ cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, heigh
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
%endmacro
%if ARCH_X86_32
@@ -294,7 +294,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg
%ifdef PIC
@@ -327,7 +327,7 @@ cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
%endmacro
INIT_XMM ssse3
@@ -478,7 +478,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
.loop:
MAIN_LOOP %2, RND
jnz .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index 62bbe512ec..b492947671 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -208,7 +208,7 @@ cglobal sbr_sum64x5, 1,2,4,z
add zq, 32
cmp zq, r1q
jne .loop
- REP_RET
+ RET
INIT_XMM sse
cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
@@ -227,7 +227,7 @@ cglobal sbr_qmf_post_shuffle, 2,3,4,W,z
add zq, 16
cmp zq, r2q
jl .loop
- REP_RET
+ RET
INIT_XMM sse
cglobal sbr_neg_odd_64, 1,2,4,z
@@ -248,7 +248,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z
add zq, 64
cmp zq, r1q
jne .loop
- REP_RET
+ RET
; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1)
%macro SBR_QMF_DEINT_BFLY 0
@@ -283,7 +283,7 @@ cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
add vrevq, 2*mmsize
sub cq, 2*mmsize
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -320,7 +320,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
jge .loop
movq m2, [zq]
movq [r2q], m2
- REP_RET
+ RET
%ifdef PIC
%define NREGS 1
@@ -446,7 +446,7 @@ cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c
sub vq, mmsize
add cq, mmsize
jl .loop
- REP_RET
+ RET
%macro SBR_AUTOCORRELATE 0
cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt
diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm
index 5f3ded3ea2..be8e1ab553 100644
--- a/libavcodec/x86/takdsp.asm
+++ b/libavcodec/x86/takdsp.asm
@@ -43,7 +43,7 @@ cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
mova [p2q+lengthq+mmsize*1], m1
add lengthq, mmsize*2
jl .loop
- REP_RET
+ RET
cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
shl lengthd, 2
@@ -60,7 +60,7 @@ cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
mova [p1q+lengthq+mmsize*1], m1
add lengthq, mmsize*2
jl .loop
- REP_RET
+ RET
cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
shl lengthd, 2
@@ -87,7 +87,7 @@ cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
mova [p2q+lengthq+mmsize], m4
add lengthq, mmsize*2
jl .loop
- REP_RET
+ RET
INIT_XMM sse4
cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
@@ -113,4 +113,4 @@ cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
mova [p1q+lengthq], m1
add lengthq, mmsize
jl .loop
- REP_RET
+ RET
diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm
index e44c1ea471..2a446bfba5 100644
--- a/libavcodec/x86/utvideodsp.asm
+++ b/libavcodec/x86/utvideodsp.asm
@@ -67,7 +67,7 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
add src_bq, linesize_bq
sub hd, 1
jg .nextrow
- REP_RET
+ RET
cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
shl wd, 1
@@ -108,4 +108,4 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
add src_bq, linesize_bq
sub hd, 1
jg .nextrow
- REP_RET
+ RET
diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm
index c24c765e5b..949a5b3167 100644
--- a/libavcodec/x86/v210.asm
+++ b/libavcodec/x86/v210.asm
@@ -70,7 +70,7 @@ cglobal v210_planar_unpack_%1, 5, 5, 7
add r4, 6
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM ssse3
diff --git a/libavcodec/x86/vc1dsp_mc.asm b/libavcodec/x86/vc1dsp_mc.asm
index 0e6d87dd8b..c1b3ed1bc3 100644
--- a/libavcodec/x86/vc1dsp_mc.asm
+++ b/libavcodec/x86/vc1dsp_mc.asm
@@ -139,7 +139,7 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
add dstq, 8
dec i
jnz .loop
- REP_RET
+ RET
%undef rnd
%undef shift
%undef stride_neg2
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index e237860700..96da851874 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -457,7 +457,7 @@ cglobal prefetch, 3, 3, 0, buf, stride, h
add bufq, strideq
dec hd
jg .loop
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 75de5690a1..68ad5c839f 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -200,7 +200,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
@@ -230,7 +230,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
@@ -268,7 +268,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
lea myd, [myq*3]
@@ -314,7 +314,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
%endmacro
INIT_MMX ssse3
@@ -368,7 +368,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
; 4x4 block, H-only 6-tap filter
INIT_MMX mmxext
@@ -426,7 +426,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
INIT_XMM sse2
cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
@@ -474,7 +474,7 @@ cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, h
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
INIT_XMM sse2
cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
@@ -537,7 +537,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
%macro FILTER_V 1
; 4x4 block, V-only 4-tap filter
@@ -590,7 +590,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
; 4x4 block, V-only 6-tap filter
@@ -655,7 +655,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
add srcq, srcstrideq
dec heightd ; next row
jg .nextrow
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -738,7 +738,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
lea srcq, [srcq+srcstrideq*2]
sub heightd, 2
jg .nextrow
- REP_RET
+ RET
%if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
@@ -815,7 +815,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
lea srcq, [srcq+srcstrideq*2]
sub heightd, 2
jg .nextrow
- REP_RET
+ RET
%endmacro
INIT_MMX mmxext
@@ -838,7 +838,7 @@ cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
lea dstq, [dstq+dststrideq*2]
sub heightd, 2
jg .nextrow
- REP_RET
+ RET
%if ARCH_X86_32
INIT_MMX mmx
@@ -856,7 +856,7 @@ cglobal put_vp8_pixels16, 5, 5, 0, dst, dststride, src, srcstride, height
lea dstq, [dstq+dststrideq*2]
sub heightd, 2
jg .nextrow
- REP_RET
+ RET
%endif
INIT_XMM sse
@@ -870,7 +870,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
lea dstq, [dstq+dststrideq*2]
sub heightd, 2
jg .nextrow
- REP_RET
+ RET
;-----------------------------------------------------------------------------
; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
diff --git a/libavcodec/x86/vp8dsp_loopfilter.asm b/libavcodec/x86/vp8dsp_loopfilter.asm
index caeb405267..95ce9153ce 100644
--- a/libavcodec/x86/vp8dsp_loopfilter.asm
+++ b/libavcodec/x86/vp8dsp_loopfilter.asm
@@ -404,7 +404,7 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
%endif
dec cntrq
jg .next8px
- REP_RET
+ RET
%else ; sse2
RET
%endif
@@ -888,7 +888,7 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, stack_size, dst, stride, flimE, f
dec cntrq
jg .next8px
%endif
- REP_RET
+ RET
%else ; mmsize == 16
RET
%endif
@@ -1547,7 +1547,7 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, stack_size, dst1, stride, flimE,
dec cntrq
jg .next8px
%endif
- REP_RET
+ RET
%else ; mmsize == 16
RET
%endif
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index 849d85e70f..fc6f748850 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -57,4 +57,4 @@ ALIGN 16
movaps [sumq + lenq+mmsize], m3
add lenq, mmsize*2
jl .loop
- REP_RET
+ RET
diff --git a/libavfilter/x86/af_volume.asm b/libavfilter/x86/af_volume.asm
index 723ab1f8fb..35a00784a2 100644
--- a/libavfilter/x86/af_volume.asm
+++ b/libavfilter/x86/af_volume.asm
@@ -56,7 +56,7 @@ cglobal scale_samples_s16, 4,4,4, dst, src, len, volume
mova [dstq+lenq], m3
sub lenq, mmsize
jge .loop
- REP_RET
+ RET
;------------------------------------------------------------------------------
; void ff_scale_samples_s32(uint8_t *dst, const uint8_t *src, int len,
@@ -93,7 +93,7 @@ cglobal scale_samples_s32, 4,4,4, dst, src, len, volume
%endif
sub lenq, mmsize
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -137,4 +137,4 @@ cglobal scale_samples_s32, 4,4,8, dst, src, len, volume
mova [dstq+lenq], m0
sub lenq, mmsize
jge .loop
- REP_RET
+ RET
diff --git a/libavfilter/x86/avf_showcqt.asm b/libavfilter/x86/avf_showcqt.asm
index 63e58408cd..16af0de9b0 100644
--- a/libavfilter/x86/avf_showcqt.asm
+++ b/libavfilter/x86/avf_showcqt.asm
@@ -127,7 +127,7 @@ cglobal showcqt_cqt_calc, 5, 10, 12, dst, src, coeffs, len, fft_len, x, coeffs_v
lea dstq, [dstq + 16]
lea coeffsq, [coeffsq + 2*Coeffs.sizeof]
jnz .loop_k
- REP_RET
+ RET
align 16
.check_loop_a:
cmp xd, [coeffsq + Coeffs.len]
@@ -170,7 +170,7 @@ cglobal showcqt_cqt_calc, 4, 7, 8, dst, src, coeffs, len, x, coeffs_val, i
lea dstq, [dstq + 8]
lea coeffsq, [coeffsq + Coeffs.sizeof]
jnz .loop_k
- REP_RET
+ RET
%endif ; ARCH_X86_64
%endmacro ; DECLARE_CQT_CALC
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
index 4916aaf251..a976103a29 100644
--- a/libavfilter/x86/vf_blend.asm
+++ b/libavfilter/x86/vf_blend.asm
@@ -56,7 +56,7 @@ cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end
add dstq, dst_linesizeq
sub endd, 1
jg .nextrow
-REP_RET
+RET
%endmacro
%macro BLEND_SIMPLE 2
diff --git a/libavfilter/x86/vf_gradfun.asm b/libavfilter/x86/vf_gradfun.asm
index 3581f89fe8..d106d52100 100644
--- a/libavfilter/x86/vf_gradfun.asm
+++ b/libavfilter/x86/vf_gradfun.asm
@@ -64,7 +64,7 @@ cglobal gradfun_filter_line, 6, 6
add r0, 4
jl .loop
.end:
- REP_RET
+ RET
INIT_XMM ssse3
cglobal gradfun_filter_line, 6, 6, 8
@@ -78,7 +78,7 @@ cglobal gradfun_filter_line, 6, 6, 8
FILTER_LINE m4
add r0, 8
jl .loop
- REP_RET
+ RET
%macro BLUR_LINE 1
cglobal gradfun_blur_line_%1, 6, 6, 8
@@ -102,7 +102,7 @@ cglobal gradfun_blur_line_%1, 6, 6, 8
mova [r3+r0], m0
add r0, 16
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
diff --git a/libavfilter/x86/vf_hqdn3d.asm b/libavfilter/x86/vf_hqdn3d.asm
index e3b1bdca53..2c0ca45571 100644
--- a/libavfilter/x86/vf_hqdn3d.asm
+++ b/libavfilter/x86/vf_hqdn3d.asm
@@ -97,7 +97,7 @@ ALIGN 16
inc xq
jl .loop
je .loop2
- REP_RET
+ RET
%endmacro ; HQDN3D_ROW
HQDN3D_ROW 8
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index 7c0065d4d9..8e0ea6c22e 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -57,7 +57,7 @@ SECTION .text
add hq, 2*mmsize
jl .loop
-REP_RET
+RET
%endmacro
%macro LOWPASS_LINE 0
@@ -129,7 +129,7 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
add srcq, mmsize
sub hd, mmsize
jg .loop
-REP_RET
+RET
cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
movd m7, DWORD clip_maxm
@@ -189,7 +189,7 @@ cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
add srcq, 2*mmsize
sub hd, mmsize
jg .loop
-REP_RET
+RET
%endmacro
INIT_XMM sse2
diff --git a/libavfilter/x86/vf_maskedmerge.asm b/libavfilter/x86/vf_maskedmerge.asm
index 7e61935b97..74b8d099dd 100644
--- a/libavfilter/x86/vf_maskedmerge.asm
+++ b/libavfilter/x86/vf_maskedmerge.asm
@@ -78,4 +78,4 @@ cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
add dstq, dlinesizeq
sub hd, 1
jg .nextrow
-REP_RET
+RET
diff --git a/libavfilter/x86/vf_stereo3d.asm b/libavfilter/x86/vf_stereo3d.asm
index a057e495f1..b6a293b18e 100644
--- a/libavfilter/x86/vf_stereo3d.asm
+++ b/libavfilter/x86/vf_stereo3d.asm
@@ -213,4 +213,4 @@ cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, cnt
add rsrcq, r_linesizeq
sub heightd, 1
jg .nextrow
-REP_RET
+RET
diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm
index 52628c38d7..3010469f97 100644
--- a/libavfilter/x86/vf_w3fdif.asm
+++ b/libavfilter/x86/vf_w3fdif.asm
@@ -38,7 +38,7 @@ cglobal w3fdif_scale, 3, 3, 2, 0, out_pixel, work_pixel, linesize
add work_pixelq, mmsize*2
sub linesized, mmsize/2
jg .loop
-REP_RET
+RET
cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize, offset
movd m1, [coefq]
@@ -63,7 +63,7 @@ cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize,
add offsetq, mmsize/2
sub linesized, mmsize/2
jg .loop
-REP_RET
+RET
cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
movq m0, [coefq]
@@ -99,7 +99,7 @@ cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
add offsetq, mmsize/2
sub linesized, mmsize/2
jg .loop
-REP_RET
+RET
%if ARCH_X86_64
cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
@@ -179,7 +179,7 @@ cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0,
add offsetq, mmsize/2
sub linesized, mmsize/2
jg .loop
-REP_RET
+RET
%if ARCH_X86_64
@@ -254,6 +254,6 @@ cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_ad
add offsetq, mmsize/2
sub linesized, mmsize/2
jg .loop
-REP_RET
+RET
%endif
diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm
index c6a5015282..b508ef8cf4 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -58,7 +58,7 @@ cglobal conv_s16_to_s32, 3,3,3, dst, src, len
mova [dstq+2*lenq+mmsize], m1
add lenq, mmsize
jl .loop
- REP_RET
+ RET
;------------------------------------------------------------------------------
; void ff_conv_s16_to_flt(float *dst, const int16_t *src, int len);
@@ -83,7 +83,7 @@ cglobal conv_s16_to_flt, 3,3,3, dst, src, len
mova [dstq+2*lenq+mmsize], m1
add lenq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -120,7 +120,7 @@ cglobal conv_s32_to_s16, 3,3,4, dst, src, len
emms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
@@ -150,7 +150,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len
mova [dstq+lenq+mmsize], m2
add lenq, mmsize*2
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -190,7 +190,7 @@ cglobal conv_flt_to_s16, 3,3,5, dst, src, len
mova [dstq+lenq+mmsize], m2
add lenq, mmsize*2
jl .loop
- REP_RET
+ RET
;------------------------------------------------------------------------------
; void ff_conv_flt_to_s32(int32_t *dst, const float *src, int len);
@@ -223,7 +223,7 @@ cglobal conv_flt_to_s32, 3,3,6, dst, src, len
mova [dstq+lenq+3*mmsize], m3
add lenq, mmsize*4
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -260,7 +260,7 @@ cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1
mova [dstq+2*lenq+3*mmsize], m3
add lenq, 2*mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -381,7 +381,7 @@ cglobal conv_s16p_to_s16_6ch, 2,7,7, dst, src0, src1, src2, src3, src4, src5
sub lend, mmsize/2
%endif
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -432,7 +432,7 @@ cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
mova [dstq+4*lenq+3*mmsize], m3
add lenq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -534,7 +534,7 @@ cglobal conv_s16p_to_flt_6ch, 2,7,8, dst, src, src1, src2, src3, src4, src5
add dstq, mmsize*6
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -580,7 +580,7 @@ cglobal conv_fltp_to_s16_2ch, 3,4,3, dst, src0, len, src1
mova [dstq+lenq], m0
add lenq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -689,7 +689,7 @@ cglobal conv_fltp_to_s16_6ch, 2,7,7, dst, src, src1, src2, src3, src4, src5
emms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
@@ -729,7 +729,7 @@ cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1
mova [dstq+2*lenq+3*mmsize], m3
add lenq, 2*mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -807,7 +807,7 @@ cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
emms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
@@ -857,7 +857,7 @@ cglobal conv_s16_to_s16p_2ch, 3,4,4, dst0, src, len, dst1
mova [dst1q+lenq], m1
add lenq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -916,7 +916,7 @@ cglobal conv_s16_to_s16p_6ch, 2,7,5, dst, src, dst1, dst2, dst3, dst4, dst5
add dstq, mmsize/2
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -956,7 +956,7 @@ cglobal conv_s16_to_fltp_2ch, 3,4,5, dst0, src, len, dst1
mova [dst1q+lenq], m1
add lenq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -1033,7 +1033,7 @@ cglobal conv_s16_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
add dstq, mmsize
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -1083,7 +1083,7 @@ cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
mova [dst1q+lenq], m1
add lenq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -1153,7 +1153,7 @@ cglobal conv_flt_to_s16p_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
add dstq, mmsize/2
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -1187,7 +1187,7 @@ cglobal conv_flt_to_fltp_2ch, 3,4,3, dst0, src, len, dst1
mova [dst1q+lenq], m1
add lenq, mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -1250,7 +1250,7 @@ cglobal conv_flt_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
add dstq, mmsize
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index fe27d6a6c9..ee9a426be4 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -50,7 +50,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
add srcq, mmsize*2
sub lend, mmsize*2/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -96,7 +96,7 @@ cglobal mix_2_to_1_s16p_flt, 3,4,6, src, matrix, len, src1
add srcq, mmsize
sub lend, mmsize/2
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -143,7 +143,7 @@ cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1
add srcq, mmsize
sub lend, mmsize/2
jg .loop
- REP_RET
+ RET
;-----------------------------------------------------------------------------
; void ff_mix_1_to_2_fltp_flt(float **src, float **matrix, int len,
@@ -169,7 +169,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
add src0q, mmsize
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -214,7 +214,7 @@ cglobal mix_1_to_2_s16p_flt, 3,5,6, src0, matrix0, len, src1, matrix1
add src0q, mmsize
sub lend, mmsize/2
jg .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
diff --git a/libavresample/x86/dither.asm b/libavresample/x86/dither.asm
index d677c7179a..f9079f0393 100644
--- a/libavresample/x86/dither.asm
+++ b/libavresample/x86/dither.asm
@@ -53,7 +53,7 @@ cglobal quantize, 4,4,3, dst, src, dither, len
mova [dstq+lenq], m0
add lenq, mmsize
jl .loop
- REP_RET
+ RET
;------------------------------------------------------------------------------
; void ff_dither_int_to_float_rectangular(float *dst, int *src, int len)
@@ -75,7 +75,7 @@ cglobal dither_int_to_float_rectangular, 3,3,3, dst, src, len
mova [dstq+lenq+mmsize], m2
add lenq, 2*mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -108,7 +108,7 @@ cglobal dither_int_to_float_triangular, 3,4,5, dst, src0, len, src1
mova [dstq+lenq+mmsize], m2
add lenq, 2*mmsize
jl .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 06d2d2cfd1..16bb5c16a5 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -48,7 +48,7 @@ ALIGN 16
sub lenq, 64
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -108,7 +108,7 @@ cglobal vector_fmac_scalar, 4,4,5, dst, src, mul, len
%endif ; mmsize
sub lenq, 64
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -145,7 +145,7 @@ cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
mova [dstq+lenq], m1
sub lenq, mmsize
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -200,7 +200,7 @@ cglobal vector_dmac_scalar, 4,4,5, dst, src, mul, len
movaps [dstq+lenq+3*mmsize], m4
sub lenq, mmsize*4
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -247,7 +247,7 @@ cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len
movaps [dstq+lenq+mmsize], m2
sub lenq, 2*mmsize
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse2
@@ -307,7 +307,7 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1
%if mmsize == 8
femms
%endif
- REP_RET
+ RET
%endmacro
INIT_MMX 3dnowext
@@ -342,7 +342,7 @@ ALIGN 16
sub lenq, 2*mmsize
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -391,7 +391,7 @@ ALIGN 16
add src1q, 2*mmsize
sub lenq, 2*mmsize
jge .loop
- REP_RET
+ RET
%endmacro
INIT_XMM sse
@@ -448,4 +448,4 @@ cglobal butterflies_float, 3,3,3, src0, src1, len
mova [src0q + lenq], m0
add lenq, mmsize
jl .loop
- REP_RET
+ RET
diff --git a/libavutil/x86/lls.asm b/libavutil/x86/lls.asm
index 317fba6fca..95505dabeb 100644
--- a/libavutil/x86/lls.asm
+++ b/libavutil/x86/lls.asm
@@ -123,7 +123,7 @@ cglobal update_lls, 2,5,8, ctx, var, i, j, covar2
test id, id
jle .loop2x1
.ret:
- REP_RET
+ RET
%macro UPDATE_LLS 0
cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
@@ -240,7 +240,7 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
cmp id, countd
jle .loop2x1
.ret:
- REP_RET
+ RET
%endmacro ; UPDATE_LLS
%if HAVE_AVX_EXTERNAL
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 6a054a3e09..71425201ef 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -144,9 +144,6 @@
; RET:
; Pops anything that was pushed by PROLOGUE, and returns.
-; REP_RET:
-; Use this instead of RET if it's a branch target.
-
; registers:
; rN and rNq are the native-size register holding function argument N
; rNd, rNw, rNb are dword, word, and byte size
@@ -634,19 +631,6 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro
%endif
-; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
-; a branch or a branch target. So switch to a 2-byte form of ret in that case.
-; We can automatically detect "follows a branch", but not a branch target.
-; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.)
-%macro REP_RET 0
- %if has_epilogue || cpuflag(ssse3)
- RET
- %else
- rep ret
- %endif
- annotate_function_size
-%endmacro
-
%define last_branch_adr $$
%macro AUTO_REP_RET 0
%if notcpuflag(ssse3)
diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index d441636d3c..969228cfeb 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -85,7 +85,7 @@ pack_2ch_%2_to_%1_u_int %+ SUFFIX:
add lenq, 2*mmsize/(2<<%4)
%endif
jl .next
- REP_RET
+ RET
%endmacro
%macro UNPACK_2CH 5-7
@@ -157,7 +157,7 @@ unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
add lenq, mmsize/(1<<%4)
%endif
jl .next
- REP_RET
+ RET
%endmacro
%macro CONV 5-7
@@ -198,7 +198,7 @@ cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
emms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
@@ -301,7 +301,7 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX:
emms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
@@ -375,7 +375,7 @@ unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
add dstq, mmsize
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
@@ -525,7 +525,7 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX:
%endif
sub lend, mmsize/4
jg .loop
- REP_RET
+ RET
%endmacro
%macro INT16_TO_INT32_N 6
diff --git a/libswresample/x86/rematrix.asm b/libswresample/x86/rematrix.asm
index 7984b9a729..c49ccc7db6 100644
--- a/libswresample/x86/rematrix.asm
+++ b/libswresample/x86/rematrix.asm
@@ -68,7 +68,7 @@ mix_2_1_float_u_int %+ SUFFIX:
mov%1 [outq + lenq + mmsize], m2
add lenq, mmsize*2
jl .next
- REP_RET
+ RET
%endmacro
%macro MIX1_FLT 1
@@ -100,7 +100,7 @@ mix_1_1_float_u_int %+ SUFFIX:
mov%1 [outq + lenq + mmsize], m1
add lenq, mmsize*2
jl .next
- REP_RET
+ RET
%endmacro
%macro MIX1_INT16 1
@@ -152,7 +152,7 @@ mix_1_1_int16_u_int %+ SUFFIX:
emms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
@@ -218,7 +218,7 @@ mix_2_1_int16_u_int %+ SUFFIX:
emms
RET
%else
- REP_RET
+ RET
%endif
%endmacro
diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
index af9afcaa53..6c8c6ae883 100644
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -190,7 +190,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
mova [dstq+wq], m0
add wq, mmsize
jl .loop
- REP_RET
+ RET
%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
%endmacro
@@ -305,7 +305,7 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
%endif ; mmsize == 8/16
add wq, mmsize
jl .loop
- REP_RET
+ RET
%endif ; ARCH_X86_64 && %0 == 3
%endmacro
@@ -391,7 +391,7 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
add wq, 2
jl .loop2
.end:
- REP_RET
+ RET
%endif ; %0 == 3
%endmacro
@@ -493,7 +493,7 @@ cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
add wq, 2
jl .loop2
.end:
- REP_RET
+ RET
%endif ; ARCH_X86_64 && %0 == 3
%endmacro
@@ -550,7 +550,7 @@ RGB32_FUNCS 8, 12
mova [dstq+wq], m0
add wq, mmsize
jl .loop_%1
- REP_RET
+ RET
%endmacro
; %1 = nr. of XMM registers
@@ -620,7 +620,7 @@ cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
%endif ; mmsize == 8/16
add wq, mmsize / 2
jl .loop_%1
- REP_RET
+ RET
%endmacro
; %1 = nr. of XMM registers
@@ -678,7 +678,7 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
%endif ; nv12/21
add wq, mmsize
jl .loop_%1
- REP_RET
+ RET
%endmacro
; %1 = nr. of XMM registers
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index db3e9934f8..22392a093e 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -241,7 +241,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
test dstq, 15
jnz .unaligned
yuv2planeX_mainloop %1, a
- REP_RET
+ RET
.unaligned:
yuv2planeX_mainloop %1, u
%endif ; mmsize == 8/16
@@ -251,10 +251,10 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
ADD rsp, pad
RET
%else ; x86-64
- REP_RET
+ RET
%endif ; x86-32/64
%else ; %1 == 9/10/16
- REP_RET
+ RET
%endif ; %1 == 8/9/10/16
%endmacro
@@ -390,11 +390,11 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset
test dstq, 15
jnz .unaligned
yuv2plane1_mainloop %1, a
- REP_RET
+ RET
.unaligned:
yuv2plane1_mainloop %1, u
%endif ; mmsize == 8/16
- REP_RET
+ RET
%endmacro
%if ARCH_X86_32
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index 83cabff722..cd76132474 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -378,7 +378,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi
add wq, 2
%endif ; %3 ==/!= X
jl .loop
- REP_RET
+ RET
%endmacro
; SCALE_FUNCS source_width, intermediate_nbits, n_xmm
diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm
index 683aae80e3..ab11bcba64 100644
--- a/tests/checkasm/x86/checkasm.asm
+++ b/tests/checkasm/x86/checkasm.asm
@@ -234,7 +234,7 @@ cglobal checked_call%1, 1,7
.emms_ok:
%endif
add esp, max_args*4
- REP_RET
+ RET
%endmacro
%endif ; ARCH_X86_64
--
2.15.0.448.gf294e3d99a
More information about the ffmpeg-devel
mailing list