[FFmpeg-devel] [PATCH 1/2] checkasm: add sbrdsp tests

James Almer jamrial at gmail.com
Tue Jul 4 21:15:56 EEST 2017


On 7/4/2017 2:31 PM, Michael Niedermayer wrote:
> On Mon, Jul 03, 2017 at 02:32:28PM +0200, Matthieu Bouron wrote:
>> On Fri, Jun 30, 2017 at 05:16:37PM +0200, Matthieu Bouron wrote:
>>> On Fri, Jun 30, 2017 at 03:55:52PM +0200, Michael Niedermayer wrote:
>>>> On Thu, Jun 29, 2017 at 10:53:06PM -0300, James Almer wrote:
>>>>> On 6/29/2017 10:14 PM, Henrik Gramner wrote:
>>>>>> On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
>>>>>> <michael at niedermayer.cc> wrote:
>>>>>>> Program received signal SIGSEGV, Segmentation fault.
>>>>>>> 0x0000000000684919 in ff_sbr_hf_gen_sse ()
>>>>>>
>>>>>>>    0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8
>>>>>>
>>>>>>> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps (%rsi,%r8,1),%xmm0
>>>>>>
>>>>>>> r9             0xdeadbeef00000080       -2401053092612145024
>>>>>>
>>>>>> Another case of a 32-bit int being used as part of a 64-bit operation.
>>>>>
>>>>> I can't reproduce it on my ArchLinux x86_64 environment for some reason,
>>>>> but based on what you said i assume the attached patch should fix it.
>>>>
>>>> no crash occurs here with this, so it seems fixed
>>>
>>> Should i push the patchset or wait a little bit longer ?
>>
>> Patchset applied.
> 
> it seems theres some issue still in this:
> 
> checkasm: using random seed 3655967467
> MMX:
>  - audiodsp.audiodsp             [OK]
>  - blockdsp.blockdsp             [OK]
>  - h264dsp.idct                  [OK]
>  - h264pred.pred4x4              [OK]
>  - h264pred.pred8x8              [OK]
>  - h264pred.pred16x16            [OK]
>  - pixblockdsp.get_pixels        [OK]
>  - pixblockdsp.diff_pixels       [OK]
>  - vp8dsp.idct                   [OK]
>  - vp8dsp.mc                     [OK]
>  - vp9dsp.ipred                  [OK]
>  - vp9dsp.itxfm                  [OK]
>  - vp9dsp.mc                     [OK]
> MMXEXT:
>  - audiodsp.audiodsp             [OK]
>  - h264dsp.idct                  [OK]
>  - h264pred.pred4x4              [OK]
>  - h264pred.pred8x8              [OK]
>  - h264pred.pred16x16            [OK]
>  - h264pred.pred8x8l             [OK]
>  - h264qpel.put                  [OK]
>  - h264qpel.avg                  [OK]
>  - hevc_add_res.add_residual     [OK]
>  - hevc_idct.idct_dc             [OK]
>  - vp8dsp.mc                     [OK]
>  - vp9dsp.ipred                  [OK]
>  - vp9dsp.itxfm                  [OK]
>  - vp9dsp.loopfilter             [OK]
>  - vp9dsp.mc                     [OK]
> SSE:
>  - aacpsdsp.add_squares          [OK]
>  - aacpsdsp.mul_pair_single      [OK]
>  - aacpsdsp.hybrid_analysis      [OK]
>  - sbrdsp.sum64x5                [OK]
>  - sbrdsp.sum_square             [OK]
>  - sbrdsp.neg_odd_64             [OK]
>  - sbrdsp.qmf_post_shuffle       [OK]
>  - sbrdsp.qmf_deint_neg          [OK]
>  - sbrdsp.qmf_deint_bfly         [OK]
>  - sbrdsp.autocorrelate          [OK]
>  - sbrdsp.hf_gen                 [OK]
>  - sbrdsp.hf_g_filt              [OK]
>  - audiodsp.audiodsp             [OK]
>  - blockdsp.blockdsp             [OK]
>  - fmtconvert.fmtconvert         [OK]
>  - h264pred.pred16x16            [OK]
>  - vp8dsp.idct                   [OK]
>  - vp8dsp.mc                     [OK]
>  - vp9dsp.ipred                  [OK]
>  - vp9dsp.mc                     [OK]
>  - float_dsp.vector_fmul         [OK]
>  - float_dsp.vector_fmac         [OK]
>  - float_dsp.butterflies_float   [OK]
>  - float_dsp.scalarproduct_float [OK]
> SSE2:
>  - sbrdsp.qmf_pre_shuffle        [OK]
>  - sbrdsp.qmf_deint_bfly         [OK]
> 
> Program received signal SIGSEGV, Segmentation fault.
> apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> 418         movu       m7, [Yq + 2*count + mmsize]
> (gdb) bt
> Python Exception <type 'exceptions.ImportError'> No module named gdb.frames:
> #0  apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> #1  0x000000000043659b in checkasm_checked_call () at tests/checkasm/x86/checkasm.asm:77
> #2  0xdeadbeefdeadbeef in ?? ()
> #3  0xdeadbeefdeadbeef in ?? ()
> #4  0xdeadbeefdeadbeef in ?? ()
> #5  0xdeadbeefdeadbeef in ?? ()
> #6  0xdeadbeefdeadbeef in ?? ()
> #7  0xdeadbeefdeadbeef in ?? ()
> #8  0xdeadbeefdeadbeef in ?? ()
> #9  0xdeadbeefdeadbeef in ?? ()
> #10 0xdeadbeefdeadbeef in ?? ()
> #11 0xdeadbeefdeadbeef in ?? ()
> #12 0xdeadbeefdeadbeef in ?? ()
> #13 0xdeadbeefdeadbeef in ?? ()
> #14 0xdeadbeefdeadbeef in ?? ()
> #15 0xdeadbeefdeadbeef in ?? ()
> #16 0xdeadbeefdeadbeef in ?? ()
> #17 0xdeadbeefdeadbeef in ?? ()
> #18 0xdeadbeefdeadbeef in ?? ()
> #19 0x00007fffffffd870 in ?? ()
> #20 0x00007fffffffcc70 in ?? ()
> #21 0x00007fffffffce70 in ?? ()
> #22 0x0000000000000000 in ?? ()
> (gdb) info all-registers
> rax            0x0      0
> rbx            0xed56bb2dcb3c7736       -1344681633365854410
> rcx            0x8e8    2280
> rdx            0x7ab77bbbffffd070       8842672440749314160
> rsi            0x7ab77bbbffffce70       8842672440749313648
> rdi            0xf56e7777ffffdc70       -761539929699263376
> rbp            0x8bda43d3fd1a7e06       0x8bda43d3fd1a7e06
> rsp            0x7fffffffcae8   0x7fffffffcae8
> r8             0xdeadbeef00000000       -2401053092612145152
> r9             0x85490444000009c0       -8842531703260968512
> r10            0x684bf0 6835184
> r11            0x1      1
> r12            0x4a75479abd64e097       5365273261009854615
> r13            0x249214109d5d1c88       2635190793557318792
> r14            0xb64a9c9e5d318408       -5311260606547786744
> r15            0xdf9a54b303f1d3a3       -2334460328996121693
> rip            0x684cc9 0x684cc9 <apply_noise_main.loop+105>
> eflags         0x10206  [ PF IF RF ]
> cs             0x33     51
> ss             0x2b     43
> ds             0x0      0
> es             0x0      0
> fs             0x0      0
> gs             0x0      0
> st0            -nan(0x0fffb0005)        (raw 0xffff00000000fffb0005)
> st1            -nan(0x334fe50ff28fc84)  (raw 0xffff0334fe50ff28fc84)
> st2            -nan(0x0ff640150)        (raw 0xffff00000000ff640150)
> st3            -nan(0x0005e005a)        (raw 0xffff00000000005e005a)
> st4            -nan(0x0ff5bffe7)        (raw 0xffff00000000ff5bffe7)
> st5            -nan(0xff63fc2cfe94fee5) (raw 0xffffff63fc2cfe94fee5)
> st6            -nan(0x01c4df38a)        (raw 0xffff000000001c4df38a)
> st7            -nan(0x06215436f)        (raw 0xffff000000006215436f)
> 

Does the attached patch fix it?
-------------- next part --------------
From 14c4b77569af06ae181e521330aef6290f29fca1 Mon Sep 17 00:00:00 2001
From: James Almer <jamrial at gmail.com>
Date: Tue, 4 Jul 2017 15:05:47 -0300
Subject: [PATCH] x86/sbrdsp: zero extend m_max in apply_noise_main

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/x86/sbrdsp.asm | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c716184b14..62bbe512ec 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
 apply_noise_main:
 %if ARCH_X86_64 == 0 || WIN64
     mov       kxd, m_maxm
-%define count kxq
+    DEFINE_ARGS Y, s_m, q_filt, noise, count
 %else
-%define count m_maxq
+    DEFINE_ARGS Y, s_m, q_filt, noise, kx, count
 %endif
     movsxdifnidn    noiseq, noised
     dec    noiseq
-    shl    count, 2
+    shl    countd, 2
 %ifdef PIC
     lea NOISE_TABLE, [sbr_noise_table]
 %endif
-    lea        Yq, [Yq + 2*count]
-    add      s_mq, count
-    add   q_filtq, count
+    lea        Yq, [Yq + 2*countq]
+    add      s_mq, countq
+    add   q_filtq, countq
     shl    noiseq, 3
     pxor       m5, m5
-    neg    count
+    neg    countq
 .loop:
-    mova       m1, [q_filtq + count]
+    mova       m1, [q_filtq + countq]
     movu       m3, [noiseq + NOISE_TABLE + 1*mmsize]
     movu       m4, [noiseq + NOISE_TABLE + 2*mmsize]
     add    noiseq, 2*mmsize
@@ -404,7 +404,7 @@ apply_noise_main:
     punpckldq  m1, m1
     mulps      m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
     mulps      m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
-    mova       m3, [s_mq + count]
+    mova       m3, [s_mq + countq]
     ; TODO: replace by a vpermd in AVX2
     punpckhdq  m4, m3, m3
     punpckldq  m3, m3
@@ -414,15 +414,15 @@ apply_noise_main:
     mulps      m4, m0 ; s_m[m] * phi_sign
     pand       m1, m6
     pand       m2, m7
-    movu       m6, [Yq + 2*count]
-    movu       m7, [Yq + 2*count + mmsize]
+    movu       m6, [Yq + 2*countq]
+    movu       m7, [Yq + 2*countq + mmsize]
     addps      m3, m1
     addps      m4, m2
     addps      m6, m3
     addps      m7, m4
-    movu    [Yq + 2*count], m6
-    movu    [Yq + 2*count + mmsize], m7
-    add    count, mmsize
+    movu    [Yq + 2*countq], m6
+    movu    [Yq + 2*countq + mmsize], m7
+    add    countq, mmsize
     jl      .loop
     RET
 
-- 
2.13.0



More information about the ffmpeg-devel mailing list