[FFmpeg-devel] [PATCH] avfilter/vf_noise: move inline assembly to own file

Paul B Mahol onemda at gmail.com
Tue Nov 12 15:21:41 CET 2013


On 11/12/13, Michael Niedermayer <michaelni at gmx.at> wrote:
> On Tue, Nov 12, 2013 at 10:34:14AM +0000, Paul B Mahol wrote:
>> On 10/21/13, Paul B Mahol <onemda at gmail.com> wrote:
>> > On 9/12/13, Paul B Mahol <onemda at gmail.com> wrote:
>> >> Signed-off-by: Paul B Mahol <onemda at gmail.com>
>> >> ---
>> >>  libavfilter/vf_noise.c     | 166
>> >> +++------------------------------------------
>> >>  libavfilter/vf_noise.h     |  64 +++++++++++++++++
>> >>  libavfilter/x86/Makefile   |   1 +
>> >>  libavfilter/x86/vf_noise.c | 145
>> >> +++++++++++++++++++++++++++++++++++++++
>> >>  4 files changed, 219 insertions(+), 157 deletions(-)
>> >>  create mode 100644 libavfilter/vf_noise.h
>> >>  create mode 100644 libavfilter/x86/vf_noise.c
>> >>
>> >
>> > ping
>> >
>>
>> ping
>
> --- a/HEAD^:libavfilter/vf_noise.c
> +++ b/HEAD:libavfilter/x86/vf_noise.c
>
> -}
> -
> -static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
> -                              int8_t *noise, int len, int shift)
> -{
> -#if HAVE_MMXEXT_INLINE
> -    x86_reg mmx_len= len&(~7);
> -    noise+=shift;
> -
> -    __asm__ volatile(
> -            "mov %3, %%"REG_a"                \n\t"
> -            "pcmpeqb %%mm7, %%mm7             \n\t"
> -            "psllw $15, %%mm7                 \n\t"
> -            "packsswb %%mm7, %%mm7            \n\t"
> -            ASMALIGN(4)
> -            "1:                               \n\t"
> -            "movq (%0, %%"REG_a"), %%mm0      \n\t"
> -            "movq (%1, %%"REG_a"), %%mm1      \n\t"
> -            "pxor %%mm7, %%mm0                \n\t"
> -            "paddsb %%mm1, %%mm0              \n\t"
> -            "pxor %%mm7, %%mm0                \n\t"
> -            "movntq %%mm0, (%2, %%"REG_a")    \n\t"
> -            "add $8, %%"REG_a"                \n\t"
> -            " js 1b                           \n\t"
> -            :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len),
> "g" (-mmx_len)
> -            : "%"REG_a
> -            );
> -    if (mmx_len != len)
> -        line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len,
> 0);
> -#endif
> +        ff_noise_filter_line_c(dst+mmx_len, src+mmx_len, noise+mmx_len,
> len-mmx_len, 0);
>  }
>
> -static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src,
> +static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
>                             int len, int8_t **shift)
>  {
> -    int i;
> -    int8_t *src2 = (int8_t*)src;
> -
> -    for (i = 0; i < len; i++) {
> -        const int n = shift[0][i] + shift[1][i] + shift[2][i];
> -        dst[i] = src2[i] + ((n * src2[i]) >> 7);
> -    }
> -}
> -
> -static inline void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
> -                                      int len, int8_t **shift)
> -{
> -#if HAVE_MMX_INLINE
>      x86_reg mmx_len= len&(~7);
>
>      __asm__ volatile(
> @@ -325,164 +93,53 @@ static inline void line_noise_avg_mmx(uint8_t *dst,
> const uint8_t *src,
>
>      if (mmx_len != len){
>          int8_t *shift2[3]={shift[0]+mmx_len, shift[1]+mmx_len,
> shift[2]+mmx_len};
> -        line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2);
> -    }
> -#endif
> -}
> -
> -static void noise(uint8_t *dst, const uint8_t *src,
> -                  int dst_linesize, int src_linesize,
> -                  int width, int start, int end, NoiseContext *n, int
> comp)
> -{
> -    FilterParams *p = &n->param[comp];
> -    int8_t *noise = p->noise;
> -    const int flags = p->flags;
> -    AVLFG *lfg = &p->lfg;
> -    int shift, y;
> -
> -    if (!noise) {
> -        if (dst != src)
> -            av_image_copy_plane(dst, dst_linesize, src, src_linesize,
> width, end - start);
> -        return;
> -    }
> -
> -    for (y = start; y < end; y++) {
> -        const int ix = y & (MAX_RES - 1);
> -        if (flags & NOISE_TEMPORAL)
> -            shift = av_lfg_get(lfg) & (MAX_SHIFT - 1);
> -        else
> -            shift = n->rand_shift[ix];
> -
> -        if (flags & NOISE_AVERAGED) {
> -            n->line_noise_avg(dst, src, width, p->prev_shift[ix]);
> -            p->prev_shift[ix][shift & 3] = noise + shift;
> -        } else {
> -            n->line_noise(dst, src, noise, width, shift);
> -        }
> -        dst += dst_linesize;
> -        src += src_linesize;
> -    }
> -}
> -
> -static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int
> nb_jobs)
> -{
> -    NoiseContext *s = ctx->priv;
> -    ThreadData *td = arg;
> -    int plane;
> -
> -    for (plane = 0; plane < s->nb_planes; plane++) {
> -        const int height = s->height[plane];
> -        const int start  = (height *  jobnr   ) / nb_jobs;
> -        const int end    = (height * (jobnr+1)) / nb_jobs;
> -        noise(td->out->data[plane] + start * td->out->linesize[plane],
> -              td->in->data[plane]  + start * td->in->linesize[plane],
> -              td->out->linesize[plane], td->in->linesize[plane],
> -              s->bytewidth[plane], start, end, s, plane);
> +        ff_noise_filter_line_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len,
> shift2);
>      }
> -    return 0;
>  }
> +#endif
>
> -static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
> +#if HAVE_MMXEXT_INLINE
> +static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
> +                              int8_t *noise, int len, int shift)
>  {
> -    AVFilterContext *ctx = inlink->dst;
> -    AVFilterLink *outlink = ctx->outputs[0];
> -    NoiseContext *n = ctx->priv;
> -    ThreadData td;
> -    AVFrame *out;
> -
> -    if (av_frame_is_writable(inpicref)) {
> -        out = inpicref;
> -    } else {
> -        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> -        if (!out) {
> -            av_frame_free(&inpicref);
> -            return AVERROR(ENOMEM);
> -        }
> -        av_frame_copy_props(out, inpicref);
> -    }
> -
> -    td.in = inpicref; td.out = out;
> -    ctx->internal->execute(ctx, filter_slice, &td, NULL,
> FFMIN(n->height[0], ctx->graph->nb_threads));
> -    emms_c();
> +    x86_reg mmx_len= len&(~7);
> +    noise+=shift;
>
> -    if (inpicref != out)
> -        av_frame_free(&inpicref);
> -    return ff_filter_frame(outlink, out);
> +    __asm__ volatile(
> +            "mov %3, %%"REG_a"                \n\t"
> +            "pcmpeqb %%mm7, %%mm7             \n\t"
> +            "psllw $15, %%mm7                 \n\t"
> +            "packsswb %%mm7, %%mm7            \n\t"
> +            ASMALIGN(4)
> +            "1:                               \n\t"
> +            "movq (%0, %%"REG_a"), %%mm0      \n\t"
> +            "movq (%1, %%"REG_a"), %%mm1      \n\t"
> +            "pxor %%mm7, %%mm0                \n\t"
> +            "paddsb %%mm1, %%mm0              \n\t"
> +            "pxor %%mm7, %%mm0                \n\t"
> +            "movntq %%mm0, (%2, %%"REG_a")    \n\t"
> +            "add $8, %%"REG_a"                \n\t"
> +            " js 1b                           \n\t"
> +            :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len),
> "g" (-mmx_len)
> +            : "%"REG_a
> +            );
> +    if (mmx_len != len)
> +        ff_noise_filter_line_c(dst+mmx_len, src+mmx_len, noise+mmx_len,
> len-mmx_len, 0);
>  }
> +#endif
>
> why is this reordered ?
> it makes review more difficult

I reordered nothing intentionally.

>
>
> -    if (HAVE_MMX_INLINE &&
> -        cpu_flags & AV_CPU_FLAG_MMX) {
> -        n->line_noise = line_noise_mmx;
> -        n->line_noise_avg = line_noise_avg_mmx;
> -    }
> -    if (HAVE_MMXEXT_INLINE &&
> -        cpu_flags & AV_CPU_FLAG_MMXEXT)
> -        n->line_noise = line_noise_mmxext;
> -
> -    return 0;
> [...]
> +    if (cpu_flags & AV_CPU_FLAG_MMX) {
> +        s->line_noise     = line_noise_mmx;
> +        s->line_noise_avg = line_noise_avg_mmx;
>  }
> [...]
> +#if HAVE_MMXEXT_INLINE
> +    if (cpu_flags & AV_CPU_FLAG_MMXEXT)
> +        s->line_noise = line_noise_mmxext;
> +#endif
> +#endif
>
> why is if() changed to #if ?

Will fix it, I made mistake by looking at spp code.

The idea is is to convert inline asm to yasm in next patch.

>
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Republics decline into democracies and democracies degenerate into
> despotisms. -- Aristotle
>


More information about the ffmpeg-devel mailing list