[FFmpeg-devel] [PATCH] avfilter: add sharpen_npp video filter

Tue Sep 28 22:58:03 EEST 2021

> From 0df6297bd3664beb05c813c5fc62852e61616fa9 Mon Sep 17 00:00:00 2001
> From: Roman Arzumanyan <rarzumanyan at nvidia.com>
> Date: Mon, 6 Sep 2021 14:26:27 +0300
> Subject: [PATCH] sharpen_npp video filter added

Same as the other patch, should be as the mail subject.

> ---
>  configure                    |   5 +-
>  libavfilter/Makefile         |   1 +
>  libavfilter/allfilters.c     |   1 +
>  libavfilter/vf_sharpen_npp.c | 530 +++++++++++++++++++++++++++++++++++

Missing entry in filter docs.

Missing avfilter minor version bump.

>  4 files changed, 535 insertions(+), 2 deletions(-)
>  create mode 100644 libavfilter/vf_sharpen_npp.c
> 
> diff --git a/configure b/configure
> index af410a9d11..e092cc8c67 100755
> --- a/configure
> +++ b/configure
> @@ -3094,6 +3094,7 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
>  transpose_npp_filter_deps="ffnvcodec libnpp"
>  overlay_cuda_filter_deps="ffnvcodec"
>  overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
> +sharpen_npp_filter_deps="ffnvcodec libnpp"
>  
>  amf_deps_any="libdl LoadLibrary"
>  nvenc_deps="ffnvcodec"
> @@ -6443,8 +6444,8 @@ enabled libmodplug        && require_pkg_config libmodplug libmodplug libmodplug
>  enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame $libm_extralibs
>  enabled libmysofa         && { check_pkg_config libmysofa libmysofa mysofa.h mysofa_neighborhood_init_withstepdefine ||
>                                 require libmysofa mysofa.h mysofa_neighborhood_init_withstepdefine -lmysofa $zlib_extralibs; }
> -enabled libnpp            && { check_lib libnpp npp.h nppGetLibVersion -lnppig -lnppicc -lnppc -lnppidei ||
> -                               check_lib libnpp npp.h nppGetLibVersion -lnppi -lnppc -lnppidei ||
> +enabled libnpp            && { check_lib libnpp npp.h nppGetLibVersion -lnppig -lnppicc -lnppc -lnppidei -lnppif ||
> +                               check_lib libnpp npp.h nppGetLibVersion -lnppi -lnppif -lnppc -lnppidei ||
>                                 die "ERROR: libnpp not found"; }

Was wondering if it's worth it to split the new dep out into a separate 
check, but it's probably not and just fine to pull along, even if 
sharpen_npp were to be disabled.

>  enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb
>  enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index af957a5ac0..330ddfe5d5 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -423,6 +423,7 @@ OBJS-$(CONFIG_SETRANGE_FILTER)               += vf_setparams.o
>  OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
>  OBJS-$(CONFIG_SETTB_FILTER)                  += settb.o
>  OBJS-$(CONFIG_SHARPNESS_VAAPI_FILTER)        += vf_misc_vaapi.o vaapi_vpp.o
> +OBJS-$(CONFIG_SHARPEN_NPP_FILTER)            += vf_sharpen_npp.o

Should be above SHARPNESS_VAAPI if strictly following alphabetic ordering.

>  OBJS-$(CONFIG_SHEAR_FILTER)                  += vf_shear.o
>  OBJS-$(CONFIG_SHOWINFO_FILTER)               += vf_showinfo.o
>  OBJS-$(CONFIG_SHOWPALETTE_FILTER)            += vf_showpalette.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 0c6b2347c8..e50e5f3b6a 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -404,6 +404,7 @@ extern const AVFilter ff_vf_setrange;
>  extern const AVFilter ff_vf_setsar;
>  extern const AVFilter ff_vf_settb;
>  extern const AVFilter ff_vf_sharpness_vaapi;
> +extern const AVFilter ff_vf_sharpen_npp;

Same here about the order.

>  extern const AVFilter ff_vf_shear;
>  extern const AVFilter ff_vf_showinfo;
>  extern const AVFilter ff_vf_showpalette;
> diff --git a/libavfilter/vf_sharpen_npp.c b/libavfilter/vf_sharpen_npp.c
> new file mode 100644
> index 0000000000..85549c36d0
> --- /dev/null
> +++ b/libavfilter/vf_sharpen_npp.c
> @@ -0,0 +1,530 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * NPP sharpen video filter
> + */
> +
> +#include <nppi.h>
> +#include <nppi_filtering_functions.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "libavutil/avstring.h"
> +#include "libavutil/common.h"
> +#include "libavutil/hwcontext.h"
> +#include "libavutil/hwcontext_cuda_internal.h"
> +#include "libavutil/cuda_check.h"
> +#include "libavutil/internal.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "scale_eval.h"

Unused header

> +#include "video.h"
> +
> +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
> +
> +static const enum AVPixelFormat supported_formats[] = {
> +    AV_PIX_FMT_YUV420P,
> +    AV_PIX_FMT_NV12,
> +    AV_PIX_FMT_YUV444P,
> +};
> +
> +static const enum AVPixelFormat deinterleaved_formats[][2] = {
> +    { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P },
> +};
> +
> +enum SharpenStage {
> +    STAGE_DEINTERLEAVE,
> +    STAGE_SHARPEN,
> +    STAGE_INTERLEAVE,
> +    STAGE_NB,
> +};
> +
> +typedef struct NPPSharpenStageContext {
> +    int stage_needed;
> +    enum AVPixelFormat in_fmt;
> +    enum AVPixelFormat out_fmt;
> +
> +    struct {
> +        int width;
> +        int height;
> +    } planes_in[3], planes_out[3];
> +
> +    AVBufferRef *frames_ctx;
> +    AVFrame     *frame;
> +} NPPSharpenStageContext;
> +
> +typedef struct NPPSharpenContext {
> +    const AVClass *class;
> +
> +    NPPSharpenStageContext stages[STAGE_NB];
> +    AVFrame *tmp_frame;
> +
> +    /**
> +     * Output sw format. AV_PIX_FMT_NONE for no conversion.
> +     */
> +    enum AVPixelFormat format;
> +
> +    /**
> +     * Width, height and pixel format strings;
> +     */
> +    char *w_expr;
> +    char *h_expr;
> +    char *format_str;    
> +
> +} NPPSharpenContext;
> +
> +static int nppsharpen_init(AVFilterContext *ctx)
> +{
> +    NPPSharpenContext *s = ctx->priv;
> +    int i;
> +
> +    if (!strcmp(s->format_str, "same")) {
> +        s->format = AV_PIX_FMT_NONE;
> +    } else {
> +        s->format = av_get_pix_fmt(s->format_str);
> +        if (s->format == AV_PIX_FMT_NONE) {
> +            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
> +            return AVERROR(EINVAL);
> +        }
> +    }
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
> +        s->stages[i].frame = av_frame_alloc();
> +        if (!s->stages[i].frame)
> +            return AVERROR(ENOMEM);
> +    }
> +    s->tmp_frame = av_frame_alloc();
> +    if (!s->tmp_frame)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static void nppsharpen_uninit(AVFilterContext *ctx)
> +{
> +    NPPSharpenContext              *s = ctx->priv;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
> +        av_frame_free(&s->stages[i].frame);
> +        av_buffer_unref(&s->stages[i].frames_ctx);
> +    }
> +    av_frame_free(&s->tmp_frame);
> +}
> +
> +static int nppsharpen_query_formats(AVFilterContext *ctx)
> +{
> +    static const enum AVPixelFormat pixel_formats[] = {
> +        AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
> +    };
> +    AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
> +
> +    return ff_set_common_formats(ctx, pix_fmts);
> +}
> +
> +static int init_stage(NPPSharpenStageContext *stage, AVBufferRef *device_ctx)
> +{
> +    AVBufferRef *out_ref = NULL;
> +    AVHWFramesContext *out_ctx;
> +    int in_sw, in_sh, out_sw, out_sh;
> +    int ret, i;
> +
> +    av_pix_fmt_get_chroma_sub_sample(stage->in_fmt,  &in_sw,  &in_sh);
> +    av_pix_fmt_get_chroma_sub_sample(stage->out_fmt, &out_sw, &out_sh);
> +    if (!stage->planes_out[0].width) {
> +        stage->planes_out[0].width  = stage->planes_in[0].width;
> +        stage->planes_out[0].height = stage->planes_in[0].height;
> +    }
> +
> +    for (i = 1; i < FF_ARRAY_ELEMS(stage->planes_in); i++) {
> +        stage->planes_in[i].width   = stage->planes_in[0].width   >> in_sw;
> +        stage->planes_in[i].height  = stage->planes_in[0].height  >> in_sh;
> +        stage->planes_out[i].width  = stage->planes_out[0].width  >> out_sw;
> +        stage->planes_out[i].height = stage->planes_out[0].height >> out_sh;
> +    }
> +
> +    out_ref = av_hwframe_ctx_alloc(device_ctx);
> +    if (!out_ref)
> +        return AVERROR(ENOMEM);
> +    out_ctx = (AVHWFramesContext*)out_ref->data;
> +
> +    out_ctx->format    = AV_PIX_FMT_CUDA;
> +    out_ctx->sw_format = stage->out_fmt;
> +    out_ctx->width     = FFALIGN(stage->planes_out[0].width,  32);
> +    out_ctx->height    = FFALIGN(stage->planes_out[0].height, 32);
> +
> +    ret = av_hwframe_ctx_init(out_ref);
> +    if (ret < 0)
> +        goto fail;
> +
> +    av_frame_unref(stage->frame);
> +    ret = av_hwframe_get_buffer(out_ref, stage->frame, 0);
> +    if (ret < 0)
> +        goto fail;
> +
> +    stage->frame->width  = stage->planes_out[0].width;
> +    stage->frame->height = stage->planes_out[0].height;
> +
> +    av_buffer_unref(&stage->frames_ctx);
> +    stage->frames_ctx = out_ref;
> +
> +    return 0;
> +fail:
> +    av_buffer_unref(&out_ref);
> +    return ret;
> +}
> +
> +static int format_is_supported(enum AVPixelFormat fmt)
> +{
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
> +        if (supported_formats[i] == fmt)
> +            return 1;
> +    return 0;
> +}
> +
> +static enum AVPixelFormat get_deinterleaved_format(enum AVPixelFormat fmt)
> +{
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
> +    int i, planes;
> +
> +    planes = av_pix_fmt_count_planes(fmt);
> +    if (planes == desc->nb_components)
> +        return fmt;
> +    for (i = 0; i < FF_ARRAY_ELEMS(deinterleaved_formats); i++)
> +        if (deinterleaved_formats[i][0] == fmt)
> +            return deinterleaved_formats[i][1];
> +    return AV_PIX_FMT_NONE;
> +}
> +
> +static int init_processing_chain(AVFilterContext *ctx, int width, int height)
> +{
> +    NPPSharpenContext *s = ctx->priv;
> +    AVHWFramesContext *in_frames_ctx;
> +
> +    enum AVPixelFormat in_format;
> +    enum AVPixelFormat out_format;
> +    enum AVPixelFormat in_deinterleaved_format;
> +    enum AVPixelFormat out_deinterleaved_format;
> +
> +    int i, ret, last_stage = -1;
> +
> +    /* check that we have a hw context */
> +    if (!ctx->inputs[0]->hw_frames_ctx) {
> +        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
> +        return AVERROR(EINVAL);
> +    }
> +    in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
> +    in_format     = in_frames_ctx->sw_format;
> +    out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
> +
> +    if (!format_is_supported(in_format)) {
> +        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
> +               av_get_pix_fmt_name(in_format));
> +        return AVERROR(ENOSYS);
> +    }
> +    if (!format_is_supported(out_format)) {
> +        av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
> +               av_get_pix_fmt_name(out_format));
> +        return AVERROR(ENOSYS);
> +    }
> +
> +    in_deinterleaved_format  = get_deinterleaved_format(in_format);
> +    out_deinterleaved_format = get_deinterleaved_format(out_format);
> +    if (in_deinterleaved_format  == AV_PIX_FMT_NONE ||
> +        out_deinterleaved_format == AV_PIX_FMT_NONE)
> +        return AVERROR_BUG;
> +
> +    /* figure out which stages need to be done */
> +    s->stages[STAGE_SHARPEN].stage_needed = 1;
> +    if (in_format != in_deinterleaved_format)
> +        s->stages[STAGE_DEINTERLEAVE].stage_needed = 1;
> +    if (out_format != out_deinterleaved_format)
> +        s->stages[STAGE_INTERLEAVE].stage_needed = 1;
> +
> +    s->stages[STAGE_DEINTERLEAVE].in_fmt              = in_format;
> +    s->stages[STAGE_DEINTERLEAVE].out_fmt             = in_deinterleaved_format;
> +    s->stages[STAGE_DEINTERLEAVE].planes_in[0].width  = width;
> +    s->stages[STAGE_DEINTERLEAVE].planes_in[0].height = height;
> +
> +    s->stages[STAGE_SHARPEN].in_fmt               = in_deinterleaved_format;
> +    s->stages[STAGE_SHARPEN].out_fmt              = out_deinterleaved_format;
> +    s->stages[STAGE_SHARPEN].planes_in[0].width   = width;
> +    s->stages[STAGE_SHARPEN].planes_in[0].height  = height;
> +    s->stages[STAGE_SHARPEN].planes_out[0].width  = width;
> +    s->stages[STAGE_SHARPEN].planes_out[0].height = height;
> +
> +    s->stages[STAGE_INTERLEAVE].in_fmt              = out_deinterleaved_format;
> +    s->stages[STAGE_INTERLEAVE].out_fmt             = out_format;
> +    s->stages[STAGE_INTERLEAVE].planes_in[0].width  = width;
> +    s->stages[STAGE_INTERLEAVE].planes_in[0].height = height;
> +
> +    /* init the hardware contexts */
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
> +        if (!s->stages[i].stage_needed)
> +            continue;
> +
> +        ret = init_stage(&s->stages[i], in_frames_ctx->device_ref);
> +        if (ret < 0)
> +            return ret;
> +
> +        last_stage = i;
> +    }
> +
> +    if (last_stage >= 0)
> +        ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->stages[last_stage].frames_ctx);
> +    else
> +        ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx);
> +
> +    if (!ctx->outputs[0]->hw_frames_ctx)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static int nppsharpen_config_props(AVFilterLink *outlink)
> +{
> +    AVFilterContext *ctx = outlink->src;
> +    AVFilterLink *inlink = outlink->src->inputs[0];
> +    int ret;
> +
> +    outlink->w = inlink->w;
> +    outlink->h = inlink->h;
> +
> +    ret = init_processing_chain(ctx, inlink->w, inlink->h);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (inlink->sample_aspect_ratio.num)
> +        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
> +                                                             outlink->w*inlink->h},
> +                                                inlink->sample_aspect_ratio);
> +    else
> +        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
> +
> +    return ret;
> +}
> +
> +static int nppsharpen_deinterleave(AVFilterContext *ctx, NPPSharpenStageContext *stage,
> +                                 AVFrame *out, AVFrame *in)
> +{
> +    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
> +    NppStatus err;
> +
> +    switch (in_frames_ctx->sw_format) {
> +    case AV_PIX_FMT_NV12:
> +        err = nppiYCbCr420_8u_P2P3R(in->data[0], in->linesize[0],
> +                                    in->data[1], in->linesize[1],
> +                                    out->data, out->linesize,
> +                                    (NppiSize){ in->width, in->height });
> +        break;
> +    default:
> +        return AVERROR_BUG;
> +    }
> +    if (err != NPP_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
> +        return AVERROR_UNKNOWN;
> +    }
> +
> +    return 0;
> +}
> +
> +static int nppsharpen_apply_filter(AVFilterContext *ctx, NPPSharpenStageContext *stage,
> +                           AVFrame *out, AVFrame *in)
> +{
> +    NppStatus err;
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(stage->planes_in) && i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) {
> +        int ow = stage->planes_out[i].width;
> +        int oh = stage->planes_out[i].height;
> +
> +        err = nppiFilterSharpenBorder_8u_C1R(in->data[i], in->linesize[i], 
> +                                             (NppiSize){ow, oh}, (NppiPoint){0, 0},
> +                                             out->data[i], out->linesize[i], 
> +                                             (NppiSize){ow, oh},
> +                                             NPP_BORDER_REPLICATE);
> +        if (err != NPP_SUCCESS) {
> +            av_log(ctx, AV_LOG_ERROR, "NPP sharpen error: %d\n", err);
> +            return AVERROR_UNKNOWN;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int nppsharpen_interleave(AVFilterContext *ctx, NPPSharpenStageContext *stage,
> +                               AVFrame *out, AVFrame *in)
> +{
> +    AVHWFramesContext *out_frames_ctx = (AVHWFramesContext*)out->hw_frames_ctx->data;
> +    NppStatus err;
> +
> +    switch (out_frames_ctx->sw_format) {
> +    case AV_PIX_FMT_NV12:
> +        err = nppiYCbCr420_8u_P3P2R((const uint8_t**)in->data,
> +                                    in->linesize,
> +                                    out->data[0], out->linesize[0],
> +                                    out->data[1], out->linesize[1],
> +                                    (NppiSize){ in->width, in->height });
> +        break;
> +    default:
> +        return AVERROR_BUG;
> +    }
> +    if (err != NPP_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
> +        return AVERROR_UNKNOWN;
> +    }
> +
> +    return 0;
> +}
> +
> +static int (*const nppsharpen_process[])(AVFilterContext *ctx, NPPSharpenStageContext *stage,
> +                                       AVFrame *out, AVFrame *in) = {
> +    [STAGE_DEINTERLEAVE] = nppsharpen_deinterleave,
> +    [STAGE_SHARPEN]      = nppsharpen_apply_filter,
> +    [STAGE_INTERLEAVE]   = nppsharpen_interleave,
> +};
> +
> +static int nppsharpen_sharpen(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
> +{
> +    NPPSharpenContext *s = ctx->priv;
> +    AVFrame *src = in;
> +    int i, ret, last_stage = -1;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
> +        if (!s->stages[i].stage_needed)
> +            continue;
> +
> +        ret = nppsharpen_process[i](ctx, &s->stages[i], s->stages[i].frame, src);
> +        if (ret < 0)
> +            return ret;
> +
> +        src        = s->stages[i].frame;
> +        last_stage = i;
> +    }
> +
> +    if (last_stage < 0)
> +        return AVERROR_BUG;
> +    ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
> +    if (ret < 0)
> +        return ret;
> +
> +    av_frame_move_ref(out, src);
> +    av_frame_move_ref(src, s->tmp_frame);
> +
> +    ret = av_frame_copy_props(out, in);
> +    if (ret < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
> +static int nppsharpen_filter_frame(AVFilterLink *link, AVFrame *in)
> +{
> +    AVFilterContext              *ctx = link->dst;
> +    AVFilterLink             *outlink = ctx->outputs[0];
> +    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
> +    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
> +
> +    AVFrame *out = NULL;
> +    CUcontext dummy;
> +    int ret = 0;
> +
> +    out = av_frame_alloc();
> +    if (!out) {
> +        ret = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
> +    if (ret < 0)
> +        goto fail;
> +
> +    ret = nppsharpen_sharpen(ctx, out, in);
> +
> +    CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
> +    if (ret < 0)
> +        goto fail;
> +
> +    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> +              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> +              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> +              INT_MAX);
> +
> +    av_frame_free(&in);
> +    return ff_filter_frame(outlink, out);
> +fail:
> +    av_frame_free(&in);
> +    av_frame_free(&out);
> +    return ret;
> +}
> +
> +#define OFFSET(x) offsetof(NPPSharpenContext, x)
> +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
> +static const AVOption options[] = {
> +    { "format",     "Output pixel format.", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> +    { NULL },
> +};
> +
> +static const AVClass nppsharpen_class = {
> +    .class_name = "nppsharpen",
> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const AVFilterPad nppsharpen_inputs[] = {
> +    {
> +        .name        = "default",
> +        .type        = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame = nppsharpen_filter_frame,
> +    }
> +};
> +
> +static const AVFilterPad nppsharpen_outputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = nppsharpen_config_props,
> +    }
> +};
> +
> +const AVFilter ff_vf_sharpen_npp = {
> +    .name      = "sharpen_npp",
> +    .description = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video "
> +                                        "sharpening filter."),
> +
> +    .init          = nppsharpen_init,
> +    .uninit        = nppsharpen_uninit,
> +    .query_formats = nppsharpen_query_formats,
> +
> +    .priv_size = sizeof(NPPSharpenContext),
> +    .priv_class = &nppsharpen_class,
> +
> +    FILTER_INPUTS(nppsharpen_inputs),
> +    FILTER_OUTPUTS(nppsharpen_outputs),
> +
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> -- 
> 2.25.1
> 

A general point about this, but also other npp filters:
Is it really worth it to have the de/interlave stages in all of them?
The filter would be a lot more simple without it, and you can just put 
scale_cuda or scale_npp in front to convert to a planar format, and 
nvenc can also then take it as input, eliminating the re-interleaving 
step entirely.

Even more generally, this filter shares a lot of code with scale_npp. To 
the point where it should be possible to combine then, and only swap out 
the middle stage based on which filter is in use.
Couldn't this whole filter just be an option to scale_npp, which turns 
on sharpening, instead of a whole other filter?
The scaling stages are already skipped if they'd be a no-op, and 
sharpening could just be another optional stage.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4494 bytes
Desc: S/MIME Cryptographic Signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20210928/e86945dc/attachment.bin>