[FFmpeg-devel] [PATCH] ffprobe: implement string validation policy setting

Clément Bœsch u at pkh.me
Wed Oct 2 19:07:54 CEST 2013


On Wed, Oct 02, 2013 at 05:52:05PM +0200, Stefano Sabatini wrote:
> This should fix trac tickets #1163, #2502, #2955.
> ---
>  doc/ffprobe.texi |  24 ++++++++++
>  ffprobe.c        | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 158 insertions(+), 7 deletions(-)
> 
> diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
> index 777dbe7..55c6e80 100644
> --- a/doc/ffprobe.texi
> +++ b/doc/ffprobe.texi
> @@ -317,6 +317,30 @@ Show information related to program and library versions. This is the
>  equivalent of setting both @option{-show_program_version} and
>  @option{-show_library_versions} options.
>  
> + at item -string_validation_policy @var{policy}
> +Set string validation policy. It accepts the following values.
> +
> + at table @samp
> + at item fail
> +The program will fail immediately in case an invalid string (UTF-8)
> +sequence is found in the input. This is especially useful to validate
> +input metadata.
> +
> + at item replace=REPLACEMENT

replace[=@var{REPLACEMENT}]

> +The program will substitute the invalid UTF-8 sequences with the
> +string specified in @var{REPLACEMENT}, which is typically a simple
> +character.

...such as '?'.

> +
> +In case the replacement string is not specified, the program will
> +assume the empty string, that is it will remove the invalid sequences
> +from the input strings.
> +This is especially useful to create validate metadata output from
> +invalid sources.
> + at end table
> +
> +By default the program will apply the replace policy with an empty

the @var{replace} policy

> +replacement.
> +
>  @item -bitexact
>  Force bitexact output, useful to produce output which is not dependent
>  on the specific build.
> diff --git a/ffprobe.c b/ffprobe.c
> index c4f0a8f..2e2bb03 100644
> --- a/ffprobe.c
> +++ b/ffprobe.c
> @@ -75,6 +75,14 @@ static int show_private_data            = 1;
>  static char *print_format;
>  static char *stream_specifier;
>  
> +typedef enum {
> +    STRING_VALIDATION_POLICY_FAIL,
> +    STRING_VALIDATION_POLICY_REPLACE,
> +} StringValidationPolicy;
> +
> +StringValidationPolicy string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
> +static char *string_validation_replace;
> +
>  typedef struct {
>      int id;             ///< identifier
>      int64_t start, end; ///< start, end in second/AV_TIME_BASE units
> @@ -428,17 +436,93 @@ static inline void writer_print_integer(WriterContext *wctx,
>      }
>  }
>  
> +static inline int validate_string(char **dstp, const char *src, void *log_ctx)
> +{
> +    const uint8_t *p;
> +    AVBPrint dstbuf;
> +    int invalid_chars_nb = 0, ret = 0;
> +
> +    av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED);
> +
> +    for (p = src; *p;) {
> +        uint32_t code;
> +        uint8_t tmp;
> +        int invalid = 0;
> +
> +        GET_UTF8(code, *p++, invalid = 1;);
> +        if (invalid) {
> +            invalid_chars_nb++;
> +
> +            switch (string_validation_policy) {
> +            case STRING_VALIDATION_POLICY_FAIL:

> +            {

Pointless

> +                av_log(log_ctx, AV_LOG_ERROR,
> +                       "Invalid UTF-8 character found in sequence '%s'\n", src);
> +                ret = AVERROR_INVALIDDATA;
> +                goto end;
> +            };

Pointless and trailing ';'

> +            break;
> +
> +            case STRING_VALIDATION_POLICY_REPLACE:

> +            if (string_validation_replace) {

You don't indent the case but you can do it for the if.

> +                const uint8_t *s;
> +                for (s = string_validation_replace; *s;) {

const uint8_t *s = string_validation_replace;
while (*s) {

> +                    GET_UTF8(code, *s++, continue;);
> +                    PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
> +                }
> +            }
> +            break;
> +            }
> +        } else {
> +            PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
> +        }
> +    }
> +
[...]
> +static int opt_string_validation_policy(void *optctx, const char *opt, const char *arg)
> +{
> +    char *mode = av_strdup(arg);
> +    char *next;
> +    int ret = 0;
> +
> +    if (!mode) return AVERROR(ENOMEM);
> +

I don't see any strtok or similar here, so I think you can drop the
av_strdup()/av_free() for mode.

> +    next = strchr(mode, '=');
> +    if (next)
> +        *next++ = 0;
> +
> +    if (!strcmp(mode, "fail")) {
> +        string_validation_policy = STRING_VALIDATION_POLICY_FAIL;
> +        if (next) {
> +            av_log(NULL, AV_LOG_ERROR,
> +                   "No argument must be specified for the option %s with mode 'fail'\n",

"option '%s'" for consistency with most messages.

> +                   opt);
> +            ret = AVERROR(EINVAL);
> +            goto end;
> +        }
> +    } else if (!strcmp(mode, "replace")) {
> +        string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
> +        string_validation_replace = av_strdup(next);
> +

I don't see any free of this

> +        if (next && !string_validation_replace) {
> +            ret = AVERROR(ENOMEM);
> +            goto end;
> +        }
> +    } else {
> +        av_log(NULL, AV_LOG_ERROR,
> +               "Invalid argument '%s' for option '%s', "

> +               "choose between fail, or replace=REPLACEMENT\n", arg, opt);

'fail', or 'replace[=REPLACEMENT]'

> +        ret = AVERROR(EINVAL);
> +        goto end;
> +    }
> +
> +end:
> +    av_free(mode);
> +    return ret;
> +}
> +
>  static int opt_pretty(void *optctx, const char *opt, const char *arg)
>  {
>      show_value_unit              = 1;
> @@ -2633,6 +2759,7 @@ static const OptionDef real_options[] = {
>      { "private",           OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" },
>      { "bitexact", OPT_BOOL, {&do_bitexact}, "force bitexact output" },
>      { "read_intervals", HAS_ARG, {.func_arg = opt_read_intervals}, "set read intervals", "read_intervals" },
> +    { "string_validation_policy",  HAS_ARG, {.func_arg = opt_string_validation_policy}, "select the string validation policy", "policy_specification" },
>      { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" },
>      { "i", HAS_ARG, {.func_arg = opt_input_file_i}, "read specified file", "input_file"},
>      { NULL, },

Could you add a broken char sequence to tests/test.ffmeta so this code is
covered?

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20131002/54d73b7a/attachment.asc>


More information about the ffmpeg-devel mailing list