[FFmpeg-devel] [PATCH 2/2] lavf/url: rewrite ff_make_absolute_url() using ff_url_decompose().

Marton Balint cus at passwd.hu
Wed Aug 5 10:58:28 EEST 2020



On Thu, 30 Jul 2020, Nicolas George wrote:

> Also add and update some tests.
>
> Change the semantic a little, because for filesytem paths
> symlinks complicate things.
> See the comments in the code for detail.
>
> Signed-off-by: Nicolas George <george at nsup.org>
> ---
> libavformat/tests/url.c |  17 ++-
> libavformat/url.c       | 246 +++++++++++++++++++---------------------
> libavformat/url.h       |   4 +-
> tests/ref/fate/url      |  13 ++-
> 4 files changed, 145 insertions(+), 135 deletions(-)
>

[...]

> diff --git a/libavformat/url.c b/libavformat/url.c
> index 26aaab4019..0c82317134 100644
> --- a/libavformat/url.c
> +++ b/libavformat/url.c
> @@ -27,6 +27,7 @@
> #if CONFIG_NETWORK
> #include "network.h"
> #endif
> +#include "libavutil/avassert.h"
> #include "libavutil/avstring.h"
> 
> /**
> @@ -152,146 +153,131 @@ int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
>     return 0;
> }
> 
> -static void trim_double_dot_url(char *buf, const char *rel, int size)
> +static int append_path(char *root, char *out_end, char **rout,
> +                       const char *in, const char *in_end)
> {
> +    char *out = *rout;
> +    const char *d, *next;
> +
> +    if (in < in_end && *in == '/')
> +        in++; /* already taken care of */
> +    while (in < in_end) {
> +        d = find_delim("/", in, in_end);
> +        next = d + (d < in_end && *d == '/');
> +        if (d - in == 1 && in[0] == '.') {
> +            /* skip */
> +        } else if (d - in == 2 && in[0] == '.' && in[1] == '.') {
> +            av_assert1(out[-1] == '/');
> +            if (out - root > 1)
> +                while (out > root && (--out)[-1] != '/');
> +        } else {
> +            if (out_end - out < next - in)
> +                return AVERROR(ENOMEM);
> +            memcpy(out, in, next - in);

memmove, because out and in can overlap or they can be the same when
buf == base in ff_make_absolute_url()

> +            out += next - in;
> +        }
> +        in = next;
>     }
> -
> -    if (!av_stristart(p, "/", NULL) && root != rel)
> -        av_strlcat(tmp_path, "/", size);
> -
> -    av_strlcat(tmp_path, p, size);
> -    /* start set buf after temp path process. */
> -    av_strlcpy(buf, rel, root - rel + 1);
> -
> -    if (!av_stristart(tmp_path, "/", NULL) && root != rel)
> -        av_strlcat(buf, "/", size);
> -
> -    av_strlcat(buf, tmp_path, size);
> +    *rout = out;
> +    return 0;
> }
> 
> -void ff_make_absolute_url(char *buf, int size, const char *base,
> +int ff_make_absolute_url(char *buf, int size, const char *base,
>                           const char *rel)

A problem with the function type change is that previously the caller 
assumed that even if there is not enough space, buf will be a 0 terminated 
string. This guarantee is now lost, so all usage of 
ff_make_absolute_url should also be updated to check error code, otherwise 
crashes might occur. Or we could re-introduce that guarantee, at 
least temporarily, and change type and add checks in a later patch.

> {
> +    URLComponents ub, uc;
> +    char *out, *out_end, *path;
> +    const char *keep, *base_path_end;
> +    int use_base_path, simplify_path = 0, ret;
> +
> +    /* This is tricky.
> +       For HTTP, http://server/site/page + ../media/file
> +       should resolve into http://media/file
> +       but for filesystem access, dir/playlist + ../media/file
> +       should resolve into dir/../media/file
> +       because dir could be a symlink, and .. points to
> +       the actual parent of the target directory.
> +
> +       We'll consider that URLs with an actual scheme and authority,
> +       i.e. starting with scheme://, need parent dir simplification,
> +       while bare paths or pseudo-URLs starting with proto: without
> +       the double slash do not.
> +     */
> +
> +    if (!size)
> +        return AVERROR(ENOMEM);
> +    out = buf;
> +    out_end = buf + size - 1;
> +
> +    if ((ret = ff_url_decompose(&ub, base, NULL) < 0) ||
> +        (ret = ff_url_decompose(&uc, rel,  NULL) < 0))
> +        return ret;
> +
> +    keep = ub.url;
> +#define KEEP(component, also) do { \
> +        if (uc.url_component_end_##component == uc.url && \
> +            ub.url_component_end_##component > keep) { \
> +            keep = ub.url_component_end_##component; \
> +            also \
> +        } \
> +    } while (0)
> +    KEEP(scheme, );
> +    KEEP(authority_full, simplify_path = 1;);
> +    KEEP(path,);
> +    KEEP(query,);
> +    KEEP(fragment,);
> +#undef KEEP
> +#define COPY(start, end) do { \
> +        size_t len = end - start; \
> +        if (len > out_end - out) \
> +            return AVERROR(ENOMEM); \
> +        memcpy(out, start, len); \

memmove because buffers can overlap here as well.

> +        out += len; \
> +    } while (0)
> +    COPY(ub.url, keep);
> +    COPY(uc.url, uc.path);
> +    if (keep > ub.path)
> +        simplify_path = 0;
> +
> +    use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path;
> +    if (uc.path > uc.url)
> +        use_base_path = 0;
> +    if (URL_COMPONENT_HAVE(uc, scheme))
> +        simplify_path = 0;
> +    if (URL_COMPONENT_HAVE(uc, authority))
> +        simplify_path = 1;

These last two checks should be moved up for better readability (to set 
simplify_path in one section)

[...]


> diff --git a/libavformat/url.h b/libavformat/url.h
> index ae27da5c73..e33edf0cb9 100644
> --- a/libavformat/url.h
> +++ b/libavformat/url.h
> @@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char 
> *proto,
>  * @param base the base url, may be equal to buf.
>  * @param rel the new url, which is interpreted relative to base
>  */
> -void ff_make_absolute_url(char *buf, int size, const char *base,
> -                          const char *rel);
> +int ff_make_absolute_url(char *buf, int size, const char *base,
> +                         const char *rel);

We should add a reference to the function docs to the relevant RFC:
https://tools.ietf.org/html/rfc3986#section-5

[...]

> diff --git a/tests/ref/fate/url b/tests/ref/fate/url
> index 84cf85abdd..7998fb5be9 100644
> --- a/tests/ref/fate/url
> +++ b/tests/ref/fate/url
> @@ -46,9 +46,9 @@ http://[::1]:8080/dev/null =>
> Testing ff_make_absolute_url:
>                                             (null) baz                  => baz
>                                           /foo/bar baz                  => /foo/baz
> -                                          /foo/bar ../baz               => /baz
> +                                          /foo/bar ../baz               => /foo/../baz
>                                           /foo/bar /baz                 => /baz
> -                                          /foo/bar ../../../baz         => /baz
> +                                          /foo/bar ../../../baz         => /foo/../../../baz
>                                 http://server/foo/ baz                  => http://server/foo/baz
>                              http://server/foo/bar baz                  => http://server/foo/baz
>                                 http://server/foo/ ../baz               => http://server/baz
> @@ -62,6 +62,15 @@ Testing ff_make_absolute_url:
>                              http://server/foo/bar /../../../../../other/url => http://server/other/url
>                              http://server/foo/bar /test/../../../../../other/url => http://server/other/url
>                              http://server/foo/bar /test/../../test/../../../other/url => http://server/other/url
> +                             http://server/foo/bar file:../baz/qux      => file:../baz/qux
> +                           http://server/foo//bar/ ../../               => http://server/foo/
> +                                   file:../tmp/foo ../bar/              => file:../tmp/../bar/
> +                                   file:../tmp/foo file:../bar/         => file:../bar/
> +                             http://server/foo/bar ./                   => http://server/foo/
> +                             http://server/foo/bar .dotfile             => http://server/foo/.dotfile
> +                             http://server/foo/bar ..doubledotfile      => http://server/foo/..doubledotfile
> +                             http://server/foo/bar double..dotfile      => http://server/foo/double..dotfile
> +                             http://server/foo/bar doubledotfile..      => http://server/foo/doubledotfile..

It would probably make sense to add all tests which are in
https://tools.ietf.org/html/rfc3986#section-5.4

I did a quick check manually, and found one failing test:

http://a/b/c/d;p?q //g                  => http://g/

but it supposed to be "http://g". Not that it matters too much...

Good work overall, thanks!

Regards,
Marton


More information about the ffmpeg-devel mailing list