On Thu, 30 Jul 2020, Nicolas George wrote:

Also add and update some tests.

Change the semantic a little, because for filesytem paths
symlinks complicate things.
See the comments in the code for detail.

Signed-off-by: Nicolas George <geo...@nsup.org>
---
libavformat/tests/url.c |  17 ++-
libavformat/url.c       | 246 +++++++++++++++++++---------------------
libavformat/url.h       |   4 +-
tests/ref/fate/url      |  13 ++-
4 files changed, 145 insertions(+), 135 deletions(-)


[...]

diff --git a/libavformat/url.c b/libavformat/url.c
index 26aaab4019..0c82317134 100644
--- a/libavformat/url.c
+++ b/libavformat/url.c
@@ -27,6 +27,7 @@
#if CONFIG_NETWORK
#include "network.h"
#endif
+#include "libavutil/avassert.h"
#include "libavutil/avstring.h"

/**
@@ -152,146 +153,131 @@ int ff_url_decompose(URLComponents *uc, const char 
*url, const char *end)
    return 0;
}

-static void trim_double_dot_url(char *buf, const char *rel, int size)
+static int append_path(char *root, char *out_end, char **rout,
+                       const char *in, const char *in_end)
{
+    char *out = *rout;
+    const char *d, *next;
+
+    if (in < in_end && *in == '/')
+        in++; /* already taken care of */
+    while (in < in_end) {
+        d = find_delim("/", in, in_end);
+        next = d + (d < in_end && *d == '/');
+        if (d - in == 1 && in[0] == '.') {
+            /* skip */
+        } else if (d - in == 2 && in[0] == '.' && in[1] == '.') {
+            av_assert1(out[-1] == '/');
+            if (out - root > 1)
+                while (out > root && (--out)[-1] != '/');
+        } else {
+            if (out_end - out < next - in)
+                return AVERROR(ENOMEM);
+            memcpy(out, in, next - in);

memmove, because out and in can overlap or they can be the same when
buf == base in ff_make_absolute_url()

+            out += next - in;
+        }
+        in = next;
    }
-
-    if (!av_stristart(p, "/", NULL) && root != rel)
-        av_strlcat(tmp_path, "/", size);
-
-    av_strlcat(tmp_path, p, size);
-    /* start set buf after temp path process. */
-    av_strlcpy(buf, rel, root - rel + 1);
-
-    if (!av_stristart(tmp_path, "/", NULL) && root != rel)
-        av_strlcat(buf, "/", size);
-
-    av_strlcat(buf, tmp_path, size);
+    *rout = out;
+    return 0;
}

-void ff_make_absolute_url(char *buf, int size, const char *base,
+int ff_make_absolute_url(char *buf, int size, const char *base,
                          const char *rel)

A problem with the function type change is that previously the caller assumed that even if there is not enough space, buf will be a 0 terminated string. This guarantee is now lost, so all usage of ff_make_absolute_url should also be updated to check error code, otherwise crashes might occur. Or we could re-introduce that guarantee, at least temporarily, and change type and add checks in a later patch.

{
+    URLComponents ub, uc;
+    char *out, *out_end, *path;
+    const char *keep, *base_path_end;
+    int use_base_path, simplify_path = 0, ret;
+
+    /* This is tricky.
+       For HTTP, http://server/site/page + ../media/file
+       should resolve into http://media/file
+       but for filesystem access, dir/playlist + ../media/file
+       should resolve into dir/../media/file
+       because dir could be a symlink, and .. points to
+       the actual parent of the target directory.
+
+       We'll consider that URLs with an actual scheme and authority,
+       i.e. starting with scheme://, need parent dir simplification,
+       while bare paths or pseudo-URLs starting with proto: without
+       the double slash do not.
+     */
+
+    if (!size)
+        return AVERROR(ENOMEM);
+    out = buf;
+    out_end = buf + size - 1;
+
+    if ((ret = ff_url_decompose(&ub, base, NULL) < 0) ||
+        (ret = ff_url_decompose(&uc, rel,  NULL) < 0))
+        return ret;
+
+    keep = ub.url;
+#define KEEP(component, also) do { \
+        if (uc.url_component_end_##component == uc.url && \
+            ub.url_component_end_##component > keep) { \
+            keep = ub.url_component_end_##component; \
+            also \
+        } \
+    } while (0)
+    KEEP(scheme, );
+    KEEP(authority_full, simplify_path = 1;);
+    KEEP(path,);
+    KEEP(query,);
+    KEEP(fragment,);
+#undef KEEP
+#define COPY(start, end) do { \
+        size_t len = end - start; \
+        if (len > out_end - out) \
+            return AVERROR(ENOMEM); \
+        memcpy(out, start, len); \

memmove because buffers can overlap here as well.

+        out += len; \
+    } while (0)
+    COPY(ub.url, keep);
+    COPY(uc.url, uc.path);
+    if (keep > ub.path)
+        simplify_path = 0;
+
+    use_base_path = URL_COMPONENT_HAVE(ub, path) && keep <= ub.path;
+    if (uc.path > uc.url)
+        use_base_path = 0;
+    if (URL_COMPONENT_HAVE(uc, scheme))
+        simplify_path = 0;
+    if (URL_COMPONENT_HAVE(uc, authority))
+        simplify_path = 1;

These last two checks should be moved up for better readability (to set simplify_path in one section)

[...]


diff --git a/libavformat/url.h b/libavformat/url.h
index ae27da5c73..e33edf0cb9 100644
--- a/libavformat/url.h
+++ b/libavformat/url.h
@@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char *proto,
 * @param base the base url, may be equal to buf.
 * @param rel the new url, which is interpreted relative to base
 */
-void ff_make_absolute_url(char *buf, int size, const char *base,
-                          const char *rel);
+int ff_make_absolute_url(char *buf, int size, const char *base,
+                         const char *rel);

We should add a reference to the function docs to the relevant RFC:
https://tools.ietf.org/html/rfc3986#section-5

[...]

diff --git a/tests/ref/fate/url b/tests/ref/fate/url
index 84cf85abdd..7998fb5be9 100644
--- a/tests/ref/fate/url
+++ b/tests/ref/fate/url
@@ -46,9 +46,9 @@ http://[::1]:8080/dev/null =>
Testing ff_make_absolute_url:
                                            (null) baz                  => baz
                                          /foo/bar baz                  => 
/foo/baz
-                                          /foo/bar ../baz               => /baz
+                                          /foo/bar ../baz               => 
/foo/../baz
                                          /foo/bar /baz                 => /baz
-                                          /foo/bar ../../../baz         => /baz
+                                          /foo/bar ../../../baz         => 
/foo/../../../baz
                                http://server/foo/ baz                  => 
http://server/foo/baz
                             http://server/foo/bar baz                  => 
http://server/foo/baz
                                http://server/foo/ ../baz               => 
http://server/baz
@@ -62,6 +62,15 @@ Testing ff_make_absolute_url:
                             http://server/foo/bar /../../../../../other/url => 
http://server/other/url
                             http://server/foo/bar /test/../../../../../other/url 
=> http://server/other/url
                             http://server/foo/bar 
/test/../../test/../../../other/url => http://server/other/url
+                             http://server/foo/bar file:../baz/qux      => 
file:../baz/qux
+                           http://server/foo//bar/ ../../               => 
http://server/foo/
+                                   file:../tmp/foo ../bar/              => 
file:../tmp/../bar/
+                                   file:../tmp/foo file:../bar/         => 
file:../bar/
+                             http://server/foo/bar ./                   => 
http://server/foo/
+                             http://server/foo/bar .dotfile             => 
http://server/foo/.dotfile
+                             http://server/foo/bar ..doubledotfile      => 
http://server/foo/..doubledotfile
+                             http://server/foo/bar double..dotfile      => 
http://server/foo/double..dotfile
+                             http://server/foo/bar doubledotfile..      => 
http://server/foo/doubledotfile..

It would probably make sense to add all tests which are in
https://tools.ietf.org/html/rfc3986#section-5.4

I did a quick check manually, and found one failing test:

http://a/b/c/d;p?q //g                  => http://g/

but it supposed to be "http://g";. Not that it matters too much...

Good work overall, thanks!

Regards,
Marton
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to