Marton Balint (12020-07-29):
> Thanks for working on this. I agree that proper (as RFC compliant as it can
> be) URL parsing is needed here. Probably we should clearly document
> differences from RFC compliant parsing, if we cannot do it entirely RFC
> compliantly...

Indeed. Unfortunately, that may cause some trouble, but not for right
now.

> ? and # can also separate authority from the rest.

Thanks, fixed.

> I am not sure about this approach, the known characters at the end or at the
> start will make further operations a bit harder. I'd just simply add another
> field for each URL component to signal the end, e.g.

I considered this, but it was not actually convenient.

I have something that works, but it needs polishing and more testing.
But you can have a peek.

Regards,

-- 
  Nicolas George
>From d6c429e879ffe3cdce0af6d22854dbbcce6c8222 Mon Sep 17 00:00:00 2001
From: Nicolas George <geo...@nsup.org>
Date: Thu, 30 Jul 2020 00:02:10 +0200
Subject: [PATCH] WIP lavf/url: rewrite ff_make_absolute_url().

Signed-off-by: Nicolas George <geo...@nsup.org>
---
 libavformat/url.c | 223 +++++++++++++++++++---------------------------
 libavformat/url.h |   4 +-
 2 files changed, 96 insertions(+), 131 deletions(-)

diff --git a/libavformat/url.c b/libavformat/url.c
index 26aaab4019..fa265e90ea 100644
--- a/libavformat/url.c
+++ b/libavformat/url.c
@@ -27,6 +27,7 @@
 #if CONFIG_NETWORK
 #include "network.h"
 #endif
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 
 /**
@@ -152,146 +153,110 @@ int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
     return 0;
 }
 
-static void trim_double_dot_url(char *buf, const char *rel, int size)
+static int append_path(char *root, char *out_end, char **rout,
+                       const char *in, const char *in_end)
 {
-    const char *p = rel;
-    const char *root = rel;
-    char tmp_path[MAX_URL_SIZE] = {0, };
-    char *sep;
-    char *node;
-
-    /* Get the path root of the url which start by "://" */
-    if (p && (sep = strstr(p, "://"))) {
-        sep += 3;
-        root = strchr(sep, '/');
-        if (!root)
-            return;
-    }
-
-    /* set new current position if the root node is changed */
-    p = root;
-    while (p && (node = strstr(p, ".."))) {
-        av_strlcat(tmp_path, p, node - p + strlen(tmp_path));
-        p = node + 3;
-        sep = strrchr(tmp_path, '/');
-        if (sep)
-            sep[0] = '\0';
-        else
-            tmp_path[0] = '\0';
+    char *out = *rout;
+    const char *d, *next;
+
+    if (in < in_end && *in == '/')
+        in++; /* already taken care of */
+    while (in < in_end) {
+        d = find_delim("/", in, in_end);
+        next = d + (d < in_end && *d == '/');
+        if (d - in == 1 && in[0] == '.') {
+            /* skip */
+        } else if (out > root && /* "../" at the very beginning really means "../" */
+                   d - in == 2 && in[0] == '.' && in[1] == '.') {
+            av_assert1(out[-1] == '/');
+            if (out - root > 1)
+                while (out > root && (--out)[-1] != '/');
+        } else {
+            if (out_end - out < next - in)
+                return AVERROR(ENOMEM);
+            memcpy(out, in, next - in);
+            out += next - in;
+        }
+        in = next;
     }
-
-    if (!av_stristart(p, "/", NULL) && root != rel)
-        av_strlcat(tmp_path, "/", size);
-
-    av_strlcat(tmp_path, p, size);
-    /* start set buf after temp path process. */
-    av_strlcpy(buf, rel, root - rel + 1);
-
-    if (!av_stristart(tmp_path, "/", NULL) && root != rel)
-        av_strlcat(buf, "/", size);
-
-    av_strlcat(buf, tmp_path, size);
+    *rout = out;
+    return 0;
 }
 
-void ff_make_absolute_url(char *buf, int size, const char *base,
+int ff_make_absolute_url(char *buf, int size, const char *base,
                           const char *rel)
 {
-    char *sep, *path_query;
-    char *root, *p;
-    char tmp_path[MAX_URL_SIZE];
-
-    memset(tmp_path, 0, sizeof(tmp_path));
-    /* Absolute path, relative to the current server */
-    if (base && strstr(base, "://") && rel[0] == '/') {
-        if (base != buf)
-            av_strlcpy(buf, base, size);
-        sep = strstr(buf, "://");
-        if (sep) {
-            /* Take scheme from base url */
-            if (rel[1] == '/') {
-                sep[1] = '\0';
-            } else {
-                /* Take scheme and host from base url */
-                sep += 3;
-                sep = strchr(sep, '/');
-                if (sep)
-                    *sep = '\0';
-            }
+    URLComponents ub, uc;
+    char *out, *out_end, *path;
+    const char *keep;
+    int ret;
+
+    //const char *scheme;     /**< possibly including lavf-specific options */
+    //const char *authority;  /**< "//" if it is a real URL */
+    //const char *userinfo;   /**< including final '@' if present */
+    //const char *host;
+    //const char *port;       /**< including initial ':' if present */
+    //const char *path;
+    //const char *query;      /**< including initial '?' if present */
+    //const char *fragment;   /**< including initial '#' if present */
+    //const char *end;
+
+    if (!size)
+        return AVERROR(ENOMEM);
+    out = buf;
+    out_end = buf + size - 1;
+
+    if ((ret = ff_url_decompose(&ub, base, NULL) < 0) ||
+        (ret = ff_url_decompose(&uc, rel,  NULL) < 0))
+        return ret;
+
+    keep = ub.url;
+#define KEEP(component) \
+    if (uc.url_component_end_##component == uc.url) \
+        keep = ub.url_component_end_##component
+    KEEP(scheme);
+    KEEP(authority_full);
+    KEEP(path);
+    KEEP(query);
+    KEEP(fragment);
+#undef KEEP
+#define COPY(start, end) do { \
+        size_t len = end - start; \
+        if (len > out_end - out) \
+            return AVERROR(ENOMEM); \
+        memcpy(out, start, len); \
+        out += len; \
+    } while (0);
+    COPY(ub.url, keep);
+    COPY(uc.url, uc.path);
+
+    if (uc.url_component_end_path > uc.url) {
+        path = out;
+        if (ub.url_component_end_authority > ub.authority ||
+            uc.url_component_end_authority > uc.authority ||
+            (ub.url_component_end_path > ub.path && ub.path[0] == '/') ||
+            (uc.url_component_end_path > uc.path && uc.path[1] == '/')) {
+            const char *root = "/";
+            COPY(root, root + 1);
         }
-        av_strlcat(buf, rel, size);
-        trim_double_dot_url(tmp_path, buf, size);
-        memset(buf, 0, size);
-        av_strlcpy(buf, tmp_path, size);
-        return;
-    }
-    /* If rel actually is an absolute url, just copy it */
-    if (!base || strstr(rel, "://") || rel[0] == '/') {
-        memset(buf, 0, size);
-        trim_double_dot_url(buf, rel, size);
-        return;
-    }
-    if (base != buf)
-        av_strlcpy(buf, base, size);
-
-    /* Strip off any query string from base */
-    path_query = strchr(buf, '?');
-    if (path_query)
-        *path_query = '\0';
-
-    /* Is relative path just a new query part? */
-    if (rel[0] == '?') {
-        av_strlcat(buf, rel, size);
-        trim_double_dot_url(tmp_path, buf, size);
-        memset(buf, 0, size);
-        av_strlcpy(buf, tmp_path, size);
-        return;
-    }
-
-    root = p = buf;
-    /* Get the path root of the url which start by "://" */
-    if (p && strstr(p, "://")) {
-        sep = strstr(p, "://");
-        if (sep) {
-            sep += 3;
-            root = strchr(sep, '/');
-            if (!root)
-                return;
+        if (uc.url_component_end_path > uc.path) {
+            const char *base_path_end = ub.path;
+            if (uc.path[0] != '/') {
+                base_path_end = ub.url_component_end_path;
+                while (base_path_end > ub.path && base_path_end[-1] != '/')
+                    base_path_end--;
+            }
+            if ((ret = append_path(path, out_end, &out, ub.path, base_path_end)) < 0 ||
+                (ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path)) < 0)
+                return ret;
         }
     }
 
-    /* Remove the file name from the base url */
-    sep = strrchr(buf, '/');
-    if (sep && sep <= root)
-        sep = root;
-
-    if (sep)
-        sep[1] = '\0';
-    else
-        buf[0] = '\0';
-    while (av_strstart(rel, "..", NULL) && sep) {
-        /* Remove the path delimiter at the end */
-        if (sep > root) {
-            sep[0] = '\0';
-            sep = strrchr(buf, '/');
-        }
+    COPY(uc.url_component_end_path, uc.end);
 
-        /* If the next directory name to pop off is "..", break here */
-        if (!strcmp(sep ? &sep[1] : buf, "..")) {
-            /* Readd the slash we just removed */
-            av_strlcat(buf, "/", size);
-            break;
-        }
-        /* Cut off the directory name */
-        if (sep)
-            sep[1] = '\0';
-        else
-            buf[0] = '\0';
-        rel += 3;
-    }
-    av_strlcat(buf, rel, size);
-    trim_double_dot_url(tmp_path, buf, size);
-    memset(buf, 0, size);
-    av_strlcpy(buf, tmp_path, size);
+#undef COPY
+    *out = 0;
+    return 0;
 }
 
 AVIODirEntry *ff_alloc_dir_entry(void)
diff --git a/libavformat/url.h b/libavformat/url.h
index 4866080e70..bbea485432 100644
--- a/libavformat/url.h
+++ b/libavformat/url.h
@@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char *proto,
  * @param base the base url, may be equal to buf.
  * @param rel the new url, which is interpreted relative to base
  */
-void ff_make_absolute_url(char *buf, int size, const char *base,
-                          const char *rel);
+int ff_make_absolute_url(char *buf, int size, const char *base,
+                         const char *rel);
 
 /**
  * Allocate directory entry with default values.
-- 
2.27.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to