Zlomek, Josef (12020-07-29):
> I also noticed that there are many more bugs in ff_make_absolute_url()
> I just fixed one of them.
> I am looking forward for your complete fix.

I think the key to a working version is to properly parse URLs first.
Then we can parse the base and relative part, and build the absolute URL
from the components, simplifying the path component, and only the path
component, at the same time.

Here is my preliminary code for parsing, in case somebody wants to look
at it.

Next step is to use it for rewriting ff_make_absolute_url(). Possibly,
use it for av_url_split() too.

Regards,

-- 
  Nicolas George
From 6e874f86d3ab3c8b69a12d118cce0a10333848dc Mon Sep 17 00:00:00 2001
From: Nicolas George <geo...@nsup.org>
Date: Wed, 29 Jul 2020 14:39:20 +0200
Subject: [PATCH] lavf/url: add ff_url_decompose().

Signed-off-by: Nicolas George <geo...@nsup.org>
---
 libavformat/tests/url.c | 33 ++++++++++++++++++++
 libavformat/url.c       | 69 +++++++++++++++++++++++++++++++++++++++++
 libavformat/url.h       | 37 ++++++++++++++++++++++
 tests/ref/fate/url      | 45 +++++++++++++++++++++++++++
 4 files changed, 184 insertions(+)

diff --git a/libavformat/tests/url.c b/libavformat/tests/url.c
index 1d961a1b43..1b69b4cea8 100644
--- a/libavformat/tests/url.c
+++ b/libavformat/tests/url.c
@@ -21,6 +21,31 @@
 #include "libavformat/url.h"
 #include "libavformat/avformat.h"
 
+static void test_decompose(const char *url)
+{
+    URLComponents uc;
+    int len, ret;
+
+    printf("%s =>\n", url);
+    ret = ff_url_decompose(&uc, url, NULL);
+    if (ret < 0) {
+        printf("  error: %s\n", av_err2str(ret));
+        return;
+    }
+#define PRINT_COMPONENT(comp) \
+    len = uc.url_component_end_##comp - uc.comp; \
+    if (len) printf("  "#comp": %.*s\n", len, uc.comp);
+    PRINT_COMPONENT(scheme);
+    PRINT_COMPONENT(authority);
+    PRINT_COMPONENT(userinfo);
+    PRINT_COMPONENT(host);
+    PRINT_COMPONENT(port);
+    PRINT_COMPONENT(path);
+    PRINT_COMPONENT(query);
+    PRINT_COMPONENT(fragment);
+    printf("\n");
+}
+
 static void test(const char *base, const char *rel)
 {
     char buf[200], buf2[200];
@@ -51,6 +76,14 @@ static void test2(const char *url)
 
 int main(void)
 {
+    printf("Testing ff_url_decompose:\n\n");
+    test_decompose("http://user:pass@ffmpeg:8080/dir/file?query#fragment";);
+    test_decompose("http://ffmpeg/dir/file";);
+    test_decompose("file:///dev/null");
+    test_decompose("file:/dev/null");
+    test_decompose("http://[::1]/dev/null";);
+    test_decompose("http://[::1]:8080/dev/null";);
+    test_decompose("//ffmpeg/dev/null");
     printf("Testing ff_make_absolute_url:\n");
     test(NULL, "baz");
     test("/foo/bar", "baz");
diff --git a/libavformat/url.c b/libavformat/url.c
index 20463a6674..7612a2ee6e 100644
--- a/libavformat/url.c
+++ b/libavformat/url.c
@@ -78,6 +78,75 @@ int ff_url_join(char *str, int size, const char *proto,
     return strlen(str);
 }
 
+static const char *find_delim(const char *delim, const char *cur, const char *end)
+{
+    while (cur < end && !strchr(delim, *cur))
+        cur++;
+    return cur;
+}
+
+int ff_url_decompose(URLComponents *uc, const char *url, const char *end)
+{
+    const char *cur, *aend, *p;
+
+    if (!end)
+        end = url + strlen(url);
+    cur = uc->url = url;
+
+    /* scheme */
+    uc->scheme = cur;
+    p = find_delim(":/", cur, end); /* lavf "schemes" can contain options */
+    if (*p == ':')
+        cur = p + 1;
+
+    /* authority */
+    uc->authority = cur;
+    if (end - cur >= 2 && cur[0] == '/' && cur[1] == '/') {
+        cur += 2;
+        aend = find_delim("/", cur, end);
+
+        /* userinfo */
+        uc->userinfo = cur;
+        p = find_delim("@", cur, aend);
+        if (*p == '@')
+            cur = p + 1;
+
+        /* host */
+        uc->host = cur;
+        if (*cur == '[') { /* hello IPv6, thanks for using colons! */
+            p = find_delim("]", cur, aend);
+            if (*p != ']')
+                return AVERROR(EINVAL);
+            if (p + 1 < aend && p[1] != ':')
+                return AVERROR(EINVAL);
+            cur = p + 1;
+        } else {
+            cur = find_delim(":", cur, aend);
+        }
+
+        /* port */
+        uc->port = cur;
+        cur = aend;
+    } else {
+        uc->userinfo = uc->host = uc->port = cur;
+    }
+
+    /* path */
+    uc->path = cur;
+    cur = find_delim("?#", cur, end);
+
+    /* query */
+    uc->query = cur;
+    if (*cur == '?')
+        cur = find_delim("#", cur, end);
+
+    /* fragment */
+    uc->fragment = cur;
+
+    uc->end = end;
+    return 0;
+}
+
 static void trim_double_dot_url(char *buf, const char *rel, int size)
 {
     const char *p = rel;
diff --git a/libavformat/url.h b/libavformat/url.h
index de0d30aca0..99d453b378 100644
--- a/libavformat/url.h
+++ b/libavformat/url.h
@@ -344,4 +344,41 @@ const AVClass *ff_urlcontext_child_class_iterate(void **iter);
 const URLProtocol **ffurl_get_protocols(const char *whitelist,
                                         const char *blacklist);
 
+typedef struct URLComponents {
+    const char *url;        /**< whole URL, for reference */
+    const char *scheme;     /**< possibly including lavf-specific options */
+    const char *authority;  /**< "//" if it is a real URL */
+    const char *userinfo;   /**< including final '@' if present */
+    const char *host;
+    const char *port;       /**< including initial ':' if present */
+    const char *path;
+    const char *query;      /**< including initial '?' if present */
+    const char *fragment;   /**< including initial '#' if present */
+    const char *end;
+} URLComponents;
+
+#define url_component_end_scheme      authority
+#define url_component_end_authority   userinfo
+#define url_component_end_userinfo    host
+#define url_component_end_host        port
+#define url_component_end_port        path
+#define url_component_end_path        query
+#define url_component_end_query       fragment
+#define url_component_end_fragment    end
+
+/**
+ * Parse an URL to find the components.
+ *
+ * Each component runs until the start of the next component,
+ * possibly including a mandatory delimiter.
+ *
+ * @param uc   structure to fill with pointers to the components.
+ * @param url  URL to parse.
+ * @param end  end of the URL, or NULL to parse to the end of string.
+ *
+ * @return  >= 0 for success or an AVERROR code, especially if the URL is
+ *          malformed.
+ */
+int ff_url_decompose(URLComponents *uc, const char *url, const char *end);
+
 #endif /* AVFORMAT_URL_H */
diff --git a/tests/ref/fate/url b/tests/ref/fate/url
index 533ba2cb1e..84cf85abdd 100644
--- a/tests/ref/fate/url
+++ b/tests/ref/fate/url
@@ -1,3 +1,48 @@
+Testing ff_url_decompose:
+
+http://user:pass@ffmpeg:8080/dir/file?query#fragment =>
+  scheme: http:
+  authority: //
+  userinfo: user:pass@
+  host: ffmpeg
+  port: :8080
+  path: /dir/file
+  query: ?query
+  fragment: #fragment
+
+http://ffmpeg/dir/file =>
+  scheme: http:
+  authority: //
+  host: ffmpeg
+  path: /dir/file
+
+file:///dev/null =>
+  scheme: file:
+  authority: //
+  path: /dev/null
+
+file:/dev/null =>
+  scheme: file:
+  path: /dev/null
+
+http://[::1]/dev/null =>
+  scheme: http:
+  authority: //
+  host: [::1]
+  path: /dev/null
+
+http://[::1]:8080/dev/null =>
+  scheme: http:
+  authority: //
+  host: [::1]
+  port: :8080
+  path: /dev/null
+
+//ffmpeg/dev/null =>
+  authority: //
+  host: ffmpeg
+  path: /dev/null
+
 Testing ff_make_absolute_url:
                                             (null) baz                  => baz
                                           /foo/bar baz                  => /foo/baz
-- 
2.27.0

Attachment: signature.asc
Description: PGP signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to