Marton Balint (12020-07-29): > Thanks for working on this. I agree that proper (as RFC compliant as it can > be) URL parsing is needed here. Probably we should clearly document > differences from RFC compliant parsing, if we cannot do it entirely RFC > compliantly...
Indeed. Unfortunately, that may cause some trouble, but not for right now. > ? and # can also separate authority from the rest. Thanks, fixed. > I am not sure about this approach, the known characters at the end or at the > start will make further operations a bit harder. I'd just simply add another > field for each URL component to signal the end, e.g. I considered this, but it was not actually convenient. I have something that works, but it needs polishing and more testing. But you can have a peek. Regards, -- Nicolas George
>From d6c429e879ffe3cdce0af6d22854dbbcce6c8222 Mon Sep 17 00:00:00 2001 From: Nicolas George <geo...@nsup.org> Date: Thu, 30 Jul 2020 00:02:10 +0200 Subject: [PATCH] WIP lavf/url: rewrite ff_make_absolute_url(). Signed-off-by: Nicolas George <geo...@nsup.org> --- libavformat/url.c | 223 +++++++++++++++++++--------------------------- libavformat/url.h | 4 +- 2 files changed, 96 insertions(+), 131 deletions(-) diff --git a/libavformat/url.c b/libavformat/url.c index 26aaab4019..fa265e90ea 100644 --- a/libavformat/url.c +++ b/libavformat/url.c @@ -27,6 +27,7 @@ #if CONFIG_NETWORK #include "network.h" #endif +#include "libavutil/avassert.h" #include "libavutil/avstring.h" /** @@ -152,146 +153,110 @@ int ff_url_decompose(URLComponents *uc, const char *url, const char *end) return 0; } -static void trim_double_dot_url(char *buf, const char *rel, int size) +static int append_path(char *root, char *out_end, char **rout, + const char *in, const char *in_end) { - const char *p = rel; - const char *root = rel; - char tmp_path[MAX_URL_SIZE] = {0, }; - char *sep; - char *node; - - /* Get the path root of the url which start by "://" */ - if (p && (sep = strstr(p, "://"))) { - sep += 3; - root = strchr(sep, '/'); - if (!root) - return; - } - - /* set new current position if the root node is changed */ - p = root; - while (p && (node = strstr(p, ".."))) { - av_strlcat(tmp_path, p, node - p + strlen(tmp_path)); - p = node + 3; - sep = strrchr(tmp_path, '/'); - if (sep) - sep[0] = '\0'; - else - tmp_path[0] = '\0'; + char *out = *rout; + const char *d, *next; + + if (in < in_end && *in == '/') + in++; /* already taken care of */ + while (in < in_end) { + d = find_delim("/", in, in_end); + next = d + (d < in_end && *d == '/'); + if (d - in == 1 && in[0] == '.') { + /* skip */ + } else if (out > root && /* "../" at the very beginning really means "../" */ + d - in == 2 && in[0] == '.' && in[1] == '.') { + av_assert1(out[-1] == '/'); + if (out - root > 1) + while (out > root && (--out)[-1] != '/'); + } else { + if (out_end - out < next - in) + return AVERROR(ENOMEM); + memcpy(out, in, next - in); + out += next - in; + } + in = next; } - - if (!av_stristart(p, "/", NULL) && root != rel) - av_strlcat(tmp_path, "/", size); - - av_strlcat(tmp_path, p, size); - /* start set buf after temp path process. */ - av_strlcpy(buf, rel, root - rel + 1); - - if (!av_stristart(tmp_path, "/", NULL) && root != rel) - av_strlcat(buf, "/", size); - - av_strlcat(buf, tmp_path, size); + *rout = out; + return 0; } -void ff_make_absolute_url(char *buf, int size, const char *base, +int ff_make_absolute_url(char *buf, int size, const char *base, const char *rel) { - char *sep, *path_query; - char *root, *p; - char tmp_path[MAX_URL_SIZE]; - - memset(tmp_path, 0, sizeof(tmp_path)); - /* Absolute path, relative to the current server */ - if (base && strstr(base, "://") && rel[0] == '/') { - if (base != buf) - av_strlcpy(buf, base, size); - sep = strstr(buf, "://"); - if (sep) { - /* Take scheme from base url */ - if (rel[1] == '/') { - sep[1] = '\0'; - } else { - /* Take scheme and host from base url */ - sep += 3; - sep = strchr(sep, '/'); - if (sep) - *sep = '\0'; - } + URLComponents ub, uc; + char *out, *out_end, *path; + const char *keep; + int ret; + + //const char *scheme; /**< possibly including lavf-specific options */ + //const char *authority; /**< "//" if it is a real URL */ + //const char *userinfo; /**< including final '@' if present */ + //const char *host; + //const char *port; /**< including initial ':' if present */ + //const char *path; + //const char *query; /**< including initial '?' if present */ + //const char *fragment; /**< including initial '#' if present */ + //const char *end; + + if (!size) + return AVERROR(ENOMEM); + out = buf; + out_end = buf + size - 1; + + if ((ret = ff_url_decompose(&ub, base, NULL) < 0) || + (ret = ff_url_decompose(&uc, rel, NULL) < 0)) + return ret; + + keep = ub.url; +#define KEEP(component) \ + if (uc.url_component_end_##component == uc.url) \ + keep = ub.url_component_end_##component + KEEP(scheme); + KEEP(authority_full); + KEEP(path); + KEEP(query); + KEEP(fragment); +#undef KEEP +#define COPY(start, end) do { \ + size_t len = end - start; \ + if (len > out_end - out) \ + return AVERROR(ENOMEM); \ + memcpy(out, start, len); \ + out += len; \ + } while (0); + COPY(ub.url, keep); + COPY(uc.url, uc.path); + + if (uc.url_component_end_path > uc.url) { + path = out; + if (ub.url_component_end_authority > ub.authority || + uc.url_component_end_authority > uc.authority || + (ub.url_component_end_path > ub.path && ub.path[0] == '/') || + (uc.url_component_end_path > uc.path && uc.path[1] == '/')) { + const char *root = "/"; + COPY(root, root + 1); } - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); - return; - } - /* If rel actually is an absolute url, just copy it */ - if (!base || strstr(rel, "://") || rel[0] == '/') { - memset(buf, 0, size); - trim_double_dot_url(buf, rel, size); - return; - } - if (base != buf) - av_strlcpy(buf, base, size); - - /* Strip off any query string from base */ - path_query = strchr(buf, '?'); - if (path_query) - *path_query = '\0'; - - /* Is relative path just a new query part? */ - if (rel[0] == '?') { - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); - return; - } - - root = p = buf; - /* Get the path root of the url which start by "://" */ - if (p && strstr(p, "://")) { - sep = strstr(p, "://"); - if (sep) { - sep += 3; - root = strchr(sep, '/'); - if (!root) - return; + if (uc.url_component_end_path > uc.path) { + const char *base_path_end = ub.path; + if (uc.path[0] != '/') { + base_path_end = ub.url_component_end_path; + while (base_path_end > ub.path && base_path_end[-1] != '/') + base_path_end--; + } + if ((ret = append_path(path, out_end, &out, ub.path, base_path_end)) < 0 || + (ret = append_path(path, out_end, &out, uc.path, uc.url_component_end_path)) < 0) + return ret; } } - /* Remove the file name from the base url */ - sep = strrchr(buf, '/'); - if (sep && sep <= root) - sep = root; - - if (sep) - sep[1] = '\0'; - else - buf[0] = '\0'; - while (av_strstart(rel, "..", NULL) && sep) { - /* Remove the path delimiter at the end */ - if (sep > root) { - sep[0] = '\0'; - sep = strrchr(buf, '/'); - } + COPY(uc.url_component_end_path, uc.end); - /* If the next directory name to pop off is "..", break here */ - if (!strcmp(sep ? &sep[1] : buf, "..")) { - /* Readd the slash we just removed */ - av_strlcat(buf, "/", size); - break; - } - /* Cut off the directory name */ - if (sep) - sep[1] = '\0'; - else - buf[0] = '\0'; - rel += 3; - } - av_strlcat(buf, rel, size); - trim_double_dot_url(tmp_path, buf, size); - memset(buf, 0, size); - av_strlcpy(buf, tmp_path, size); +#undef COPY + *out = 0; + return 0; } AVIODirEntry *ff_alloc_dir_entry(void) diff --git a/libavformat/url.h b/libavformat/url.h index 4866080e70..bbea485432 100644 --- a/libavformat/url.h +++ b/libavformat/url.h @@ -312,8 +312,8 @@ int ff_url_join(char *str, int size, const char *proto, * @param base the base url, may be equal to buf. * @param rel the new url, which is interpreted relative to base */ -void ff_make_absolute_url(char *buf, int size, const char *base, - const char *rel); +int ff_make_absolute_url(char *buf, int size, const char *base, + const char *rel); /** * Allocate directory entry with default values. -- 2.27.0
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".