On 2023/06/30 11:14:51 +0200, Florian Obser <flor...@openbsd.org> wrote:
> On 2023-06-30 10:46 +02, Omar Polo <o...@omarpolo.com> wrote:
> > On 2023/06/29 23:43:25 +0200, Omar Polo <o...@omarpolo.com> wrote:
> >> On 2023/06/29 19:55:52 +0200, Florian Obser <flor...@openbsd.org> wrote:
> >> > I'm worried that we pass un-sanitized input through to fcgi.
> >> > Of course we are passing *a lot* of un-sanitized input through to fcgi,
> >> > so does this matter in the grand scheme of things?
> >> > But I'd like if server_http_parsehost() enforces syntactically valid
> >> > hostnames first.
> >> > 
> >> > There is valid_domain() in usr.bin/ssh/misc.c or res_hnok in
> >> > resolv.h. Which is probably fine to use since we don't care too much
> >> > about portable.
> >> 
> >> right, httpd is happily accepting any nonsense as Host.  Here's an
> >> adaptation of millert' valid_domain() from usr.bin/ssh/misc.c.
> >> Hope noone is relying on using an empty/malformed Host :>
> >
> > actually this is wrong.  it breaks when an ipv6 address is used as
> > Host.  this seems to be an issue in ssh too, although I haven't
> 
> IIRC ssh only uses valid_domain() when it already knows that it's not an
> IPv6 address.

        % ssh ssh://[::1]
        usage: ssh [-46AaCfGgKkMNnqsTtVvXxYy] [-B bind_interface]
               [...]
        % curl --head http://[::1]
        HTTP/1.1 200 OK
        [...]

but maybe I'm holding it wrong.  IPv6 addresses in the HostName config
directive work fine though.  `ssh ::1' also works.

> > [...]
> > should I make it strictier (not done just because it's a bit long and
> > definitely not fun to validate ipv6 addresses), or there's something
> > else in base that I can steal fom? :)
> 
> getaddrinfo(3) with AI_NUMERICHOST.

duh.  sorry for the dumb question, it was obvious.  Here's a better
diff.

I've made strictier the syntax checks for IPv6 (after the closing ']'
there's the optional port but then nothing else) and joined together
the cases for the host:port or hostname alone to simplify the validity
check.

last thing I'm unsure if whether we should log as-is the original Host
in case of error, as it can contain anything.

Thanks!

diff /usr/src
commit - 9832f5035d799ae12e9f848cff5650481b673260
path + /usr/src
blob - 091cc86fec946a4a9d422c04a48f5cbfae015ce0
file + usr.sbin/httpd/server_http.c
--- usr.sbin/httpd/server_http.c
+++ usr.sbin/httpd/server_http.c
@@ -23,6 +23,7 @@
 #include <sys/tree.h>
 #include <sys/stat.h>
 
+#include <netdb.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 
@@ -831,11 +832,49 @@ char *
        return (buf);
 }
 
+static int
+valid_domain(char *name, const char **errstr)
+{
+       size_t i;
+       unsigned char c, last = '\0';
+
+       *errstr = NULL;
+
+       if (*name == '\0') {
+               *errstr = "empty domain name";
+               return 0;
+       }
+       if (!isalpha((unsigned char)name[0]) &&
+           !isdigit((unsigned char)name[0])) {
+               *errstr = "domain name starts with an invalid character";
+               return 0;
+       }
+       for (i = 0; name[i] != '\0'; ++i) {
+               c = tolower((unsigned char)name[i]);
+               name[i] = c;
+               if (last == '.' && c == '.') {
+                       *errstr = "domain name contains consecutive separators";
+                       return 0;
+               }
+               if (c != '.' && c != '-' && !isalnum(c) &&
+                   c != '_') /* technically invalid, but common */ {
+                       *errstr = "domain name contains invalid characters";
+                       return 0;
+               }
+               last = c;
+       }
+       if (name[i - 1] == '.')
+               name[i - 1] = '\0';
+       return 1;
+}
+
 char *
 server_http_parsehost(char *host, char *buf, size_t len, int *portval)
 {
+       struct addrinfo  hints, *ai;
        char            *start, *end, *port;
        const char      *errstr = NULL;
+       int              err;
 
        if (strlcpy(buf, host, len) >= len) {
                log_debug("%s: host name too long", __func__);
@@ -849,18 +888,37 @@ server_http_parsehost(char *host, char *buf, size_t le
                /* Address enclosed in [] with port, eg. [2001:db8::1]:80 */
                start++;
                *end++ = '\0';
-               if ((port = strchr(end, ':')) == NULL || *port == '\0')
-                       port = NULL;
-               else
-                       port++;
+               if (*end == ':') {
+                       *end++ = '\0';
+                       port = end;
+               } else if (*end != '\0') {
+                       log_debug("%s: gibberish after IPv6 address: %s",
+                           __func__, host);
+                       return (NULL);
+               }
+
                memmove(buf, start, strlen(start) + 1);
-       } else if ((end = strchr(start, ':')) != NULL) {
-               /* Name or address with port, eg. www.example.com:80 */
-               *end++ = '\0';
-               port = end;
+
+               memset(&hints, 0, sizeof(hints));
+               hints.ai_flags = AI_NUMERICHOST;
+               err = getaddrinfo(buf, NULL, &hints, &ai);
+               if (err != 0) {
+                       log_debug("%s: %s: %s", __func__, gai_strerror(err),
+                           host);
+                       return (NULL);
+               }
+               freeaddrinfo(ai);
        } else {
-               /* Name or address with default port, eg. www.example.com */
-               port = NULL;
+               /* Name or address with optional port */
+               if ((end = strchr(start, ':')) != NULL) {
+                       *end++ = '\0';
+                       port = end;
+               }
+
+               if (!valid_domain(start, &errstr)) {
+                       log_debug("%s: %s: %s", __func__, errstr, host);
+                       return (NULL);
+               }
        }
 
        if (port != NULL) {

Reply via email to