Changeset: 4533b80259fc for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/4533b80259fc Modified Files: monetdb5/modules/atoms/url.c Branch: urlfuncs Log Message:
fighting with junk urls diffs (35 lines): diff --git a/monetdb5/modules/atoms/url.c b/monetdb5/modules/atoms/url.c --- a/monetdb5/modules/atoms/url.c +++ b/monetdb5/modules/atoms/url.c @@ -841,15 +841,26 @@ extractURLHost(str *retval, str *url, bo } else { l = s - h; } - if ((*retval = GDKmalloc(l + 1)) != NULL) { - if (no_www && !strncmp(h, "wwww.", 4)) { - strcpy_len(*retval, (h + 4), l + 1); + if (l > 4) { + if ((*retval = GDKmalloc(l + 1)) != NULL) { + if (no_www && strlen(h) > 4 && !strncmp(h, "www.", 4)) { + strcpy_len(*retval, (h + 4), l + 1); + } else { + strcpy_len(*retval, h, l + 1); + } + // clean up if not valid UTF-8 + if (!checkUTF8(*retval)) { + printf("%s\n", h); + GDKfree(*retval); + *retval = GDKstrdup(str_nil); + } } else { - strcpy_len(*retval, h, l + 1); + throw(MAL, "url.getURLHost", SQLSTATE(HY013) MAL_MALLOC_FAIL); } } else { - throw(MAL, "url.getURLHost", SQLSTATE(HY013) MAL_MALLOC_FAIL); + *retval = GDKstrdup(str_nil); } + } else { *retval = GDKstrdup(str_nil); } _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org