This is in response to a question I was asked off-list about this:
https://marc.info/?l=openbsd-tech&m=172374490203891&w=2 On Sun, Aug 18, 2024 at 05:20:54AM +0100, ropers wrote: > On Thu, 15 Aug 2024 at 19:02, Walter Alejandro Iglesias wrote: > > > > > After reading an article by Todd C. Miller, I realized that I was > > misusing snprintf(). > > > > Hiya Walter, would you mind telling me which article that was (URL > preferred)? Right, my mistake. > Feel free to reply on-list if you think others on misc@ might benefit from > knowing too. > > Many thanks and kind regards, > Ian The following is the conversation I had with Todd off-list which will help to undrestand the context. On Wed, Aug 14, 2024 at 10:56:45PM +0200, Walter Alejandro Iglesias wrote: > On Tue, Aug 13, 2024 at 12:51:17PM -0600, Todd C. Miller wrote: > > On Tue, 13 Aug 2024 17:19:15 +0200, Walter Alejandro Iglesias wrote: > > > > > After reading this article of yours: > > > > > > https://www.millert.dev/papers/strlcpy/ > > > > > > I've tried to use strlcpy(3) instead of snprintf(3) in the little > > > program I paste below (in the encode_word() function.) In this case I > > > don't want to append the whole source string to the destination one but > > > *word by word*, hence the return value (that would be the length of > > > &r[n] or &r[n - 1]) is not useful to perform the check you suggest in > > > your article (also explained in the man page.) > > > > > > In case the method I chose is not totally wrong, my question is: is > > > still advisable to use strlcpy() instead of snprintf() in this case? > > > Is what I did susceptible to overflows? > > > > You could substitute strlcpy() for snprintf() in calls like: > > > > snprintf(word, word_len + 1, "%s", &r[n - 1]); > > > > but it probably doesn't make much of a difference. In the cases > > where you glue multiple strings together, snprintf() is probably > > the better choice than strlcpy() + strlcat(). > > > > My inclination would be to use strdup() to set "word" instead of > > determining the length, allocating memory, and copying the string. > > > > It would also be a good idea to check the snprintf() return value > > to make sure that the length of the string matches the amount of > > space you allocated. > > This was the point of my question, the check with the return value. > That was what, after reading your article, made me realize that > snprintf() wasn't the right choice to pick word by word from a string as > I do here: > > if (n > 0 && utf8word) { > if (r[n - 1] == '\n') > r[n - 1] = ' '; > snprintf(word, word_len + 1, "%s", &r[n - 1]); > } else > snprintf(word, word_len + 1, "%s", &r[n]); > > snprintf() initially tries to create a string with a length ranging from > r[n] to the end of r (same strlcpy()). For example, if the source > string is 500 characters long and the loop is at n == 2, it tries to > create a 498 bytes string, but I'm loading just one word, hence the > return value is obviously a lot of times larger than the memory I'd > allocated. For this same reason it's too expensive, I'd noticed that > with big UTF-8 files was too slow. In the new example I paste below, I > use strndup() as you told me. Running it with a big UTF-8 dense file > the difference in performance is remarkable: > > With the other version: > > $ encode-word very_big_utf8_file.txt > 0m04.13s real 0m03.98s user 0m00.03s system > > With the version pasted bellow: > > $ encode-word very_big_utf8_file.txt > 0m00.17s real 0m00.07s user 0m00.01s system > > > By other hand, to append the tags and the word to the final string, > snprintf() is suitable. In this same example below I use the return > value to do some kind of check. > > > There is still a bug (not related to this) that is driving me crazy and > I can't find the cause. :-) > > > Thanks Todd! > > The version below is not the one I sent to Todd but my last version which is even more useful as an example for the issue discussed (and solve the bug I mention above.) In this version I give more use of strdup(3). /* * Little program to test encode_word() and wrap_header() functions * included in my mail(1) patches: * https://en.roquesor.com/Downloads/mail_patches.tar.gz */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <err.h> #include <errno.h> #include <stdint.h> #include <unistd.h> #include <ctype.h> #include <locale.h> #define b64enc __b64_ntop static int newline_at_end = 0; void encode_word(char **p); static void filecopy(FILE *, FILE *); int b64enc(unsigned char const *src, size_t srclength, char *target, size_t targsize); void wrap_header(char **p, size_t name_len); void usage(void); int main(int argc, char *argv[]) { FILE *fp; int option; while ((option = getopt(argc, argv, "h")) != -1) switch (option) { case 'h': usage(); break; default: usage(); } argc -= optind; argv += optind; if (argc > 0) while (argc-- > 0) if ((fp = fopen(*argv++, "r")) == NULL) warn("%s", *(argv - 1)); else { filecopy(fp, stdout); fclose(fp); } else filecopy(stdin, stdout); return errno; } void filecopy(FILE * ifp, FILE * ofp) { char *p = NULL; size_t size = 0; size_t i = 0; int c; while ((c = getc(ifp)) != EOF) { if (i == size) { p = realloc(p, size + 100); if (p == NULL) err(1, NULL); size += 100; } /* Strip control characters */ if (!iscntrl(c) || c == '\t' || c == '\n') p[i++] = c; } if (i == size) { p = realloc(p, size + 1); if (p == NULL) err(1, NULL); } p[i] = '\0'; encode_word(&p); wrap_header(&p, 0); printf("%s", p); free(p); } void encode_word(char **p) { char *s = NULL; char *r = NULL; char *word = NULL; char *eword = NULL; char *otag = NULL; char ctag[] = "?="; size_t i = 0; size_t n = 0; size_t size = 0; size_t ck = 0; size_t sp = 0; size_t wlen = 0; size_t elen = 0; int encoded = 0; setlocale(LC_CTYPE, "en_US.UTF-8"); size = strlen(*p); s = malloc(size); if (s == NULL) err(1, NULL); r = *p; while (r[n] != '\0') { if (encoded == 0 && isspace(r[n])) s[i++] = r[n++]; sp = strspn(&r[n], "\t\n "); wlen = strcspn(&r[n + sp], "\t\n "); wlen += sp; word = strndup(&r[n], wlen); if (word == NULL) err(1, NULL); /* Check if the word contains UTF-8 characters */ if (wlen > mbstowcs(NULL, word, 0)) { elen = (wlen + 2) / 3 * 4; eword = malloc(elen + 1); if (eword == NULL) err(1, NULL); b64enc(word, wlen, eword, elen); eword[elen] = '\0'; if (encoded > 0) otag = strdup(" =?UTF-8?B?"); else otag = strdup("=?UTF-8?B?"); if (otag == NULL) err(1, NULL); s = realloc(s, size + strlen(otag) + strlen(ctag) + (elen - wlen) + 1); if (s == NULL) err(1, NULL); size += strlen(otag) + strlen(ctag) + (elen - wlen) + 1; ck = snprintf(&s[i], elen + strlen(otag) + strlen(ctag) + 1, "%s%s%s", otag, eword, ctag); if (ck >= elen + strlen(otag) + strlen(ctag) + 1) errx(1, "snprint: toolong"); i += elen + strlen(otag) + strlen(ctag); free(otag); free(eword); n += wlen; encoded++; } else { ck = snprintf(&s[i], wlen + 1, "%s", word); if (ck >= wlen + 1) errx(1, "snprint: toolong"); i += wlen; n += wlen; encoded = 0; } free(word); } if (i == size) { s = realloc(s, size + 1); if (s == NULL) err(1, NULL); } s[i] = '\0'; /* Remove from mail patch */ free(r); *p = s; } void wrap_header(char **p, size_t name_len) { char *s = NULL; char *r = NULL; size_t i = 0; size_t n = 0; size_t size = 0; int col = name_len; /* let room for header name */ int wrap = 72; size = strlen(*p); s = malloc(size); if (s == NULL) err(1, NULL); r = *p; while (r[n] != '\0') { /* Replace newlines by spaces */ if (r[n] == '\n' && r[n + 1] != '\0') r[n] = ' '; if (n != 0 && isspace(r[n]) && col + strcspn(&r[n + 1], "\t\n ") >= wrap) { s = realloc(s, size + 1); if (s == NULL) err(1, NULL); size++; if (r[n + 1] != '\0') s[i++] = '\n'; while (isspace(r[n + 1])) n++; col = 0; if (r[n] == '\t') col += 7; } s[i++] = r[n++]; if (r[n] == '\t') col += 7; col++; } if (i == size) { s = realloc(s, size + 1); if (s == NULL) err(1, NULL); } s[i] = '\0'; /* Remove this free from mail patch */ free(r); *p = s; } void usage(void) { extern char *__progname; fprintf(stderr, "Usage: %s [-bhlnp] [-w width] [file ...]\n" " -h print this help\n", __progname); exit(1); } -- Walter