On Tue, Mar 20, 2007 at 10:27:04AM +0100, Thomas Roessler wrote: > On 2007-03-20 12:01:49 +0900, TAKAHASHI Tamotsu wrote: > > >strict_mime=yes: > > unset ignore_linear_white_space > > >strict_mime=no: > > set ignore_linear_white_space > > Why on earth do we need two different behaviors for encoding or > decoding subject headers as far as whitespace is concerned? There's > a spec-compliant way to do this, let's do that, and be done with. > > If the other approach is really needed to deal with widespread > breakage (I don't think it is), implement that, and implement it > always.
As for Mutt before, linear-white-space between encoded-word and text was displayed as it was. Because the operation changed, I made it to the option. > As an aside, the code used to implement this (in the instance around > line 816 of rfc2047.c) looks like an incredibly convoluted and > inefficient way of saying something like this: > > if (islwsp (*s)) > { > *d = ' '; > d++; > dlen--; > } > > while (islwsp (*s) && *s) > s++; Thank you for your suggestions. The attached patch is the one that the following correction was done. - Remove the option $ignore_linear_white_space. - By default, replace linear-white-space between encoded-word and text to a single space. - Clean up the code. > Going further through the current code of rfc2047.c, the > AssumedCharset related code says things like: > > n = mutt_strlen (s); > t = safe_malloc (n + 1); > strfcpy (t, s, n + 1); > > That's usually spelled as > > t = safe_strdup (s); > > in mutt code. I think it is good in the code that Tamotsu wrote. -- TAKIZAWA Takashi http://www.emaillab.org/
diff -r b0172175cc89 init.h --- a/init.h Tue Mar 20 13:39:29 2007 -0700 +++ b/init.h Wed Mar 21 21:05:39 2007 +0900 @@ -850,13 +850,6 @@ struct option_t MuttVars[] = { ** Specifies the hostname to use after the ``@'' in local e-mail ** addresses. This overrides the compile time definition obtained from ** /etc/resolv.conf. - */ - { "ignore_linear_white_space", DT_BOOL, R_NONE, OPTIGNORELWS, 0 }, - /* - ** .pp - ** This option replaces linear-white-space between encoded-word - ** and *text to a single space to prevent the display of MIME-encoded - ** ``Subject'' field from being divided into multiple lines. */ { "ignore_list_reply_to", DT_BOOL, R_NONE, OPTIGNORELISTREPLYTO, 0 }, /* diff -r b0172175cc89 mutt.h --- a/mutt.h Tue Mar 20 13:39:29 2007 -0700 +++ b/mutt.h Wed Mar 21 21:05:40 2007 +0900 @@ -372,7 +372,6 @@ enum OPTHIDETHREADSUBJECT, OPTHIDETOPLIMITED, OPTHIDETOPMISSING, - OPTIGNORELWS, OPTIGNORELISTREPLYTO, #ifdef USE_IMAP OPTIMAPCHECKSUBSCRIBED, diff -r b0172175cc89 rfc2047.c --- a/rfc2047.c Tue Mar 20 13:39:29 2007 -0700 +++ b/rfc2047.c Wed Mar 21 22:54:26 2007 +0900 @@ -748,54 +748,13 @@ static const char *find_encoded_word (co return 0; } -/* return length of linear-white-space */ -static size_t lwslen (const char *s, size_t n) -{ - const char *p = s; - size_t len = n; - - if (n <= 0) - return 0; - - for (; p < s + n; p++) - if (!strchr (" \t\r\n", *p)) - { - len = (size_t)(p - s); - break; - } - if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */ - len = (size_t)0; - return len; -} - -/* return length of linear-white-space : reverse */ -static size_t lwsrlen (const char *s, size_t n) -{ - const char *p = s + n - 1; - size_t len = n; - - if (n <= 0) - return 0; - - if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */ - return (size_t)0; - - for (; p >= s; p--) - if (!strchr (" \t\r\n", *p)) - { - len = (size_t)(s + n - 1 - p); - break; - } - return len; -} - /* try to decode anything that looks like a valid RFC2047 encoded * header field, ignoring RFC822 parsing rules */ void rfc2047_decode (char **pd) { - const char *p, *q; - size_t m, n; + const char *p, *q, *r; + size_t n; int found_encoded = 0; char *d0, *d; const char *s = *pd; @@ -812,15 +771,16 @@ void rfc2047_decode (char **pd) if (!(p = find_encoded_word (s, &q))) { /* no encoded words */ - if (option (OPTIGNORELWS)) + if (found_encoded) { - n = mutt_strlen (s); - if (found_encoded && (m = lwslen (s, n)) != 0) - { - if (m != n) - *d = ' ', d++, dlen--; - s += m; - } + while (*s && strchr (" \t\r\n", *s)) + s++; + if (dlen > 0) + { + *d = *s ? ' ' : '\n'; + d++; + dlen--; + } } if (AssumedCharset && *AssumedCharset) { @@ -842,38 +802,32 @@ void rfc2047_decode (char **pd) break; } - if (p != s) - { - n = (size_t) (p - s); - /* ignore spaces between encoded word - * and linear-white-space between encoded word and *text */ - if (option (OPTIGNORELWS)) + if (p != s && found_encoded) + { + while (*s && strchr(" \t\r\n", *s)) + s++; + if (p != s && dlen > 0) { - if (found_encoded && (m = lwslen (s, n)) != 0) - { - if (m != n) - *d = ' ', d++, dlen--; - n -= m, s += m; - } - - if ((m = n - lwsrlen (s, n)) != 0) - { - if (m > dlen) - m = dlen; - memcpy (d, s, m); - d += m; - dlen -= m; - if (m != n) - *d = ' ', d++, dlen--; - } + *d = ' '; + d++; + dlen--; } - else if (!found_encoded || strspn (s, " \t\r\n") != n) + } + + if (p != s) { + for (r = p - 1; s < r && strchr(" \t\r\n", *r); r--) + ; + n = (size_t) (r - s + 1); + if (n > dlen) + n = dlen; + memcpy (d, s, n); + d += n; + dlen -= n; + if (dlen > 0) { - if (n > dlen) - n = dlen; - memcpy (d, s, n); - d += n; - dlen -= n; + *d = ' '; + d++; + dlen--; } }