* Tue Mar 20 2007 Thomas Roessler <[EMAIL PROTECTED]> > On 2007-03-20 12:01:49 +0900, TAKAHASHI Tamotsu wrote:
> >strict_mime=yes: > > unset ignore_linear_white_space > >strict_mime=no: > > set ignore_linear_white_space > Why on earth do we need two different behaviors for encoding or > decoding subject headers as far as whitespace is concerned? There's > a spec-compliant way to do this, let's do that, and be done with. Personally I don't need $ignore_linear_space. But I understand the situation. For example, RFC2047 says that | Subject: (a | =?ISO-8859-1?Q?b?=) should be displayed as the following: | Subject: (a | b) But many Japanese MUAs insert spaces and a CRLF between alphabet-word and kanjis because kanji becomes very long, and because RFC limits header length. For example, | Subject: (a <kanji>) is encoded to | Subject: (a | =?ISO-2022-JP?B?GyRCNEE7ehsoQgo=?=) by such MUAs. We don't like this: | Subject: you | are | annoyed | by | this | subject This is what we feel from such bogus encoded headers when $ignore_linear_white_space="no". > If the other approach is really needed to deal with widespread > breakage (I don't think it is), implement that, and implement it > always. You are generous. Most Japanese users say "It is really needed!" > As an aside, the code used to implement this (in the instance around > line 816 of rfc2047.c) looks like an incredibly convoluted and > inefficient way of saying something like this: (snip) LWS code is not written by me. Takashi, please review it. Well, safe_strdup can return NULL. And I found a comment: "/* LWS doesn't end with CRLF */" These _may_ be the reason, but I don't know what Takashi thought. > The way in which this code iterates through AssumedCharset in > convert_nonmime_string is clumsy at best. (A better way to go > through it would be to safe_strdup() the entire thing, use strtok on > the copy, and then free the temporary buffer.) Thanks for your advice. I like your implementation. I didn't know of strtok. > So, from a casual glance at this code, two suggestions: > - Please clean up this code. I'm going to try. Beta patch is attached. -- tamo
diff -r d0b9363c79aa charset.c --- a/charset.c Fri Mar 16 17:14:54 2007 +0900 +++ b/charset.c Wed Mar 21 01:15:09 2007 +0900 @@ -290,16 +290,15 @@ int mutt_chscmp (const char *s, const ch char *mutt_get_default_charset () { - static char fcharset[SHORT_STRING]; - const char *c = AssumedCharset; - const char *c1; - - if (c && *c) { - c1 = strchr (c, ':'); - strfcpy (fcharset, c, c1 ? (c1 - c + 1) : sizeof (fcharset)); - return fcharset; - } - return strcpy (fcharset, "us-ascii"); /* __STRCPY_CHECKED__ */ + char *s; + + if (AssumedCharset && *AssumedCharset) + { + s = strdup (AssumedCharset); + strtok (s, ":"); + return s; + } + return strdup ("us-ascii"); } #ifndef HAVE_ICONV diff -r d0b9363c79aa handler.c --- a/handler.c Fri Mar 16 17:14:54 2007 +0900 +++ b/handler.c Wed Mar 21 00:53:53 2007 +0900 @@ -1445,10 +1445,12 @@ void mutt_decode_attachment (BODY *b, ST if (istext && s->flags & M_CHARCONV) { char *charset = mutt_get_parameter ("charset", b->parameter); + char *assumed_charset = NULL; if (!charset && AssumedCharset && *AssumedCharset) - charset = mutt_get_default_charset (); - if (charset && Charset) - cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM); + assumed_charset = mutt_get_default_charset (); + if ((charset || assumed_charset) && Charset) + cd = mutt_iconv_open (Charset, charset ? charset : assumed_charset, M_ICONV_HOOK_FROM); + FREE (&assumed_charset); } else if (istext && b->charset) cd = mutt_iconv_open (Charset, b->charset, M_ICONV_HOOK_FROM); diff -r d0b9363c79aa parse.c --- a/parse.c Fri Mar 16 17:14:54 2007 +0900 +++ b/parse.c Wed Mar 21 00:55:27 2007 +0900 @@ -402,9 +402,11 @@ void mutt_parse_content_type (char *s, B if (ct->type == TYPETEXT) { if (!(pc = mutt_get_parameter ("charset", ct->parameter))) - mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ? - (const char *) mutt_get_default_charset () - : "us-ascii", &ct->parameter); + { + char *assumed_charset = mutt_get_default_charset (); + mutt_set_parameter ("charset", assumed_charset, &ct->parameter); + FREE (&assumed_charset); + } } } diff -r d0b9363c79aa rfc2047.c --- a/rfc2047.c Fri Mar 16 17:14:54 2007 +0900 +++ b/rfc2047.c Wed Mar 21 02:12:02 2007 +0900 @@ -89,38 +89,42 @@ static size_t convert_string (ICONV_CONS int convert_nonmime_string (char **ps) { - const char *c, *c1; - - for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) - { - char *u = *ps; - char *s; - char *fromcode; - size_t m, n; - size_t ulen = mutt_strlen (*ps); - size_t slen; - - if (!u || !*u) - return 0; - - c1 = strchr (c, ':'); - n = c1 ? c1 - c : mutt_strlen (c); - if (!n) - return 0; - fromcode = safe_malloc (n + 1); - strfcpy (fromcode, c, n + 1); - m = convert_string (u, ulen, fromcode, Charset, &s, &slen); - FREE (&fromcode); + char *c, *c1; + char *u = *ps; + char *s; + size_t ulen = mutt_strlen (*ps); + size_t slen; + size_t m; + + if (!u || !*u) + return 0; + + c = safe_strdup (AssumedCharset); + if (!c) + c = safe_calloc (1, 1); + for (c1 = strtok (c, ":"); c1; + c1 = strtok (NULL, ":")) + { + m = convert_string (u, ulen, c1, Charset, &s, &slen); if (m != (size_t)(-1)) { + FREE (&c); FREE (ps); /* __FREE_CHECKED__ */ *ps = s; return 0; } } - mutt_convert_string (ps, - (const char *)mutt_get_default_charset (AssumedCharset), + FREE (&c); + + /* pass-thru if the last entry is empty */ + if (AssumedCharset && *AssumedCharset && + AssumedCharset[strlen (AssumedCharset) - 1] == ':') + return 0; + + /* going to do my best */ + mutt_convert_string (ps, c = mutt_get_default_charset (), Charset, M_ICONV_HOOK_FROM); + FREE (&c); return -1; } @@ -827,11 +829,11 @@ void rfc2047_decode (char **pd) char *t; size_t tlen; - n = mutt_strlen (s); - t = safe_malloc (n + 1); - strfcpy (t, s, n + 1); + t = safe_strdup (s); convert_nonmime_string (&t); tlen = mutt_strlen (t); + if (dlen < tlen) /* truncation */ + tlen = dlen; strncpy (d, t, tlen); d += tlen; FREE (&t);