* Wed Mar 21 2007 TAKAHASHI Tamotsu <[EMAIL PROTECTED]> > * Tue Mar 20 2007 Thomas Roessler <[EMAIL PROTECTED]> > > The way in which this code iterates through AssumedCharset in > > convert_nonmime_string is clumsy at best. (A better way to go > > through it would be to safe_strdup() the entire thing, use strtok on > > the copy, and then free the temporary buffer.) > > Thanks for your advice. I like your implementation. > I didn't know of strtok. > > > > So, from a casual glance at this code, two suggestions: > > > - Please clean up this code. > > I'm going to try.
I've finished cleaning it up. Attached. If Alain's "pass-thru" option is not needed, the code can be even shorter. -- tamo
diff -r a6da24788400 charset.c --- a/charset.c Wed Mar 21 11:29:20 2007 -0700 +++ b/charset.c Thu Mar 22 16:15:34 2007 +0900 @@ -288,18 +288,25 @@ int mutt_chscmp (const char *s, const ch return !ascii_strcasecmp (buffer, chs); } +/* Returns a buffer which should be freed later. + * The content of the buffer is: + * + * - "us-ascii" if AssumedCharset is NULL or "" + * - the first entry of AssumedCharset + * (this entry can be "") + */ char *mutt_get_default_charset () { - static char fcharset[SHORT_STRING]; - const char *c = AssumedCharset; - const char *c1; - - if (c && *c) { - c1 = strchr (c, ':'); - strfcpy (fcharset, c, c1 ? (c1 - c + 1) : sizeof (fcharset)); - return fcharset; - } - return strcpy (fcharset, "us-ascii"); /* __STRCPY_CHECKED__ */ + char *s; + + if (!AssumedCharset || !*AssumedCharset) + return strdup ("us-ascii"); + + s = strdup (AssumedCharset); + /* If it begins with ":", make it empty */ + if (s != strtok (s, ":")) + *s = '\0'; + return s; } #ifndef HAVE_ICONV diff -r a6da24788400 handler.c --- a/handler.c Wed Mar 21 11:29:20 2007 -0700 +++ b/handler.c Thu Mar 22 16:15:34 2007 +0900 @@ -1443,10 +1443,10 @@ void mutt_decode_attachment (BODY *b, ST if (istext && s->flags & M_CHARCONV) { char *charset = mutt_get_parameter ("charset", b->parameter); - if (!charset && AssumedCharset && *AssumedCharset) - charset = mutt_get_default_charset (); - if (charset && Charset) - cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM); + char *assumed_charset = mutt_get_default_charset (); + if ((charset || *assumed_charset) && Charset) + cd = mutt_iconv_open (Charset, charset ? charset : assumed_charset, M_ICONV_HOOK_FROM); + FREE (&assumed_charset); } else if (istext && b->charset) cd = mutt_iconv_open (Charset, b->charset, M_ICONV_HOOK_FROM); diff -r a6da24788400 init.h --- a/init.h Wed Mar 21 11:29:20 2007 -0700 +++ b/init.h Thu Mar 22 16:15:34 2007 +0900 @@ -169,18 +169,29 @@ struct option_t MuttVars[] = { { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL 0}, /* ** .pp - ** This variable is a colon-separated list of character encoding - ** schemes for messages without character encoding indication. - ** Header field values and message body content without character encoding - ** indication would be assumed that they are written in one of this list. - ** By default, all the header fields and message body without any charset - ** indication are assumed to be in "us-ascii". - ** .pp + ** This is a colon-separated list of possible character sets for + ** incoming messages. If a header field has no RFC2047-encoded word, + ** mutt assumes that the header contains unencoded non-ASCII words + ** violating the RFC. Mutt tries to detect the header's character set + ** by testing the list entries in turn. If the header can be converted + ** from none of them to ``$$charset'', mutt uses the first entry unless + ** the list ends with a colon. The trailing colon makes mutt bypass the + ** conversion process. This trial-and-error method is not performed for + ** message body. So only the first entry is used to show the body whose + ** Content-Type header has no "charset" parameter. + ** .pp + ** If unset, mutt converts unknown headers and bodies from "us-ascii". + ** If you want no conversion, you can trust anything with this setting: + ** .pp + ** set assumed_charset=":" + ** .pp + ** But in most cases, you should filter out invalid characters by + ** specifying at least one character sets. ** For example, Japanese users might prefer this: ** .pp ** set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8" ** .pp - ** However, only the first content is valid for the message body. + ** Note: This variable takes effect only after reloading the folder. */ { "attach_charset", DT_STR, R_NONE, UL &AttachCharset, UL 0 }, /* diff -r a6da24788400 parse.c --- a/parse.c Wed Mar 21 11:29:20 2007 -0700 +++ b/parse.c Thu Mar 22 16:15:34 2007 +0900 @@ -402,9 +402,12 @@ void mutt_parse_content_type (char *s, B if (ct->type == TYPETEXT) { if (!(pc = mutt_get_parameter ("charset", ct->parameter))) - mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ? - (const char *) mutt_get_default_charset () - : "us-ascii", &ct->parameter); + { + char *assumed_charset = mutt_get_default_charset (); + mutt_set_parameter ("charset", *assumed_charset ? + assumed_charset : NULL, &ct->parameter); + FREE (&assumed_charset); + } } } diff -r a6da24788400 rfc2047.c --- a/rfc2047.c Wed Mar 21 11:29:20 2007 -0700 +++ b/rfc2047.c Thu Mar 22 16:15:34 2007 +0900 @@ -87,40 +87,41 @@ static size_t convert_string (ICONV_CONS return n; } +/* AssumedCharset must be a non-empty string */ int convert_nonmime_string (char **ps) { - const char *c, *c1; - - for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) - { - char *u = *ps; - char *s; - char *fromcode; - size_t m, n; - size_t ulen = mutt_strlen (*ps); - size_t slen; - - if (!u || !*u) - return 0; - - c1 = strchr (c, ':'); - n = c1 ? c1 - c : mutt_strlen (c); - if (!n) - return 0; - fromcode = safe_malloc (n + 1); - strfcpy (fromcode, c, n + 1); - m = convert_string (u, ulen, fromcode, Charset, &s, &slen); - FREE (&fromcode); + char *c, *c1; + char *u = *ps; + char *s; + size_t ulen = mutt_strlen (*ps); + size_t slen; + size_t m; + + if (!u || !*u) + return 0; + + c = safe_strdup (AssumedCharset); + for (c1 = strtok (c, ":"); c1; c1 = strtok (NULL, ":")) + { + m = convert_string (u, ulen, c1, Charset, &s, &slen); if (m != (size_t)(-1)) { + FREE (&c); FREE (ps); /* __FREE_CHECKED__ */ *ps = s; return 0; } } - mutt_convert_string (ps, - (const char *)mutt_get_default_charset (AssumedCharset), + FREE (&c); + + /* pass-thru if the last entry is empty */ + if (AssumedCharset[strlen (AssumedCharset) - 1] == ':') + return 0; + + /* try to convert it replacing invalid chars */ + mutt_convert_string (ps, c = mutt_get_default_charset (), Charset, M_ICONV_HOOK_FROM); + FREE (&c); return -1; } @@ -827,11 +828,11 @@ void rfc2047_decode (char **pd) char *t; size_t tlen; - n = mutt_strlen (s); - t = safe_malloc (n + 1); - strfcpy (t, s, n + 1); + t = safe_strdup (s); convert_nonmime_string (&t); tlen = mutt_strlen (t); + if (dlen < tlen) /* truncation */ + tlen = dlen; strncpy (d, t, tlen); d += tlen; FREE (&t);