Hi Norbert!
You wrote:
> * Bas Zoetekouw wrote:
> > Would you rather consider adding a patch as I suggested above, that
> > adds a new config option "default_header_charset" or so, which
> > controls the charset to fall back to for non-encoded headers? I can
> > see if I can hack on that later today...
>
> Any news here already?
Yeah, it's done \o/
I made the first version of the patch last week already, but it had some
bugs, and I haven't had any time to fix it until today.
The patch is attached. It adds an option "fallback_charset" for slrnrc,
which selects the default charset that is to be used if the header or
the article doesn't speficy anything. Ideally, this fallback charset
could be specified on a per-group basis, but that would make the patch a
lot more complicated. The current patch, with only one global fallback,
at least restores slrn's functionality of before the utf8 patch.
The patch also fixes a small bug in the previous utf8 patch.
Have fun!
Bas.
--
Kind regards,
+--------------------------------------------------------------------+
| Bas Zoetekouw | GPG key: 0644fab7 |
|----------------------------| Fingerprint: c1f5 f24c d514 3fec 8bf6 |
| [EMAIL PROTECTED], [EMAIL PROTECTED] | a2b1 2bae e41f 0644 fab7 |
+--------------------------------------------------------------------+
diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/00list
slrn-0.9.8.1pl1/debian/patches/00list
--- eerst/slrn-0.9.8.1pl1/debian/patches/00list 2007-01-12 22:14:32.000000000
+0100
+++ slrn-0.9.8.1pl1/debian/patches/00list 2007-01-15 20:33:20.827864867
+0100
@@ -14,3 +14,4 @@
211_query-cutoff.diff
300_iconv.diff
301_warning.diff
+302_fallback_charset.diff
diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/300_iconv.diff
slrn-0.9.8.1pl1/debian/patches/300_iconv.diff
--- eerst/slrn-0.9.8.1pl1/debian/patches/300_iconv.diff 2007-01-12
22:14:32.000000000 +0100
+++ slrn-0.9.8.1pl1/debian/patches/300_iconv.diff 2007-01-15
20:34:04.012711486 +0100
@@ -259,7 +259,7 @@
+ num = iconv(cd, &in_cursor, &in_left, &out_cursor, &out_left);
+
+ /* the entire line was translated, we're done */
-+ if (num>=0)
++ if ( in_left == 0 )
+ {
+ break;
+ }
diff -Naur eerst/slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff
slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff
--- eerst/slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff
1970-01-01 01:00:00.000000000 +0100
+++ slrn-0.9.8.1pl1/debian/patches/302_fallback_charset.diff 2007-01-15
20:33:01.614068679 +0100
@@ -0,0 +1,212 @@
+#! /bin/sh -e
+if [ $# -ne 1 ]; then
+ echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
+ exit 1
+fi
+case "$1" in
+ -patch) patch -f --no-backup-if-mismatch -p1 < $0;;
+ -unpatch) patch -f --no-backup-if-mismatch -R -p1 < $0;;
+ *)
+ echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
+ exit 1;;
+esac
+
+exit 0
+
[EMAIL PROTECTED]@
+diff -Naur eerst/slrn-0.9.8.1pl1/src/art.c slrn-0.9.8.1pl1/src/art.c
+--- eerst/slrn-0.9.8.1pl1/src/art.c 2007-01-15 18:54:23.795361911 +0100
++++ slrn-0.9.8.1pl1/src/art.c 2007-01-15 18:51:24.808845511 +0100
+@@ -473,6 +473,9 @@
+ remove_from_hash_table (h);
+ slrn_free (h->tree_ptr);
+ slrn_free (h->subject);
++#ifdef USE_ICONV /* we've copied this string */
++ slrn_free (h->from);
++#endif
+ slrn_free (h->date);
+ slrn_free (h->realname);
+ slrn_free_additional_headers (h->add_hdrs);
+@@ -5519,7 +5522,7 @@
+ static Slrn_Header_Type *process_xover (Slrn_XOver_Type *xov)
+ {
+ Slrn_Header_Type *h;
+- unsigned char *c;
++ char *c;
+
+ h = (Slrn_Header_Type *) slrn_safe_malloc (sizeof (Slrn_Header_Type));
+
+@@ -5527,19 +5530,44 @@
+ Number_Total++;
+
+ #ifdef USE_ICONV
+- /* ok, some news client (Outlook Express *sigh*) just put unencoded
++
++ /* Annoyingly, h->from wasn't malloced separately, but is part of the same
++ * buffer as h->subject (ie free()ing h->subject will also delete the
memory
++ * space used by h->from(). That sucks, so make a copy of from, to make
++ * sure we can freely mess with the strings as we see fit.
++ * NOTE: h->from is free()ed in free_header()
++ */
++ h->from = slrn_safe_strmalloc( h->from );
++
++ /* ok, some news clients (Outlook Express *sigh*) just put unencoded
+ * latin1/9 chars in their headers. As we don't know any charset at
+- * this time, replace those chars by '?' chars */
+- c = h->subject;
+- while (*c!='\0' && *c!=0x0a && *c!=0x0d)
+- {
+- if (*c>=0x7f) *c = '?'; c++;
++ * this time, try to translating them using the (user-specified) default
++ * charset, or if the user didn't specify any, replace all non-ASCII
++ * chars by question marks
++ */
++
++ /* if the user specified a fallback charset */
++ if ( Slrn_Fallback_Input_Charset_Default == 0 )
++ {
++ slrn_chmap_translate_string(Slrn_Fallback_Input_Charset,
++ Slrn_Charset, &(h->subject));
++ slrn_chmap_translate_string(Slrn_Fallback_Input_Charset,
++ Slrn_Charset, &(h->from));
+ }
+- c = h->from;
+- while (*c!='\0' && *c!=0x0a && *c!=0x0d)
+- {
++ else /* no fallback charset specified, so replace all high chars by ? */
++ {
++ c = h->subject;
++ while (*c!='\0' && *c!=0x0a && *c!=0x0d)
++ {
+ if (*c>=0x7f) *c = '?'; c++;
++ }
++ c = h->from;
++ while (*c!='\0' && *c!=0x0a && *c!=0x0d)
++ {
++ if (*c>=0x7f) *c = '?'; c++;
++ }
+ }
++
+ #endif /* USE_ICONV */
+
+ #if SLRN_HAS_MIME
+diff -Naur eerst/slrn-0.9.8.1pl1/src/art.h slrn-0.9.8.1pl1/src/art.h
+--- eerst/slrn-0.9.8.1pl1/src/art.h 2007-01-15 18:54:23.796361774 +0100
++++ slrn-0.9.8.1pl1/src/art.h 2007-01-12 22:40:58.470216033 +0100
+@@ -84,6 +84,11 @@
+
+ #endif /* NOT SLRNPULL_CODE */
+
++#ifdef USE_ICONV
++extern char *Slrn_Fallback_Input_Charset;
++extern short int Slrn_Fallback_Input_Charset_Default;
++#endif
++
+ typedef struct Slrn_Header_Line_Type
+ {
+ char *name;
+diff -Naur eerst/slrn-0.9.8.1pl1/src/chmap.c slrn-0.9.8.1pl1/src/chmap.c
+--- eerst/slrn-0.9.8.1pl1/src/chmap.c 2007-01-15 18:54:23.798361500 +0100
++++ slrn-0.9.8.1pl1/src/chmap.c 2007-01-15 19:01:51.939060247 +0100
+@@ -75,6 +75,11 @@
+
+ #ifdef USE_ICONV
+
++/* the user-specified fallback charset */
++char *Slrn_Fallback_Input_Charset = NULL;
++/* is set to a true value if the user didn't specify an override fallback */
++short int Slrn_Fallback_Input_Charset_Default = 0;
++
+ const iconv_t ICONV_FAIL = (iconv_t) -1;
+
+ /* translate the string *str_ptr from charset cs_from to charset cs_to */
+@@ -92,7 +97,7 @@
+
+
+ /* make sure the charsets are initialized */
+- if (cs_from == NULL) cs_from = ICONV_DEFAULT_CHARSET;
++ if (cs_from == NULL) cs_from = Slrn_Fallback_Input_Charset;
+ if (cs_to == NULL) return *str_ptr;
+
+ /* don't translate if from and to charsets are equal */
+@@ -258,7 +263,7 @@
+ #ifdef USE_ICONV
+ /* check if we need to translate */
+ if (a->charset == NULL)
+- charset = ICONV_DEFAULT_CHARSET;
++ charset = Slrn_Fallback_Input_Charset;
+ else
+ charset = a->charset;
+
+@@ -399,6 +404,22 @@
+ #if USE_ICONV
+ iconv_t cd;
+
++ /* first set the fallback input charset to ASCII if the user hasn't
++ * specified it */
++ if ( Slrn_Fallback_Input_Charset != NULL
++ && *Slrn_Fallback_Input_Charset == '\0' )
++ {
++ free( Slrn_Fallback_Input_Charset );
++ }
++ if ( Slrn_Fallback_Input_Charset == NULL )
++ {
++ Slrn_Fallback_Input_Charset = malloc( 9 );
++ strncpy( Slrn_Fallback_Input_Charset, "US-ASCII", 9 );
++ Slrn_Fallback_Input_Charset_Default = 1;
++ }
++
++ /* the rest of this function deals with the _output_ charset */
++
+ /* use environenment for locale */
+ setlocale (LC_ALL, "");
+
+diff -Naur eerst/slrn-0.9.8.1pl1/src/chmap.h slrn-0.9.8.1pl1/src/chmap.h
+--- eerst/slrn-0.9.8.1pl1/src/chmap.h 2007-01-15 18:54:23.798361500 +0100
++++ slrn-0.9.8.1pl1/src/chmap.h 2007-01-15 18:50:00.857329255 +0100
+@@ -26,13 +26,12 @@
+ # undef USE_ICONV
+ #endif
+
+-#define ICONV_DEFAULT_CHARSET "iso-8859-15"
+-
+ extern int slrn_set_charset (char *);
+ extern int slrn_chmap_fix_file (char *, int);
+ #ifdef USE_ICONV
+ extern char * slrn_chmap_translate_string (
+ char *, char *, char **);
++extern char *Slrn_Fallback_Input_Charset;
+ #endif
+ extern void slrn_chmap_fix_body (Slrn_Article_Type *, int);
+ extern void slrn_chmap_fix_header (Slrn_Header_Type *);
+diff -Naur eerst/slrn-0.9.8.1pl1/src/mime.c slrn-0.9.8.1pl1/src/mime.c
+--- eerst/slrn-0.9.8.1pl1/src/mime.c 2007-01-15 18:54:23.831356987 +0100
++++ slrn-0.9.8.1pl1/src/mime.c 2007-01-15 18:49:38.805345750 +0100
+@@ -734,7 +734,7 @@
+ /* make sure we have a charset available */
+ if (a->charset==NULL)
+ {
+- a->charset = slrn_safe_strmalloc(ICONV_DEFAULT_CHARSET);
++ a->charset = slrn_safe_strmalloc(Slrn_Fallback_Input_Charset);
+ }
+ #endif /* USE_ICONV */
+
+diff -Naur eerst/slrn-0.9.8.1pl1/src/stamp-h1 slrn-0.9.8.1pl1/src/stamp-h1
+--- eerst/slrn-0.9.8.1pl1/src/stamp-h1 1970-01-01 01:00:00.000000000 +0100
++++ slrn-0.9.8.1pl1/src/stamp-h1 2007-01-15 18:51:41.310588234 +0100
+@@ -0,0 +1 @@
++timestamp for src/config.h
+diff -Naur eerst/slrn-0.9.8.1pl1/src/startup.c slrn-0.9.8.1pl1/src/startup.c
+--- eerst/slrn-0.9.8.1pl1/src/startup.c 2007-01-15 18:54:23.864352473
+0100
++++ slrn-0.9.8.1pl1/src/startup.c 2007-01-12 22:16:05.556850947 +0100
+@@ -699,7 +699,9 @@
+ },
+
+ #if SLRN_HAS_MIME
+-# ifndef USE_ICONV /* this is fetched from the current locale */
++# ifdef USE_ICONV
++ {"fallback_charset", &Slrn_Fallback_Input_Charset},
++#else
+ {"mime_charset", &Slrn_Mime_Display_Charset},
+ # endif /* USE_ICONV */
+ {"metamail_command", &Slrn_MetaMail_Cmd},