On Fri, 2009-09-11 at 18:05 -0400, Timo Sirainen wrote:
> I think the main reason this hasn't been implemented yet is that this
> conversion isn't required anywhere else in Dovecot. So the first step
> would be to create RFC 2047 encoder for Dovecot. Feel free to implement
> one. :)

Well, I actually spent some time writing the code anyway. Not tested at
all currently. I started writing unit tests but it didn't get very far.
I'm not really sure what kind of decisions I should use to figure out
where to start/end the encoded words or when to use base64 instead of
q-p. Currently it uses:

 - if only one word has non-ascii, encode only that word. otherwise
encode the whole string.
 - if more than 1/3 of string has non-ascii characters, use base64.

Attached a patch for it. Of course you'd still need to change Sieve to
use it.
diff -r accb1c02e03c src/lib-mail/Makefile.am
--- a/src/lib-mail/Makefile.am	Fri Sep 11 15:30:45 2009 -0400
+++ b/src/lib-mail/Makefile.am	Fri Sep 11 18:48:03 2009 -0400
@@ -13,6 +13,7 @@
 	message-date.c \
 	message-decoder.c \
 	message-header-decode.c \
+	message-header-encode.c \
 	message-header-parser.c \
 	message-id.c \
 	message-parser.c \
@@ -33,6 +34,7 @@
 	message-date.h \
 	message-decoder.h \
 	message-header-decode.h \
+	message-header-encode.h \
 	message-header-parser.h \
 	message-id.h \
 	message-parser.h \
diff -r accb1c02e03c src/lib-mail/message-header-encode.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-encode.c	Fri Sep 11 18:48:03 2009 -0400
@@ -0,0 +1,92 @@
+/* Copyright (c) 2009 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "base64.h"
+#include "message-header-encode.h"
+
+#define IS_LWSP(c) \
+	((c) == ' ' || (c) == '\t' || (c) == '\n')
+
+void message_header_encode_q(const unsigned char *input, size_t size,
+			     string_t *output)
+{
+	unsigned int i;
+
+	str_append(output, "=?utf-8?q?");
+	for (i = 0; i < size; i++) {
+		switch (input[i]) {
+		case ' ':
+			str_append_c(output, '_');
+			break;
+		case '=':
+		case '?':
+		case '_':
+			str_printfa(output, "=%2X", input[i]);
+			break;
+		default:
+			if (input[i] < 32 || input[i] >= 128)
+				str_printfa(output, "=%2X", input[i]);
+			else
+				str_append_c(output, '_');
+			break;
+		}
+	}
+	str_append(output, "?=");
+}
+
+static void message_header_encode_b(const unsigned char *input, size_t size,
+				    string_t *output)
+{
+	str_append(output, "=?utf-8?q?");
+	base64_encode(input, size, output);
+	str_append(output, "?=");
+}
+
+void message_header_encode(const char *_input, string_t *output)
+{
+	const unsigned char *input = (const unsigned char *)_input;
+	unsigned int i, enc_words = 0, enc_chars = 0, first_enc_word_pos = 0;
+	unsigned int last_word_pos = 0, last_enc_char_pos = 0;
+	bool use_base64;
+
+	for (i = 0; input[i] != '\0'; i++) {
+		if (input[i] > 127) {
+			if (last_enc_char_pos < last_word_pos ||
+			    i == last_word_pos) {
+				if (enc_words++ == 0)
+					first_enc_word_pos = last_word_pos;
+			}
+			enc_chars++;
+			last_enc_char_pos = i;
+		} else if (IS_LWSP(input[i])) {
+			last_word_pos = i + 1;
+		}
+	}
+
+	use_base64 = enc_chars*3 >= i;
+
+	if (enc_words == 1) {
+		/* just one word requires encoding. */
+		str_append_n(output, input, first_enc_word_pos);
+		for (i = first_enc_word_pos; input[i] != '\0'; i++) {
+			if (IS_LWSP(input[i]))
+				break;
+		}
+		if (use_base64) {
+			message_header_encode_b(input + first_enc_word_pos,
+						i - first_enc_word_pos, output);
+		} else {
+			message_header_encode_q(input + first_enc_word_pos,
+						i - first_enc_word_pos, output);
+		}
+
+		str_append(output, _input + i);
+	} else {
+		/* more than one word requires encoding - encode everything */
+		if (use_base64)
+			message_header_encode_b(input, i, output);
+		else
+			message_header_encode_q(input, i, output);
+	}
+}
diff -r accb1c02e03c src/lib-mail/message-header-encode.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-encode.h	Fri Sep 11 18:48:03 2009 -0400
@@ -0,0 +1,11 @@
+#ifndef MESSAGE_HEADER_ENCODE_H
+#define MESSAGE_HEADER_ENCODE_H
+
+/* Encode UTF-8 input into output wherever necessary. */
+void message_header_encode(const char *input, string_t *output);
+
+/* Encode the whole UTF-8 input using "Q" encoding into output */
+void message_header_encode_q(const unsigned char *input, size_t size,
+			     string_t *output);
+
+#endif

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to