Brendan Cully <[EMAIL PROTECTED]> writes:

> On Sunday, 18 March 2007 at 17:36, Rocco Rutte wrote:
>>  I was looking at some mutt code for this issue and other issues that 
>>  report broken threading upon invalid message-ids. It seems that mutt 
>>  happily accepts the following syntax: '<.*>' which is just plain wrong.
>> 
>>  I looked at rfc822.c to try to reuse address parsing for parsing 
>>  message-ids but failed since I didn't have much time and the quote is 
>>  quite complex.
>> 
>>  Even though adopting your code for mutt would be quite easy, I'm not 
>>  yet sure what to do in case of validation errors.
>> 
>>  Say we get '<foobar>' during APOP authentication; should be really 
>>  reject that and report failed authentication? If a message has 
>>  '<foobar>' as message-id and others have it in their References: 
>>  header, should we really ignore it and break threading?
>
> Here's a patch that does a really minimal check that the message ID is
> of the form <[EMAIL PROTECTED]> where x and y are between ASCII 0 and 127. I 
> hope
> that it's enough to thwart the MD5 collision attack, but liberal
> enough to tolerate the range of broken POP servers out there. The @y
> test could be easily removed if necessary.

May I again offer to use my code here which I deem a *COMPLETE*
RFC822-validation:
<http://mknod.org/svn/fetchmail/branches/BRANCH_6-3/rfc822valid.c>

Any complaints my way. It is lightly-documented ad-hoc code implementing
RFC-822 tokens more or less in a hardwired way. I didn't bother to read
RFC-2822, because that's not what RFC-1939 references :-P

The code expects the timestamp as C string with the angle brackets
included, you'd call for instance

  res = rfc822_valid_msgid((const unsigned char *)"<[EMAIL PROTECTED]>");
  if (!res) {
     barf("noncompliant timestamp, APOP not available.\n");
  }

so we can share & fix code together rather than all go reinventing
wheels with dents or something like that...

The code at the URL above is part of fetchmail 6.3.8, so if someone
spots a bug, let me know.

It's GPL v2, but I'm willing to license under different terms for
OSI-compliant software if needed.

I'm attaching it for your convenience (I hope it passes through the list
driver...)

-- 
Matthias Andree
/* rfc822valid.c -- validators for RFC-822 syntax
 * (C) Copyright 2007 Matthias Andree <[EMAIL PROTECTED]>
 * GNU General Public License v2 */

/* This works only on ASCII-based computers. */

#include "fetchmail.h"
#include <string.h>

/* CHAR except specials, SPACE, CTLs */
static const char *atomchar = "!#$%&'*+-/0123456789=?ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~";

static int quotedpair(unsigned char const **x) {
    if (**x != '\\') return 0;
    ++ *x;
    if ((int)* *x > 127 || * *x == '\0')
	/* XXX FIXME: 0 is a legal CHAR, so the == '\0' is sort of bogus
	 * above, but fetchmail does not currently deal with NUL inputs
	 * so we don't need to make the distinction between
	 * end-of-string and quoted NUL. */
	return 0;
    ++ *x;
    return 1;
}


static int quotedstring(unsigned char const **x) {
    if (* *x != '"') return 0;
    ++ *x;
    for(;;) {
	switch (* *x) {
	    case '"':
		++ *x;
		return 1;
	    case '\\':
		if (quotedpair(x) == 0) return 0;
		continue;
	    case '\r':
	    case '\0':
		return 0;
	}
	if ((int)* *x >= 128) {
	    return 0;
	}
	++ *x;
    }
}

static int atom(unsigned char const **x) {
    /* atom */
    if (strchr(atomchar, (const char)**x)) {
	*x += strspn((const char *)*x, atomchar);
	return 1;
    }
    /* invalid character */
    return 0;
}

static int word(unsigned char const **x) {
    if (**x == '"')
	return quotedstring(x);
    return atom(x);
}

static int domain_literal(unsigned char const **x) {
    if (**x != '[') return 0;
    ++ *x;
    for(;;) {
	switch (* *x) {
	    case '\0':
	    case '\r':
	    case '[':
		return 0;
	    case ']':
		++ *x;
		return 1;
	    case '\\':
		if (quotedpair(x) == 0) return 0;
		continue;
	}
	if ((int)* *x > 127) return 0;
	++ *x;
    }
}

static int subdomain(unsigned char const **x) {
    if (* *x == '[') return domain_literal(x);
    return atom(x);
}

int rfc822_valid_msgid(const unsigned char *x) {
    /* expect "<" */
    if (*x != '<') return 0;
    ++ x;

    /* expect local-part = word *("." word)
     * where
     * word = atom/quoted-string
     * atom = 1*ATOMCHAR
     * quoted-string = <"> *(qtext/quoted-pair) <">
     * qtext = CHAR except ", \, CR
     * quoted-pair = "\" CHAR
     */
    for(;;) {
	if (word(&x) == 0) return 0;
	if (*x == '.') { ++x; continue; }
	if (*x == '@') break;
	return 0;
    }

    /* expect "@" */
    if (*x != '@') return 0;
    ++ x;

    /* expect domain = sub-domain *("." sub-domain)
     * sub-domain = domain-ref/domain-literal
     * domain-ref = atom
     * domain-literal = "[" *(dtext/quoted-pair) "]" */
    for(;;) {
	if (subdomain(&x) == 0) return 0;
	if (*x == '.') { ++x; continue; }
	if (*x == '>') break;
	return 0;
    }

    if (*x != '>') return 0;
    return 1;
}

#ifdef TEST
#include <stdio.h>

int main(int argc, char **argv) {
    int i;
    for (i = 1; i < argc; i++) {
	printf("%s: %s\n", argv[i], rfc822_valid_msgid((unsigned char *)argv[i]) ? "OK" : "INVALID");
    }
    return 0;
}
#endif

Reply via email to