On Thu, Jan 03, 2002 at 03:36:01PM +0100, giorgian wrote:
> hi all,
> 
> if i have two mail folders and want to merge them in one avoiding
> repetitions, how can i do?

using this script: 

Uses memory for the Message-ID-hash at the moment. Should be easy to tweak for
support for Maildir and then also mixing of Maildir and mbox:es. 

$ids-hash could be tied to some file if somebody thinks memory is an issue.


/magnus



--- %< --- cut here --- %< ---
#!/usr/bin/perl
#
# dupmailweed.pl; BSD-license applies; (C) [EMAIL PROTECTED]; http://x42.com/
#
# usage: cat box1 box2 boxn | maildupweed.pl > mergedbox
#

sub do_mail ($$$$)
{
        my ($hdrs, $body, $ids, $output) = @_;
        $msgid = $1 if $hdrs =~ /.+?\nmessage-id:\s+(.+?)\s*\n/i;
        unless (length($msgid) && exists $ids->{$msgid})
        {
                $ids->{$msgid}++;
                print $hdrs . "\n" . $body;
        }
}

sub traverse_mbox ($$$)
{
        my ($input, $ids, $output) = @_;

        my ($nextfrom, $hdrs, $body, $tm);
        $nextfrom = <$input>;
        while (<$input>)
        {
                $hdrs = $nextfrom . $_;
                $body = '';
                while (<$input>)
                {
                        last if /^$/;
                        $hdrs .= $_;
                }
                while (<$input>)
                {
                        last if /^From /;
                        $body .= $_;
                }
                $nextfrom = $_ if /^From /;
                do_mail($hdrs, $body, $ids, $output);
        }
}

my $ids = {};
my $input = *STDIN;
my $output = *STDOUT;

traverse_mbox($input, $ids, $output);

--- %< --- cut here --- %< ---

Reply via email to