#!/usr/bin/perl

use Mail::Mbox::MessageParser;
use Digest::SHA1 qw(sha1_hex);
use strict;
use warnings;

# Boilerplate.  See man page for Mail::Mbox::MessageParser.
my $filename = $ARGV[0];
my $fh = FileHandle->new($filename);
my $r = Mail::Mbox::MessageParser->new({ 'file_name' => $filename,
	'file_handle' => $fh, 'enable_cache' => 0 });
die $r if !ref $r;
my $prologue = $r->prologue;

# Main loop.
my $msgnum = 0;
my %threadindex;
my @threadlist;
while (! $r->end_of_file) {
	my $emailref = $r->read_next_email();
	my $email = $$emailref;
	++$msgnum;
	my @line = split(/\r\n?|\n/, $email);

	# Extract message ID, subject, and from line.
	my ($message_id, $subject, $from, $date);
	for my $line (@line) {
		if ($line =~ /^Message-ID:\s+(.*)$/i) {
			$message_id = $1;
			$message_id =~ s/^<//;
			$message_id =~ s/>$//;
		}
		elsif ($line =~ /^Subject:\s+(.*)$/i) {
			$subject = $1;
			$subject =~ s/\s+$//;
		}
		elsif ($line =~ /^From:\s+(.*)$/i) {
			$from = $1;
			$from =~ s/^\s+//;
			$from =~ s/\s+$//;
			$from =~ s/<.*// if $from =~ /^.+</;
			$from =~ s/[<>"]//g;
			$from =~ s/\s+$//;
		}
		elsif ($line =~ /^Date:\s+(.*)/i) {
			$date = $1;
			if ($date =~ /^\w\w\w,\s+(\d+ \w\w\w \d\d\d\d)/) {
				$date = $1;
			}
			else {
				warn "$msgnum: malformed date ($date)!\n";
				next;
			}
		}
		elsif ($line =~ /^$/) {
			last;
		}
	}

	# Complain if no message-ID.
	if (!defined $message_id) {
		warn "$msgnum: no message-ID!\n";
		next;
	}
	if (!defined $subject) {
		warn "$msgnum: no subject!\n";
		next;
	}
	if (!defined $from) {
		warn "$msgnum: no from!\n";
		next;
	}
	if (!defined $date) {
		warn "$msgnum: no date!\n";
		next;
	}

	# Trim subject.
	my $trimsubject = $subject;
	$trimsubject =~s/\[(HACKERS|BUGS|PERFORM|COMMITTERS|ADMIN|DOCS|GENERAL|PATCHES|SQL|Pg-migrator-general)\]\s*//g;
	$trimsubject =~ s/^Re:\s*//;
	$trimsubject =~ s/\s*$//;
	$trimsubject =~ s/^\s*//;
	$trimsubject =~ s/\s+/ /g;
	$trimsubject = lc($trimsubject);

	# Build record for message.
	my $message = {
		'from' => $from,
		'subject' => $subject,
		'message-id' => $message_id,
		'date' => $date,
		'msgnum' => $msgnum
	};
	if (!exists $threadindex{$subject}) {
		push @threadlist, $message;
		$threadindex{$subject} = $message;
	}
	else {
		push @{$threadindex{$subject}->{'followup'}}, $message;
	}
}

print <<'EOM';
<title>open mailbox</title>
<style type="text/css" media="screen" title="Normal Text">@import 
url("http://www.postgresql.org/layout/css/blue/fixed.css");</style>
</head>
<body>
<div style='padding: 10px'>
<h1>open mailbox</h1>
EOM

my $threadnum = 0;
my $previous_section = '';
for my $thread (@threadlist) {
	my $current_section = section_name($thread);
	if ($current_section ne $previous_section) {
		print "<h2>Threads Starting In $current_section</h2>";
		$previous_section = $current_section;
	}
	print "<div>" . format_msg($thread) . "</div>\n";
	for my $msg (@{$thread->{'followup'}}) {
		print "<div style='padding-left: 30px'>" . format_msg($msg) . "</div>\n";
	}
	print "\n";
}

sub format_msg {
	my ($m) = @_;
	sprintf "<tt>%s</tt> <a href='http://archives.postgresql.org/message-id/%s'>%s</a> %s <i>%s</i>",
		substr(sha1_hex($m->{'message-id'}), 0, 8),
		$m->{'message-id'}, $m->{'subject'}, $m->{'from'}, $m->{'date'};
}

sub section_name {
	my ($m) = @_;
	my $x = $m->{'date'};
	$x =~ s/^\d+\s*//;
	$x;
}

print <<EOM;
</div>
</body></html>
EOM
