#!/usr/bin/perl -w
my $report_hdr_pat =
  qr<^Content-Type:\s*text/plain;\s*
       name\s*=\s*['"]?SpamAssassinReport\.txt['"]?\n
      Content-Disposition:\s*inline;\s*
       filename\s*=\s*['"]?SpamAssassinReport\.txt['"]\n
      Content-Transfer-Encoding:.*?\n
      MIME-Version:.*?\n
      X-Mailer:.*?\n\n>xsm;
my $sa_report_attach_hdr =
    "Content-Type: text/plain\n"
  . "Content-Disposition: inline\n"
  . "Content-Transfer-Encoding: 8bit\n\n";
my $sa_msg_attach_hdr =
    "Content-Type: message/rfc822; x-spam-type=original\n"
  . "Content-Description: original message before SpamAssassin\n"
  . "Content-Disposition: attachment\n"
  . "Content-Transfer-Encoding: 8bit]\n\n";
my $msg = do { local( $/ ) ; <STDIN> }; # slurp
my ($hdr, $body) = split m{\n\n}, $msg, 2;
goto copy_original if !(defined $hdr && defined $body);
# add back the new line to terminate the header
$hdr .= "\n";
# Make some quick checks that is is likely
# a message that has been marked up by MIMEdefang
goto copy_original if $hdr !~ /^X-Spam-Score:/m;
goto copy_original if $hdr !~ /^X-Scanned-By:\s+MIMEDefang/m;
# if we can't find the SA report, then just forget about it.
$_ = $body;
goto copy_original if !m{$report_hdr_pat};
# Remove mail delivery artifacts from the header
$hdr =~ s/^Status:.*?\n//msgi;
$hdr =~ s/^Content-Length:.*?\n//msgi;
$hdr =~ s/^Lines:.*?\n//msgi;
$hdr =~ s/^X-Status:.*?\n//msgi;
$hdr =~ s/^X-Keywords:.*?\n//msgi;
$hdr =~ s/^X-UID:.*?\n//msgi;
# find the MIME boundary
my $orig_boundary;
my $is_multipart;
if ($hdr =~ m{^Content-Type:\s+multipart/mixed;\s*
              boundary\s*=\s*['"]?(.+?)['"]?(?:;|$)}msx)
  {
    $orig_boundary = "$1";
    $is_multipart = 1;
  }
else
  {
    # nothing in the header. Try the first few lines
    # and look for something that might be a boundary
    goto copy_original if $body !~ m<(?:^.*?$){0,5}(--.+?)$>m;
    $orig_boundary = substr("$1", 2);
    $is_multipart = 0;
  }
my $boundary_pat = "\Q${orig_boundary}\E";
# verify that the message body is well-formed.
goto copy_original if !m{(?:(?:.*?)(?:^--${boundary_pat}$))+
                         (?:.*?)(?:^--${boundary_pat}--$)(?:.*)^$}smx;
# Check to see if MIIMEDefang already deposited the
# original message as the first attachment.
my $has_msg_attached = $is_multipart
           && $orig_boundary =~ /^-{10}=_\d+-\d+-\d+$/
	   && /.*(?!^--${boundary_pat}\n)
	       ^--${boundary_pat}\n(?!Content)/imsx;
# remove everything up to and including to the first boundary.
s{(.*?)^--${boundary_pat}\n}{}sm;
my $orig_mime_prefix = "$1";
# remove the end boundary and everything after.
s{^--${boundary_pat}--\n.*}{}sm;
# remove the SA mark up part including the content header
goto copy_original if !s{(?:(?:^--${boundary_pat}\n)?${report_hdr_pat})
                         ((?:.(?!^--${boundary_pat}\n))*)}{}msx;
# save the SA report that was extracted.
my $report_part = "$1";
my $msg_boundary = $orig_boundary;
my $orig_msg = $_;
if (!$has_msg_attached)
  {
    # Generate a new MIME boundary
    $msg_boundary =
       "----------=_".scalar(time)."-$$-".(unpack("%32C*",$hdr)%1000);
    my $orig_hdr = $hdr;
    # Try to reconstruct as much of the original header as we can.
    $orig_hdr =~ s/^From.*?\n//ms if $orig_hdr =~ /^From /;
    $orig_hdr =~ s/^X-Scanned-By:.*?MIMEDefang.*?\n//msgi;
    $orig_hdr =~ s/^X-Spam-Score:.*?\n//msgi;
    # Add the boundaries back in, if we deleted them.
    if ($body =~ /^--${boundary_pat}$/sm)
      {
        $orig_msg = "--${orig_boundary}\n" . $orig_msg;
	$orig_msg = "${orig_mime_prefix}" . $orig_msg if $orig_mime_prefix;
        $orig_msg .= "--${orig_boundary}--\n\n";
      }
    # Peel off a couple of the intermediate Received headers,
    # if they're present. This is site dependent
    for (1..2) { $orig_hdr =~ s/^Received:.*?\n(?:\s.*?\n)*//smi; }
    # Tack on the header and we're done
    $orig_msg = "$orig_hdr" . "\n" . "$orig_msg";
    # Remove all Content artifacts in the outer header
    $hdr =~ s/^Content.*?:.*?\n(?:\s.*?\n)*//smgi;
    $hdr =~ s/^MIME-Version:.*?\n(?:\s.*?\n)*//smgi;
    # then add back the new ones
    $hdr .= "MIME-Version: 1.0\n";
    $hdr .= "Content-Type: multipart/mixed;"
               . " boundary=\"${msg_boundary}\"\n";
  }
# Build a new message body, with the SA report
# and the original (reconstructed) message.
$body = "This is a multi-part message in MIME format.\n\n"
        . "--${msg_boundary}\n"
	. $sa_report_attach_hdr
	. $report_part
        . "--${msg_boundary}\n"
	. $sa_msg_attach_hdr
	. $orig_msg
	. "--${msg_boundary}--\n\n";
$msg = "$hdr\n$body";
# fall through

copy_original:
  print $msg;
  exit 0;
