On Mon, Nov 26, 2012 at 09:57:33AM +0000, Jamie Paul Griffin wrote: > > Does anyone have or know of a perl or python script, or even a shell > script, that removes the multipart/(mixed|alternative| ... ) parts of > incoming mail and leaves or converts the message into plain text?
I have two scripts for that. Both are not perfect and sometimes eat mails or attachments, esp. from MUAs with certain bugs (specifically Apple mail, html, and attachments are a known culprit) The effect comes from inherently from the way is works, it parses the mime-structures and recreates it, thereby not necessarily groking all unknowns. Bye, Joerg a) remove text/html part from maultipart/alternativ # Clean MIME mails :0 fhbw * ^Content-Type:.*multipart/ # Apple mail attachment bug * ! ^X-Mailer: Apple Mail | fixmail.pl #!/usr/bin/perl -w # # This is a mail filter. Takes multipart from STDIN, deletes # superfluous MIME-parts and reduces multipart/alternative to # singlepart, finally writes cleaned MIME mail to STDOUT. # # By Boris 'pi' Piwinger <3...@piology.org>. Please let me know if you # improve it or fix bugs. # # Based on tinnef.pl (there is not much left, though;-) by Gerd Knorr # <gkn...@berlinonline.de>. Get it there: http://www.ch-open.ch/~cho13093/ # # This code is public domain. It comes with absolutely no warranty. # If it eats your mails for lunch, that's your problem. If you don't # like this, don't use it. # # Best with Procmail, e.g.: # # # Clean MIME mails # :0 # * ^Content-Type:.*multipart/ # { # :0c: # tmp/fixmail # :0fhbw # | fixmail.pl # } # Save the From line my$from = <STDIN>; # Create parser, we are being daring here (huge mails might cause problems) use MIME::Parser; my$done=""; my$parser=MIME::Parser->new; $parser->output_to_core(1); my$top=$parser->read(\*STDIN) or die "Couldn't parse MIME stream.\n"; $top=&analyze($top); $top->head->add('X-pi-MIME-Parts-removed',$done) if $done; $top->sync_headers(Length=>'COMPUTE'); # Print From line print $from; $top->print(\*STDOUT); exit 0; sub analyze { my($body,$i)=(@_,0); my($parts)=$body->{ME_Parts}; if ($body->mime_type eq "multipart/alternative") { # Reduce multipart/alternative $i=-1; $i++ until ($$parts[$i]->mime_type eq "text/plain" || $i==$#{$parts}); if ($$parts[$i]->mime_type eq "text/plain") { @$parts=@$parts[$i]; $done.=" multipart/alternative"; } } else { # Kill superfluous junk: # - text/x-vcard # - application/x-pkcs7-signature # - application/ms-tnef # Recursion on multipart while ($i<=$#{$parts}) { # try pkcs7 for a while (mutt supports it now) # if ($$parts[$i]->mime_type =~ /(text\/x-vcard|application\/(?:x-pkcs7-signature|ms-tnef))/) { if ($$parts[$i]->mime_type =~ /(text\/x-vcard|application\/(?:ms-tnef))/) { $done.=" $1"; splice(@$parts,$i,1); } elsif ($$parts[$i]->mime_type =~ /^multipart\//) { $$parts[$i]=&analyze($$parts[$i]); $i++; } else {$i++} } } $body->{ME_Parts}=$parts; if ($body->mime_type =~ /^multipart\/related/ && $body->head->mime_attr("content-type.type") eq "multipart/alternative") {$body->head->mime_attr("content-type.type" => "text/plain")} $body->make_singlepart if $body->parts==1; return $body; } b) adds a plain/text part to html-only mails # Add text/plain :0 fhbw * ^Content-Type:.*text/html | addtext.pl #!/usr/bin/perl -w # This is a mail filter. Takes a mail from STDIN, # adds a text/plain section to every text/html not in # multipart/alternative, finally writes cleaned MIME mail to # STDOUT. # # text/plain is generated by lynx -dump # # TODO: fix charset # # Inspired by fixmail.pl by Boris 'pi' Piwinger # <3...@piology.org> # # (c) Joerg Dorchain <jo...@dorchain.net> # This code is public domain. It comes with absolutely no warranty. # If it eats your mails for lunch, that's your problem. If you don't # like this, don't use it. # # Best with Procmail, e.g.: # # # Add text/plain # :0 # * ^Content-Type:.*text/html # { # :0c: # tmp/addtext # :0fhbw # | addtext.pl # } use IPC::Run qw(run); # Save the From line my $from = <STDIN>; # Create parser, we are being daring here (huge mails might cause # problems) use MIME::Parser; my $done=""; my $parser=MIME::Parser->new; $parser->output_to_core(1); my $top=$parser->read(\*STDIN) or die "Couldn't parse MIME stream.\n"; $top = &analyse($top); $top->head->add('X-JD-Mime-Part-added',$done) if $done; $top->sync_headers(Length=>'COMPUTE'); # Print From line print $from; $top->print(\*STDOUT); exit 0; sub analyse { my ($top) = (@_); if ($top->is_multipart) { my ($i,$ti,$hi); my ($parts) = $top->{ME_Parts}; for($i = 0; $i < $#{$parts}; $i++) { if ($top->effective_type ne "multipart/alternative") { $$parts[$i] = &analyse($$parts[$i]); } else { if ($$parts[$i]->effective_type eq "text/plain") { $ti = $i; } if ($$parts[$i]->effective_type eq "text/html") { $hi = $i; } } } if ($top->effective_type eq "multipart/alternative") { if ($hi && !$ti) { # html found, but not text $$parts[$hi] = &analyse($$parts[$hi]); } } $top->{ME_Parts} = $parts; } if ($top->effective_type ne "text/html") { return $top; } # Now do the dirty job my ($html, $plain, $err); $top->make_multipart("alternative"); $html = $top->parts(0)->bodyhandle->as_string; run ["lynx", "-dump", "-stdin", "-stderr"], '<', \$html, '>', \$plain, '2>', \$err; $top->attach(Data => $err.$plain, Type => "text/plain", Encoding => "8bit", Charset => "utf-8"); $done.=" text/plain"; return $top; }
signature.asc
Description: Digital signature