#!/usr/bin/perl

use MIME::Parser;
use Switch;
use HTML::Parser;

my $parser = new MIME::Parser;

$entity = $parser->parse(\*STDIN) or die "parse failed\n";

# create a hash of html tag names that may have links
my %link_attr = (
	'a' => {'href'},
	'applet' => {'archive','codebase','code'},
	'area' => {'href'},
	'blockquote' => {'cite'},
	'body'    => {'background'},
	'embed'   => {'pluginspage', 'src'},
	'form'    => {'action'},
	'frame'   => {'src', 'longdesc'},
	'iframe'  => {'src', 'longdesc'},
	'ilayer'  => {'background'},
	#'img' => {'src'},
	'input'   => {'src', 'usemap'},
	'ins'     => {'cite'},
	'isindex' => {'action'},
	'head'    => {'profile'},
	'layer'   => {'background', 'src'},
	'link'    => {'href'},
	'object'  => {'classid', 'codebase', 'data', 'archive', 'usemap'},
	'q'       => {'cite'},
	'script'  => {'src', 'for'},
	'table'   => {'background'},
	'td'      => {'background'},
	'th'      => {'background'},
	'tr'      => {'background'},
	'xmp'     => {'href'},
);

sub find_urls_rec
{
	my($ent) = @_;
	if ($ent->parts > 1) {
		for ($i=0;$i<$ent->parts;$i++) {
			find_urls_rec($ent->parts($i));
		}
	} else {
		switch ($ent->mime_type) {
			case "text/html" {
				my $parser = HTML::Parser->new(api_version=>3);
				$parser->handler(start => sub {
						my($tagname,$pos,$text) = @_;
						if (my $link_attr = $link_attr{$tagname}) {
							while (4 <= @$pos) {
								my($k_offset, $k_len, $v_offset, $v_len) = splice(@$pos,-4);
								my $attrname = lc(substr($text, $k_offset, $k_len));
								next unless exists($link_attr->{$attrname});
								next unless $v_offset; # 0 v_offset means no value
								my $v = substr($text, $v_offset, $v_len);
								$v =~ s/^([\'\"])(.*)\1$/$2/;
								print "link: $v\n";
							}
						}
					},
					"tagname, tokenpos, text");
				$parser->parse($ent->bodyhandle->as_string);
			}
			case "text/" {
				# does this need more work, or does MIME::Parser take care of format=flowed delsp=yes for me?
				print $ent->bodyhandle->print(\*STDOUT);
			}
		}
	}
}

&find_urls_rec($entity);
