Dan Muey wrote: > > print "body text: @body\n"; # this needs to keep the tags were they are** >
that' fairly easy to do: #!/usr/bin/perl -w use strict; use HTMP::Parser; my $text = <<HTML; <html><head> <title> HI Title </title> heaD STUFF </head> <body bodytag=attributes> <i> keep the I tag </i> hI HERE'S CONTENT i WANT <img src=""> IMaGE <!-- i WANT TO STRIP COMMENTS OUT --> <SCRIPT> i DON'T WANT THIS SCRIPT EITHER </SCRIPT> <font>Hello world</font> </BODY> </HTMl> HTML my $body = 0; my $title = 0; my @body; my @title; my %body_attr; my $html = HTML::Parser->new(api_version => 3, text_h => [\&text,'dtext'], start_h => [\&open_tag, 'tagname,attr'], end_h => [\&close_tag, 'tagname']); $html->ignore_elements(qw(script comment)); $html->parse($text); $html->eof; print "title is:\n@title\n\n"; print "body text:\n@body\n\n"; print "body attr:\n"; while(my($k,$v) = each %body_attr){ print "$k=$v\n"; } sub text{ my $text = shift; return unless($text =~ /\w/); if($title){ push(@title,$text); }elsif($body){ push(@body,$text); } } sub open_tag{ my $tagname = shift; my $attr = shift; $title = 1 if($tagname eq 'title'); if($tagname eq 'body'){ $body = 1; while(my($key,$value) = each %{$attr}){ $body_attr{$key} = "'$value'"; } }elsif($body){ my $t = ''; while(my($key,$value) = each %{$attr}){ $t .= "$key='$value' "; } $t =~ s/\s$//; push(@body,"<$tagname" . ($t ? " $t>" : '>')); } } sub close_tag{ my $tagname = shift; $title = 0 if($tagname eq 'title'); $body = 0 if($tagname eq 'body'); push(@body,"</$tagname>") if($body); } __END__ prints: title is: HI Title body text: <i> keep the I tag </i> hI HERE'S CONTENT i WANT <img src=''> IMaGE <font> Hello world </font> body attr.: bodytag='attributes' david -- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]