For clarity sake with all of the code and changes and stuff here is the code that 
works mostly the way I want with the 3 questions/problems/needs after the #'s, $text 
contains actual html code:
#-------------------------

# get $title - EG the 'Your Title Here' in :: <title> Your Title Here </title>
# get $bdy_tg_at - EG the 'bgcolor="red" link="#EOEOEO"' in :: <body bgcolor="red" 
link="#EOEOEO">    
# This code removes <!-- comments --> automatically, which is what I want. But I'm not 
sure how/why exactly it does.

# Should I start a new object that just grabs the title and bdy_tg_at ??
# I tried another example with fetched the title ok but 
#       it made the attributes :: bgcolor="red"=link="#EOEOEO"
#     the attributes were in the same data as the body contents, so there was no way 
to separate it fomr the content
#       removed all html from the body content

use HTML::Parser;

my $temp;
my $html = HTML::Parser->new(
                api_version => 3,
                text_h      => [sub{ $temp .= shift; }, 'dtext'],
                start_h     => [sub{ $temp .= shift; }, 'text'],
                end_h       => [sub{ $temp .= shift; }, 'text']);

$html->ignore_elements(qw(head script));
$html->ignore_tags(qw(html body));

$html->parse($text);
$html->eof;

my $ntemp;
my @t = split(/\n/, $temp);
foreach $t (@t) {
        if($t =~ m/\w/) {
                $ntemp .= "$t \n";
        }
} 

print "TITLE -$title- \n";
print "BDATT -$bdy_tg_at- \n";
print $ntemp;
#----------------------------

--
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to