I have a Perl script where I try to strip some data from a web page
and insert it

into a database.  I'm having a problem where, it seems like the method
of quoting

the data for insertion don't seem to be working (as far as escaping
the text) and

some of the text is ending up getting injected into the SQL command.

In this example, I am capturing the paragraphs of text and inserting
each HTML

paragraph into a new record.  What seems to be hanging up the
insertion is the "or

die" portion of the text.  It will also bomb if the text has a word
like "don't".

I thought the insertion mechanism I'm using would properly escape
these special

characters/phrases.

This is an old problem for me, I know I tried multiple methods but had
to put the

project down for a few months here so I'm a bit sketchy on the details
of what I

have tried already, otherwise I would have posted that as well.

I am currently trying this with a SQLite database.  I tried it on a
MySQL database

with the same results.  Here is the database Schema:

CREATE TABLE area_status (updatre_time TEXT, status BLOB, survey_date
TEXT, areaID

NUMERIC, area_statusID INTEGER PRIMARY KEY);

The only thing you hsould have to change is the $file_prefix
variable.  Also, the

example webpage and SQLite database are here:
http://www.ottomatica.com/perl/paragraph_insert_issue/test_page.html
http://www.ottomatica.com/perl/paragraph_insert_issue/area.db

Thanks in advance!!!!!

Here's the code.

#!/usr/bin/perl
use strict;
use LWP::Simple qw(!head);
use diagnostics;
use DBI();

sub trim($);                        # subroutines to trim off
whitespace
sub ltrim($);                       # subroutines to trim off
whitespace
sub rtrim($);                       # subroutines to trim off
whitespace
sub status_of_the_area_insert;      # subroutine that inserts data
into database

my $html_string;                    # This variable will contain the
whole web

page
my $file_string;                    # This contains the complete file
path (in the

real program I loop through many web pages)
my $status_of_the_area_error = 0;   # This is a debuging variable that
is a

boolean and will help decide if I execute certain portions of code
my $file_name;                      # The file name and extension
portion of

$file_string above
my $areaID;                         # A field in my area_status table,
hardcoded

in this case
my $inventory_number;               # A field in my area_status table,
hardcoded

in this case
my $survey_date;                    # A field in my area_status table,
hardcoded

in this case
my $time = localtime(time);         # The current time when the code
is exectued

for later reference
my $status_of_the_area;             # This contains the text I'm
interested in

capture by a Perl Regex
my $status_of_the_area_complete;    # The final text that I am
interested in

(after trimming)
my $status_of_the_area_present;     # This is a boolean that tells me
whether the

text I'm interested in is there or not
my $detect_paragraphs;              # A boolean to detect if there are
multiple

paragraphs present
my @status_of_the_area_array;       # An array that contains multiple
paragraphs

if present
my $status_of_the_area_array;       # Used to loop through the

@status_of_the_area_array array
my $file_prefix;                    # The location of the html file on
the user's

computer.

# Declare the location of the html file on the user's computer.
$file_prefix = 'file:///C:/Lanosrep/beW/Perl/HelpPage/';

# Print Status
print "<br/>Begin looping through the directory <br/>";

# Loop through directory (in this case there will be only one file)
while ( $file_name=glob("test_page.html") ) { #Use this for a test
string to limit

results *downum=0100*.html

    # Reset the $status_of_the_area_error to zero
    # This is a debuging variable that is a boolean and will help
decide if I

execute certain portions of code
    $status_of_the_area_error = 0;

    # Create the file string
    # This contains the complete file path (in the real program I loop
through

many web pages)
    $file_string = $file_prefix.$file_name;

    # Assign the contents of the web page to the $html_string variable
    # This variable will contain the whole web page
    $html_string = get($file_string);

    # Determine if the text I care about is present
    # This is a boolean that tells me whether the text I'm interested
in is there

or not
    $status_of_the_area_present = $html_string =~ m/<H2>.??Status of
the

Area.*?<\/H2>(.*?)<H2>.??For Additional Information.*?<\/H2>/si;

    # If it is present, print status that tells me it's there and
execute some

additional code
    if ($status_of_the_area_present) {
        print "<a href=$file_string>Status of the area Found -

$file_string</a>.<br/>";

        # This contains the text I'm interested in capture by a Perl
Regex
        ($status_of_the_area) = $html_string =~ m/<H2>.??Status of
the

Area.*?<\/H2>(.*?)<H2>.??For Additional Information.*?<\/H2>/si;

        # The final text that I am interested in (after trimming)
        $status_of_the_area_complete = trim($status_of_the_area);

        # A boolean to detect if there are multiple paragraphs present
        $detect_paragraphs = $status_of_the_area =~ m/<P>/i;

        # If multiple paragraphs are detected, split them up and place
in an array
        if ($detect_paragraphs) {
            $status_of_the_area =~ s/<P>.??<P>//sgi;
            (@status_of_the_area_array) = split /<P>/i,
$status_of_the_area;
        }

        # Hardcode the following variables as they are unimportant for
this

discussion
        $inventory_number = 22222222;
        $areaID = 22222222;
        $survey_date = "01-01-1999";

        # If multiple paragraphs are detected, loop through the
array,

@status_of_the_area_array and insert the data
        if ($detect_paragraphs) {
            my $i = 1;
            foreach $status_of_the_area_array
(@status_of_the_area_array) {
                # Detect if the paragraph contains any text
                if ($status_of_the_area_array) {


status_of_the_area_insert($status_of_the_area_array,$areaID,
$survey_date,$time,1);
                }
                # If paragraph is blank don't insert into the database
                else {
                    #print "<P>Paragraph $i<P>Nothing Here";
                }
            $i++;
            }
        }

        # If just a single paragraph, insert the data captured in the
variable

$status_of_the_area_complete
        else {
            #print $status_of_the_area;


status_of_the_area_insert($status_of_the_area_complete,$areaID,
$survey_date,$time,

1);
        }
            print "<br/>";
        }
    # If text is not detected print status
    else {
        print "<a href=$file_string>Status of the area NOT Found -

$file_string</a>.<br/>";
    }
    }

# Done
# Subroutines below

# Perl trim function to remove whitespace from the start and end of
the string
sub trim($)
{
        my $string = shift;
        $string =~ s/^\s+//;
        $string =~ s/\s+$//;
        return $string;
}
# Left trim function to remove leading whitespace
sub ltrim($)
{
        my $string = shift;
        $string =~ s/^\s+//;
        return $string;
}
# Right trim function to remove trailing whitespace
sub rtrim($)
{
        my $string = shift;
        $string =~ s/\s+$//;
        return $string;
}

# subroutine that inserts data into database
sub status_of_the_area_insert {
    my $var;
    my $areaID;
    my $survey_date;
    my $update_time;
    my $row_count_final;
    ($var,$areaID,$survey_date,$update_time,$row_count_final)[EMAIL PROTECTED];
    my $dbh =

DBI->connect("DBI:SQLite:dbname=C:/Lanosrep/beW/Perl/HelpPage/
area.db", "", "",

{'RaiseError' => 1});
    my $row_count = 0;
    my $sth;
    my @values;
    my @sql_row;
    my $rv;
    my $sql;
    my $OK2INSERT;
    my $parsed_var;

    $values[0] = trim($areaID);
    $values[1] = trim($survey_date);
    $values[2] = trim($update_time);
    $values[3] = trim($var);
    $sql = "SELECT * FROM area_status WHERE areaID='$values[0]' AND

survey_date='$values[1]' AND status='$values[3]'";
    $sth = $dbh->prepare($sql);
    $sth->execute;
    $rv = $sth->rows;
    if ($rv > 1) {
        print "<br/>Error: There were $rv rows returned for SQL
statement:

$sql<br/><br/>";
    }
    else {
        if ($rv == 1) {
            print "<br/>Database entry, @values, already exists.";
        }
        else {
            $OK2INSERT = 1;
            }
    }
    $sth->finish;
    # Disconnect from the database.
    $dbh->disconnect();
    if ($OK2INSERT) {
        $dbh =

DBI->connect("DBI:SQLite:dbname=C:/Lanosrep/beW/Perl/HelpPage/
area.db", "", "",

{'RaiseError' => 1});
        print "<br/>Inserting into Database , @values.";
        $dbh->do("INSERT INTO area_status (areaID, survey_date,
update_time,

status ) VALUES ('$values[0]', '$values[1]', '$values[2]',
'$values[3]')");
        $dbh->disconnect();
    }
}


-- 
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
http://learn.perl.org/


Reply via email to