I have a Perl script where I try to strip some data from a web page and insert it
into a database. I'm having a problem where, it seems like the method of quoting the data for insertion don't seem to be working (as far as escaping the text) and some of the text is ending up getting injected into the SQL command. In this example, I am capturing the paragraphs of text and inserting each HTML paragraph into a new record. What seems to be hanging up the insertion is the "or die" portion of the text. It will also bomb if the text has a word like "don't". I thought the insertion mechanism I'm using would properly escape these special characters/phrases. This is an old problem for me, I know I tried multiple methods but had to put the project down for a few months here so I'm a bit sketchy on the details of what I have tried already, otherwise I would have posted that as well. I am currently trying this with a SQLite database. I tried it on a MySQL database with the same results. Here is the database Schema: CREATE TABLE area_status (updatre_time TEXT, status BLOB, survey_date TEXT, areaID NUMERIC, area_statusID INTEGER PRIMARY KEY); The only thing you hsould have to change is the $file_prefix variable. Also, the example webpage and SQLite database are here: http://www.ottomatica.com/perl/paragraph_insert_issue/test_page.html http://www.ottomatica.com/perl/paragraph_insert_issue/area.db Thanks in advance!!!!! Here's the code. #!/usr/bin/perl use strict; use LWP::Simple qw(!head); use diagnostics; use DBI(); sub trim($); # subroutines to trim off whitespace sub ltrim($); # subroutines to trim off whitespace sub rtrim($); # subroutines to trim off whitespace sub status_of_the_area_insert; # subroutine that inserts data into database my $html_string; # This variable will contain the whole web page my $file_string; # This contains the complete file path (in the real program I loop through many web pages) my $status_of_the_area_error = 0; # This is a debuging variable that is a boolean and will help decide if I execute certain portions of code my $file_name; # The file name and extension portion of $file_string above my $areaID; # A field in my area_status table, hardcoded in this case my $inventory_number; # A field in my area_status table, hardcoded in this case my $survey_date; # A field in my area_status table, hardcoded in this case my $time = localtime(time); # The current time when the code is exectued for later reference my $status_of_the_area; # This contains the text I'm interested in capture by a Perl Regex my $status_of_the_area_complete; # The final text that I am interested in (after trimming) my $status_of_the_area_present; # This is a boolean that tells me whether the text I'm interested in is there or not my $detect_paragraphs; # A boolean to detect if there are multiple paragraphs present my @status_of_the_area_array; # An array that contains multiple paragraphs if present my $status_of_the_area_array; # Used to loop through the @status_of_the_area_array array my $file_prefix; # The location of the html file on the user's computer. # Declare the location of the html file on the user's computer. $file_prefix = 'file:///C:/Lanosrep/beW/Perl/HelpPage/'; # Print Status print "<br/>Begin looping through the directory <br/>"; # Loop through directory (in this case there will be only one file) while ( $file_name=glob("test_page.html") ) { #Use this for a test string to limit results *downum=0100*.html # Reset the $status_of_the_area_error to zero # This is a debuging variable that is a boolean and will help decide if I execute certain portions of code $status_of_the_area_error = 0; # Create the file string # This contains the complete file path (in the real program I loop through many web pages) $file_string = $file_prefix.$file_name; # Assign the contents of the web page to the $html_string variable # This variable will contain the whole web page $html_string = get($file_string); # Determine if the text I care about is present # This is a boolean that tells me whether the text I'm interested in is there or not $status_of_the_area_present = $html_string =~ m/<H2>.??Status of the Area.*?<\/H2>(.*?)<H2>.??For Additional Information.*?<\/H2>/si; # If it is present, print status that tells me it's there and execute some additional code if ($status_of_the_area_present) { print "<a href=$file_string>Status of the area Found - $file_string</a>.<br/>"; # This contains the text I'm interested in capture by a Perl Regex ($status_of_the_area) = $html_string =~ m/<H2>.??Status of the Area.*?<\/H2>(.*?)<H2>.??For Additional Information.*?<\/H2>/si; # The final text that I am interested in (after trimming) $status_of_the_area_complete = trim($status_of_the_area); # A boolean to detect if there are multiple paragraphs present $detect_paragraphs = $status_of_the_area =~ m/<P>/i; # If multiple paragraphs are detected, split them up and place in an array if ($detect_paragraphs) { $status_of_the_area =~ s/<P>.??<P>//sgi; (@status_of_the_area_array) = split /<P>/i, $status_of_the_area; } # Hardcode the following variables as they are unimportant for this discussion $inventory_number = 22222222; $areaID = 22222222; $survey_date = "01-01-1999"; # If multiple paragraphs are detected, loop through the array, @status_of_the_area_array and insert the data if ($detect_paragraphs) { my $i = 1; foreach $status_of_the_area_array (@status_of_the_area_array) { # Detect if the paragraph contains any text if ($status_of_the_area_array) { status_of_the_area_insert($status_of_the_area_array,$areaID, $survey_date,$time,1); } # If paragraph is blank don't insert into the database else { #print "<P>Paragraph $i<P>Nothing Here"; } $i++; } } # If just a single paragraph, insert the data captured in the variable $status_of_the_area_complete else { #print $status_of_the_area; status_of_the_area_insert($status_of_the_area_complete,$areaID, $survey_date,$time, 1); } print "<br/>"; } # If text is not detected print status else { print "<a href=$file_string>Status of the area NOT Found - $file_string</a>.<br/>"; } } # Done # Subroutines below # Perl trim function to remove whitespace from the start and end of the string sub trim($) { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } # Left trim function to remove leading whitespace sub ltrim($) { my $string = shift; $string =~ s/^\s+//; return $string; } # Right trim function to remove trailing whitespace sub rtrim($) { my $string = shift; $string =~ s/\s+$//; return $string; } # subroutine that inserts data into database sub status_of_the_area_insert { my $var; my $areaID; my $survey_date; my $update_time; my $row_count_final; ($var,$areaID,$survey_date,$update_time,$row_count_final)[EMAIL PROTECTED]; my $dbh = DBI->connect("DBI:SQLite:dbname=C:/Lanosrep/beW/Perl/HelpPage/ area.db", "", "", {'RaiseError' => 1}); my $row_count = 0; my $sth; my @values; my @sql_row; my $rv; my $sql; my $OK2INSERT; my $parsed_var; $values[0] = trim($areaID); $values[1] = trim($survey_date); $values[2] = trim($update_time); $values[3] = trim($var); $sql = "SELECT * FROM area_status WHERE areaID='$values[0]' AND survey_date='$values[1]' AND status='$values[3]'"; $sth = $dbh->prepare($sql); $sth->execute; $rv = $sth->rows; if ($rv > 1) { print "<br/>Error: There were $rv rows returned for SQL statement: $sql<br/><br/>"; } else { if ($rv == 1) { print "<br/>Database entry, @values, already exists."; } else { $OK2INSERT = 1; } } $sth->finish; # Disconnect from the database. $dbh->disconnect(); if ($OK2INSERT) { $dbh = DBI->connect("DBI:SQLite:dbname=C:/Lanosrep/beW/Perl/HelpPage/ area.db", "", "", {'RaiseError' => 1}); print "<br/>Inserting into Database , @values."; $dbh->do("INSERT INTO area_status (areaID, survey_date, update_time, status ) VALUES ('$values[0]', '$values[1]', '$values[2]', '$values[3]')"); $dbh->disconnect(); } } -- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] http://learn.perl.org/