Hi, Full code. It strips UTF-8 newlines between xhtml tags and leaves other tags with UTF-8 newlines. It uses Knuth-Morris-Pratt, because we can read the file only once. I need this script because of compatibility of a special tool.
use bytes; use warnings; use strict; use XML::writer; use IO::File; use utf8; print "begin whitespace trimming\n"; open FILE, "<default.xml" or die $!; open OUTFILE, ">output.xml" or die $!; my @xhtmlbegin = ("0x3c", "0x78", "0x68", "0x74", "0x6d", "0x6c", "0x20", "0x78", "0x6d", "0x6c", "0x6e", "0x73", "0x3d", "0x22", "0x68", "0x74", "0x74", "0x70", "0x3a", "0x2f", "0x2f", "0x77", "0x77", "0x77", "0x2e", "0x77", "0x33", "0x2e", "0x6f", "0x72", "0x67", "0x2f", "0x31", "0x39", "0x39", "0x39", "0x2f", "0x78", "0x68", "0x74", "0x6d", "0x6c", "0x22", "0x20", "0x2f", "0x3e"); my $xhtmlbeginpos = 0; my $begintagfound = 0; my @xhtmlend = ("0x3c", "0x2f", "0x78", "0x68", "0x74", "0x6d", "0x6c", "0x3e"); my $xhtmlendpos = 0; my $endtagfound = 0; my $exit1 = 0; my $exit2 = 0; binmode FILE; binmode OUTFILE; my @next = {0}; my @next2 = {0}; # initnext(@pattern, @next); # sub initnext { # my $i = 1; # my $j = 0; # while($i < (length @next) - 1) { # if( $pattern[$i + 1] = $pattern[$j + 1] ) { # $i = $i + 1; # $j = $j + 1; # $next[$i] = $j; # #push(@next, $j); # } else { # if($j == 0) { # $i = $i + 1; # $next[$i] = 0; # #push(@next, 0); # } else { # $j = $next[$j]; # } # } # } # } my $i001 = 1; my $j001 = 0; while($i001 < (length 0+@next) - 1) { if( $xhtmlbegin[$i001 + 1] == $xhtmlbegin[$j001 + 1] ) { $i001 = $i001 + 1; $j001 = $j001 + 1; $next[$i001] = $j001; #push(@next, $j); } else { if($j001 == 0) { $i001 = $i001 + 1; $next[$i001] = 0; #push(@next, 0); } else { $j001 = $next[$j001]; } } } my $i002 = 1; my $j002 = 0; while($i002 < (length 0+@next2) - 1) { if( $xhtmlend[$i002 + 1] == $xhtmlend[$j002 + 1] ) { $i002 = $i002 + 1; $j002 = $j002 + 1; $next2[$i002] = $j002; #push(@next2, $j); } else { if($j002 == 0) { $i002 = $i002 + 1; $next2[$i002] = 0; #push(@next2, 0); } else { $j002 = $next2[$j002]; } } } #initnext(@xhtmlbegin, @next); #initnext(@xhtmlend, @next2); my $i = 0; # xhtmlbegin my $j = 0; my $i2 = 0; my $j2 = 0; my $doread = 0; my ($buf, $data, $n); $n = read FILE, $data, 1; $buf = $data; while($n != 0) { $doread = 0; # there has to be a begin tag before the end tag # so we can check for begin tag first if($begintagfound == 0) { if($buf eq chr hex $xhtmlbegin[$j + 1]) { $doread = 1; $j = $j + 1; } else { if($j == 0) { $doread = 1; } else { $j = $next[$j]; } } } else { if ($buf eq chr hex $xhtmlend[$j2 + 1]) { $doread = 1; $j2 = $j2 + 1; } else { if($j2 == 0) { $doread = 1; } else { $j2 = $next[$j2]; } } } if($j == length 0+@xhtmlbegin) { #$k = tell(FILE) - @xhtmlbegin; $begintagfound = 1; print "begin tag found\n"; print OUTFILE "BEGINFOUND"; } if($j2 == length 0+@xhtmlend) { #$k = tell(FILE) - @xhtmlbegin; $endtagfound = 1; print "end tag found:" . $j2 . "==" . length 0+@xhtmlend . "\n"; print OUTFILE "ENDFOUND"; $begintagfound = 0; } if($doread == 1) { $n = read FILE, $data, 1; $buf = $data; if($begintagfound == 1) { # ignore 0d-s before the 0a if($buf ne chr hex "0x0d") { print OUTFILE $buf; } } else { print OUTFILE $buf; } } } close(FILE); close(OUTFILE); print "end whitespace trimming\n"; -----Ursprüngliche Nachricht----- Von: Shlomi Fish [mailto:shlo...@shlomifish.org] Gesendet: Freitag, 3. Juli 2015 20:51 Cc: beginners@perl.org Betreff: Re: Use of uninitialized value $j in numeric eq (==) at myfile line 125 Hi Tamas, On Fri, 3 Jul 2015 15:10:18 +0000 "Nagy Tamas (TVI-GmbH)" <tamas.n...@tvi-gmbh.de> wrote: > Hi, > > My code: > > Use strict; > Use warnings; > > my $j = 0; > my $doread = 0; > my ($buf, $data, $n); > $n = read FILE, $data, 1; > $buf = $data; > while($n != 0) { > > $doread = 0; > > # there has to be a begin tag before the end tag # so we can check for > begin tag first > > if($begintagfound == 0) { > if($buf eq chr hex $xhtmlbegin[$j + 1]) { > $doread = 1; > $j = $j + 1; > } else { > if($j == 0) {... line 125 > ... > > Says use of uninitialized value $j in numeric eq == at line 125 Does > anyone has any idea why does it tell $j as uninitialized? > I don't have an idea off hand and part of the problem is that the code is incredibly partial. Can you provide a self contained, reproducing, example? Note that often trimming the code in an attempt to isolate the problem, can reveal its root cause. Regards, Shlomi Fish -- ----------------------------------------------------------------- Shlomi Fish http://www.shlomifish.org/ Selina Mandrake - The Slayer (Buffy parody) - http://shlom.in/selina Reality to be conquered, must be obeyed. — Francis Bacon Please reply to list if it's a mailing list post - http://shlom.in/reply . -- To unsubscribe, e-mail: beginners-unsubscr...@perl.org For additional commands, e-mail: beginners-h...@perl.org http://learn.perl.org/ Tamas Nagy