Hi,

Full code. It strips UTF-8 newlines between xhtml tags and leaves other tags 
with UTF-8 newlines.
It uses Knuth-Morris-Pratt, because we can read the file only once. I need this 
script because of
compatibility of a special tool.

use bytes; 
use warnings;
use strict;
use XML::writer;
use IO::File;
use utf8;


print "begin whitespace trimming\n";

open FILE, "<default.xml" or die $!;
open OUTFILE, ">output.xml" or die $!;

my @xhtmlbegin = ("0x3c", "0x78", "0x68", "0x74", "0x6d", "0x6c", "0x20", 
"0x78", "0x6d", "0x6c", "0x6e", "0x73", "0x3d", "0x22", "0x68", "0x74", "0x74", 
"0x70", "0x3a", "0x2f", "0x2f", "0x77", "0x77", "0x77", "0x2e", "0x77", "0x33", 
"0x2e", "0x6f", "0x72", "0x67", "0x2f", "0x31", "0x39", "0x39", "0x39", "0x2f", 
"0x78", "0x68", "0x74", "0x6d", "0x6c", "0x22", "0x20", "0x2f", "0x3e");
my $xhtmlbeginpos = 0;
my $begintagfound = 0;
my @xhtmlend = ("0x3c", "0x2f", "0x78", "0x68", "0x74", "0x6d", "0x6c", "0x3e");
my $xhtmlendpos = 0;
my $endtagfound = 0;
my $exit1 = 0;
my $exit2 = 0;

binmode FILE;
binmode OUTFILE;

my @next = {0};
my @next2 = {0};

# initnext(@pattern, @next);
# sub initnext {
    # my $i = 1;
    # my $j = 0;


    # while($i < (length @next) - 1) {
        # if( $pattern[$i + 1] = $pattern[$j + 1] ) {
            # $i = $i + 1;
            # $j = $j + 1;
            # $next[$i] = $j;
            # #push(@next, $j);
        # } else {
            # if($j == 0) {
                # $i = $i + 1;
                # $next[$i] = 0;
                # #push(@next, 0);
            # } else {
                # $j = $next[$j];
            # }
        # }
    # }
# }

    my $i001 = 1;
    my $j001 = 0;


    while($i001 < (length 0+@next) - 1) {
        if( $xhtmlbegin[$i001 + 1] == $xhtmlbegin[$j001 + 1] ) {
            $i001 = $i001 + 1;
            $j001 = $j001 + 1;
            $next[$i001] = $j001;
            #push(@next, $j);
        } else {
            if($j001 == 0) {
                $i001 = $i001 + 1;
                $next[$i001] = 0;
                #push(@next, 0);
            } else {
                $j001 = $next[$j001];
            }
        }
    }
    
    
    my $i002 = 1;
    my $j002 = 0;


    while($i002 < (length 0+@next2) - 1) {
        if( $xhtmlend[$i002 + 1] == $xhtmlend[$j002 + 1] ) {
            $i002 = $i002 + 1;
            $j002 = $j002 + 1;
            $next2[$i002] = $j002;
            #push(@next2, $j);
        } else {
            if($j002 == 0) {
                $i002 = $i002 + 1;
                $next2[$i002] = 0;
                #push(@next2, 0);
            } else {
                $j002 = $next2[$j002];
            }
        }
    }




#initnext(@xhtmlbegin, @next);
#initnext(@xhtmlend, @next2);
my $i = 0;  # xhtmlbegin
my $j = 0;

my $i2 = 0;
my $j2 = 0;

my $doread = 0;

my ($buf, $data, $n);
$n = read FILE, $data, 1;
$buf = $data;
while($n != 0) {
    
    $doread = 0;
    
    # there has to be a begin tag before the end tag
    # so we can check for begin tag first
        
    if($begintagfound == 0) {
        if($buf eq chr hex $xhtmlbegin[$j + 1]) {
            $doread = 1;         
            $j = $j + 1;
        } else {
            if($j == 0) {
                $doread = 1; 
            } else {
                $j = $next[$j];            
            }
        }
    
    } else {
    

        if ($buf eq chr hex $xhtmlend[$j2 + 1]) {
            $doread = 1; 
            $j2 = $j2 + 1;
        } else {
            if($j2 == 0) {
                $doread = 1; 
            } else {
                $j2 = $next[$j2];            
            }
        }
    
    }
    
    
    
    if($j == length 0+@xhtmlbegin) {
        #$k = tell(FILE) - @xhtmlbegin;
        $begintagfound = 1;
        print "begin tag found\n";
        print OUTFILE "BEGINFOUND";
    }
    
    if($j2 == length 0+@xhtmlend) {
        #$k = tell(FILE) - @xhtmlbegin;
        $endtagfound = 1;
        print "end tag found:" . $j2 . "==" . length 0+@xhtmlend . "\n";
        
        print OUTFILE "ENDFOUND";
        
        $begintagfound = 0;
    }
 
    
    if($doread == 1) {
        $n = read FILE, $data, 1;
        $buf = $data;
        
        if($begintagfound == 1) {
            # ignore 0d-s before the 0a
           
            if($buf ne chr hex "0x0d") {
                print OUTFILE $buf;
            }
        
       } else {
           print OUTFILE $buf;
       }
    }
    
}


close(FILE);
close(OUTFILE);

print "end whitespace trimming\n";

-----Ursprüngliche Nachricht-----
Von: Shlomi Fish [mailto:shlo...@shlomifish.org] 
Gesendet: Freitag, 3. Juli 2015 20:51
Cc: beginners@perl.org
Betreff: Re: Use of uninitialized value $j in numeric eq (==) at myfile line 125

Hi Tamas,

On Fri, 3 Jul 2015 15:10:18 +0000
"Nagy Tamas (TVI-GmbH)" <tamas.n...@tvi-gmbh.de> wrote:

> Hi,
> 
> My code:
> 
> Use strict;
> Use warnings;
> 
> my $j = 0;
> my $doread = 0;
> my ($buf, $data, $n);
> $n = read FILE, $data, 1;
> $buf = $data;
> while($n != 0) {
> 
> $doread = 0;
> 
> # there has to be a begin tag before the end tag # so we can check for 
> begin tag first
> 
> if($begintagfound == 0) {
>     if($buf eq chr hex $xhtmlbegin[$j + 1]) {
>         $doread = 1;
>         $j = $j + 1;
>     } else {
>         if($j == 0) {...  line 125
> ...
> 
> Says use of uninitialized value $j in numeric eq == at line 125 Does 
> anyone has any idea why does it tell $j as uninitialized?
> 

I don't have an idea off hand and part of the problem is that the code is 
incredibly partial. Can you provide a self contained, reproducing, example?
Note that often trimming the code in an attempt to isolate the problem, can 
reveal its root cause.

Regards,

        Shlomi Fish

--
-----------------------------------------------------------------
Shlomi Fish       http://www.shlomifish.org/
Selina Mandrake - The Slayer (Buffy parody) - http://shlom.in/selina

Reality to be conquered, must be obeyed.  — Francis Bacon

Please reply to list if it's a mailing list post - http://shlom.in/reply .

--
To unsubscribe, e-mail: beginners-unsubscr...@perl.org For additional commands, 
e-mail: beginners-h...@perl.org http://learn.perl.org/

Tamas Nagy

Reply via email to