I am extracting addresses from an XML file to process through other programs using pipe delimiter the following code works but this is going to get 130,000 records through it it must be very efficient and I cannot follow the documentation on the best way to do this.
After this simple one is programmed I have to change a much more complex version of this program. #!/usr/bin/perl -w # vi:set sw=4 ts=4 et cin: # $Id:$ =head1 SYNOPSIS Extract addresses from an XML file into pipe delimited file. usage: address_extract.pl xml_file =cut use warnings; use strict; use XML::Twig qw(:strict); sub no_pipe { my $value = shift; $value =~ s/\|//g; return $value; } if( ! -f $ARGV[0] ) { print "$ARGV[0] is not a filename, requires filename as first parameter!\n"; } my $sort; my $sort_file = $ARGV[0].'.unsorted'; unlink $sort_file; # in case of rerun open( $sort, '>', $sort_file ) or die "Unable to open $sort_file for output $!"; my $ref = XML::Twig->new( twig_handlers=>{mem=>\&member} ) or die "Unable to open $ARGV[0] $!"; my $member = 0; $ref->parsefile( $ARGV[0] ); sub get_value { my ($mem_ref, $key) = @_; my @array = $mem_ref->descendants( $key ); return $array[0]->text(); } sub member { my ($t, $mem_ref) = @_; $member++; my $mem_no = get_value( $mem_ref, 'member' ); my $add1 = get_value( $mem_ref, 'add1' ); my $add2 = get_value( $mem_ref, 'add2' ); my $add3 = get_value( $mem_ref, 'add3' ); my $suburb = get_value( $mem_ref, 'suburb' ); my $state = get_value( $mem_ref, 'state' ); my $pcode = get_value( $mem_ref, 'pcode' ); print $sort join( '|', $member, $mem_no, no_pipe( $add1 ), no_pipe( $add2 ), no_pipe( $add3 ), no_pipe( $suburb), $state, $pcode, ) ."\n"; return 1; }