Thanks to those that helped. The code works great. Now I will practice one honing it down to the bare essentials. Below is the final code you all helped with.

-Thanks a million!
-Mike

>>> Begin PERL Code<<<

#! /usr/bin/perl -w

use strict;
use FileHandle;

my %organisms;

print "Enter in a list of files to be processed:\n";

# For example:
# CytB.fasta
# NADH1.fasta
# ....

chomp (my @infiles = <STDIN>);
# TODO we should make this nice later
#my @infiles = ('genetics.txt');

print "Enter in the name of the OUTFILE:\n";
        
        chomp (my $outfile = <STDIN>);
        
        open(OUTFILE, ">$outfile")
                or die "Can't open OUTFILE: $!";

foreach my $infile (@infiles) {
        my $FASTA = new FileHandle;
    open  ($FASTA, $infile)
        or die "Can't open INFILE: $!";
                
        # I moved this variable outside the while-loop
        # in order to be able to assign the "data" in
        # the nextline to the organism it belongs to
        # (we're keeping track of the last start line
        #  that we came across here)
        my $orgID;

        while (defined($_ = <$FASTA>)) {
             chomp;
                         print "\nWorking on >>$_<<\n";

             # see if this line is the start of an
             # organism; the thing we´re searching for
             # looks like this:
             #  >dog
             # so try to match something like
             #   \s*   zero-to-many characters of
             #             optional whitespace
             #   >     the bigger-than sign
             #   \w+   one-to-many (word) characters
             # the parenthesis around the \w+ means that
             # we want to access this value later using $1
             if (/\s*>(\w+)/) {              
                $orgID = $1;
                print "Found a new organism start line ('$orgID')\n";

                 }
                 # or just some data belonging to the last
                 # organism we found
                 else {
                        print "Sequence data found: $_\n";
                        print "Appending data to $orgID\n";
                        
                        # let´s check if we´ve got data for this entry
                        if (exists ($organisms{$orgID})) {
                                # TODO append the data to the hash here
                                $organisms{$orgID} .= $_;
                        }
                        else {
                                # create a new hash entry for this data
                                $organisms{$orgID} = $_;                
                        }                                               
                 }              
     }
     # do not forget to close the input file
     close ($FASTA)
        or die "could not close INFILE : $!";
}

# we've processed all input files...print the resulting hash
print "\n****************************************\n";
while (my ($orgID, $sequence) = each(%organisms)) {
        print OUTFILE ">$orgID\n$sequence\n\n";    
}

>>>END PERL CODE<<<


-- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] <http://learn.perl.org/> <http://learn.perl.org/first-response>




Reply via email to