Hi all,

I am trying to parse a tab-delimited file which has repeating lines. This is causing problems while parsing it to the data structure (see below). I would appreciate if you could help me solve this.

Thanks,

Aravind

sub parse {

    my $pazar_file_path = shift;
    my $pazar_data; # ref to a hash holding the parsed data

open FH, '<', $pazar_file_path or croak ( "Cannot open file '$pazar_file_path': $!" );
    while ( my $data = <FH> ) {

        chomp $data;
        my @record_lines = split /\t/, $data;
my ( $pazar_p_id, $prot_id, $pazar_g_id, $ensembl_id ) = splice ( @record_lines, 0, 4 ); push @{ $pazar_data->{$pazar_p_id}{$prot_id}{$pazar_g_id}{$ensembl_id} }, @record_lines;

    } # end of <FH>
    close FH;
    $pazar_data ? return $pazar_data : carp "No data!";

}

Data Structure:

 'TF0000212' => {
                           'SP3_HUMAN' => {
                                            'GS0000425' => {
'ENSG00000117983 ' => [
                                                                                
     '11',
                                                                                
     '1244296',
                                                                                
     '1284402',
                                                                                
     'Homo sapiens',
                                                                                
     'MUC5AC',
                                                                                
     '12077147',
                                                                                
     'ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT ',
                                                                                
     '11',
                                                                                
     '1244296',
                                                                                
     '1284402',
                                                                                
     'Homo sapiens',
                                                                                
     'MUC5AC',
                                                                                
     '12077147',
                                                                                
     'ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT '
                                                                                
   ]
                                                           }
                                          }
                         },


 _______DATA_________

TF0000211 SP3_MOUSE GS0000422 ENSMUSG00000037974 7 148974877 149005136 Mus musculus MUC5AC 14570593 ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT TF0000212 SP3_HUMAN GS0000425 ENSG00000117983 11 1244296 1284402 Homo sapiens MUC5AC 12077147 ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT TF0000212 SP3_HUMAN GS0000425 ENSG00000117983 11 1244296 1284402 Homo sapiens MUC5AC 12077147 ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT

--
To unsubscribe, e-mail: beginners-unsubscr...@perl.org
For additional commands, e-mail: beginners-h...@perl.org
http://learn.perl.org/


Reply via email to