Hi all,
I am trying to parse a tab-delimited file which has repeating lines.
This is causing problems while parsing it to the data structure (see
below). I would appreciate if you could help me solve this.
Thanks,
Aravind
sub parse {
my $pazar_file_path = shift;
my $pazar_data; # ref to a hash holding the parsed data
open FH, '<', $pazar_file_path or croak ( "Cannot open file
'$pazar_file_path': $!" );
while ( my $data = <FH> ) {
chomp $data;
my @record_lines = split /\t/, $data;
my ( $pazar_p_id, $prot_id, $pazar_g_id, $ensembl_id ) = splice
( @record_lines, 0, 4 );
push @{
$pazar_data->{$pazar_p_id}{$prot_id}{$pazar_g_id}{$ensembl_id} },
@record_lines;
} # end of <FH>
close FH;
$pazar_data ? return $pazar_data : carp "No data!";
}
Data Structure:
'TF0000212' => {
'SP3_HUMAN' => {
'GS0000425' => {
'ENSG00000117983 ' => [
'11',
'1244296',
'1284402',
'Homo sapiens',
'MUC5AC',
'12077147',
'ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT ',
'11',
'1244296',
'1284402',
'Homo sapiens',
'MUC5AC',
'12077147',
'ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT '
]
}
}
},
_______DATA_________
TF0000211 SP3_MOUSE GS0000422 ENSMUSG00000037974 7
148974877 149005136 Mus musculus MUC5AC 14570593
ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT
TF0000212 SP3_HUMAN GS0000425 ENSG00000117983 11
1244296 1284402 Homo sapiens MUC5AC 12077147
ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT
TF0000212 SP3_HUMAN GS0000425 ENSG00000117983 11
1244296 1284402 Homo sapiens MUC5AC 12077147
ELECTROPHORETIC MOBILITY SHIFT ASSAY (EMSA)::SUPERSHIFT
--
To unsubscribe, e-mail: beginners-unsubscr...@perl.org
For additional commands, e-mail: beginners-h...@perl.org
http://learn.perl.org/