+perl-beginners forgot to hit reply all On Sun, Sep 20, 2015, 7:00 PM Raj Barath <barat...@outlook.com> wrote:
> Hi, > I have added the comments. But to understand even better please go through > the official documentation such as: > http://perldoc.perl.org/index-tutorials.html > > > my %hash; > > while( my $line = <DATA> ){ # Reads every line from __DATA__ handle > chomp $line; #it removes the trailing new line character > my ( $scaf, $pro_per ) = $line =~ m/ #match > \s #a single space character > Hit=(.*?) #captures the value of the hit where "." any character * means > match zero or more and adding ? to it means match non greedily. > \s #a single space character > .*? #same as above but it does not capture ( coz no parens ) > Percent_id=(.*?)$ #captures the value of the hit where "." means any > character * means match zero or more and ? is non greedy(means match as > minimum as possible) > # $ means end of line > /xg; # x - allows white space and g - global match > > push @{$hash{$scaf}}, $pro_per; # for the each scaffolds it pushes the > protein percentages as values to an anonymous array > } > > print Dumper (\%hash); > > > On Sun, Sep 20, 2015 at 6:33 PM, Alaba, Oluwafemi (IITA) < > o.al...@cgiar.org> wrote: > >> Dear Raj, >> >> Thanks. >> >> Could you comment on the lines of code so that i can understand better ? >> >> Best >> >> On Sep 20, 2015, at 10:19 PM, Raj Barath <barat...@live.com> wrote: >> >> Hi, >> >> Is this is what you're looking for ? >> >> my %hash; >> >> while( my $line = <DATA> ){ >> chomp $line; >> my ( $scaf, $pro_per ) = $line =~ m/\sHit=(.*?)\s.*?Percent_id=(.*?)$/g; >> push @{$hash{$1}}, $2; >> } >> >> print Dumper (\%hash); >> >> >> Output: >> $VAR1 = { >> 'scaffold293_size341291' => [ >> '228.36676217765', >> '241.818181818182', >> '240', >> '233.076923076923', >> '241.904761904762', >> '227.461139896373', >> '222.666666666667' >> ], >> 'scaffold4_size6989527' => [ >> '235.023041474654', >> '247.663551401869', >> '247.663551401869', >> '224.137931034483', >> '236.734693877551', >> '237.634408602151', >> '237.777777777778', >> '231.707317073171', >> '230.337078651685' >> ] >> }; >> >> >> >> __DATA__ >> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349 >> Percent_id=228.36676217765 >> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 >> Percent_id=241.818181818182 >> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 >> Percent_id=240 >> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130 >> Percent_id=233.076923076923 >> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105 >> Percent_id=241.904761904762 >> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193 >> Percent_id=227.461139896373 >> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150 >> Percent_id=222.666666666667 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217 >> Percent_id=235.023041474654 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 >> Percent_id=247.663551401869 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 >> Percent_id=247.663551401869 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174 >> Percent_id=224.137931034483 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98 >> Percent_id=236.734693877551 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93 >> Percent_id=237.634408602151 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90 >> Percent_id=237.777777777778 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82 >> Percent_id=231.707317073171 >> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89 >> Percent_id=230.337078651685 >> >> On Sun, Sep 20, 2015 at 5:56 PM, Alaba, Oluwafemi (IITA) < >> o.al...@cgiar.org> wrote: >> >>> Dear ALL, >>> >>> I have a file that looks like this. >>> >>> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349 >>> Percent_id=228.36676217765 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 >>> Percent_id=241.818181818182 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 >>> Percent_id=240 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130 >>> Percent_id=233.076923076923 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105 >>> Percent_id=241.904761904762 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193 >>> Percent_id=227.461139896373 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150 >>> Percent_id=222.666666666667 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217 >>> Percent_id=235.023041474654 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 >>> Percent_id=247.663551401869 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 >>> Percent_id=247.663551401869 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174 >>> Percent_id=224.137931034483 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98 >>> Percent_id=236.734693877551 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93 >>> Percent_id=237.634408602151 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90 >>> Percent_id=237.777777777778 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82 >>> Percent_id=231.707317073171 >>> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89 >>> Percent_id=230.337078651685 >>> >>> I need hints to write a script that will recognise the fragments of >>> protein in the same scaffolds. >>> >>> Best wishes, >>> >>> Alaba >>> >> >> >> >