Hi, Is this is what you're looking for ?
my %hash; while( my $line = <DATA> ){ chomp $line; my ( $scaf, $pro_per ) = $line =~ m/\sHit=(.*?)\s.*?Percent_id=(.*?)$/g; push @{$hash{$1}}, $2; } print Dumper (\%hash); Output: $VAR1 = { 'scaffold293_size341291' => [ '228.36676217765', '241.818181818182', '240', '233.076923076923', '241.904761904762', '227.461139896373', '222.666666666667' ], 'scaffold4_size6989527' => [ '235.023041474654', '247.663551401869', '247.663551401869', '224.137931034483', '236.734693877551', '237.634408602151', '237.777777777778', '231.707317073171', '230.337078651685' ] }; __DATA__ Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349 Percent_id=228.36676217765 Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 Percent_id=241.818181818182 Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 Percent_id=240 Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130 Percent_id=233.076923076923 Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105 Percent_id=241.904761904762 Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193 Percent_id=227.461139896373 Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150 Percent_id=222.666666666667 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217 Percent_id=235.023041474654 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 Percent_id=247.663551401869 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 Percent_id=247.663551401869 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174 Percent_id=224.137931034483 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98 Percent_id=236.734693877551 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93 Percent_id=237.634408602151 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90 Percent_id=237.777777777778 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82 Percent_id=231.707317073171 Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89 Percent_id=230.337078651685 On Sun, Sep 20, 2015 at 5:56 PM, Alaba, Oluwafemi (IITA) <o.al...@cgiar.org> wrote: > Dear ALL, > > I have a file that looks like this. > > Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349 > Percent_id=228.36676217765 > Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 > Percent_id=241.818181818182 > Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110 > Percent_id=240 > Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130 > Percent_id=233.076923076923 > Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105 > Percent_id=241.904761904762 > Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193 > Percent_id=227.461139896373 > Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150 > Percent_id=222.666666666667 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217 > Percent_id=235.023041474654 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 > Percent_id=247.663551401869 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107 > Percent_id=247.663551401869 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174 > Percent_id=224.137931034483 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98 > Percent_id=236.734693877551 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93 > Percent_id=237.634408602151 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90 > Percent_id=237.777777777778 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82 > Percent_id=231.707317073171 > Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89 > Percent_id=230.337078651685 > > I need hints to write a script that will recognise the fragments of > protein in the same scaffolds. > > Best wishes, > > Alaba >