Hi,

Is this is what you're looking for ?

my %hash;

while( my $line = <DATA> ){
chomp $line;
my ( $scaf, $pro_per ) = $line =~ m/\sHit=(.*?)\s.*?Percent_id=(.*?)$/g;
push @{$hash{$1}}, $2;
}

print Dumper (\%hash);


Output:
$VAR1 = {
          'scaffold293_size341291' => [
                                        '228.36676217765',
                                        '241.818181818182',
                                        '240',
                                        '233.076923076923',
                                        '241.904761904762',
                                        '227.461139896373',
                                        '222.666666666667'
                                      ],
          'scaffold4_size6989527' => [
                                       '235.023041474654',
                                       '247.663551401869',
                                       '247.663551401869',
                                       '224.137931034483',
                                       '236.734693877551',
                                       '237.634408602151',
                                       '237.777777777778',
                                       '231.707317073171',
                                       '230.337078651685'
                                     ]
        };



__DATA__
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349
Percent_id=228.36676217765
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
Percent_id=241.818181818182
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
Percent_id=240
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130
Percent_id=233.076923076923
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105
Percent_id=241.904761904762
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193
Percent_id=227.461139896373
Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150
Percent_id=222.666666666667
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217
Percent_id=235.023041474654
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
Percent_id=247.663551401869
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
Percent_id=247.663551401869
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174
Percent_id=224.137931034483
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98
Percent_id=236.734693877551
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93
Percent_id=237.634408602151
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90
Percent_id=237.777777777778
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82
Percent_id=231.707317073171
Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89
Percent_id=230.337078651685

On Sun, Sep 20, 2015 at 5:56 PM, Alaba, Oluwafemi (IITA) <o.al...@cgiar.org>
wrote:

> Dear ALL,
>
> I have a file that looks like this.
>
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=349
> Percent_id=228.36676217765
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
> Percent_id=241.818181818182
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=110
> Percent_id=240
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=130
> Percent_id=233.076923076923
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=105
> Percent_id=241.904761904762
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=193
> Percent_id=227.461139896373
> Query=sp|P59287|CASS_RICCO Hit=scaffold293_size341291 Bit=152 Length=150
> Percent_id=222.666666666667
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=217
> Percent_id=235.023041474654
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
> Percent_id=247.663551401869
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=107
> Percent_id=247.663551401869
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=174
> Percent_id=224.137931034483
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=98
> Percent_id=236.734693877551
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=93
> Percent_id=237.634408602151
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=90
> Percent_id=237.777777777778
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=82
> Percent_id=231.707317073171
> Query=sp|P59287|CASS_RICCO Hit=scaffold4_size6989527 Bit=150 Length=89
> Percent_id=230.337078651685
>
> I need hints to write a script that will recognise the fragments of
> protein in the same scaffolds.
>
> Best wishes,
>
> Alaba
>

Reply via email to