On Mon, Sep 17, 2012 at 1:29 PM, Wang, Li <li.w...@ttu.edu> wrote:
> Dear List members
>
> I have three columns of a table.  The example is as follows:
>
> DBS     R^2     genename
> 801     0.27807486057281494     POPTR_0002s00200
> 1903    1.0     POPTR_0002s00200
> 1103    0.25852271914482117     POPTR_0002s00200
> 3215    0.03134157508611679     POPTR_0002s00200
> 2415    0.010018552653491497    POPTR_0002s00200
> 1313    0.03134157508611679     POPTR_0002s00200
> 3442    1.0     POPTR_0002s00200
> 2642    0.25852271914482117     POPTR_0002s00200
> 1540    1.0     POPTR_0002s00200
> 228     0.03134157508611679     POPTR_0002s00200
> 3099    0.026160990819334984    POPTR_0002s00210
> 7555    0.800000011920929       POPTR_0002s00210
> 4457    0.014814814552664757    POPTR_0002s00210
> 7564    5.232862313278019E-4    POPTR_0002s00210
> 4466    0.0018315018387511373   POPTR_0002s00210
> 10      0.0036630036775022745   POPTR_0002s00210
> 7565    5.232862313278019E-4    POPTR_0002s00210
> 4467    0.0018315018387511373   POPTR_0002s00210
> 11      0.0036630036775022745   POPTR_0002s00210
> 2       1.0     POPTR_0002s00210
>
> I would like to calculate the average value of column 2 while the content of 
> column three is the same. In this case, I would like the output of my result 
> be as follows:
> R^2     genename
> 0.3899163577    POPTR_0002s00200
> 0.2314956035    POPTR_0002s00210
>

Maybe something like this:

HTH,

Chris

#!/usr/bin/perl

use 5.010;
use strict;
use warnings;
use Data::Dumper;

my %hash;
my $counter;

while ( my $line = <DATA> ) {

    my @record = split( /\s+/, $line );

    ## check to see if we've already added the 3rd column to the hash
    ## if we have, start counting how many times is shows up
    ## else add it to the hash with the values of @record[ 0 .. 2 ] and
    ## counter starting at 1 since first occurence
    if ( defined $hash{ $record[2]} ) {
        $counter++;
        my @trec = @{ $hash{$record[2]} };

        $hash{$record[2]} = [
            $trec[0], $trec[1] + $record[1],
            $trec[2], $counter
        ];
    }

    else {
        $hash{ $record[2] } = [ @record[ 0 .. 2 ] ];
        $counter = 1;
    }
}
print Dumper \%hash;


__DATA__
DBS     R^2     genename
801     0.27807486057281494     POPTR_0002s00200
1903    1.0     POPTR_0002s00200
1103    0.25852271914482117     POPTR_0002s00200
3215    0.03134157508611679     POPTR_0002s00200
2415    0.010018552653491497    POPTR_0002s00200
1313    0.03134157508611679     POPTR_0002s00200
3442    1.0     POPTR_0002s00200
2642    0.25852271914482117     POPTR_0002s00200
1540    1.0     POPTR_0002s00200
228     0.03134157508611679     POPTR_0002s00200
3099    0.026160990819334984    POPTR_0002s00210
7555    0.800000011920929       POPTR_0002s00210
4457    0.014814814552664757    POPTR_0002s00210
7564    5.232862313278019E-4    POPTR_0002s00210
4466    0.0018315018387511373   POPTR_0002s00210
10      0.0036630036775022745   POPTR_0002s00210
7565    5.232862313278019E-4    POPTR_0002s00210
4467    0.0018315018387511373   POPTR_0002s00210
11      0.0036630036775022745   POPTR_0002s00210
2       1.0     POPTR_0002s00210

__END___
## Output ##
$VAR1 = {
          'genename' => [
                          'DBS',
                          'R^2',
                          'genename'
                        ],
          'POPTR_0002s00200' => [
                                  '801',
                                  '3.8991635767743',
                                  'POPTR_0002s00200',
                                  10
                                ],
          'POPTR_0002s00210' => [
                                  '3099',
                                  '1.85301140078809',
                                  'POPTR_0002s00210',
                                  10
                                ]
        };

-- 
To unsubscribe, e-mail: beginners-unsubscr...@perl.org
For additional commands, e-mail: beginners-h...@perl.org
http://learn.perl.org/


Reply via email to