On Mar 2, 2:44 pm, "Shawn Milo" <[EMAIL PROTECTED]> wrote: (snipped)
> I'm attaching both the Perl and Python versions, and I'm open to > comments on either. The script reads a file from standard input and > finds the best record for each unique ID (piid). The best is defined > as follows: The newest expiration date (field 5) for the record with > the state (field 1) which matches the desired state (field 6). If > there is no record matching the desired state, then just take the > newest expiration date. > > Thanks for taking the time to look at these. > My attempts: ### Perl ### #!/usr/bin/perl use strict; use warnings; use List::Util qw/reduce/; use constant { STATE => 1, DATE => 6, TARGET => 5, }; sub keep_best { my ($best, $current) = @_; if ($current->[STATE] eq $current->[TARGET]) { if ($best->[STATE] eq $best->[TARGET]) { if ($current->[DATE] gt $best->[DATE]) { return 0; } } else { return 0; } } elsif ( $best->[STATE] ne $best->[TARGET] and $current->[DATE] gt $best->[DATE]) { return 0; } return 1; } my %input; # while uses less memory than for: # the former is an iterator while (<>) { chomp; my @results = split(/\t/, $_); my $key = $results[0]; push @{$input{$key}}, [ @results, $_ ]; } # while uses less memory than for: # the former is an iterator while (my ($key, $aref ) = each %input) { my $best = reduce { keep_best( $a, $b ) ? $a : $b } @$aref; print $best->[-1], "\n"; } ### Python (re-working John's code) ### import sys def keep_best(best, current): ACTUAL_STATE = 1 # John had these swapped DESIRED_STATE = 5 EXPIRY_DATE = 6 keep = True if current[ACTUAL_STATE] == current[DESIRED_STATE]: if best[ACTUAL_STATE] == best[DESIRED_STATE]: if current[EXPIRY_DATE] > best[EXPIRY_DATE]: keep = False else: keep = False else: if (best[ACTUAL_STATE] != best[ACTUAL_STATE] and current[EXPIRY_DATE] > best[EXPIRY_DATE]): keep = False return keep def process_file(opened_file=sys.stdin): PIID = 0 recs = {} for line in opened_file: line = line.rstrip('\n') row = line.split('\t') row.append(line) piid = row[PIID] if piid not in recs: recs[piid] = [] recs[piid].append(row) for piid in recs: best = reduce(lambda b, c: keep_best(b, c) and b or c, recs[piid]) print best[-1] if __name__ == "__main__": process_file() # "reduce" seems to be Lispish, Pythonic, and Perlish! -- Hope this helps, Steve -- http://mail.python.org/mailman/listinfo/python-list