May be this'll help? )
#!/usr/bin/perl use strict; use warnings; die 'Usage: ' . __FILE__ . " file1[ file2...]\n" unless @ARGV; my $ref_file = 'ref.txt'; my $new_file = 'new.txt'; open my $ref_fh, '<', $ref_file or die "Failed to open reference file - $!\n"; my %limits_for; while (<$ref_fh>) { next unless /\d/; # skipping infoless lines my ($chromosome, $start, $end) = split; $limits_for{ $chromosome } = { start => $start, end => $end, }; } close $ref_fh; my %positions_for; while (<>) { my ($chromosome, $pos) = split; push @{ $positions_for{ $chromosome } }, $pos; } my %in_limits_for; foreach my $chromosome (keys %positions_for) { next unless exists $limits_for{ $chromosome }; my @in_limits = grep { $limits_for{ $chromosome }->{start} <= $_ && $_ <= $limits_for{ $chromosome }->{end} } @{ $positions_for{ $chromosome } }; $in_limits_for{ $chromosome } = \@in_limits; } open my $new_fh, '>', $new_file or die "Failed to write out results - $!\n"; foreach my $chromosome (keys %in_limits_for) { foreach my $pos ( @{ $in_limits_for{ $chromosome } } ) { printf $new_fh "%-7s %15s\n", $chromosome, $pos; } print $new_fh '=' x 80 . "\n"; } close $new_fh; -- iD 2011/10/12 Nathalie Conte <n...@sanger.ac.uk> > HI All, > I have 2 sets of files I want to compare,and I don't know where to start to > get what I want :( > I have a reference file ( see ref for example) with a chromosome name, a > start and a end position > Chr7 115249090 115859515 > Chr8 25255496 29565459 > Chr13 198276698 298299815 > ChrX 109100951 109130998 > > > and I have a file (file_test) file I want to parse against this reference > ref.txt > Chr1 115249098 Chr1 1362705 Chr8 25255996 Chr8 1362714 Chr1 > 1362735 ChrX 109100997 > So if the position on the file_test is found in ref_file it is kept in a > new file, if not discarded. > > I am looking for advises /modules I could use to compare those 2 files . > many thanks in advance for any tips > Nat > > > -- > The Wellcome Trust Sanger Institute is operated by Genome Research Limited, > a charity registered in England with number 1021457 and a company registered > in England with number 2742969, whose registered office is 215 Euston Road, > London, NW1 2BE. > -- > To unsubscribe, e-mail: beginners-unsubscr...@perl.org > For additional commands, e-mail: beginners-h...@perl.org > http://learn.perl.org/ > > >