HI, 
I need your wisdom on this parsing script. I have a fastq file,this
contains info for reads ( from nextGen), 1 line starts with a @, second
contain the sequence info from which I want to count pattern, third line
with a sign, fourth with info about the sequence quality ( see attached
working example). 

 I have created an array containing my patterns,
@sequences=qw{^TGGCAGTGGAGG ^TGTCTGGCAGTG ^TG....GCAGTG TCTGTCTG TCTGGCAG 
GCAGTGGA TGTCTGGC ^TGTCTGGC ^..TCTGGCAGTG ^TGTCTGGCAGTG ^TGCATGGC}. Some
patterns have to be at the beginning of the sequence, some not.
I try to use the grep function to loop through a list in order to test if
the sequence match the elements from the list. then i use a hashe to count.
In the end I create an output file which contain the first line of the
fastq as keys and not at all the elements from my @sequences (class counted
attached).
I would appreciate any pointers on this,
many thanks 
Nat


#!/usr/bin/perl
use strict;
use warnings;

 
my @sequences;
@sequences=qw{^TGGCAGTGGAGG ^TGTCTGGCAGTG ^TG....GCAGTG TCTGTCTG TCTGGCAG 
GCAGTGGA TGTCTGGC ^TGTCTGGC ^..TCTGGCAGTG ^TGTCTGGCAGTG ^TGCATGGC};
my %final_hash;

      while (<IN>) {
   if (/^\@/){
    my $seq=<IN>; get the sequences
    chomp;
    if (grep {$seq } @sequences){# I want to test if $seq contain anything
that will match with any of the element from @sequences, this is where it
goes wrong I think.
        if (!exists $final_hash{$_}) {
            $final_hash{$_}=1;
        } else {
        $final_hash {$_}++;
            }
    }
}
}
for my $key (sort {$final_hash {$b} <=> $final_hash {$a}}keys
%final_hash){
    my $value=$final_hash{$key};
     print OUT $key,"\t",$value, "\n";
}


-- 
 The Wellcome Trust Sanger Institute is operated by Genome Research 
 Limited, a charity registered in England with number 1021457 and a 
 company registered in England with number 2742969, whose registered 
 office is 215 Euston Road, London, NW1 2BE. 
@MF8V4:4:156
TGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAATTTTCCGTCTTATTTCATTTCTGTAACA
GTTGATATCTGGCTGTCCA
+
:<:<<>;<=====>>==<:893583368-9<;:;1::35::;;6;;7;;99+7<<-;<::136999499::4:9189;;<
1::9;8851.0+...+2/2

@MF8V4:4:162
CTCTGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAAATTTTCCGTCTTATTTCATTTCTGT
AACAGTTGATACTTCGTAGGCTGTCCA
+
88888988885.21*205168888.833555+888.6.68-12+23*04444444&446,662/-4355606766136.4
3(+,,-*--+-(+&,+---),10++.1

@MF8V4:4:164
CTGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAAATTTTCCGTCTTATTTCATTTCTAGTT
ACATATTGATATCTGTCTGTCA
+
8889888887886848865565-545888,88868188056155+14+..000&233(6-+(,,,1,1-..11+22.+-+
((+,(,&-..,,..,,131-0+

@MF8V4:4:170
TGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAATTTTCCGTTCTTAATTTCATTTCTGTAA
CAGTTGATATCTGGCTGTCA
+
>>>>;;21,22;=;;<<9998/391034*3;9<<5<=;>>=<;265+-//0),<<,27,,(,5/2.76/58990998785
::::<<;97==10.,85,,(

@MF8V4:4:171
CTCTAGGCTGAGATATGAGGAAGTACTCTTTAAGAAAATAGTTTAAACAATTAT
+
864/.+*++---,0,1,155,.,10/-+--',,,,,,&,/.,1(11+11.1211

@MF8V4:4:191
ATTCAAGATCTCTGCGAGAGTGGAAGATCTTCTTAAAGAAATAGTTTAACATTGTTAAATTTCAATTTCATTATTCTTTC
TTGATTTGTGATACATTGATA
+
668884886556/-+-,((+,0-0),(+,-*,,(0/),,,&(/--/&--/011-1(11+,0'-+'--&,,(/,1,0-/&(
,,(((,&+++/0/,+(-0,,,

@MF8V4:4:197
CAAGATCTCTGGCAGTGAGGAAGTCCTTATAAGTAAGATTAAGATTTGTAAAACATTTTTGTAAAATTTTCCTATCATTT
CTACTGAATTATATGTCTGCCA
+
88958898888888,,)3823/351*0,,((+,+,((-/+-*+---'+0000&,,(,,,&(,(,,&(//&0,,(,--,-(
cbi4a[nac]17: which perl
/software/bin/perl
cbi4a[nac]18: more test_33pbremoved.fatsq
@MF8V4:4:156
TGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAATTTTCCGTCTTATTTCATTTCTGTAACA
GTTGATATCTGGCTGTCCA
+
:<:<<>;<=====>>==<:893583368-9<;:;1::35::;;6;;7;;99+7<<-;<::136999499::4:9189;;<
1::9;8851.0+...+2/2

@MF8V4:4:162
CTCTGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAAATTTTCCGTCTTATTTCATTTCTGT
AACAGTTGATACTTCGTAGGCTGTCCA
+
88888988885.21*205168888.833555+888.6.68-12+23*04444444&446,662/-4355606766136.4
3(+,,-*--+-(+&,+---),10++.1

@MF8V4:4:164
CTGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAAATTTTCCGTCTTATTTCATTTCTAGTT
ACATATTGATATCTGTCTGTCA
+
8889888887886848865565-545888,88868188056155+14+..000&233(6-+(,,,1,1-..11+22.+-+
((+,(,&-..,,..,,131-0+

@MF8V4:4:170
TGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAATTTTCCGTTCTTAATTTCATTTCTGTAA
CAGTTGATATCTGGCTGTCA
+
>>>>;;21,22;=;;<<9998/391034*3;9<<5<=;>>=<;265+-//0),<<,27,,(,5/2.76/58990998785
::::<<;97==10.,85,,(

@MF8V4:4:171
CTCTAGGCTGAGATATGAGGAAGTACTCTTTAAGAAAATAGTTTAAACAATTAT
+
864/.+*++---,0,1,155,.,10/-+--',,,,,,&,/.,1(11+11.1211

@MF8V4:4:191
ATTCAAGATCTCTGCGAGAGTGGAAGATCTTCTTAAAGAAATAGTTTAACATTGTTAAATTTCAATTTCATTATTCTTTC
TTGATTTGTGATACATTGATA
+
668884886556/-+-,((+,0-0),(+,-*,,(0/),,,&(/--/&--/011-1(11+,0'-+'--&,,(/,1,0-/&(
,,(((,&+++/0/,+(-0,,,

@MF8V4:4:197
CAAGATCTCTGGCAGTGAGGAAGTCCTTATAAGTAAGATTAAGATTTGTAAAACATTTTTGTAAAATTTTCCTATCATTT
CTACTGAATTATATGTCTGCCA
+
88958898888888,,)3823/351*0,,((+,+,((-/+-*+---'+0000&,,(,,,&(,(,,&(//&0,,(,--,-(
(,(,((1+1(1,,22((,,(&(

@MF8V4:4:199
TGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAAATTTTCCGTCTTATTTCATTTCTAGTAA
CATGTTGATATCTGGTCGTCCA
+
======<;8988968996344,499;<<1<<<;;+11*,73783:9788889'999+9744.6*,(2-5552,30.0/+4
3265564561/++-(--+-+*/

@MF8V4:4:210
CAAGATCTCTGGCAGTGAGGAAGTCTCTTTAGGAAAATAGTTTAAACATTTGTTAAATTTCAATTTCATTATTTCTTGTT
ACATTTCATATACGTTGATA
+
8914399:::::98710,3,5/0444110&,(&,..&14444/66-21+0&-/./0),-&--+-0)/--.(,,&,-((,/
--++-&+(-(,(,(,,,((,

@MF8V4:4:211
CAAGATCTCTGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAATTTTCCATCTTATTTCATT
TCTGTAACAGTTGATATCTGGCTGTCCA
+
:;99<<<<<<<<<<<::69866;<<<<<<<;<<<<<<176588(//'+6,22*.5+0122&-55(6/50144567499::
156054676073977675//)+00.0+3

@MF8V4:4:214
TCAAGATCTCTGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAATTTTCCGTCTTATTTCAT
TTCTGGTAACATGTTGATATCTGGCTGTCA
+
778746268888;898886889838666,,,&,4,+,,&,11-1/44/44544/4464444(665(500++10001-144
1-14,,1-/221113544400.0*+--(,(

@MF8V4:4:215
CAAGATCTCTGGCAGTGGAGGAAGTCTCCTTAAGAAAATAGTTTCAACAATTTGTTAAAAATTTTCCATCATTATTTCAT
TTCTGTTAATCAGTTGAGTATCTGCTGTCGA
+
;;576;::;;;77::9:6;:::::8735(+(,22369+877770463843771-4-....&+--'+/22+,(),22-432
4.441+&1--23,63---343/.1,3,.0+0

@MF8V4:4:558
TGGCAGTGAAGAAGTCTCTTTAGAAAATAGTTTAAACATTGTTAAATTCAATTTCATTATTCTTATTACATTCTATACGT
TGATA
+
>>>>::0,)(,(/1122355',(,77*89999065,1,(((/*//'+0//-/0(/+00.0/00&,(-//,(((++-/-,/
*/-,-

@MF8V4:4:580
TGGCAGCGGAGGAAGCTCTTAGAAAGAAATAGTTTAAACAGTTTTATAAATTTTCCGATCTTATTTCATTCTGTAACAGT
TGATATCTGTCGTCCAC
+
>;;;=,,(776276,((31*--..&,-3,262.2122-20+---&+++0+.22)2+-0/--+/--(/0,,,2-2286628
776400(++(-+((*--

@MF8V4:4:602
TGGCAGTGGAGGAAGTCTCTTTAAGAAAATAGTTTAAACAATTTGTTAAAAATTTTCCGTCTTATTTCATTTCTGTAACA
TGTTGATATCTGGCTGTCCA
+
>>>>>>>=:=>===9=;3+,.&,6669=08<<3;6;;79;7;;69::;<<<,<<</<:65080209518997:<195;91
/2;;;::40.++(+..+316

@MF8V4:4:605
TGGCAGCGGAGGAAGCCTCTTGAAGAAAATAGTTTAAACAGTTTTTTATAAGTTTTCCGTCTTATTTCACTTCTGTAACA
TGTTGATATCTGGCGTTCA
+
<=====<<:<:<<<<9;9<:;7;;;<<</<8817*33+05345778'57748977*4-110304281699:89908:;:5
25:7/0,712..(((,&,(

@MF8V4:4:607
TGGCAGTGGTGGAAGTCTCTTGAAGAAAATAGTTTAAACAATTTATTCAACATTTTCTGTCTTATTTCATTTCTGTAACA
GTTGATATCTGGCT
+
>>>=>>>>>>>>>>>>>>>=<<=;;;<<3=;9=>.6835===<7;8;54//5568-99997998990878;3:;;;<<<=
===<===>;===9<

@MF8V4:4:620
TGGCAGTGGAGGAAGTCTCCTTAAGAAAATAGTTTCAACTAATTTAGTTAAAAATTTTCCATCAATTATTTCATTTCTGA
TAATCAGTTGTATATCTGACTGTCCGA
+
9<::=============:<98-1--99;/:=:::6/8;657775055265222'555*6412,6267643*.+(,&(+-,
,(1221-1--00-/0.+,(----/&-/
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:197    2
@MF8V4:4:162    2
@MF8V4:4:156    2
@MF8V4:4:171    2
@MF8V4:4:191    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:558    1
@MF8V4:4:607    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:580    1
@MF8V4:4:605    1
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:211    1
@MF8V4:4:156    2
@MF8V4:4:197    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:162    2
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:211    1
@MF8V4:4:197    2
@MF8V4:4:162    2
@MF8V4:4:156    2
@MF8V4:4:171    2
@MF8V4:4:191    2
@MF8V4:4:170    2
@MF8V4:4:164    2
@MF8V4:4:558    1
@MF8V4:4:607    1
@MF8V4:4:210    1
@MF8V4:4:602    1
@MF8V4:4:215    1
@MF8V4:4:214    1
@MF8V4:4:580    1
@MF8V4:4:605    1
@MF8V4:4:620    1
@MF8V4:4:199    1
@MF8V4:4:211    1
-- 
To unsubscribe, e-mail: beginners-unsubscr...@perl.org
For additional commands, e-mail: beginners-h...@perl.org
http://learn.perl.org/

Reply via email to