This is an automated email from the git hooks/post-receive script. satta pushed a commit to branch master in repository roary.
commit 622661bfc8d75dd459351df08730a74ea63c24ed Author: Sascha Steinbiss <[email protected]> Date: Wed Oct 11 10:17:28 2017 +0200 New upstream version 3.11.0+dfsg --- .travis.yml | 11 ++- dist.ini | 4 +- install_dependencies.sh | 12 ++-- lib/Bio/Roary/CommandLine/Roary.pm | 2 +- lib/Bio/Roary/External/CheckTools.pm | 8 +-- lib/Bio/Roary/MergeMultifastaAlignments.pm | 2 +- lib/Bio/Roary/PrepareInputFiles.pm | 81 +++++++++++++++------- lib/Bio/Roary/ReformatInputGFFs.pm | 72 +++++++++++++++---- t/Bio/Roary/ReformatInputGFFs.t | 12 ++-- t/data/expected_core_gene_alignment_core0.66.aln | 2 +- t/data/overall_gene_presence_absence.csv | 42 +++++------ .../reformat_input_gffs/expected_fixed_query_2.gff | 10 +-- 12 files changed, 168 insertions(+), 90 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8896bae..5789ffe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,20 +8,19 @@ addons: cache: directories: - build/parallel-20160722 - - build/parallel-20130922 + - build/parallel-20170822 - build/bedtools2 - - build/cd-hit-v4.6.6-2016-0711 + - build/cd-hit-v4.6.8-2017-0621 - build/prank-msa-master - - build/ncbi-blast-2.4.0+ + - build/ncbi-blast-2.6.0+ - build/mcl-14-137 - build/fasttree perl: - "5.14" - - "5.20" - - "5.24" + - "5.26" env: - PARALLEL_VERSION=20160722 - - PARALLEL_VERSION=20130922 + - PARALLEL_VERSION=20170822 install: - "source ./install_dependencies.sh" script: "ROARY_FULL_TESTS=1 dzil test" diff --git a/dist.ini b/dist.ini index 4d8b414..e673cd7 100644 --- a/dist.ini +++ b/dist.ini @@ -1,9 +1,9 @@ name = Bio-Roary -version = 3.9.1 author = Andrew J. Page <[email protected]> license = GPL_3 copyright_holder = Wellcome Trust Sanger Institute copyright_year = 2013 +version = 3.11.0 main_module = lib/Bio/Roary.pm [MetaResources] @@ -12,7 +12,7 @@ repository.web = http://sanger-pathogens.github.io/Roary/ repository.url = https://github.com/sanger-pathogens/Roary.git repository.type = git -[@Basic] +[@Starter] [PruneCruft] [ExtraTests] [AutoPrereqs] diff --git a/install_dependencies.sh b/install_dependencies.sh index 41aecc2..253ed7b 100755 --- a/install_dependencies.sh +++ b/install_dependencies.sh @@ -15,16 +15,16 @@ BEDTOOLS_VERSION="2.26.0" BEDTOOLS_DOWNLOAD_FILENAME="bedtools-${BEDTOOLS_VERSION}.tar.gz" BEDTOOLS_URL="https://github.com/arq5x/bedtools2/releases/download/v${BEDTOOLS_VERSION}/${BEDTOOLS_DOWNLOAD_FILENAME}" -CDHIT_SHORT_VERSION="4.6.6" -CDHIT_LONG_VERSION="4.6.6-2016-0711" +CDHIT_SHORT_VERSION="4.6.8" +CDHIT_LONG_VERSION="4.6.8-2017-0621" CDHIT_DOWNLOAD_FILENAME="cd-hit-${CDHIT_SHORT_VERSION}.tar.gz" -CDHIT_URL="https://github.com/weizhongli/cdhit/releases/download/V${CDHIT_SHORT_VERSION}/cd-hit-v${CDHIT_LONG_VERSION}.tar.gz" +CDHIT_URL="https://github.com/weizhongli/cdhit/releases/download/V${CDHIT_SHORT_VERSION}/cd-hit-v${CDHIT_LONG_VERSION}-source.tar.gz" PRANK_VERSION="0.140603" PRANK_DOWNLOAD_FILENAME="prank-msa-master.tar.gz" PRANK_URL="https://github.com/ariloytynoja/prank-msa/archive/master.tar.gz" -BLAST_VERSION="2.4.0" +BLAST_VERSION="2.6.0" BLAST_DOWNLOAD_FILENAME="ncbi-blast-${BLAST_VERSION}+-x64-linux.tar.gz" BLAST_URL="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VERSION}/${BLAST_DOWNLOAD_FILENAME}" @@ -32,11 +32,11 @@ MCL_VERSION="14-137" MCL_DOWNLOAD_FILENAME="mcl-${MCL_VERSION}.tar.gz" MCL_URL="http://micans.org/mcl/src/mcl-${MCL_VERSION}.tar.gz" -FASTTREE_VERSION="2.1.9" +FASTTREE_VERSION="2.1.10" FASTTREE_DOWNLOAD_FILENAME="FastTree-${FASTTREE_VERSION}.c" FASTTREE_URL="http://microbesonline.org/fasttree/FastTree-${FASTTREE_VERSION}.c" -MAFFT_VERSION="7.271" +MAFFT_VERSION="7.310" MAFFT_DOWNLOAD_FILENAME="mafft-${MAFFT_VERSION}-without-extensions-src.tgz" MAFFT_URL="http://mafft.cbrc.jp/alignment/software/${MAFFT_DOWNLOAD_FILENAME}" diff --git a/lib/Bio/Roary/CommandLine/Roary.pm b/lib/Bio/Roary/CommandLine/Roary.pm index 3052b6c..e2335a0 100644 --- a/lib/Bio/Roary/CommandLine/Roary.pm +++ b/lib/Bio/Roary/CommandLine/Roary.pm @@ -154,7 +154,7 @@ sub BUILD { $self->perc_identity($perc_identity); if ( $perc_identity < 50 ) { $self->logger->error( -"The percentage identity is too low. Either somethings wrong with your data, like contamination, or your doing something that the software isnt designed to support." +"The percentage identity is too low. Either something is wrong with your data, like contamination, or your doing something that the software isnt designed to support." ); } } diff --git a/lib/Bio/Roary/External/CheckTools.pm b/lib/Bio/Roary/External/CheckTools.pm index b05593a..dc01fd5 100644 --- a/lib/Bio/Roary/External/CheckTools.pm +++ b/lib/Bio/Roary/External/CheckTools.pm @@ -57,17 +57,17 @@ my %tools = ( }, 'mafft' => { GETVER => "mafft --version < /dev/null 2>&1", - REGEXP => qr/v($BIDEC) /, + REGEXP => qr/(\d+\.\d+) /, NEEDED => 1, }, 'kraken' => { GETVER => "kraken --version | head -n 1", - REGEXP => qr/Kraken version kraken-(\d+\.\d+\.\d+.*)/, + REGEXP => qr/(\d+\.\d+\.\d+.*)/, NEEDED => 0, }, 'kraken-report' => { GETVER => "kraken-report --version | head -n 1", - REGEXP => qr/Kraken version kraken-(\d+\.\d+\.\d+.*)/, + REGEXP => qr/(\d+\.\d+\.\d+.*)/, NEEDED => 0, }, 'Rscript' => { @@ -78,7 +78,7 @@ my %tools = ( }, # prank version also performs an update check so cant use it - 'prank' => { NEEDED => 1 }, + 'prank' => { NEEDED => 0 }, # now just the standard unix tools we need 'grep' => { NEEDED => 1 }, diff --git a/lib/Bio/Roary/MergeMultifastaAlignments.pm b/lib/Bio/Roary/MergeMultifastaAlignments.pm index f74d4ac..1355cf5 100644 --- a/lib/Bio/Roary/MergeMultifastaAlignments.pm +++ b/lib/Bio/Roary/MergeMultifastaAlignments.pm @@ -83,7 +83,7 @@ sub _sequence_for_sample_from_gene_file { sub _padded_string_for_gene_file { my ( $self, $gene_file ) = @_; return '' unless ( defined( $self->_gene_lengths->{$gene_file} ) ); - return 'N' x ( $self->_gene_lengths->{$gene_file} ); + return '-' x ( $self->_gene_lengths->{$gene_file} ); } sub _create_merged_sequence_for_sample { diff --git a/lib/Bio/Roary/PrepareInputFiles.pm b/lib/Bio/Roary/PrepareInputFiles.pm index bedb9bb..aaf2a19 100644 --- a/lib/Bio/Roary/PrepareInputFiles.pm +++ b/lib/Bio/Roary/PrepareInputFiles.pm @@ -18,32 +18,39 @@ use Moose; use Bio::Roary::Exceptions; use Bio::Roary::ExtractProteomeFromGFFs; use Bio::Roary::FilterUnknownsFromFasta; -use Cwd qw(getcwd); +use Cwd qw(getcwd); use File::Temp; - -has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); -has 'job_runner' => ( is => 'ro', isa => 'Str', default => 'Local' ); -has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 ); -has '_input_gff_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_gff_files' ); -has '_input_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files' ); -has '_input_fasta_files_filtered' => - ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files_filtered' ); +use Log::Log4perl qw(:easy); + +has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); +has 'job_runner' => ( is => 'ro', isa => 'Str', default => 'Local' ); +has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 ); +has '_input_gff_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_gff_files' ); +has '_input_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files' ); +has '_input_fasta_files_filtered' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files_filtered' ); has '_input_fasta_files_filtered_obj' => - ( is => 'ro', isa => 'Bio::Roary::FilterUnknownsFromFasta', lazy => 1, builder => '_build__input_fasta_files_filtered_obj' ); + ( is => 'ro', isa => 'Bio::Roary::FilterUnknownsFromFasta', lazy => 1, builder => '_build__input_fasta_files_filtered_obj' ); -has '_derived_fasta_files' => - ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__derived_fasta_files' ); +has '_derived_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__derived_fasta_files' ); has '_extract_proteome_obj' => ( is => 'ro', isa => 'Bio::Roary::ExtractProteomeFromGFFs', lazy => 1, builder => '_build__extract_proteome_obj' ); -has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 ); -has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 ); -has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); -has '_fasta_filter_obj' => ( is => 'ro', isa => 'Bio::Roary::FilterUnknowsFromFasta', lazy => 1, builder => '_fasta_filter_obj' ); -has 'working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } ); +has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 ); +has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 ); +has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); +has '_fasta_filter_obj' => ( is => 'ro', isa => 'Bio::Roary::FilterUnknowsFromFasta', lazy => 1, builder => '_fasta_filter_obj' ); +has 'working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } ); +has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' ); + +sub _build_logger { + my ($self) = @_; + Log::Log4perl->easy_init($ERROR); + my $logger = get_logger(); + return $logger; +} sub _build__input_gff_files { my ($self) = @_; @@ -54,6 +61,33 @@ sub _build__input_gff_files { sub _build__input_fasta_files { my ($self) = @_; my @fasta_files = grep( !/\.gff$/, @{ $self->input_files } ); + + my @validated_fasta_files; + + for my $fasta_file (@fasta_files) { + eval { + my $inseq = Bio::SeqIO->new( + -file => $fasta_file, + -format => 'fasta', + -alphabet => 'protein' + ); + while ( my $seq = $inseq->next_seq ) { + + # do something to force the reading. + $seq->seq; + } + }; + if ($@) { + $self->logger->warn( + "Input file doesnt have a .gff extension and isnt a protein FASTA file so excluding it from further analysis: $fasta_file" + ); + } + else { + push( @validated_fasta_files, $fasta_file ); + } + + } + return \@fasta_files; } @@ -62,11 +96,10 @@ sub _build__input_fasta_files_filtered_obj { return Bio::Roary::FilterUnknownsFromFasta->new( fasta_files => $self->_input_fasta_files ); } -sub _build__input_fasta_files_filtered -{ - my ($self) = @_; - return undef if ( !defined( $self->_input_fasta_files ) ); - return $self->_input_fasta_files_filtered_obj->filtered_fasta_files(); +sub _build__input_fasta_files_filtered { + my ($self) = @_; + return undef if ( !defined( $self->_input_fasta_files ) ); + return $self->_input_fasta_files_filtered_obj->filtered_fasta_files(); } sub _build__extract_proteome_obj { @@ -77,8 +110,8 @@ sub _build__extract_proteome_obj { apply_unknowns_filter => $self->apply_unknowns_filter, translation_table => $self->translation_table, cpus => $self->cpus, - verbose => $self->verbose, - working_directory => $self->working_directory, + verbose => $self->verbose, + working_directory => $self->working_directory, ); } diff --git a/lib/Bio/Roary/ReformatInputGFFs.pm b/lib/Bio/Roary/ReformatInputGFFs.pm index 0ae52ce..4d1dd03 100644 --- a/lib/Bio/Roary/ReformatInputGFFs.pm +++ b/lib/Bio/Roary/ReformatInputGFFs.pm @@ -18,10 +18,12 @@ Take in gff files and add suffix where a gene id is seen twice use Moose; use Bio::Roary::Exceptions; use Cwd; +use File::Copy; use Log::Log4perl qw(:easy); use Bio::Tools::GFF; use File::Path qw(make_path); use File::Basename; +use Digest::MD5::File qw(file_md5_hex); has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' ); @@ -42,47 +44,93 @@ sub fix_duplicate_gene_ids { my ($self) = @_; my %gene_ids_seen_before; + + my %file_md5s; + for my $file ( @{ $self->gff_files } ) { - + my $digest = file_md5_hex($file); + + if(defined($file_md5s{$digest})) + { + $self->logger->warn( + "Input files have identical MD5 hashes, only using the first file: ".$file_md5s{$digest}." == ".$file + ); + next; + } + else + { + $file_md5s{$digest} = $file; + } + my $ids_seen = 0; my $ids_from_file = $self->_get_ids_for_gff_file($file); if ( @{$ids_from_file} < 1 ) { - $self->logger->warn( + $self->logger->error( "Input GFF file doesnt contain annotation we can use so excluding it from the analysis: $file" ); } else { for my $gene_id ( @{$ids_from_file} ) { if ( $gene_ids_seen_before{$gene_id} ) { - $self->logger->warn( - "Input file contains duplicate gene IDs, attempting to fix by adding a unique suffix. New GFF in the fixed_input_files directory. $file " + $self->logger->error( + "Input file contains duplicate gene IDs, attempting to fix by adding a unique suffix, new GFF in the fixed_input_files directory: $file " ); - my $updated_file = $self->_add_suffix_to_gene_ids_and_return_new_file($file); + my $updated_file = $self->_add_suffix_to_gene_ids_and_return_new_file($file, $digest); push( @{ $self->fixed_gff_files }, $updated_file ) if ( defined($updated_file) ); $ids_seen = 1; last; } $gene_ids_seen_before{$gene_id}++; } + + # We know its a valid GFF file since we could open it and extract IDs. + # We need to make sure the filenames end in .gff. If it contained duplicate IDs, then they are fixed so nothing to do, but + # if they didnt, then we have to double check and repair if necessary. if ( $ids_seen == 0 ) { - push( @{ $self->fixed_gff_files }, $file ); + + + push( @{ $self->fixed_gff_files }, $self->_fix_gff_file_extension($file) ); } } } return 1; } +sub _fix_gff_file_extension +{ + my ( $self, $input_file ) = @_; + + my ( $filename, $directories, $suffix ) = fileparse( $input_file, qr/\.[^.]*/ ); + return $input_file if($suffix eq '.gff'); + + + make_path( $self->output_directory ) if ( !( -d $self->output_directory ) ); + my $output_file = $self->output_directory . '/' . $filename . '.gff'; + copy($input_file, $output_file) or $self->logger->error("Couldnt copy file with invalid gff extention: $input_file -> $output_file"); + return $output_file; +} + + sub _add_suffix_to_gene_ids_and_return_new_file { - my ( $self, $input_file ) = @_; + my ( $self, $input_file, $digest ) = @_; my ( $filename, $directories, $suffix ) = fileparse( $input_file, qr/\.[^.]*/ ); make_path( $self->output_directory ) if ( !( -d $self->output_directory ) ); - my $output_file = $self->output_directory . '/' . $filename . $suffix; + my $output_file = $self->output_directory . '/' . $filename . '.gff'; open( my $input_gff_fh, $input_file ); open( my $out_gff_fh, '>', $output_file ); + + # There is a chance that there can be a collision here, but its remote. + my $random_locus_tag = "".$digest; + + $self->logger->warn( + "Renamed GFF file from: $input_file -> $output_file" ); + $self->logger->warn( + "Locus tag used is '$random_locus_tag' for file: $input_file" ); my $found_fasta = 0; + my $gene_counter = 1; while (<$input_gff_fh>) { my $line = $_; @@ -102,15 +150,15 @@ sub _add_suffix_to_gene_ids_and_return_new_file { if ( $tags[$i] =~ /^(ID=["']?)([^;"']+)(["']?)/ ) { my $current_id = $2; $current_id .= '___' . $self->suffix_counter; - $tags[$i] = $1 . $current_id . $3; - $self->suffix_counter( $self->suffix_counter + 1 ); + $tags[$i] = $1 .$random_locus_tag.'_'. $gene_counter . $3; + $gene_counter++; $found_id++; last; } } if ( $found_id == 0 ) { - unshift( @tags, 'ID=id___' . $self->suffix_counter ); - $self->suffix_counter( $self->suffix_counter + 1 ); + unshift( @tags, 'ID=' . $random_locus_tag.'_'. $gene_counter ); + $gene_counter++; } $cells[8] = join( ';', @tags ); print {$out_gff_fh} join( "\t", @cells ); diff --git a/t/Bio/Roary/ReformatInputGFFs.t b/t/Bio/Roary/ReformatInputGFFs.t index 16ad53b..dd68fc1 100755 --- a/t/Bio/Roary/ReformatInputGFFs.t +++ b/t/Bio/Roary/ReformatInputGFFs.t @@ -45,21 +45,19 @@ ok(( -e 'fixed_input_files/query_2.gff'), 'fixed file should exist'); compare_ok('fixed_input_files/query_2.gff', 't/data/reformat_input_gffs/expected_fixed_query_2.gff', 'fixed file should have expected changes'); remove_tree('fixed_input_files'); - -ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/query_1.gff', 't/data/reformat_input_gffs/query_2.gff', 't/data/reformat_input_gffs/query_3.gff']), 'initialise with 3 input gffs'); +ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/query_1.gff', 't/data/reformat_input_gffs/query_2.gff', 't/data/reformat_input_gffs/query_3.gff']), 'initialise with 3 input gffs, 2 identical duplicates'); ok(!( -d 'fixed_input_files'), 'Directory shouldnt exist before running'); ok($obj->fix_duplicate_gene_ids, 'fix duplicates with 3 input gffs'); -ok(( -d 'fixed_input_files'), 'Directory should exist because there is 2 gffs thats fixed'); -is_deeply($obj->fixed_gff_files, ['t/data/reformat_input_gffs/query_1.gff','fixed_input_files/query_2.gff','fixed_input_files/query_3.gff' ] ,'list of gff files 2 in the fixed directory'); +ok(( -d 'fixed_input_files'), 'Directory should exist because there are 2 gffs thats fixed'); +is_deeply($obj->fixed_gff_files, ['t/data/reformat_input_gffs/query_1.gff','fixed_input_files/query_2.gff' ] ,'list of gff files 2 in the fixed directory'); ok(( -e 'fixed_input_files/query_2.gff'), 'fixed file should exist'); -ok(( -e 'fixed_input_files/query_3.gff'), 'fixed file should exist'); +ok(!( -e 'fixed_input_files/query_3.gff'), 'fixed file should exist'); compare_ok('fixed_input_files/query_2.gff','t/data/reformat_input_gffs/expected_fixed_query_2.gff', 'fixed file should have expected changes'); -compare_ok('fixed_input_files/query_3.gff', 't/data/reformat_input_gffs/expected_fixed_query_3.gff', 'fixed file should have expected changes'); remove_tree('fixed_input_files'); ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/real_1.gff']), 'initialise with 1 gff that has shown to have a bug'); -ok(my $fixed_file = $obj->_add_suffix_to_gene_ids_and_return_new_file('t/data/reformat_input_gffs/real_1.gff'), 'fix duplicates'); +ok(my $fixed_file = $obj->_add_suffix_to_gene_ids_and_return_new_file('t/data/reformat_input_gffs/real_1.gff', 'id__'), 'fix duplicates'); ok(( -e 'fixed_input_files/real_1.gff'), 'fixed file should exist'); compare_ok('fixed_input_files/real_1.gff', 't/data/reformat_input_gffs/expected_real_1.gff', 'fixed file should have expected changes'); remove_tree('fixed_input_files'); diff --git a/t/data/expected_core_gene_alignment_core0.66.aln b/t/data/expected_core_gene_alignment_core0.66.aln index e542299..9309965 100644 --- a/t/data/expected_core_gene_alignment_core0.66.aln +++ b/t/data/expected_core_gene_alignment_core0.66.aln @@ -3,4 +3,4 @@ TTTTT >query_2 GGGGG >query_3 -NNNNN +----- diff --git a/t/data/overall_gene_presence_absence.csv b/t/data/overall_gene_presence_absence.csv index d828265..fd16e85 100644 --- a/t/data/overall_gene_presence_absence.csv +++ b/t/data/overall_gene_presence_absence.csv @@ -1,22 +1,22 @@ "Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1","query_2","query_5" -"hly","","Alpha-toxin","2","2","1","1","1","1","7","","959","959","959","1_1","2_1___1","" -"group_10","","hypothetical protein","2","2","1","1","10","1","6","","227","227","227","abc_00010","abc_00010___10","" -"group_11","","C4-dicarboxylate transporter/malic acid transport protein","2","2","1","1","11","1","5","","947","947","947","abc_00011","abc_00011___11","" -"group_12","","hypothetical protein","2","2","1","1","12","1","4","","188","188","188","abc_00012","abc_00012___12","" -"group_13","","Gonococcal growth inhibitor III","2","2","1","1","13","1","3","","134","134","134","abc_00014","abc_00014___14","" -"group_14","","Gonococcal growth inhibitor III","2","2","1","1","14","1","2","","134","134","134","1_6","2_7___15","" -"yfnB","","Putative HAD-hydrolase yfnB","2","2","1","1","15","1","1","","686","686","686","abc_00016","abc_00016___16","" -"group_2","","hypothetical protein","2","2","1","1","2","1","8","","146","146","146","abc_00002","abc_00002___2","" -"group_3","","hypothetical protein","2","2","1","1","3","1","9","","197","197","197","abc_00003","abc_00003___3","" -"group_4","","superantigen-like protein","2","2","1","1","4","1","10","","716","716","716","abc_00004","abc_00004___4","" -"speH","","hypothetical protein","2","2","1","1","5","1","11","","725","725","725","1_2","2_2___5","" -"group_6","","superantigen-like protein","2","2","1","1","6","1","12","","725","725","725","abc_00006","abc_00006___6","" -"argF","","Ornithine carbamoyltransferase","2","2","1","1","7","1","13","","1001","1001","1001","1_3","2_3___7","" -"arcC1","","Carbamate kinase 1","2","2","1","1","8","1","14","","935","935","935","abc_00008","abc_00008___8","" -"group_9","","16S ribosomal RNA","2","2","1","1","9","1","15","","1556","1556","1556","abc_01705","abc_01705___9","" -"group_16","","hypothetical protein","1","1","1","2","1","2","6","","146","146","146","","","abc_50002" -"group_17","argF","Ornithine carbamoyltransferase","1","1","1","2","6","2","5","","1001","1001","1001","","","3_3" -"group_18","","hypothetical protein","1","1","1","2","5","2","4","","227","227","227","","","abc_50010" -"group_19","","hypothetical protein","1","1","1","2","4","2","3","","188","188","188","","","abc_50012" -"group_20","","Gonococcal growth inhibitor III","1","1","1","2","3","2","2","","134","134","134","","","abc_50014" -"group_21","yfnB","Putative HAD-hydrolase yfnB","1","1","1","2","2","2","1","","686","686","686","","","3_5" +"hly","","Alpha-toxin","2","2","1","1","14","1","1","","959","959","959","1_1","05a85fcc1cbac7027ac3689992006154_1","" +"group_10","","hypothetical protein","2","2","1","1","6","1","11","","227","227","227","abc_00010","05a85fcc1cbac7027ac3689992006154_10","" +"group_11","","C4-dicarboxylate transporter/malic acid transport protein","2","2","1","1","5","1","10","","947","947","947","abc_00011","05a85fcc1cbac7027ac3689992006154_11","" +"group_12","","hypothetical protein","2","2","1","1","4","1","9","","188","188","188","abc_00012","05a85fcc1cbac7027ac3689992006154_12","" +"group_13","","Gonococcal growth inhibitor III","2","2","1","1","3","1","8","","134","134","134","abc_00014","05a85fcc1cbac7027ac3689992006154_14","" +"group_14","","Gonococcal growth inhibitor III","2","2","1","1","2","1","7","","134","134","134","1_6","05a85fcc1cbac7027ac3689992006154_15","" +"yfnB","","Putative HAD-hydrolase yfnB","2","2","1","1","1","1","6","","686","686","686","abc_00016","05a85fcc1cbac7027ac3689992006154_16","" +"group_2","","hypothetical protein","2","2","1","1","15","1","2","","146","146","146","abc_00002","05a85fcc1cbac7027ac3689992006154_2","" +"group_3","","hypothetical protein","2","2","1","1","13","1","3","","197","197","197","abc_00003","05a85fcc1cbac7027ac3689992006154_3","" +"group_4","","superantigen-like protein","2","2","1","1","12","1","4","","716","716","716","abc_00004","05a85fcc1cbac7027ac3689992006154_4","" +"speH","","hypothetical protein","2","2","1","1","11","1","5","","725","725","725","1_2","05a85fcc1cbac7027ac3689992006154_5","" +"group_6","","superantigen-like protein","2","2","1","1","10","1","15","","725","725","725","abc_00006","05a85fcc1cbac7027ac3689992006154_6","" +"argF","","Ornithine carbamoyltransferase","2","2","1","1","9","1","14","","1001","1001","1001","1_3","05a85fcc1cbac7027ac3689992006154_7","" +"arcC1","","Carbamate kinase 1","2","2","1","1","8","1","13","","935","935","935","abc_00008","05a85fcc1cbac7027ac3689992006154_8","" +"group_9","","16S ribosomal RNA","2","2","1","1","7","1","12","","1556","1556","1556","abc_01705","05a85fcc1cbac7027ac3689992006154_9","" +"group_16","","hypothetical protein","1","1","1","2","6","2","3","","146","146","146","","","abc_50002" +"group_17","argF","Ornithine carbamoyltransferase","1","1","1","2","5","2","4","","1001","1001","1001","","","3_3" +"group_18","","hypothetical protein","1","1","1","2","4","2","5","","227","227","227","","","abc_50010" +"group_19","","hypothetical protein","1","1","1","2","3","2","6","","188","188","188","","","abc_50012" +"group_20","","Gonococcal growth inhibitor III","1","1","1","2","2","2","2","","134","134","134","","","abc_50014" +"group_21","yfnB","Putative HAD-hydrolase yfnB","1","1","1","2","1","2","1","","686","686","686","","","3_5" diff --git a/t/data/reformat_input_gffs/expected_fixed_query_2.gff b/t/data/reformat_input_gffs/expected_fixed_query_2.gff index 1cabef6..a425bb4 100644 --- a/t/data/reformat_input_gffs/expected_fixed_query_2.gff +++ b/t/data/reformat_input_gffs/expected_fixed_query_2.gff @@ -1,10 +1,10 @@ ##gff-version 3 ##sequence-region abc|SC|contig000001 1 15000 -abc|SC|contig000001 Prodigal:2.60 CDS 172 1131 . - 0 ID=1_1___1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001 -abc|SC|contig000001 Prodigal:2.60 CDS 1804 1950 . + 0 ID=abc_00002___2;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002 -abc|SC|contig000001 Prodigal:2.60 CDS 1934 2131 . + 0 inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;ID=abc_00003___3;product=hypothetical protein;protein_id=gnl|SC|abc_00003 -abc|SC|contig000001 Prodigal:2.60 CDS 2621 3337 . - 0 ID=abc_00004___4;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004 -abc|SC|contig000001 Prodigal:2.60 CDS 3445 4170 . - 0 gene=speH;ID=1_2___5;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005 +abc|SC|contig000001 Prodigal:2.60 CDS 172 1131 . - 0 ID=5d3897f59edf296200f1c7de895509e1_1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001 +abc|SC|contig000001 Prodigal:2.60 CDS 1804 1950 . + 0 ID=5d3897f59edf296200f1c7de895509e1_2;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002 +abc|SC|contig000001 Prodigal:2.60 CDS 1934 2131 . + 0 inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;ID=5d3897f59edf296200f1c7de895509e1_3;product=hypothetical protein;protein_id=gnl|SC|abc_00003 +abc|SC|contig000001 Prodigal:2.60 CDS 2621 3337 . - 0 ID=5d3897f59edf296200f1c7de895509e1_4;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004 +abc|SC|contig000001 Prodigal:2.60 CDS 3445 4170 . - 0 gene=speH;ID=5d3897f59edf296200f1c7de895509e1_5;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005 ##FASTA >abc|SC|contig000001 ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/roary.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
