commit: cb4121a23d10b5a0879682b261c0e55e483bc218 Author: Martin Mokrejš <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> AuthorDate: Thu Apr 30 22:46:00 2015 +0000 Commit: Martin Mokrejs <mmokrejs <AT> fold <DOT> natur <DOT> cuni <DOT> cz> CommitDate: Thu Apr 30 22:46:00 2015 +0000 URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=cb4121a2
sci-biology/TransDecoder: renamed package to keep original letter-casing Package-Manager: portage-2.2.18 sci-biology/TransDecoder/ChangeLog | 54 ++++++++ sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild | 69 +++++++++++ sci-biology/TransDecoder/files/TransDecoder.patch | 136 +++++++++++++++++++++ .../TransDecoder/files/pfam_runner.pl.patch | 20 +++ sci-biology/TransDecoder/metadata.xml | 9 ++ 5 files changed, 288 insertions(+) diff --git a/sci-biology/TransDecoder/ChangeLog b/sci-biology/TransDecoder/ChangeLog new file mode 100644 index 0000000..6fd8f5c --- /dev/null +++ b/sci-biology/TransDecoder/ChangeLog @@ -0,0 +1,54 @@ +# ChangeLog for sci-biology/transdecoder +# Copyright 1999-2015 Gentoo Foundation; Distributed under the GPL v2 +# $Header: $ + + 27 Apr 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + transdecoder-2.0.1.ebuild: + sci-biology/transdecoder: more ebuild cleanup + + 27 Apr 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + transdecoder-2.0.1.ebuild: + sci-biology/transdecoder: ebuild cleanup + + 27 Apr 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + -transdecoder-20140704.ebuild, transdecoder-2.0.1.ebuild: + sci-biology/transdecoder: drop old + + 17 Apr 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + transdecoder-2.0.1.ebuild, transdecoder-20140704.ebuild: + sci-biology/transdecoder: fixed installation of perl-related files + + 19 Mar 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + transdecoder-2.0.1.ebuild: + dropped hmmer dependency altogether, added pkg_postinst() with + usage/dependency info + +*transdecoder-2.0.1 (19 Mar 2015) + + 19 Mar 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + +transdecoder-2.0.1.ebuild, transdecoder-20140704.ebuild: + removing KEYWORDS until the perl files are installed to some other place. At + the moment I get: 'perl-module.eclass: Suspicious environment values found. + PERL5LIB="/usr/lib64/perl5/vendor_perl/5.18.2/TransDecoder"' + + 15 Feb 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + transdecoder-20140704.ebuild: + drop hmmer-3 dependency, it is an optional dependency only + + 10 Jan 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + transdecoder-20140704.ebuild: + install *.pm into PERL5LIB/TransDecoder and pass that via env.d + + 09 Jan 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> + +files/TransDecoder.patch, +files/pfam_runner.pl.patch, + transdecoder-20140704.ebuild: + sci-biology/transdecoder: added patches so that we use PATH to loclate + binaries and not in a local subdirectory named 'util', drop sys- + cluster/openmpi requirement, it does not link against it all all, this is a + bunch of perl and shell scripts + +*transdecoder-20140704 (08 Jan 2015) + + 08 Jan 2015; Martin Mokrejs <mmokr...@fold.natur.cuni.cz> +metadata.xml, + +transdecoder-20140704.ebuild: + sci-biology/transdecoder: new package diff --git a/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild new file mode 100644 index 0000000..e8f5134 --- /dev/null +++ b/sci-biology/TransDecoder/TransDecoder-2.0.1.ebuild @@ -0,0 +1,69 @@ +# Copyright 1999-2015 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Header: $ + +EAPI=5 + +PERL_EXPORT_PHASE_FUNCTIONS=no +inherit perl-module eutils toolchain-funcs + +DESCRIPTION="Extract ORF/CDS regions from FASTA sequences" +HOMEPAGE="http://sourceforge.net/projects/transdecoder/" +SRC_URI="https://github.com/TransDecoder/TransDecoder/archive/"${PV}".tar.gz -> ${P}.tar.gz" + +LICENSE="BSD-BroadInstitute" +SLOT="0" +KEYWORDS="~amd64" +IUSE="" + +DEPEND="" +RDEPEND="${DEPEND} + sci-biology/cd-hit + sci-biology/hmmer + sci-biology/parafly + sci-biology/ffindex" +# cdhit-4.6.1 is a real dependency, at least hmmer is optional (also ncbi-tools++ is now used for ORF searches) + +S="${WORKDIR}"/TransDecoder-2.0.1 + +##src_prepare(){ +# #mv Makefile Makefile.old +# #epatch "${FILESDIR}"/TransDecoder.patch +# #epatch "${FILESDIR}"/pfam_runner.pl.patch +#} + +# avoid fetching 1.5TB "${S}"/pfam/Pfam-AB.hmm.bin, see +# "Re: [Transdecoder-users] Announcement: Transdecoder release r20140704" thread in archives +# +# you cna get it from http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin + +src_install(){ + dobin TransDecoder.Predict TransDecoder.LongOrfs + insinto /usr/share/${PN}/util + dobin util/*.pl + # zap the bundled cdhit binaries copied from transdecoder_plugins/cdhit/ to util/bin + rm -rf util/bin + # + # * sci-biology/trinityrnaseq-20140413:0::science + # * /usr/bin/Fasta_reader.pm + # * /usr/bin/GFF3_utils.pm + # * /usr/bin/Gene_obj.pm + # * /usr/bin/Gene_obj_indexer.pm + # * /usr/bin/Longest_orf.pm + # * /usr/bin/Nuc_translator.pm + # * /usr/bin/TiedHash.pm + # + perl_set_version + insinto ${VENDOR_LIB}/${PN} + dobin PerlLib/*.pm # BUG: install into /usr/bin but wanted to have it readable and executable in ${VENDOR_LIB}/${PN} instead + einfo "Fetch on your own:" + einfo "wget --mirror -nH -nd http://downloads.sourceforge.net/project/transdecoder/Pfam-AB.hmm.bin" + einfo "hmmpress Pfam-AB.hmm.bin" +} + +pkg_postinst(){ + einfo "It is recommended to use TransDecoder with hmmer-3 or at least NCBI blast" + einfo "from either sci-biology/ncbi-blast+ (released more often) or" + einfo "from sci-biology/ncbi-toolkit++ (huge bundle with releases and less frequent bugfixes)" + einfo "Author says the minimum requirement is sci-biology/cd-hit" +} diff --git a/sci-biology/TransDecoder/files/TransDecoder.patch b/sci-biology/TransDecoder/files/TransDecoder.patch new file mode 100644 index 0000000..c0cff94 --- /dev/null +++ b/sci-biology/TransDecoder/files/TransDecoder.patch @@ -0,0 +1,136 @@ +--- /usr/bin/TransDecoder 2015-01-09 11:22:55.000000000 +0100 ++++ TransDecoder 2015-01-09 14:31:44.095839522 +0100 +@@ -48,7 +48,7 @@ + --prepare_pfam Prepare data for PFAM search and then quit (for running PFAM on HPC/computing cluster + with or without MPI ) + +- --CPU <int> number of threads to use; (default: 2) ++ --CPU <int> number of threads to use; (default: 1) + + --MPI use MPI w/ execution of hmmscan + +@@ -76,7 +76,7 @@ + + =head1 PFAM + +-You will need hmmer installed. Use hmmpress to prepare the database for hmmer. ++You will need hmmer installed. Use hmmpress from >=hmmer-3.0 to prepare the database for hmmer. + L<See|https://sourceforge.net/projects/transdecoder/files/Pfam-AB.hmm.bin> for downloading the database. + + =head1 CD-HIT +@@ -105,7 +105,6 @@ + use Longest_orf; + + my $UTIL_DIR = "$FindBin::RealBin/util"; +-$ENV{PATH} = "$UTIL_DIR/bin:$ENV{PATH}"; + $ENV{LD_LIBRARY_PATH} .= ":$FindBin::RealBin/util/lib64"; + + my ($cd_hit_est_exec) = &check_program('cd-hit-est'); +@@ -124,7 +123,7 @@ + my $verbose; + my $search_pfam = ""; + my ($reuse,$pfam_out); +-my $CPU = 2; ++my $CPU = 1; + my $RETAIN_LONG_ORFS = 900; + my $MPI = 0; + +@@ -330,15 +329,15 @@ + my $top_cds_file = $train_file && -s $train_file ? $train_file : "$cds_file.top_${top_ORFs_train}_longest"; + if (!-s $top_cds_file) { + # get longest entries +- my $cmd = "$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file"; ++ my $cmd = "get_top_longest_fasta_entries.pl $cds_file $top_ORFs_train > $top_cds_file"; + + unless ($reuse && -s $top_cds_file){ + if ($cd_hit_est_exec){ + # to speed things up only check for redundancy up to 4x the number of entries we want + my $red_num = $top_ORFs_train * 4 ; +- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top"); ++ &process_cmd("get_top_longest_fasta_entries.pl $cds_file $red_num > $workdir/redundant_top"); + &process_cmd("$cd_hit_est_exec -r 1 -i $workdir/redundant_top -o $workdir/redundant_top.nr90 -M 0 -T $CPU >/dev/null 2>/dev/null"); +- &process_cmd("$UTIL_DIR/get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file"); ++ &process_cmd("get_top_longest_fasta_entries.pl $workdir/redundant_top.nr90 $top_ORFs_train > $top_cds_file"); + unlink("$workdir/redundant_top"); + unlink("$workdir/redundant_top.nr90"); + unlink("$workdir/redundant_top.nr90.bak.clstr"); +@@ -349,20 +348,20 @@ + } + } + +-$cmd = "$UTIL_DIR/compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat"; ++$cmd = "compute_base_probs.pl $transcripts_file $TOP_STRAND_ONLY > $workdir/base_freqs.dat"; + &process_cmd($cmd) unless $reuse && -s "$workdir/base_freqs.dat"; + + + # get hexamer scores +-#$cmd = "$UTIL_DIR/seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores"; ++#$cmd = "seq_n_background_to_logliklihood_vals.pl $top_cds_file $transcripts_file.random > hexamer.scores"; + #&process_cmd($cmd) unless ($reuse && -s "hexamer.scores"); + +-$cmd = "$UTIL_DIR/seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores"; ++$cmd = "seq_n_baseprobs_to_logliklihood_vals.pl $top_cds_file $workdir/base_freqs.dat > $workdir/hexamer.scores"; + &process_cmd($cmd) unless $reuse && -s "$workdir/hexamer.scores"; + + + # score all cds entries +-$cmd = "$UTIL_DIR/score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores"; ++$cmd = "score_CDS_liklihood_all_6_frames.pl $cds_file $workdir/hexamer.scores > $cds_file.scores"; + &process_cmd($cmd) unless ($reuse && -s "$cds_file.scores"); + + +@@ -440,18 +439,18 @@ + } + + # index the current gff file: +-$cmd = "$UTIL_DIR/index_gff3_files_by_isoform.pl $gff3_file"; ++$cmd = "index_gff3_files_by_isoform.pl $gff3_file"; + &process_cmd($cmd); + + # retrieve the best entries: +-$cmd = "$UTIL_DIR/gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3"; ++$cmd = "gene_list_to_gff.pl $acc_file $gff3_file.inx > $cds_file.best_candidates.gff3"; + &process_cmd($cmd); + + { + my $final_output_prefix = basename($transcripts_file) . ".transdecoder"; + + # exclude shadow orfs (smaller orfs in different reading frame that are eclipsed by longer orfs) +- $cmd = "$UTIL_DIR/remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3"; ++ $cmd = "remove_eclipsed_ORFs.pl $cds_file.best_candidates.gff3 > $final_output_prefix.gff3"; + &process_cmd($cmd); + + +@@ -462,14 +461,14 @@ + my $gff3_file = "$final_output_prefix.gff3"; + my $bed_file = $gff3_file; + $bed_file =~ s/\.gff3$/\.bed/; +- $cmd = "$UTIL_DIR/gff3_file_to_bed.pl $gff3_file > $bed_file"; ++ $cmd = "gff3_file_to_bed.pl $gff3_file > $bed_file"; + &process_cmd($cmd); + + + # make a peptide file: + my $best_pep_file = $gff3_file; + $best_pep_file =~ s/\.gff3$/\.pep/; +- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file"; ++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file > $best_pep_file"; + &process_cmd($cmd); + + +@@ -477,13 +476,13 @@ + # make a CDS file: + my $best_cds_file = $best_pep_file; + $best_cds_file =~ s/\.pep$/\.cds/; +- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file"; ++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file CDS > $best_cds_file"; + &process_cmd($cmd); + + # make a CDS file: + my $best_cdna_file = $best_pep_file; + $best_cdna_file =~ s/\.pep$/\.mRNA/; +- $cmd = "$UTIL_DIR/gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file"; ++ $cmd = "gff3_file_to_proteins.pl $gff3_file $transcripts_file cDNA > $best_cdna_file"; + &process_cmd($cmd); + + } diff --git a/sci-biology/TransDecoder/files/pfam_runner.pl.patch b/sci-biology/TransDecoder/files/pfam_runner.pl.patch new file mode 100644 index 0000000..7809b1a --- /dev/null +++ b/sci-biology/TransDecoder/files/pfam_runner.pl.patch @@ -0,0 +1,20 @@ +--- /usr/bin/pfam_runner.pl 2015-01-09 11:22:55.000000000 +0100 ++++ pfam_runner.pl 2015-01-09 14:25:43.385838579 +0100 +@@ -24,7 +24,7 @@ + my $workdir; + my $verbose; + my ($reuse,$pfam_out); +-my $CPU = 2; ++my $CPU = 1; + + my $usage = <<_EOH_; + +@@ -59,7 +59,7 @@ + # -h print this option menu and quit + # -v verbose + # +-# --CPU <int> number of threads to use; (default: 2) ++# --CPU <int> number of threads to use; (default: 1) + # + # --MPI use MPI (via ffindex_apply_mpi) + # diff --git a/sci-biology/TransDecoder/metadata.xml b/sci-biology/TransDecoder/metadata.xml new file mode 100644 index 0000000..2bc8930 --- /dev/null +++ b/sci-biology/TransDecoder/metadata.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> +<pkgmetadata> + <herd>sci-biology</herd> + <maintainer> + <email>mmokr...@fold.natur.cuni.cz</email> + <name>Martin Mokrejs</name> + </maintainer> +</pkgmetadata>