This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository bedtools.
commit c6cc55014c5ec363e1f71a510009a4268a0b950c Author: Andreas Tille <[email protected]> Date: Sat Feb 10 07:47:15 2018 +0100 New upstream version 2.27.1+dfsg --- .travis.yml | 5 + Makefile | 11 +- docs/conf.py | 6 +- docs/content/advanced-usage.rst | 10 +- docs/content/faq.rst | 7 + docs/content/general-usage.rst | 2 + docs/content/history.rst | 52 +- docs/content/installation.rst | 2 +- docs/content/tools/bamtobed.rst | 1 + docs/content/tools/bed12tobed6.rst | 2 + docs/content/tools/bedtobam.rst | 3 + docs/content/tools/closest.rst | 2 +- docs/content/tools/coverage.rst | 2 +- docs/content/tools/genomecov.rst | 2 +- docs/content/tools/getfasta.rst | 2 +- docs/content/tools/intersect.rst | 4 +- docs/content/tools/links.rst | 4 + docs/content/tools/overlap.rst | 3 + docs/content/tools/pairtopair.rst | 4 + docs/content/tools/sort.rst | 39 +- docs/content/tools/subtract.rst | 5 +- docs/content/tools/unionbedg.rst | 1 + docs/content/tools/window.rst | 4 +- docs/index.rst | 11 + docs/templates/sidebar-intro.html | 2 +- src/annotateBed/annotateBed.cpp | 4 +- src/bed12ToBed6/bed12ToBed6.cpp | 3 +- src/bedtools.cpp | 35 +- src/closestFile/closestHelp.cpp | 2 - src/clusterBed/clusterBed.h | 1 + src/clusterBed/clusterMain.cpp | 18 +- src/complementFile/complementFile.cpp | 18 +- src/complementFile/complementFile.h | 6 +- src/complementFile/complementHelp.cpp | 2 - src/coverageFile/coverageFile.cpp | 183 +- src/coverageFile/coverageFile.h | 9 +- src/coverageFile/coverageHelp.cpp | 2 - src/fastaFromBed/fastaFromBed.cpp | 94 +- src/fastaFromBed/fastaFromBed.h | 9 +- src/fastaFromBed/fastaFromBedMain.cpp | 18 +- src/fisher/fisher.cpp | 17 +- src/fisher/fisherHelp.cpp | 4 - src/genomeCoverageBed/genomeCoverageBed.cpp | 2 +- src/genomeCoverageBed/genomeCoverageMain.cpp | 4 + src/groupBy/Makefile | 1 + src/groupBy/groupBy.cpp | 74 +- src/groupBy/groupBy.h | 10 +- src/intersectFile/intersectFile.cpp | 15 +- src/intersectFile/intersectFile.h | 10 +- src/intersectFile/intersectHelp.cpp | 5 +- src/jaccard/jaccard.cpp | 6 +- src/jaccard/jaccard.h | 8 +- src/jaccard/jaccardHelp.cpp | 4 - src/mapFile/mapHelp.cpp | 4 - src/maskFastaFromBed/Makefile | 3 +- src/maskFastaFromBed/maskFastaFromBed.cpp | 26 +- src/maskFastaFromBed/maskFastaFromBed.h | 7 +- src/maskFastaFromBed/maskFastaFromBedMain.cpp | 33 +- src/mergeFile/mergeHelp.cpp | 4 - src/multiBamCov/multiBamCov.cpp | 4 +- src/nekSandbox1/nekSandboxMain.cpp | 14 +- src/regressTest/regressTestMain.cpp | 4 +- src/reldist/reldist.cpp | 12 +- src/sampleFile/sampleHelp.cpp | 5 - src/shiftBed/shiftBed.cpp | 6 +- src/shuffleBed/shuffleBed.cpp | 102 +- src/shuffleBed/shuffleBed.h | 6 + src/slopBed/slopBed.cpp | 65 +- src/sortBed/sortBed.h | 1 + src/sortBed/sortMain.cpp | 27 +- src/spacingFile/spacingFile.cpp | 9 +- src/spacingFile/spacingFile.h | 8 +- src/spacingFile/spacingHelp.cpp | 12 +- src/subtractFile/subtractFile.cpp | 14 +- src/subtractFile/subtractHelp.cpp | 4 - src/utils/BamTools/src/api/BamConstants.h | 6 +- .../src/api/internal/bam/BamMultiReader_p.cpp | 4 +- .../BamTools/src/api/internal/io/HostAddress_p.cpp | 2 +- src/utils/BinTree/BinTree.cpp | 20 +- src/utils/BinTree/BinTree.h | 10 +- src/utils/Contexts/ContextBase.cpp | 45 +- src/utils/Contexts/ContextBase.h | 44 +- src/utils/Contexts/ContextClosest.cpp | 6 +- src/utils/Contexts/ContextGroupBy.cpp | 10 +- src/utils/Contexts/ContextGroupBy.h | 8 +- src/utils/Contexts/ContextIntersect.cpp | 9 - src/utils/Contexts/ContextIntersect.h | 8 +- src/utils/Contexts/ContextMerge.cpp | 8 +- src/utils/Contexts/ContextMerge.h | 2 +- src/utils/Contexts/ContextSubtract.cpp | 19 +- src/utils/Contexts/ContextSubtract.h | 2 +- src/utils/Contexts/Makefile | 2 +- .../FileRecordTools/FileReaders/BamFileReader.cpp | 8 +- .../FileRecordTools/FileReaders/BamFileReader.h | 14 +- .../FileReaders/BufferedStreamMgr.cpp | 6 +- .../FileReaders/BufferedStreamMgr.h | 10 +- src/utils/FileRecordTools/FileReaders/FileReader.h | 4 +- .../FileRecordTools/FileReaders/InputStreamMgr.cpp | 10 +- .../FileRecordTools/FileReaders/InputStreamMgr.h | 12 +- src/utils/FileRecordTools/FileReaders/Makefile | 8 +- src/utils/FileRecordTools/FileReaders/Makefile~ | 2 +- .../FileReaders/SingleLineDelimTextFileReader.cpp | 22 +- .../FileReaders/SingleLineDelimTextFileReader.h | 18 +- src/utils/FileRecordTools/FileRecordMergeMgr.cpp | 8 +- src/utils/FileRecordTools/FileRecordMergeMgr.h | 2 +- src/utils/FileRecordTools/FileRecordMgr.cpp | 34 +- src/utils/FileRecordTools/FileRecordMgr.h | 18 +- src/utils/FileRecordTools/Makefile | 18 +- src/utils/FileRecordTools/Records/BamRecord.cpp | 112 +- src/utils/FileRecordTools/Records/BamRecord.h | 30 +- .../FileRecordTools/Records/Bed12Interval.cpp | 48 +- src/utils/FileRecordTools/Records/Bed12Interval.h | 31 +- src/utils/FileRecordTools/Records/Bed3Interval.cpp | 38 +- src/utils/FileRecordTools/Records/Bed3Interval.h | 12 +- src/utils/FileRecordTools/Records/Bed4Interval.cpp | 18 +- src/utils/FileRecordTools/Records/Bed4Interval.h | 10 +- src/utils/FileRecordTools/Records/Bed5Interval.cpp | 22 +- src/utils/FileRecordTools/Records/Bed5Interval.h | 10 +- src/utils/FileRecordTools/Records/Bed6Interval.cpp | 28 +- src/utils/FileRecordTools/Records/Bed6Interval.h | 10 +- .../FileRecordTools/Records/BedGraphInterval.cpp | 16 +- .../FileRecordTools/Records/BedGraphInterval.h | 10 +- .../FileRecordTools/Records/BedPlusInterval.cpp | 24 +- .../FileRecordTools/Records/BedPlusInterval.h | 14 +- src/utils/FileRecordTools/Records/BlockMgr.cpp | 36 +- src/utils/FileRecordTools/Records/BlockMgr.h | 9 +- .../FileRecordTools/Records/GffPlusRecord.cpp | 10 +- src/utils/FileRecordTools/Records/GffPlusRecord.h | 10 +- src/utils/FileRecordTools/Records/GffRecord.cpp | 74 +- src/utils/FileRecordTools/Records/GffRecord.h | 24 +- src/utils/FileRecordTools/Records/Makefile | 18 +- .../FileRecordTools/Records/NoPosPlusRecord.cpp | 4 +- .../FileRecordTools/Records/NoPosPlusRecord.h | 4 +- src/utils/FileRecordTools/Records/PlusFields.cpp | 7 +- src/utils/FileRecordTools/Records/PlusFields.h | 9 +- src/utils/FileRecordTools/Records/Record.cpp | 28 +- src/utils/FileRecordTools/Records/Record.h | 65 +- .../FileRecordTools/Records/RecordKeyList.cpp | 8 +- src/utils/FileRecordTools/Records/RecordKeyList.h | 6 +- .../FileRecordTools/Records/RecordKeyVector.cpp | 28 +- .../FileRecordTools/Records/RecordKeyVector.h | 15 +- src/utils/FileRecordTools/Records/RecordList.cpp | 5 +- src/utils/FileRecordTools/Records/RecordList.h | 12 +- src/utils/FileRecordTools/Records/StrandQueue.cpp | 18 +- src/utils/FileRecordTools/Records/StrandQueue.h | 14 +- src/utils/FileRecordTools/Records/VcfRecord.cpp | 33 +- src/utils/FileRecordTools/Records/VcfRecord.h | 16 +- src/utils/GenomeFile/NewGenomeFile.cpp | 16 +- src/utils/GenomeFile/NewGenomeFile.h | 24 +- src/utils/KeyListOps/KeyListOps.cpp | 100 +- src/utils/KeyListOps/KeyListOps.h | 38 +- src/utils/KeyListOps/KeyListOpsMethods.cpp | 77 +- src/utils/KeyListOps/KeyListOpsMethods.h | 54 +- src/utils/NewChromsweep/CloseSweep.cpp | 177 +- src/utils/NewChromsweep/CloseSweep.h | 35 +- src/utils/NewChromsweep/NewChromsweep.cpp | 61 +- src/utils/NewChromsweep/NewChromsweep.h | 18 +- src/utils/RecordOutputMgr/RecordOutputMgr.cpp | 81 +- src/utils/RecordOutputMgr/RecordOutputMgr.h | 14 +- src/utils/bedFile/bedFile.cpp | 50 +- src/utils/bedFile/bedFile.h | 23 +- src/utils/driver/BedtoolsDriver.cpp | 6 +- src/utils/driver/BedtoolsDriver.h | 9 +- src/utils/fileType/FileRecordTypeChecker.cpp | 22 +- src/utils/fileType/FileRecordTypeChecker.h | 6 +- src/utils/fileType/Makefile | 2 +- src/utils/general/BTlist.h | 16 +- src/utils/general/BedtoolsTypes.h | 2 +- src/utils/general/Makefile | 6 +- src/utils/general/ParseTools.cpp | 42 +- src/utils/general/ParseTools.h | 15 +- src/utils/general/QuickString.cpp | 96 +- src/utils/general/QuickString.h | 78 +- src/utils/general/Tokenizer.cpp | 9 +- src/utils/general/Tokenizer.h | 9 +- src/utils/stringUtilities/stringUtilities.h | 18 +- src/utils/version/version_release.txt | 6 +- src/windowBed/windowMain.cpp | 2 +- test/bamtobed/test-bamtobed.sh | 35 +- test/bamtofastq/test-bamtofastq.sh | 7 +- test/bed12tobed6/test-bed12tobed6.sh | 19 +- test/closest/kclosest/bug471_a.bed | 5 + test/closest/kclosest/bug471_b.bed | 9 + test/closest/kclosest/test-kclosest.sh | 262 +- test/closest/sortAndNaming/test-sort-and-naming.sh | 71 +- test/closest/test-closest.sh | 168 +- test/cluster/test-cluster.sh | 17 +- test/complement/test-complement.sh | 29 +- test/coverage/chr_0-100.bed | 2 + test/coverage/test-coverage.sh | 89 +- test/expand/test-expand.sh | 17 +- test/fisher/test-fisher.sh | 19 +- test/flank/test-flank.sh | 35 +- test/general/a.chromheader.bed | 2 + test/general/a.trackheader.bed | 2 + test/general/hg19.fa.fai | 1 + test/general/t.bed | 5 + test/general/test-general.sh | 171 +- test/genomecov/test-genomecov.sh | 37 +- test/getfasta/test-getfasta.sh | 78 +- test/{groupBy => groupby}/a_vcfSVtest.vcf | 0 test/groupby/bug569_problem.txt | 4 + test/{groupBy => groupby}/gdc.bam | Bin test/{groupBy => groupby}/noPosvalues.header.bed | 0 test/{groupBy => groupby}/test-groupby.sh | 89 +- test/groupby/test.bed | 3 + .../values3.7fields.header.bed | 0 test/{groupBy => groupby}/values3.header.bed | 0 test/{groupBy => groupby}/values3.no_header.bed | 0 .../values3.unmarked_header.bed | 2 +- .../values3.unmarked_header.bed.2} | 2 +- test/{groupBy => groupby}/values3_case.header.bed | 0 test/intersect/jim.bed | 1 + test/intersect/jim.vcf | 3 + .../multi_intersect/test-multi_intersect.sh | 47 +- test/intersect/new_test-intersect.sh | 185 +- test/intersect/new_test-intersect.sh~ | 6 +- .../sortAndNaming/test-sort-and-naming.sh | 73 +- test/intersect/test-intersect.sh | 231 +- test/jaccard/test-jaccard.sh | 31 +- test/makewindows/test-makewindows.sh | 27 +- test/map/test-map.sh | 121 +- test/merge/test-merge.sh | 201 +- test/multicov/test-multicov.sh | 27 +- test/negativecontrol/test-negativecontrol.sh | 2 + test/reldist/test-reldist.sh | 13 +- test/sample/test-sample.sh | 41 +- test/shift/b.bed | 2 + test/shift/huge.genome | 1 + test/shift/test-shift.sh | 51 +- test/shuffle/test-shuffle.sh | 145 +- test/slop/test-slop.sh | 106 +- test/sort/b.bed | 4 + test/sort/test-sort.sh | 38 +- test/spacing/a.bed | 3 +- test/spacing/test-spacing.sh | 18 +- test/split/randData.bed | 10000 +++++++++++++++++++ test/split/test-split.sh | 32 +- test/subtract/test-subtract.sh | 51 +- test/test.sh | 99 +- 240 files changed, 13581 insertions(+), 2675 deletions(-) diff --git a/.travis.yml b/.travis.yml index 23ffa85..00e87ef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,4 +2,9 @@ language: cpp compiler: - gcc - clang +addons: + apt: + packages: + - samtools + - tabix script: make test diff --git a/Makefile b/Makefile index 91cbbd5..15b7b75 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,18 @@ export SRC_DIR = src export UTIL_DIR = src/utils export CXX = g++ ifeq ($(DEBUG),1) -export CXXFLAGS = -Wall -O0 -g -fno-inline -fkeep-inline-functions -D_FILE_OFFSET_BITS=64 -fPIC -DDEBUG -D_DEBUG +export CXXFLAGS = -Wall -Wextra -DDEBUG -D_DEBUG -g -O0 -D_FILE_OFFSET_BITS=64 -fPIC $(INCLUDES) else export CXXFLAGS = -Wall -O2 -D_FILE_OFFSET_BITS=64 -fPIC $(INCLUDES) endif + +# If the user has specified to do so, tell the compile to use rand() (instead of mt19937). +ifeq ($(USE_RAND),1) +export CXXFLAGS += -DUSE_RAND +else +export CXXFLAGS += -std=c++11 +endif + export LIBS = -lz export BT_ROOT = src/utils/BamTools/ @@ -156,6 +164,7 @@ install: all print_banner: @echo "Building BEDTools:" @echo "=========================================================" + $(info $$CXXFLAGS is [${CXXFLAGS}]) .PHONY: print_banner # make the "obj/" and "bin/" directories, if they don't exist diff --git a/docs/conf.py b/docs/conf.py index 5312924..c44fa35 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,16 +44,16 @@ master_doc = 'index' # General information about the project. project = u'bedtools' -copyright = u'2009 - 2016, Aaron R. Quinlan and Neil Kindlon' +copyright = u'2009 - 2017, Aaron R. Quinlan and Neil Kindlon' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '2.26.0' +version = '2.27.0' # The full version, including alpha/beta/rc tags. -release = '2.26.0' +release = '2.27.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/content/advanced-usage.rst b/docs/content/advanced-usage.rst index 476f3a9..22acfb3 100755 --- a/docs/content/advanced-usage.rst +++ b/docs/content/advanced-usage.rst @@ -11,10 +11,13 @@ Step 1. Add 500 bp up and downstream of each probe .. code-block:: bash - bedtools slop -i probes.bed -b 500 > probes.500bp.bed + bedtools slop -i probes.bed -g hg18.genome -b 500 > probes.500bp.bed +NB genome is two column chromosome size list - i.e. https://genome.ucsc.edu/goldenpath/help/hg18.chrom.sizes + Step 2. Get a BED file of all regions not covered by the probes (+500 bp up/down) + .. code-block:: bash bedtools complement -i probes.500bp.bed -g hg18.genome > probes.500bp.complement.bed @@ -24,8 +27,9 @@ Step 3. Create a masked genome where all bases are masked except for the probes .. code-block:: bash - bedtools maskfasta -in hg18.fa -bed probes.500bp.complement.bed -fo \ - > hg18.probecomplement.masked.fa + bedtools maskfasta -fi hg18.fa -bed probes.500bp.complement.bed \ + -fo hg18.probecomplement.masked.fa + ========================================================================== diff --git a/docs/content/faq.rst b/docs/content/faq.rst index 8999d81..035a11b 100644 --- a/docs/content/faq.rst +++ b/docs/content/faq.rst @@ -39,7 +39,14 @@ and on Fedora/Centos this would be: yum install zlib yum install zlib1g-dev +-------------------------------------------------- +Compiling with a specific zlib library +-------------------------------------------------- +If you need to override the location of the zlib lbrary used to compile bedtools, you can run `make` and specify the `LIBS` argument. For example: + +.. code-block:: bash + make LIBS='/PATH/TO/ZLIB/lib/libz.a' ==================== General questions diff --git a/docs/content/general-usage.rst b/docs/content/general-usage.rst index 910f3ea..cb9e434 100755 --- a/docs/content/general-usage.rst +++ b/docs/content/general-usage.rst @@ -232,6 +232,7 @@ The GFF format is described on the Sanger Institute's website (http://www.sanger An entry from an example GFF file : :: + seq1 BLASTX similarity 101 235 87.1 + 0 Target "HBA_HUMAN" 11 55 ; E_value 0.0003 dJ102G20 GD_mRNA coding_exon 7105 7201 . - 2 Sequence "dJ102G20.C1.1" @@ -251,6 +252,7 @@ the chromosomes (or scaffolds, etc.) and their size (in basepairs). Genome files must be **tab-delimited** and are structured as follows (this is an example for *C. elegans*): :: + chrI 15072421 chrII 15279323 ... diff --git a/docs/content/history.rst b/docs/content/history.rst index c59e84e..bf9fd43 100644 --- a/docs/content/history.rst +++ b/docs/content/history.rst @@ -2,25 +2,45 @@ Release History ############### +Version 2.27.0 (6-Dec-2017) +============================ +1. Fixed a big memory leak and algorithmic flaw in the `split` option. Thanks to Neil Kindlon! +2. Resolved compilation errors on OSX High Sierra. Many thanks to @jonchang! +3. Fixed a bug in the `shift` tool that caused some intervals to exceed the end of the chromosome. Thanks to @wlholtz +4. Fixed major bug in `groupby` that prevented proper functionality. +5. Speed improvements to the `shuffle` tool. +6. Bug fixes to the p-value calculation in the `fisher` tool. Thanks to Brent Pedersen. +7. Allow BED headers to start with chrom or chr +8. Fixes to the "k-closest" functionality in the `closest` tool. Thanks to Neil Kindlon. +9. Fixes to the output of the freqasc, freqdesc, distinct_sort_num and distinct_sort, and num_desc operations in the `groupby` tool. Thanks to @ghuls. +10. Many minor bug fixes and compilation improvements from Luke Goodsell. +11. Added the `-fullHeader` option to the `maskfasta` tool. Thanks to @ghuls. +12. Many bug fixes and performance improvements from John Marshall. +13. Fixed bug in the `-N/-f` behavior in `subtract`. +14. Full support for .fai files as genome (`-g`) files. +15. Many other minor bug fixes and functionality improvements. + + Version 2.26.0 (7-July-2016) ============================ -1. Fixed a major memory leak when using ``-sorted``. Thanks to Emily Tsang and Steohen Montgomery. +1. Fixed a major memory leak when using ``-sorted``. Thanks to Emily Tsang and Stephen Montgomery. 2. Fixed a bug for BED files containing a single record with no newline. Thanks to @jmarshall. -3. The ``getfasta`` tool includes name, chromosome and position in fasta headers when the ``-name`` option is used. Thanks to @rishavray. -4. Fixed a bug that now forces the ``coverage`` tool to process every record in the ``-a`` file. -5. Fixed a bug preventing proper processing of BED files with consecutive tabs. -6. VCF files containing structural variants now infer SV length from either the SVLEN or END INFO fields. Thanks to Zev Kronenberg. -7. Resolve off by one bugs when intersecting GFF or VCF files with BED files. -8. The ``shuffle`` tool now uses roulette wheel sampling to shuffle to ``-incl`` regions based upon the size of the interval. Thanks to Zev Kronenberg and Michael Imbeault. -9. Fixed a bug in ``coverage`` that prevented correct calculation of depth when using the ``-split`` option. -10. The ``shuffle`` tool warns when an interval exceeds the maximum chromosome length. -11. The ``complement`` tool better checks intervals against the chromosome lengths. -12. Fixes for ``stddev``, ``min``, and ``max`` operations. Thanks to @jmarshall. -13. Enabled ``stdev``, ``sstdev``, ``freqasc``, and ``freqdesc`` options for ``groupby``. -14. Allow ``-s`` and ``-w`` to be used in any order for ``makewindows``. -15. Added new ``-bedOut`` option to ``getfasta``. -16. The ``-r`` option forces the ``-F`` value for ``intersect``. -17. Add ``-pc`` option to the ``genomecov`` tool, allowing coverage to be calculated based upon paired-end fragments. +3. Fixed a bug in the contigency table values for thr ``fisher`` tool. +4. The ``getfasta`` tool includes name, chromosome and position in fasta headers when the ``-name`` option is used. Thanks to @rishavray. +5. Fixed a bug that now forces the ``coverage`` tool to process every record in the ``-a`` file. +6. Fixed a bug preventing proper processing of BED files with consecutive tabs. +7. VCF files containing structural variants now infer SV length from either the SVLEN or END INFO fields. Thanks to Zev Kronenberg. +8. Resolve off by one bugs when intersecting GFF or VCF files with BED files. +9. The ``shuffle`` tool now uses roulette wheel sampling to shuffle to ``-incl`` regions based upon the size of the interval. Thanks to Zev Kronenberg and Michael Imbeault. +10. Fixed a bug in ``coverage`` that prevented correct calculation of depth when using the ``-split`` option. +11. The ``shuffle`` tool warns when an interval exceeds the maximum chromosome length. +12. The ``complement`` tool better checks intervals against the chromosome lengths. +13. Fixes for ``stddev``, ``min``, and ``max`` operations. Thanks to @jmarshall. +14. Enabled ``stdev``, ``sstdev``, ``freqasc``, and ``freqdesc`` options for ``groupby``. +15. Allow ``-s`` and ``-w`` to be used in any order for ``makewindows``. +16. Added new ``-bedOut`` option to ``getfasta``. +17. The ``-r`` option forces the ``-F`` value for ``intersect``. +18. Add ``-pc`` option to the ``genomecov`` tool, allowing coverage to be calculated based upon paired-end fragments. Version 2.25.0 (3-Sept-2015) diff --git a/docs/content/installation.rst b/docs/content/installation.rst index ab3b037..55bb1a6 100755 --- a/docs/content/installation.rst +++ b/docs/content/installation.rst @@ -70,12 +70,12 @@ Charles for doing this. apt-get install bedtools - **Homebrew**. Carlos Borroto has made BEDTools available on the bedtools package manager for OSX. .. code-block:: bash + brew tap homebrew/science brew install bedtools **MacPorts**. Alternatively, the MacPorts ports system can be used to install BEDTools on OSX. diff --git a/docs/content/tools/bamtobed.rst b/docs/content/tools/bamtobed.rst index 2cb22f8..d3ab00c 100755 --- a/docs/content/tools/bamtobed.rst +++ b/docs/content/tools/bamtobed.rst @@ -149,6 +149,7 @@ the entire span of a spliced/split BAM alignment. However, when using the created for each aligned portion of the sequencing read. :: + Chromosome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Exons *************** ********** diff --git a/docs/content/tools/bed12tobed6.rst b/docs/content/tools/bed12tobed6.rst index e29105b..559d9db 100755 --- a/docs/content/tools/bed12tobed6.rst +++ b/docs/content/tools/bed12tobed6.rst @@ -14,6 +14,7 @@ Usage and option summary Usage: :: + bed12ToBed6 [OPTIONS] -i <BED12> =========================== =============================================================================================================================================================================================================== @@ -31,6 +32,7 @@ Default behavior Figure: :: + head data/knownGene.hg18.chr21.bed | tail -n 3 chr21 10079666 10120808 uc002yiv.1 0 - 10081686 1 0 1 2 0 6 0 8 0 4 528,91,101,215, 0,1930,39750,40927, chr21 10080031 10081687 uc002yiw.1 0 - 10080031 1 0 0 8 0 0 3 1 0 2 200,91, 0,1565, diff --git a/docs/content/tools/bedtobam.rst b/docs/content/tools/bedtobam.rst index 3177dd0..e3bac41 100755 --- a/docs/content/tools/bedtobam.rst +++ b/docs/content/tools/bedtobam.rst @@ -12,6 +12,7 @@ Usage and option summary Usage: :: + bedToBam [OPTIONS] -i <BED/GFF/VCF> -g <GENOME> > <BAM> =========================== =============================================================================================================================================================================================================== @@ -31,6 +32,7 @@ Default behavior The default behavior is to assume that the input file is in unblocked format. For example: :: + head -5 rmsk.hg18.chr21.bed chr21 9719768 9721892 ALR/Alpha 1004 + chr21 9721905 9725582 ALR/Alpha 1010 + @@ -59,6 +61,7 @@ bedToBam) of a BED file of UCSC genes. For example: :: + bedToBam -i knownGene.hg18.chr21.bed -g human.hg18.genome -bed12 > knownGene.bam samtools view knownGene.bam | head -2 diff --git a/docs/content/tools/closest.rst b/docs/content/tools/closest.rst index 0bf8d28..cc0ba14 100755 --- a/docs/content/tools/closest.rst +++ b/docs/content/tools/closest.rst @@ -50,7 +50,7 @@ Usage and option summary **(or)**: :: - + closestBed [OPTIONS] -a <FILE> \ -b <FILE1, FILE2, ..., FILEN> diff --git a/docs/content/tools/coverage.rst b/docs/content/tools/coverage.rst index 5fc9a8e..b8c50d8 100755 --- a/docs/content/tools/coverage.rst +++ b/docs/content/tools/coverage.rst @@ -44,7 +44,7 @@ Usage and option summary **(or)**: :: - + coverageBed [OPTIONS] -a <FILE> \ -b <FILE1, FILE2, ..., FILEN> diff --git a/docs/content/tools/genomecov.rst b/docs/content/tools/genomecov.rst index 2cad65e..7a6b409 100755 --- a/docs/content/tools/genomecov.rst +++ b/docs/content/tools/genomecov.rst @@ -36,7 +36,7 @@ Usage and option summary **(or)**: :: - + genomeCoverageBed [OPTIONS] [-i|-ibam] -g (iff. -i) diff --git a/docs/content/tools/getfasta.rst b/docs/content/tools/getfasta.rst index cbfe6e4..850b320 100755 --- a/docs/content/tools/getfasta.rst +++ b/docs/content/tools/getfasta.rst @@ -14,7 +14,7 @@ intervals defined in a BED/GFF/VCF file. .. tip:: - + 1. The headers in the input FASTA file must *exactly* match the chromosome column in the BED file. diff --git a/docs/content/tools/intersect.rst b/docs/content/tools/intersect.rst index 6a4d209..df11c4d 100755 --- a/docs/content/tools/intersect.rst +++ b/docs/content/tools/intersect.rst @@ -51,7 +51,7 @@ Usage and option summary **(or)**: :: - + intersectBed [OPTIONS] -a <FILE> \ -b <FILE1, FILE2, ..., FILEN> @@ -712,6 +712,7 @@ are reported as overlapping with the "split" BAM alignment, but in addition, a t overlaps the "split" portion of the alignment is also reported. :: + Chromosome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Exons --------------- ---------- @@ -726,6 +727,7 @@ overlaps the "split" portion of the alignment is also reported. In contrast, when using the **-split** option, only the exon overlaps are reported. :: + Chromosome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Exons --------------- ---------- diff --git a/docs/content/tools/links.rst b/docs/content/tools/links.rst index cbdbee6..43f6be1 100755 --- a/docs/content/tools/links.rst +++ b/docs/content/tools/links.rst @@ -13,6 +13,7 @@ Usage and option summary Usage: :: + linksBed [OPTIONS] -i <BED/GFF/VCF> > <HTML file> =========================== =============================================================================================================================================================================================================== @@ -34,6 +35,7 @@ By default, **linksBed** creates links to the public UCSC Genome Browser. For example: :: + head genes.bed chr21 9928613 10012791 uc002yip.1 0 - chr21 9928613 10012791 uc002yiq.1 0 - @@ -63,6 +65,7 @@ Optionally, **linksBed** will create links to a local copy of the UCSC Genome Br For example: :: + head -3 genes.bed chr21 9928613 10012791 uc002yip.1 0 - chr21 9928613 10012791 uc002yiq.1 0 - @@ -72,6 +75,7 @@ For example: One can point the links to the appropriate organism and genome build as well: :: + head -3 genes.bed chr21 9928613 10012791 uc002yip.1 0 - chr21 9928613 10012791 uc002yiq.1 0 - diff --git a/docs/content/tools/overlap.rst b/docs/content/tools/overlap.rst index f365ead..c2b0fd9 100755 --- a/docs/content/tools/overlap.rst +++ b/docs/content/tools/overlap.rst @@ -14,6 +14,7 @@ Usage and option summary Usage: :: + overlap [OPTIONS] -i <input> -cols s1,e1,s2,e2 =========================== =============================================================================================================================================================================================================== @@ -32,6 +33,7 @@ The default behavior is to compute the amount of overlap between the features yo start and end coordinates. For example: :: + windowBed -a A.bed -b B.bed -w 10 chr1 10 20 A chr1 15 25 B chr1 10 20 C chr1 25 35 D @@ -40,6 +42,7 @@ start and end coordinates. For example: # between the overlapping features from the output of windowBed. :: + windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7 chr1 10 20 A chr1 15 25 B 5 chr1 10 20 C chr1 25 35 D -5 diff --git a/docs/content/tools/pairtopair.rst b/docs/content/tools/pairtopair.rst index c4645a8..f47c32b 100755 --- a/docs/content/tools/pairtopair.rst +++ b/docs/content/tools/pairtopair.rst @@ -15,6 +15,7 @@ Usage and option summary **Usage:** :: + pairToPair [OPTIONS] -a <BEDPE> -b <BEDPE> @@ -50,6 +51,7 @@ locations) F/R alignment will not be matched with a R/R alignment. Default: Report A if *both* ends overlaps B. :: + Chromosome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEDPE/BAM A *****.................................***** @@ -63,6 +65,7 @@ Default when strand information is present in both BEDPE files: Report A if *bot the same strands*. :: + Chromosome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEDPE A >>>>>.................................>>>>> @@ -90,6 +93,7 @@ feature in B. **-type neither**: Report A only if *neither* end overlaps B. :: + Chromosome ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEDPE/BAM A *****.................................***** diff --git a/docs/content/tools/sort.rst b/docs/content/tools/sort.rst index 05a4e3a..76c4ce0 100755 --- a/docs/content/tools/sort.rst +++ b/docs/content/tools/sort.rst @@ -3,25 +3,32 @@ ############### *sort* ############### -**sortBed** sorts a feature file by chromosome and other criteria. +The ``bedtools sort`` tool sorts a feature file by chromosome and other criteria. ========================================================================== Usage and option summary ========================================================================== -Usage: +**Usage**: +:: + + bedtools sort [OPTIONS] -i <BED/GFF/VCF> +**(or)**: :: + sortBed [OPTIONS] -i <BED/GFF/VCF> =========================== =============================================================================================================================================================================================================== Option Description =========================== =============================================================================================================================================================================================================== -**-sizeA** Sort by feature size in ascending order. -**-sizeD** Sort by feature size in descending order. -**-chrThenSizeA** Sort by chromosome, then by feature size (asc). -**-chrThenSizeD** Sort by chromosome, then by feature size (desc). -**-chrThenScoreA** Sort by chromosome, then by score (asc). -**-chrThenScoreD** Sort by chromosome, then by score (desc). +**-sizeA** Sort by feature size in ascending order. +**-sizeD** Sort by feature size in descending order. +**-chrThenSizeA** Sort by chromosome (asc), then by feature size (asc). +**-chrThenSizeD** Sort by chromosome (asc), then by feature size (desc). +**-chrThenScoreA** Sort by chromosome (asc), then by score (asc). +**-chrThenScoreD** Sort by chromosome (asc), then by score (desc). +**-g** Define sort order by order of tab-delimited file with chromosome names in the first column. +**-faidx** Define sort order by order of tab-delimited file with chromosome names in the first column. Sort by specified chromosome order. =========================== =============================================================================================================================================================================================================== @@ -29,11 +36,12 @@ Usage: ========================================================================== Default behavior ========================================================================== -By default, **sortBed** sorts a BED file by chromosome and then by start position in ascending order. +By default, ``bedtools sort`` sorts a BED file by chromosome and then by start position in ascending order. For example: :: + cat A.bed chr1 800 1000 chr1 80 180 @@ -47,16 +55,17 @@ For example: chr1 800 1000 - - + + ========================================================================== Optional sorting behavior ========================================================================== -**sortBed** will also sorts a BED file by chromosome and then by other criteria. +``bedtools sort`` will also sort a BED file by chromosome and then by other criteria. For example, to sort by chromosome and then by feature size (in descending order): :: + cat A.bed chr1 800 1000 chr1 80 180 @@ -68,16 +77,16 @@ For example, to sort by chromosome and then by feature size (in descending order chr1 800 1000 chr1 80 180 chr1 1 10 - -**Disclaimer:** it should be noted that **sortBed** is merely a convenience utility, as the UNIX sort utility + +**Disclaimer:** it should be noted that ``bedtools sort`` is merely a convenience utility, as the UNIX sort utility will sort BED files more quickly while using less memory. For example, UNIX sort will sort a BED file by chromosome then by start position in the following manner: :: + sort -k 1,1 -k2,2n a.bed chr1 1 10 chr1 80 180 chr1 750 10000 chr1 800 1000 - diff --git a/docs/content/tools/subtract.rst b/docs/content/tools/subtract.rst index 37c527e..cc84177 100755 --- a/docs/content/tools/subtract.rst +++ b/docs/content/tools/subtract.rst @@ -27,7 +27,7 @@ Usage and option summary **(or)**: :: - + subtractBed [OPTIONS] -a <BED/GFF/VCF> -b <BED/GFF/VCF> @@ -38,7 +38,8 @@ Option Description **-f** Minimum overlap required as a fraction of A. Default is 1E-9 (i.e. 1bp). **-F** Minimum overlap required as a fraction of B. Default is 1E-9 (i.e., 1bp). **-r** Require that the fraction of overlap be reciprocal for A and B. In other words, if -f is 0.90 and -r is used, this requires that B overlap at least 90% of A and that A also overlaps at least 90% of B. -**-e** Require that the minimum fraction be satisfied for A _OR_ B. In other words, if -e is used with -f 0.90 and -F 0.10 this requires that either 90% of A is covered OR 10% of B is covered. Without -e, both fractions would have to be satisfied.**-s** Force "strandedness". That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand. +**-e** Require that the minimum fraction be satisfied for A _OR_ B. In other words, if -e is used with -f 0.90 and -F 0.10 this requires that either 90% of A is covered OR 10% of B is covered. Without -e, both fractions would have to be satisfied. +**-s** Force "strandedness". That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand. **-S** Require different strandedness. That is, only report hits in B that overlap A on the _opposite_ strand. By default, overlaps are reported without respect to strand. **-A** Remove entire feature if any overlap. That is, by default, only subtract the portion of A that overlaps B. Here, if any overlap is found (or ``-f`` amount), the entire feature is removed. **-N** Same as -A except when used with -f, the amount is the sum of all features (not any single feature). diff --git a/docs/content/tools/unionbedg.rst b/docs/content/tools/unionbedg.rst index e76c886..b6108a1 100755 --- a/docs/content/tools/unionbedg.rst +++ b/docs/content/tools/unionbedg.rst @@ -13,6 +13,7 @@ Usage and option summary Usage: :: + bedtools unionbedg [OPTIONS] -i FILE1 FILE2 FILE3 ... FILEn =========================== =============================================================================================================================================================================================================== diff --git a/docs/content/tools/window.rst b/docs/content/tools/window.rst index 850180f..208a94c 100755 --- a/docs/content/tools/window.rst +++ b/docs/content/tools/window.rst @@ -26,7 +26,7 @@ Usage and option summary **(or)**: :: - + bedtools window [OPTIONS] [-a|-abam] -b <BED/GFF/VCF> @@ -44,7 +44,7 @@ Option Description **-sm** Only report hits in B that overlap A on the same strand. *By default, overlaps are reported without respect to strand*. **-Sm** Only report hits in B that overlap A on the opposite strand. *By default, overlaps are reported without respect to strand*. **-u** Write original A entry once if any overlaps found in B. In other words, just report the fact at least one overlap was found in B. -**-c** For each entry in A, report the number of hits in B while restricting to -f. Reports 0 for A entries that have no overlap with B. +**-c** For each entry in A, report the number of hits in B while restricting to -w, -l, and -r. Reports 0 for A entries that have no overlap with B. **-v** Only report those entries in A that have *no overlaps* with B. **-header** Print the header from the A file prior to results. =========================== ========================================================================================================================================================= diff --git a/docs/index.rst b/docs/index.rst index 2d67581..2397ac3 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,6 +11,7 @@ genomic file formats such as BAM, BED, GFF/GTF, VCF. While each individual tool *intersect* two interval files), quite sophisticated analyses can be conducted by combining multiple bedtools operations on the UNIX command line. +**bedtools** is developed in the `Quinlan laboratory <http://quinlanlab.org>`_ at the `University of Utah <http://www.utah.edu/>`_ and benefits from fantastic contributions made by scientists worldwide. ========================== Tutorial @@ -18,6 +19,16 @@ Tutorial We have developed a fairly comprehensive `tutorial <http://quinlanlab.org/tutorials/bedtools/bedtools.html>`_ that demonstrates both the basics, as well as some more advanced examples of how bedtools can help you in your research. Please have a look. ========================== +Important notes +========================== +- With the exception of BAM files, `bedtools` assumes all input files are TAB delimited. +- `bedtools` also assumes that all input files use UNIX line endings. +- Unless you use the `-sorted` option, `bedtools` currently does not support chromosomes larger than 512Mb +- When using the -sorted option with files whose chromosomes are not lexicographically sorted (e.g., sort -k1,1 -k2,2n for BED files), one must provide a genome file (`-g`) defining the expected chromosome order. +- `bedtools` requires that chromosome naming schemes are identical in files that you are comparing (e.g., 'chr1' in one file and '1' in another won't work). +- .fai files may be used as genome (`-g`) files. + +========================== Interesting Usage Examples ========================== diff --git a/docs/templates/sidebar-intro.html b/docs/templates/sidebar-intro.html index 262da46..dc430e7 100644 --- a/docs/templates/sidebar-intro.html +++ b/docs/templates/sidebar-intro.html @@ -8,7 +8,7 @@ <li><a target="_blank" href="https://bedtools.googlecode.com">Old Releases @ Google Code</a></li> <li><a target="_blank" href="http://groups.google.com/group/bedtools-discuss">Mailing list @ Google Groups</a></li> <li><a target="_blank" href="http://www.biostars.org/show/tag/bedtools/">Queries @ Biostar</a></li> - <li><a target="_blank" href="http://quinlanlab.org">Quinlan lab @ UVa</a></li> + <li><a target="_blank" href="http://quinlanlab.org">Quinlan lab @ UU</a></li> </ul> diff --git a/src/annotateBed/annotateBed.cpp b/src/annotateBed/annotateBed.cpp index 23c775a..d462944 100644 --- a/src/annotateBed/annotateBed.cpp +++ b/src/annotateBed/annotateBed.cpp @@ -57,13 +57,13 @@ void BedAnnotate::PrintHeader() { // print a hash to indicate header and then write a tab // for each field in the main file. printf("#"); - for (size_t i = 0; i < _bed->bedType; ++i) + for (size_t i = 0; i < _bed->bedType -1; ++i) printf("\t"); // now print the label for each file. if (_reportBoth == false) { for (size_t i = 0; i < _annoTitles.size(); ++i) - printf("%s\t", _annoTitles[i].c_str()); + printf("\t%s", _annoTitles[i].c_str()); printf("\n"); } else { diff --git a/src/bed12ToBed6/bed12ToBed6.cpp b/src/bed12ToBed6/bed12ToBed6.cpp index d9ea0b7..85010f8 100644 --- a/src/bed12ToBed6/bed12ToBed6.cpp +++ b/src/bed12ToBed6/bed12ToBed6.cpp @@ -56,7 +56,7 @@ int bed12tobed6_main(int argc, char* argv[]) { if(showHelp) bed12tobed6_help(); - // do some parsing (all of these parameters require 2 strings) + // do some parsing (-i requires 2 strings, -n requires 1) for(int i = 1; i < argc; i++) { int parameterLength = (int)strlen(argv[i]); @@ -69,7 +69,6 @@ int bed12tobed6_main(int argc, char* argv[]) { } else if(PARAMETER_CHECK("-n", 2, parameterLength)) { addBlockNums = true; - i++; } else { cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; diff --git a/src/bedtools.cpp b/src/bedtools.cpp index 088ea70..d31d640 100644 --- a/src/bedtools.cpp +++ b/src/bedtools.cpp @@ -34,8 +34,9 @@ using namespace std; // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) -bool sub_main(const QuickString &subCmd); -void showHelp(const QuickString &subCmd); +bool sub_main(const string &subCmd); +void showHelp(const string &subCmd); +void showErrors(const string &errors); int annotate_main(int argc, char* argv[]);// int bamtobed_main(int argc, char* argv[]);// @@ -92,14 +93,18 @@ int main(int argc, char *argv[]) // make sure the user at least entered a sub_command if (argc < 2) return bedtools_help(); - QuickString subCmd(argv[1]); + string subCmd(argv[1]); BedtoolsDriver btDriver; if (btDriver.supports(subCmd)) { - if (btDriver.subMain(argc, argv)) { - return 0; - } else if (btDriver.hadError()) { + if (btDriver.subMain(argc, argv)) + { + return 0; + } + else if (btDriver.hadError()) + { showHelp(subCmd); + showErrors(btDriver.getErrors()); return 1; } } @@ -190,8 +195,13 @@ int main(int argc, char *argv[]) int bedtools_help(void) { - cout << PROGRAM_NAME << ": flexible tools for genome arithmetic and DNA sequence analysis.\n"; - cout << "usage: bedtools <subcommand> [options]" << endl << endl; + cout << PROGRAM_NAME << " is a powerful toolset for genome arithmetic." << endl << endl; + cout << "Version: " << VERSION << endl; ... 26981 lines suppressed ... -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bedtools.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
