Changeset: 68db6864de04 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=68db6864de04 Added Files: sql/backends/monet5/bamloader/85_bam.mal sql/backends/monet5/bamloader/85_bam.sql sql/backends/monet5/bamloader/Makefile.ag sql/backends/monet5/bamloader/README sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark1.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark2.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query1.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query1.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query1.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query1.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query1.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.10.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.11.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.12.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.6.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.7.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.8.sql sql/backends/monet5/bamloader/Tests/benchmarks_0/query2.9.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/benchmark1.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/benchmark2.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query1.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query1.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query1.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query1.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query1.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.10.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.11.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.12.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.6.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.7.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.8.sql sql/backends/monet5/bamloader/Tests/benchmarks_0_minimaloutput/query2.9.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/benchmark1.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/benchmark2.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query1.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query1.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query1.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query1.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query1.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.10.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.11.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.12.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.6.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.7.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.8.sql sql/backends/monet5/bamloader/Tests/benchmarks_1/query2.9.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/benchmark1.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/benchmark2.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query1.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query1.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query1.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query1.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query1.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.10.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.11.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.12.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.6.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.7.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.8.sql sql/backends/monet5/bamloader/Tests/benchmarks_1_minimaloutput/query2.9.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/benchmark1.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/benchmark2.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query1.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query1.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query1.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query1.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query1.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.10.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.11.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.12.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.6.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.7.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.8.sql sql/backends/monet5/bamloader/Tests/benchmarks_2/query2.9.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/benchmark1.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/benchmark2.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query1.1.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query1.2.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query1.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query1.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query1.5.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.10.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.11.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.12.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.3.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.4.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.6.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.7.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.8.sql sql/backends/monet5/bamloader/Tests/benchmarks_2_minimaloutput/query2.9.sql sql/backends/monet5/bamloader/bam.mal sql/backends/monet5/bamloader/bam_globals.h sql/backends/monet5/bamloader/bam_lib.c sql/backends/monet5/bamloader/bam_lib.h sql/backends/monet5/bamloader/bam_loader.c sql/backends/monet5/bamloader/bam_loader.h sql/backends/monet5/bamloader/bam_sql.c sql/backends/monet5/bamloader/bam_sql.h sql/backends/monet5/bamloader/sql/bam_clear.sql sql/backends/monet5/bamloader/sql/bam_create_alignments_storage_0.sql sql/backends/monet5/bamloader/sql/bam_create_alignments_storage_1.sql sql/backends/monet5/bamloader/sql/bam_drop_alignments_storage_0.sql sql/backends/monet5/bamloader/sql/bam_drop_alignments_storage_1.sql sql/backends/monet5/bamloader/sql/bam_schema.sql Modified Files: sql/backends/monet5/Makefile.ag Branch: bamloader Log Message:
Added bam loader to default branch, stripped the files from my own files wherever possible, since the bam loader is the only piece of software of interest at the moment. diffs (truncated from 7620 to 300 lines): diff --git a/sql/backends/monet5/Makefile.ag b/sql/backends/monet5/Makefile.ag --- a/sql/backends/monet5/Makefile.ag +++ b/sql/backends/monet5/Makefile.ag @@ -15,7 +15,7 @@ # Copyright August 2008-2014 MonetDB B.V. # All Rights Reserved. -SUBDIRS = NOT_WIN32?vaults UDF LSST ENABLE_DATACELL?datacell HAVE_JSONSTORE?rest HAVE_GSL?gsl +SUBDIRS = NOT_WIN32?vaults UDF HAVE_SAMTOOLS?bamloader LSST ENABLE_DATACELL?datacell HAVE_JSONSTORE?rest HAVE_GSL?gsl INCLUDES = ../../include ../../common ../../storage ../../server \ ../../../monetdb5/modules/atoms \ diff --git a/sql/backends/monet5/bamloader/85_bam.mal b/sql/backends/monet5/bamloader/85_bam.mal new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bamloader/85_bam.mal @@ -0,0 +1,1 @@ +include bam; \ No newline at end of file diff --git a/sql/backends/monet5/bamloader/85_bam.sql b/sql/backends/monet5/bamloader/85_bam.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bamloader/85_bam.sql @@ -0,0 +1,21 @@ +CREATE PROCEDURE bam_loader_repos(bam_repos STRING, dbschema INT, storage_mask STRING, nr_threads INT) +EXTERNAL NAME bam.bam_loader_repos; + +CREATE PROCEDURE bam_loader_file(bam_file STRING, dbschema INT, storage_mask STRING) +EXTERNAL NAME bam.bam_loader_file; + +CREATE PROCEDURE bam_drop_file(file_id SMALLINT, dbschema SMALLINT) +EXTERNAL NAME bam.bam_drop_file; + + +CREATE FUNCTION bam_flag(flag SMALLINT, name STRING) +RETURNS BOOLEAN EXTERNAL NAME bam.bam_flag; + +CREATE FUNCTION reverse_seq(seq STRING) +RETURNS STRING EXTERNAL NAME bam.reverse_seq; + +CREATE FUNCTION reverse_qual(qual STRING) +RETURNS STRING EXTERNAL NAME bam.reverse_qual; + +CREATE FUNCTION seq_length(cigar STRING) +RETURNS INT EXTERNAL NAME bam.seq_length; diff --git a/sql/backends/monet5/bamloader/Makefile.ag b/sql/backends/monet5/bamloader/Makefile.ag new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bamloader/Makefile.ag @@ -0,0 +1,61 @@ +# The contents of this file are subject to the MonetDB Public License +# Version 1.1 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http://www.monetdb.org/Legal/MonetDBLicense +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +# License for the specific language governing rights and limitations +# under the License. +# +# The Original Code is the MonetDB Database System. +# +# The Initial Developer of the Original Code is CWI. +# Portions created by CWI are Copyright (C) 1997-July 2008 CWI. +# Copyright August 2008-2013 MonetDB B.V. +# All Rights Reserved. + +INCLUDES = .. \ + ../../../include \ + ../../../common \ + ../../../storage \ + ../../../server \ + ../../../../monetdb5/modules/atoms \ + ../../../../monetdb5/modules/kernel \ + ../../../../monetdb5/mal \ + ../../../../monetdb5/modules/mal \ + ../../../../monetdb5/optimizer \ + ../../../../clients/mapilib \ + ../../../../common/options \ + ../../../../common/stream \ + ../../../../gdk \ + $(SAMTOOLS_CFLAGS) + +lib__bam = { + MODULE + DIR = libdir/monetdb5 + SOURCES = bam_loader.c bam_loader.h bam_lib.h bam_lib.c bam_sql.h bam_sql.c + LIBS = ../../../../monetdb5/tools/libmonetdb5 \ + ../../../../gdk/libbat \ + $(SAMTOOLS_LIBS) +} + +headers_mal = { + HEADERS = mal + DIR = libdir/monetdb5 + SOURCES = bam.mal +} + +headers_sql = { + HEADERS = sql + DIR = libdir/monetdb5/createdb + SOURCES = 85_bam.sql +} + +headers_autoload = { + HEADERS = mal + DIR = libdir/monetdb5/autoload + SOURCES = 85_bam.mal +} + +EXTRA_DIST_DIR = Tests diff --git a/sql/backends/monet5/bamloader/README b/sql/backends/monet5/bamloader/README new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bamloader/README @@ -0,0 +1,48 @@ +This document describes how to use the code that can be found in sql/backends/monet5/bam/ + + +*************************************************************************************************** +Installation +*************************************************************************************************** +In order to run the code, samtools [1][2] has to be installed. To do so, create a directory named +samtools in some location <samtools_location> and unpack the contents of samtools into +<samtools_location>/samtools. Then, create symbolic links <samtools_location>/lib and +<samtools_location>/include that both link to <samtools_location>/samtools. Now install +samtools with the compiler flag -fPIC (in my case, I had to alter the by samtools provided +Makefile by setting the constant CFLAGS to -g -Wall -O2 -fPIC. + +The bamloader writes messages to a log file. For now, this logfile is specified in sql/ +backends/monet5/bam/bamloader.h in the constant LOG_FILE. Change this to a filepath of +choice. The given directory must exist, while the file does not. If the file doesn't +exist it is automatically created. + +Then you can compile and install MonetDB with BAM file support using the usual sequence +as described at http://www.monetdb.org/wiki/MonetDB:Building_from_sources , +but telling configure where to find your samtools installation, i.e., + bootstrap + configure --with-samtools=<samtools_location> + make + make install + + +[1] http://samtools.sourceforge.net/ +[2] http://en.wikipedia.org/wiki/SAMtools + + +*************************************************************************************************** +Loading BAM files in the database +*************************************************************************************************** +- First of all, the BAM schema needs to be loaded in the database. This can be done by executing + the SQL code found in sql/backends/monet5/bam/bam_schema.sql (note that cleaning of the database + is done by executing sql/backends/monet5/bam/bam_clear.sql). +- From the SQL interpreter, you can now call the UDF bamloader(str filepath, int, int). The first + string needs to contain the absolute or relative path to a bam file. The last two arguments are + not used yet. +- If all went well, the data from the specified bam file is now loaded into the tables of the bam + schema. Details of the loading process can be found in the aforementioned logfile. Note that you + can load multiple BAM files into the database my doing multiple calls to bamloader. + + + + +If you have any questions, please contact me at c.p.cij...@cwi.nl diff --git a/sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark1.sql b/sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark1.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark1.sql @@ -0,0 +1,82 @@ +-------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------- Query 1.1 -------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------- + +SELECT qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual +FROM bam.alignments_i +WHERE bam_flag(flag, 'seco_alig') = False +ORDER BY qname; + + + + + + + + + + +-------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------- Query 1.2 -------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------- + +SELECT qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual +FROM bam.alignments_i +WHERE bam_flag(flag, 'seco_alig') = False +ORDER BY rname, pos; + + + + + + + + + + +-------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------- Query 1.3 -------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------- + +SELECT qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual +FROM bam.alignments_i +WHERE rname = rname_1_3 + AND pos >= pos_1_3_1 + AND pos <= pos_1_3_2 +ORDER BY pos; + + + + + + + + + + +-------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------- Query 1.4 -------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------- + +SELECT qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual +FROM bam.alignments_i +WHERE qname = qname_1_4 +ORDER BY rname, pos; + + + + + + + + + + +-------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------- Query 1.5 -------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------- + +SELECT qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, qual +FROM bam.alignments_i +WHERE mapq > mapq_1_5 +ORDER BY mapq DESC; diff --git a/sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark2.sql b/sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark2.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bamloader/Tests/benchmarks_0/benchmark2.sql @@ -0,0 +1,566 @@ +-------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------- Query 2.1 -------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------------------------------------- + +WITH alig AS ( + SELECT qname, flag, seq, qual + FROM bam.alignments_i + WHERE bam_flag(flag, 'firs_segm') <> bam_flag(flag, 'last_segm') + AND bam_flag(flag, 'seco_alig') = False + AND mapq < mapq_2_1 +), alig_proj AS ( + SELECT qname, flag, + CASE WHEN bam_flag(flag, 'segm_reve') THEN reverse_seq(seq) ELSE seq END AS seq, + CASE WHEN bam_flag(flag, 'segm_reve') THEN reverse_qual(qual) ELSE qual END AS qual + FROM alig + WHERE qname IN ( + SELECT qname + FROM alig + GROUP BY qname + HAVING COUNT(*) = 2 + AND SUM(bam_flag(flag, 'firs_segm')) = 1 + AND SUM(bam_flag(flag, 'last_segm')) = 1 + ) +) +SELECT l.qname AS qname, l.seq AS l_seq, l.qual AS qual1, r.seq AS r_seq, r.qual AS qual2 +FROM ( + SELECT * + FROM alig_proj + WHERE bam_flag(flag, 'firs_segm') = True +) AS l JOIN ( + SELECT * + FROM alig_proj + WHERE bam_flag(flag, 'last_segm') = True +) AS r + ON l.qname = r.qname +ORDER BY qname; + +-- Description: +-- This query selects fields required by the FASTQ file format (qname, seq/seq-reverse, qual/qual-reverse). +-- It performs a join resulting in a wide table starting with a qname, followed by the seq/seq-reverse +-- and the qual/qual-reverse for both reads of this qname. I.e., every tuple in the result contains a read pair. +-- The outer query joins two subresults together. Both subresults only contain primary alignments. +-- Also, alignments that have not stored their seq or qual value are filtered out in the +-- subresults and alignments with a mapping quality >= 100 are also filtered out. _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list