Changeset: a02498cf33c0 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a02498cf33c0 Added Files: sql/backends/monet5/bamloader/bam_db_interface.c sql/backends/monet5/bamloader/bam_db_interface.h sql/backends/monet5/bamloader/bam_wrapper.c sql/backends/monet5/bamloader/bam_wrapper.h Removed Files: sql/backends/monet5/bamloader/bam_sql.c sql/backends/monet5/bamloader/bam_sql.h sql/backends/monet5/bamloader/sql/bam_clear.sql sql/backends/monet5/bamloader/sql/bam_create_alignments_storage_0.sql sql/backends/monet5/bamloader/sql/bam_create_alignments_storage_1.sql sql/backends/monet5/bamloader/sql/bam_drop_alignments_storage_0.sql sql/backends/monet5/bamloader/sql/bam_drop_alignments_storage_1.sql sql/backends/monet5/bamloader/sql/bam_schema.sql Modified Files: sql/backends/monet5/bamloader/85_bam.sql sql/backends/monet5/bamloader/Makefile.ag sql/backends/monet5/bamloader/bam.mal sql/backends/monet5/bamloader/bam_globals.h sql/backends/monet5/bamloader/bam_loader.c sql/backends/monet5/bamloader/bam_loader.h Branch: bamloader Log Message:
Finished basic bam loader functionality. Divided code nicely over multiple files so it will be relatively easy to separate the code between client-side code and server-side code at some later point. diffs (truncated from 4985 to 300 lines): diff --git a/sql/backends/monet5/bamloader/85_bam.sql b/sql/backends/monet5/bamloader/85_bam.sql --- a/sql/backends/monet5/bamloader/85_bam.sql +++ b/sql/backends/monet5/bamloader/85_bam.sql @@ -1,10 +1,13 @@ -CREATE PROCEDURE bam_loader_repos(bam_repos STRING, dbschema INT, storage_mask STRING, nr_threads INT) +CREATE PROCEDURE bam_loader_repos(bam_repos STRING, dbschema SMALLINT, nr_threads SMALLINT) EXTERNAL NAME bam.bam_loader_repos; -CREATE PROCEDURE bam_loader_file(bam_file STRING, dbschema INT, storage_mask STRING) +CREATE PROCEDURE bam_loader_files(bam_files STRING, dbschema SMALLINT, nr_threads SMALLINT) +EXTERNAL NAME bam.bam_loader_files; + +CREATE PROCEDURE bam_loader_file(bam_file STRING, dbschema SMALLINT) EXTERNAL NAME bam.bam_loader_file; -CREATE PROCEDURE bam_drop_file(file_id SMALLINT, dbschema SMALLINT) +CREATE PROCEDURE bam_drop_file(file_id BIGINT, dbschema SMALLINT) EXTERNAL NAME bam.bam_drop_file; diff --git a/sql/backends/monet5/bamloader/Makefile.ag b/sql/backends/monet5/bamloader/Makefile.ag --- a/sql/backends/monet5/bamloader/Makefile.ag +++ b/sql/backends/monet5/bamloader/Makefile.ag @@ -29,12 +29,14 @@ INCLUDES = .. \ ../../../../common/options \ ../../../../common/stream \ ../../../../gdk \ + ../../../../tools/merovingian \ + ../../../../tools/merovingian/daemon \ $(SAMTOOLS_CFLAGS) lib__bam = { MODULE DIR = libdir/monetdb5 - SOURCES = bam_loader.c bam_loader.h bam_lib.h bam_lib.c bam_sql.h bam_sql.c + SOURCES = bam_loader.c bam_loader.h bam_wrapper.c bam_wrapper.h bam_db_interface.c bam_db_interface.h bam_globals.h bam_lib.h bam_lib.c LIBS = ../../../../monetdb5/tools/libmonetdb5 \ ../../../../gdk/libbat \ $(SAMTOOLS_LIBS) diff --git a/sql/backends/monet5/bamloader/bam.mal b/sql/backends/monet5/bamloader/bam.mal --- a/sql/backends/monet5/bamloader/bam.mal +++ b/sql/backends/monet5/bamloader/bam.mal @@ -1,19 +1,23 @@ module bam; -# Bam_loader related signatures +# Bam loader related signatures -pattern bam_loader_repos(bam_repos_list:str, dbschema:int, storage_mask:str, nr_threads:int):void +pattern bam_loader_repos(bam_repos:str, dbschema:sht, nr_threads:sht):void address bam_loader_repos -comment "Read the files in the given list of bam files and store them in the database"; +comment "Read all bam files in the given bam_repos directory (non-recursive) and store them in the given dbschema"; -pattern bam_loader_file(bam_file:str, dbschema:int, storage_mask:str):void +pattern bam_loader_files(bam_files:str, dbschema:sht, nr_threads:sht):void +address bam_loader_files +comment "Read all bam files in the file list stored in the file bam_files (separated by a newline) and store them in the given dbschema"; + +pattern bam_loader_file(bam_file:str, dbschema:sht):void address bam_loader_file -comment "Read the bam file given as the first parameter and store it in the database"; +comment "Read bam_file and store it in the given dbschema"; -pattern bam_drop_file(file_id:sht, dbschema:sht):void +pattern bam_drop_file(file_id:lng, dbschema:sht):void address bam_drop_file -comment "Drop alignment tables and header data for the given file" +comment "Drop alignment tables and header data for the bam file with the given file_id" # Scalar signatures for bam_lib diff --git a/sql/backends/monet5/bamloader/bam_db_interface.c b/sql/backends/monet5/bamloader/bam_db_interface.c new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bamloader/bam_db_interface.c @@ -0,0 +1,482 @@ +#include "monetdb_config.h" +#include "bam_globals.h" +#include "bam_db_interface.h" + +#define SQL_CREATE_STORAGE_0 \ + "CREATE TABLE bam.alignments_"LLFMT" ( \n\ + virtual_offset BIGINT NOT NULL, \n\ + qname STRING NOT NULL, \n\ + flag SMALLINT NOT NULL, \n\ + rname STRING NOT NULL, \n\ + pos INT NOT NULL, \n\ + mapq SMALLINT NOT NULL, \n\ + cigar STRING NOT NULL, \n\ + rnext STRING NOT NULL, \n\ + pnext INT NOT NULL, \n\ + tlen INT NOT NULL, \n\ + seq STRING NOT NULL, \n\ + qual STRING NOT NULL, \n\ + CONSTRAINT alignments_"LLFMT"_pkey_virtual_offset PRIMARY KEY (virtual_offset) \n\ + ); \n\ + \n\ + CREATE TABLE bam.alignments_extra_"LLFMT" ( \n\ + tag CHAR(2) NOT NULL, \n\ + virtual_offset BIGINT NOT NULL, \n\ + type CHAR(1) NOT NULL, \n\ + value STRING, \n\ + CONSTRAINT alignments_extra_"LLFMT"_pkey_tag_virtual_offset PRIMARY KEY (tag, virtual_offset), \n\ + CONSTRAINT alignments_extra_"LLFMT"_fkey_virtual_offset FOREIGN KEY (virtual_offset) \n\ + REFERENCES bam.alignments_"LLFMT" (virtual_offset) \n\ + );" + +#define SQL_CREATE_STORAGE_1 \ + "CREATE TABLE bam.paired_primary_alignments_"LLFMT" ( \n\ + l_virtual_offset BIGINT NOT NULL, \n\ + r_virtual_offset BIGINT NOT NULL, \n\ + qname STRING NOT NULL, \n\ + l_flag SMALLINT NOT NULL, \n\ + l_rname STRING NOT NULL, \n\ + l_pos INT NOT NULL, \n\ + l_mapq SMALLINT NOT NULL, \n\ + l_cigar STRING NOT NULL, \n\ + l_rnext STRING NOT NULL, \n\ + l_pnext INT NOT NULL, \n\ + l_tlen INT NOT NULL, \n\ + l_seq STRING NOT NULL, \n\ + l_qual STRING NOT NULL, \n\ + r_flag SMALLINT NOT NULL, \n\ + r_rname STRING NOT NULL, \n\ + r_pos INT NOT NULL, \n\ + r_mapq SMALLINT NOT NULL, \n\ + r_cigar STRING NOT NULL, \n\ + r_rnext STRING NOT NULL, \n\ + r_pnext INT NOT NULL, \n\ + r_tlen INT NOT NULL, \n\ + r_seq STRING NOT NULL, \n\ + r_qual STRING NOT NULL, \n\ + CONSTRAINT paired_primary_alignments_"LLFMT"_pkey_l_virtual_offset_r_virtual_offset \n\ + PRIMARY KEY (l_virtual_offset, r_virtual_offset) \n\ + ); \n\ + \n\ + CREATE TABLE bam.paired_secondary_alignments_"LLFMT" ( \n\ + l_virtual_offset BIGINT NOT NULL, \n\ + r_virtual_offset BIGINT NOT NULL, \n\ + qname STRING NOT NULL, \n\ + l_flag SMALLINT NOT NULL, \n\ + l_rname STRING NOT NULL, \n\ + l_pos INT NOT NULL, \n\ + l_mapq SMALLINT NOT NULL, \n\ + l_cigar STRING NOT NULL, \n\ + l_rnext STRING NOT NULL, \n\ + l_pnext INT NOT NULL, \n\ + l_tlen INT NOT NULL, \n\ + l_seq STRING NOT NULL, \n\ + l_qual STRING NOT NULL, \n\ + r_flag SMALLINT NOT NULL, \n\ + r_rname STRING NOT NULL, \n\ + r_pos INT NOT NULL, \n\ + r_mapq SMALLINT NOT NULL, \n\ + r_cigar STRING NOT NULL, \n\ + r_rnext STRING NOT NULL, \n\ + r_pnext INT NOT NULL, \n\ + r_tlen INT NOT NULL, \n\ + r_seq STRING NOT NULL, \n\ + r_qual STRING NOT NULL, \n\ + CONSTRAINT paired_secondary_alignments_"LLFMT"_pkey_l_virtual_offset_r_virtual_offset \n\ + PRIMARY KEY (l_virtual_offset, r_virtual_offset) \n\ + ); \n\ + \n\ + CREATE TABLE bam.unpaired_alignments_"LLFMT" ( \n\ + virtual_offset BIGINT NOT NULL, \n\ + qname STRING NOT NULL, \n\ + flag SMALLINT NOT NULL, \n\ + rname STRING NOT NULL, \n\ + pos INT NOT NULL, \n\ + mapq SMALLINT NOT NULL, \n\ + cigar STRING NOT NULL, \n\ + rnext STRING NOT NULL, \n\ + pnext INT NOT NULL, \n\ + tlen INT NOT NULL, \n\ + seq STRING NOT NULL, \n\ + qual STRING NOT NULL, \n\ + CONSTRAINT unpaired_alignments_"LLFMT"_pkey_virtual_offset PRIMARY KEY (virtual_offset) \n\ + ); \n\ + \n\ + CREATE TABLE bam.alignments_extra_"LLFMT" ( \n\ + tag CHAR(2) NOT NULL, \n\ + virtual_offset BIGINT NOT NULL, \n\ + type CHAR(1) NOT NULL, \n\ + value STRING, \n\ + CONSTRAINT alignments_extra_"LLFMT"_pkey_tag_virtual_offset PRIMARY KEY (tag, virtual_offset) \n\ + ); \n\ + \n\ + CREATE VIEW bam.unpaired_primary_alignments_"LLFMT" AS \n\ + SELECT l_virtual_offset AS virtual_offset, qname, l_flag AS flag, l_rname AS rname, l_pos AS pos, l_mapq AS mapq, \n\ + l_cigar AS cigar, l_rnext AS rnext, l_pnext AS pnext, l_tlen AS tlen, l_seq AS seq, l_qual AS qual \n\ + FROM bam.paired_primary_alignments_"LLFMT" \n\ + UNION ALL \n\ + SELECT r_virtual_offset AS virtual_offset, qname, r_flag AS flag, r_rname AS rname, r_pos AS pos, r_mapq AS mapq, \n\ + r_cigar AS cigar, r_rnext AS rnext, r_pnext AS pnext, r_tlen AS tlen, r_seq AS seq, r_qual AS qual \n\ + FROM bam.paired_primary_alignments_"LLFMT"; \n\ + \n\ + CREATE VIEW bam.unpaired_secondary_alignments_"LLFMT" AS \n\ + SELECT l_virtual_offset AS virtual_offset, qname, l_flag AS flag, l_rname AS rname, l_pos AS pos, l_mapq AS mapq, \n\ + l_cigar AS cigar, l_rnext AS rnext, l_pnext AS pnext, l_tlen AS tlen, l_seq AS seq, l_qual AS qual \n\ + FROM bam.paired_secondary_alignments_"LLFMT" \n\ + UNION ALL \n\ + SELECT r_virtual_offset AS virtual_offset, qname, r_flag AS flag, r_rname AS rname, r_pos AS pos, r_mapq AS mapq, \n\ + r_cigar AS cigar, r_rnext AS rnext, r_pnext AS pnext, r_tlen AS tlen, r_seq AS seq, r_qual AS qual \n\ + FROM bam.paired_secondary_alignments_"LLFMT"; \n\ + \n\ + CREATE VIEW bam.unpaired_all_alignments_"LLFMT" AS \n\ + SELECT * \n\ + FROM bam.unpaired_primary_alignments_"LLFMT" \n\ + UNION ALL \n\ + SELECT * \n\ + FROM bam.unpaired_secondary_alignments_"LLFMT" \n\ + UNION ALL \n\ + SELECT * \n\ + FROM bam.unpaired_alignments_"LLFMT";" + +#define SQL_DROP_HEADER \ + "DELETE FROM bam.pg WHERE file_id = "LLFMT";\n" \ + "DELETE FROM bam.rg WHERE file_id = "LLFMT";\n" \ + "DELETE FROM bam.sq WHERE file_id = "LLFMT";\n" \ + "DELETE FROM bam.files WHERE file_id = "LLFMT";\n" + +#define SQL_DROP_STORAGE_0 \ + "DROP TABLE bam.alignments_"LLFMT";\n" \ + "DROP TABLE bam.alignments_extra_"LLFMT";\n" + +#define SQL_DROP_STORAGE_1 \ + "DROP VIEW bam.unpaired_all_alignments_"LLFMT";\n"\ + "DROP VIEW bam.unpaired_secondary_alignments_"LLFMT";\n"\ + "DROP VIEW bam.unpaired_primary_alignments_"LLFMT";\n"\ + "DROP TABLE bam.paired_primary_alignments_"LLFMT";\n" \ + "DROP TABLE bam.paired_secondary_alignments_"LLFMT";\n" \ + "DROP TABLE bam.unpaired_alignments_"LLFMT";\n" \ + "DROP TABLE bam.alignments_extra_"LLFMT";\n" + + + +#define SQL_COPY_INTO_FILES "COPY BINARY INTO bam.files FROM ('%s', '%s', '%s', '%s', '%s', '%s');\n" +#define SQL_COPY_INTO_SQ "COPY BINARY INTO bam.sq FROM ('%s', '%s', '%s', '%s', '%s', '%s', '%s');\n" +#define SQL_COPY_INTO_RG "COPY BINARY INTO bam.rg FROM ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');\n" +#define SQL_COPY_INTO_PG "COPY BINARY INTO bam.pg FROM ('%s', '%s', '%s', '%s', '%s', '%s');\n" + +#define SQL_COPY_INTO_ALIGNMENTS "COPY BINARY INTO bam.%salignments_"LLFMT" FROM \ + ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');\n" +#define SQL_COPY_INTO_PAIRED_ALIGNMENTS "COPY BINARY INTO bam.paired_%s_alignments_"LLFMT" FROM \ + ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', \ + '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');\n" + +#define SQL_COPY_INTO_ALIGNMENTS_EXTRA "COPY BINARY INTO bam.alignments_extra_"LLFMT" FROM ('%s', '%s', '%s', '%s');\n" + +#define BUF_SIZE_CREATE_STORAGE_0 2048 +#define BUF_SIZE_CREATE_STORAGE_1 8192 +#define BUF_SIZE_DROP_FILE 1024 +#define BUF_SIZE_COPY_INTO 8192 + + + + +/* Some buffers */ +char buf_sql_create_storage_0[BUF_SIZE_CREATE_STORAGE_0]; +char buf_sql_create_storage_1[BUF_SIZE_CREATE_STORAGE_1]; +char buf_sql_drop_file[BUF_SIZE_DROP_FILE]; +char buf_sql_copy_into[BUF_SIZE_COPY_INTO]; + + + + +/* TODO Find out if executed SQL queries get logged somewhere else already, since in that case we shouldn't log it again */ + +str +create_schema_if_not_exists(Client cntxt, mvc *m, str schemaname, str descr, sql_schema **ret) { + sql_schema *result; + if((result = mvc_bind_schema(m, schemaname)) == NULL) { + char buf_sql_create_schema[64]; + str sql_create_schema = buf_sql_create_schema; + str msg; + + snprintf(sql_create_schema, 64, "CREATE SCHEMA %s;", schemaname); + + TO_LOG("<bam_loader> Creating schema '%s'...", schemaname); + RUN_SQL(cntxt, &sql_create_schema, descr, msg); + if(msg != MAL_SUCCEED) { + REUSE_EXCEPTION(msg, MAL, "create_schema_if_not_exists", "Could not create bam schema: %s", msg); + return msg; + } + if((result = mvc_bind_schema(m, schemaname)) == NULL) { + throw(MAL, "create_schema_if_not_exists", "Could not create bam schema"); + } + } + if(ret) *ret = result; + return MAL_SUCCEED; +} + + +/** + * Function tries to bind to a table with the given name. If it fails (== NULL), it attempts to create the table. + * The function fails if a binding to the table is impossible, even after creation. + * If the function succeeds, it adjusts the optionally given pointer to point to the binded sql_table. _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list