Changeset: 6a45c22ca31d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6a45c22ca31d Modified Files: sql/backends/monet5/bam/Tests/bam_lib.stable.err sql/backends/monet5/bam/Tests/bam_lib.stable.out sql/backends/monet5/bam/bam_lib.c Branch: bamloader Log Message:
Replaced linear time case-statement by constant time dictionary lookup for reverse_seq udf, also improved its output diffs (296 lines): diff --git a/sql/backends/monet5/bam/Tests/bam_lib.stable.err b/sql/backends/monet5/bam/Tests/bam_lib.stable.err --- a/sql/backends/monet5/bam/Tests/bam_lib.stable.err +++ b/sql/backends/monet5/bam/Tests/bam_lib.stable.err @@ -1,9 +1,9 @@ stderr of test 'bam_lib` in directory 'sql/backends/monet5/bam` itself: -# 09:28:35 > -# 09:28:35 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=33399" "--set" "mapi_usock=/var/tmp/mtest-22883/.s.monetdb.33399" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam" "--set" "mal_listing=0" "--set" "embedded_r=yes" -# 09:28:35 > +# 14:48:31 > +# 14:48:31 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=35990" "--set" "mapi_usock=/var/tmp/mtest-20228/.s.monetdb.35990" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam" "--set" "mal_listing=0" "--set" "embedded_r=yes" +# 14:48:31 > # builtin opt gdk_dbpath = /home/robin/MonetDB/PREFIX/var/monetdb5/dbfarm/demo # builtin opt gdk_debug = 0 @@ -17,71 +17,23 @@ stderr of test 'bam_lib` in directory 's # builtin opt sql_debug = 0 # cmdline opt gdk_nr_threads = 0 # cmdline opt mapi_open = true -# cmdline opt mapi_port = 33399 -# cmdline opt mapi_usock = /var/tmp/mtest-22883/.s.monetdb.33399 +# cmdline opt mapi_port = 35990 +# cmdline opt mapi_usock = /var/tmp/mtest-20228/.s.monetdb.35990 # cmdline opt monet_prompt = # cmdline opt mal_listing = 2 # cmdline opt gdk_dbpath = /home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam # cmdline opt mal_listing = 0 # cmdline opt embedded_r = yes # cmdline opt gdk_debug = 536870922 -# <bam_loader>: Loader started for 1 BAM file... -# # <bam_loader> Retrieving next file id... -# # <bam_loader> Initializing BAM wrapper for file '/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'... -# # <bam_loader> Parsing header for file '/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'... -# # <bam_loader> Creating alignment tables for file '/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'... -# # CREATE TABLE bam.alignments_1 ( -# virtual_offset BIGINT NOT NULL, -# qname STRING NOT NULL, -# flag SMALLINT NOT NULL, -# rname STRING NOT NULL, -# pos INT NOT NULL, -# mapq SMALLINT NOT NULL, -# cigar STRING NOT NULL, -# rnext STRING NOT NULL, -# pnext INT NOT NULL, -# tlen INT NOT NULL, -# seq STRING NOT NULL, -# qual STRING NOT NULL, -# CONSTRAINT alignments_1_pkey_virtual_offset PRIMARY KEY (virtual_offset) -# ); -# -# CREATE TABLE bam.alignments_extra_1 ( -# tag CHAR(2) NOT NULL, -# virtual_offset BIGINT NOT NULL, -# type CHAR(1) NOT NULL, -# value STRING, -# CONSTRAINT alignments_extra_1_pkey_tag_virtual_offset PRIMARY KEY (tag, virtual_offset), -# CONSTRAINT alignments_extra_1_fkey_virtual_offset FOREIGN KEY (virtual_offset) -# REFERENCES bam.alignments_1 (virtual_offset) -# ); -# # <bam_loader> Creating reader threads... -# # <bam_loader> Waiting for reader threads to finish... -# # <Thread 0> Starting on next file... -# # <Thread 0> Processing alignments of file '/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam' (file id 1)... -# # <Thread 0> All alignments in file '/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam' (file id 1) processed! -# # <Thread 0> Starting on next file... -# # <Thread 0> No files left to work on; thread done -# # <bam_loader> Copying data into DB... -# # COPY BINARY INTO bam.files FROM ('bam_binaries/1/files_0', 'bam_binaries/1/files_1', 'bam_binaries/1/files_2', 'bam_binaries/1/files_3', 'bam_binaries/1/files_4', 'bam_binaries/1/files_5'); -# COPY BINARY INTO bam.sq FROM ('bam_binaries/1/sq_0', 'bam_binaries/1/sq_1', 'bam_binaries/1/sq_2', 'bam_binaries/1/sq_3', 'bam_binaries/1/sq_4', 'bam_binaries/1/sq_5', 'bam_binaries/1/sq_6'); -# COPY BINARY INTO bam.pg FROM ('bam_binaries/1/pg_0', 'bam_binaries/1/pg_1', 'bam_binaries/1/pg_2', 'bam_binaries/1/pg_3', 'bam_binaries/1/pg_4', 'bam_binaries/1/pg_5'); -# COPY BINARY INTO bam.alignments_1 FROM ('bam_binaries/1/alignments_0', 'bam_binaries/1/alignments_1', 'bam_binaries/1/alignments_2', 'bam_binaries/1/alignments_3', 'bam_binaries/1/alignments_4', 'bam_binaries/1/alignments_5', 'bam_binaries/1/alignments_6', 'bam_binaries/1/alignments_7', 'bam_binaries/1/alignments_8', 'bam_binaries/1/alignments_9', 'bam_binaries/1/alignments_10', 'bam_binaries/1/alignments_11'); -# COPY BINARY INTO bam.alignments_extra_1 FROM ('bam_binaries/1/alignments_extra_0', 'bam_binaries/1/alignments_extra_1', 'bam_binaries/1/alignments_extra_2', 'bam_binaries/1/alignments_extra_3'); -# -# # <bam_loader>: Loader finished processing 1 BAM file... -# -# 09:28:35 > -# 09:28:35 > "/usr/bin/python2" "bam_lib.SQL.py" "bam_lib" -# 09:28:35 > -MAPI = (monetdb) /var/tmp/mtest-22883/.s.monetdb.33399 +# 14:48:31 > +# 14:48:31 > "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-20228" "--port=35990" +# 14:48:31 > + +MAPI = (monetdb) /var/tmp/mtest-20228/.s.monetdb.35990 QUERY = SELECT bam_flag(111, 'Fail-hard'); ERROR = !Unknown flag name given: Fail-hard -MAPI = (monetdb) /var/tmp/mtest-22883/.s.monetdb.33399 -QUERY = SELECT reverse_seq('invalidchars'); -ERROR = !Invalid character found in sequence: 'i' -# 09:28:36 > -# 09:28:36 > "Done." -# 09:28:36 > +# 14:48:32 > +# 14:48:32 > "Done." +# 14:48:32 > diff --git a/sql/backends/monet5/bam/Tests/bam_lib.stable.out b/sql/backends/monet5/bam/Tests/bam_lib.stable.out --- a/sql/backends/monet5/bam/Tests/bam_lib.stable.out +++ b/sql/backends/monet5/bam/Tests/bam_lib.stable.out @@ -1,9 +1,9 @@ stdout of test 'bam_lib` in directory 'sql/backends/monet5/bam` itself: -# 08:43:53 > -# 08:43:53 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=35352" "--set" "mapi_usock=/var/tmp/mtest-9342/.s.monetdb.35352" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam" "--set" "mal_listing=0" "--set" "embedded_r=yes" -# 08:43:53 > +# 14:48:31 > +# 14:48:31 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=35990" "--set" "mapi_usock=/var/tmp/mtest-20228/.s.monetdb.35990" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam" "--set" "mal_listing=0" "--set" "embedded_r=yes" +# 14:48:31 > # MonetDB 5 server v11.20.0 # This is an unreleased version @@ -13,47 +13,16 @@ stdout of test 'bam_lib` in directory 's # Copyright (c) 1993-July 2008 CWI. # Copyright (c) August 2008-2014 MonetDB B.V., all rights reserved # Visit http://www.monetdb.org/ for further information -# Listening for connection requests on mapi:monetdb://robin-xps13:35352/ -# Listening for UNIX domain connection requests on mapi:monetdb:///var/tmp/mtest-9342/.s.monetdb.35352 +# Listening for connection requests on mapi:monetdb://robin-xps13:35990/ +# Listening for UNIX domain connection requests on mapi:monetdb:///var/tmp/mtest-20228/.s.monetdb.35990 # MonetDB/SQL module loaded # MonetDB/R module loaded Ready. -# SQL catalog created, loading sql scripts once -# loading sql script: 09_like.sql -# loading sql script: 10_math.sql -# loading sql script: 11_times.sql -# loading sql script: 12_url.sql -# loading sql script: 13_date.sql -# loading sql script: 14_inet.sql -# loading sql script: 15_querylog.sql -# loading sql script: 16_tracelog.sql -# loading sql script: 19_cluster.sql -# loading sql script: 20_vacuum.sql -# loading sql script: 21_dependency_functions.sql -# loading sql script: 22_clients.sql -# loading sql script: 23_skyserver.sql -# loading sql script: 24_zorder.sql -# loading sql script: 25_debug.sql -# loading sql script: 26_sysmon.sql -# loading sql script: 39_analytics.sql -# loading sql script: 39_analytics_hge.sql -# loading sql script: 40_json.sql -# loading sql script: 40_json_hge.sql -# loading sql script: 41_jsonstore.sql -# loading sql script: 45_uuid.sql -# loading sql script: 75_storagemodel.sql -# loading sql script: 80_statistics.sql -# loading sql script: 80_udf.sql -# loading sql script: 80_udf_hge.sql -# loading sql script: 85_bam.sql -# loading sql script: 89_generator_hge.sql -# loading sql script: 90_generator.sql -# loading sql script: 99_system.sql -# 08:43:54 > -# 08:43:54 > "/usr/bin/python2" "bam_lib.SQL.py" "bam_lib" -# 08:43:54 > +# 14:48:31 > +# 14:48:31 > "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-20228" "--port=35990" +# 14:48:31 > #SET SCHEMA bam; #SELECT bam_flag(1, 'mult_segm'); @@ -456,6 +425,12 @@ Ready. % clob # type % 15 # length [ "NVBHDKMWSRYCGAT" ] +#SELECT reverse_seq('invalidchars'); +% .L # table_name +% reverse_seq_single_value # name +% clob # type +% 12 # length +[ "????????????" ] #SELECT seq, reverse_seq(seq) AS reverse_seq #FROM bam.alignments_1; % bam.alignments_1, bam.L # table_name @@ -780,6 +755,6 @@ Ready. % 8, 100, 8, 21, 1 # length [ 17922987, "TATACTTATAGAACAAATGAACCCAAAACCACATAAGGTAAACAACAAAGCTACTGGTTCAAAATTAAGCCTAACTTCAACAGTACCAGGCAAAAACCAT", 17922987, "3=1X1=1X43=1X16=1X33=", "T" ] -# 08:43:54 > -# 08:43:54 > "Done." -# 08:43:54 > +# 14:48:32 > +# 14:48:32 > "Done." +# 14:48:32 > diff --git a/sql/backends/monet5/bam/bam_lib.c b/sql/backends/monet5/bam/bam_lib.c --- a/sql/backends/monet5/bam/bam_lib.c +++ b/sql/backends/monet5/bam/bam_lib.c @@ -54,68 +54,51 @@ bam_flag(bit * ret, sht * flag, str * na return MAL_SUCCEED; } +char reverse_seq_map[] = { + 'T', //A + 'V', //B + 'G', //C + 'H', //D + 0 , //E + 0 , //F + 'C', //G + 'D', //H + 0 , //I + 0 , //J + 'M', //K + 0 , //L + 'K', //M + 'N', //N + 0 , //O + 0 , //P + 0 , //Q + 'Y', //R + 'S', //S + 'A', //T + 0 , //U + 'B', //V + 'W', //W + 0 , //X + 'R' //Y +}; + str reverse_seq(str * ret, str * seq) { str result; unsigned int i; unsigned int len = strlen(*seq); + sht map_index; result = GDKmalloc((len + 1) * sizeof(char)); if (result == NULL) throw(MAL, "reverse_seq", MAL_MALLOC_FAIL); + for (i = 0; i < len; ++i) { - switch ((*seq)[i]) { - case 'A': - result[len - i - 1] = 'T'; - break; - case 'T': - result[len - i - 1] = 'A'; - break; - case 'C': - result[len - i - 1] = 'G'; - break; - case 'G': - result[len - i - 1] = 'C'; - break; - case 'R': - result[len - i - 1] = 'Y'; - break; - case 'Y': - result[len - i - 1] = 'R'; - break; - case 'S': - result[len - i - 1] = 'S'; - break; - case 'W': - result[len - i - 1] = 'W'; - break; - case 'K': - result[len - i - 1] = 'M'; - break; - case 'M': - result[len - i - 1] = 'K'; - break; - case 'H': - result[len - i - 1] = 'D'; - break; - case 'D': - result[len - i - 1] = 'H'; - break; - case 'V': - result[len - i - 1] = 'B'; - break; - case 'B': - result[len - i - 1] = 'V'; - break; - case 'N': - result[len - i - 1] = 'N'; - break; - default: - GDKfree(result); - throw(MAL, "reverse_seq", - "Invalid character found in sequence: '%c'\n", - (*seq)[i]); + map_index = (sht)((*seq)[i] - 'A'); + if(map_index < 0 || map_index > 24 || + (result[len - i - 1] = reverse_seq_map[map_index]) == 0) { + result[len - i - 1] = '?'; } } result[len] = '\0'; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list