Changeset: 6a45c22ca31d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6a45c22ca31d
Modified Files:
        sql/backends/monet5/bam/Tests/bam_lib.stable.err
        sql/backends/monet5/bam/Tests/bam_lib.stable.out
        sql/backends/monet5/bam/bam_lib.c
Branch: bamloader
Log Message:

Replaced linear time case-statement by constant time dictionary lookup for 
reverse_seq udf, also improved its output


diffs (296 lines):

diff --git a/sql/backends/monet5/bam/Tests/bam_lib.stable.err 
b/sql/backends/monet5/bam/Tests/bam_lib.stable.err
--- a/sql/backends/monet5/bam/Tests/bam_lib.stable.err
+++ b/sql/backends/monet5/bam/Tests/bam_lib.stable.err
@@ -1,9 +1,9 @@
 stderr of test 'bam_lib` in directory 'sql/backends/monet5/bam` itself:
 
 
-# 09:28:35 >  
-# 09:28:35 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=33399" "--set" 
"mapi_usock=/var/tmp/mtest-22883/.s.monetdb.33399" "--set" "monet_prompt=" 
"--forcemito" "--set" "mal_listing=2" 
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
 "--set" "mal_listing=0" "--set" "embedded_r=yes"
-# 09:28:35 >  
+# 14:48:31 >  
+# 14:48:31 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=35990" "--set" 
"mapi_usock=/var/tmp/mtest-20228/.s.monetdb.35990" "--set" "monet_prompt=" 
"--forcemito" "--set" "mal_listing=2" 
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
 "--set" "mal_listing=0" "--set" "embedded_r=yes"
+# 14:48:31 >  
 
 # builtin opt  gdk_dbpath = /home/robin/MonetDB/PREFIX/var/monetdb5/dbfarm/demo
 # builtin opt  gdk_debug = 0
@@ -17,71 +17,23 @@ stderr of test 'bam_lib` in directory 's
 # builtin opt  sql_debug = 0
 # cmdline opt  gdk_nr_threads = 0
 # cmdline opt  mapi_open = true
-# cmdline opt  mapi_port = 33399
-# cmdline opt  mapi_usock = /var/tmp/mtest-22883/.s.monetdb.33399
+# cmdline opt  mapi_port = 35990
+# cmdline opt  mapi_usock = /var/tmp/mtest-20228/.s.monetdb.35990
 # cmdline opt  monet_prompt = 
 # cmdline opt  mal_listing = 2
 # cmdline opt  gdk_dbpath = 
/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam
 # cmdline opt  mal_listing = 0
 # cmdline opt  embedded_r = yes
 # cmdline opt  gdk_debug = 536870922
-# <bam_loader>: Loader started for 1 BAM file...
-# # <bam_loader> Retrieving next file id...
-# # <bam_loader> Initializing BAM wrapper for file 
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'...
-# # <bam_loader> Parsing header for file 
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'...
-# # <bam_loader> Creating alignment tables for file 
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam'...
-# # CREATE TABLE bam.alignments_1 ( 
-#              virtual_offset                          BIGINT    NOT NULL, 
-#              qname                                            STRING   NOT 
NULL, 
-#              flag                                              SMALLINT      
NOT NULL, 
-#              rname                                            STRING   NOT 
NULL, 
-#              pos                                                INT          
 NOT NULL, 
-#              mapq                                              SMALLINT      
NOT NULL, 
-#              cigar                                            STRING   NOT 
NULL, 
-#              rnext                                            STRING   NOT 
NULL, 
-#              pnext                                            INT            
 NOT NULL, 
-#              tlen                                              INT           
 NOT NULL, 
-#              seq                                                STRING       
  NOT NULL, 
-#              qual                                              STRING        
  NOT NULL, 
-#              CONSTRAINT alignments_1_pkey_virtual_offset PRIMARY KEY 
(virtual_offset) 
-#      ); 
-#      
-#      CREATE TABLE bam.alignments_extra_1 ( 
-#              tag                                                CHAR(2)      
 NOT NULL, 
-#              virtual_offset                          BIGINT    NOT NULL, 
-#              type                                              CHAR(1)       
 NOT NULL, 
-#              value                                            STRING, 
-#              CONSTRAINT alignments_extra_1_pkey_tag_virtual_offset PRIMARY 
KEY (tag, virtual_offset), 
-#              CONSTRAINT alignments_extra_1_fkey_virtual_offset FOREIGN KEY 
(virtual_offset) 
-#                      REFERENCES bam.alignments_1 (virtual_offset) 
-#      );
-# # <bam_loader> Creating reader threads...
-# # <bam_loader> Waiting for reader threads to finish...
-# # <Thread 0> Starting on next file...
-# # <Thread 0> Processing alignments of file 
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam' 
(file id 1)...
-# # <Thread 0> All alignments in file 
'/home/robin/MonetDB/SOURCE/sql/backends/monet5/bam/Tests/files/file1.bam' 
(file id 1) processed!
-# # <Thread 0> Starting on next file...
-# # <Thread 0> No files left to work on; thread done
-# # <bam_loader> Copying data into DB...
-# # COPY BINARY INTO bam.files FROM ('bam_binaries/1/files_0', 
'bam_binaries/1/files_1', 'bam_binaries/1/files_2', 'bam_binaries/1/files_3', 
'bam_binaries/1/files_4', 'bam_binaries/1/files_5');
-# COPY BINARY INTO bam.sq      FROM ('bam_binaries/1/sq_0', 
'bam_binaries/1/sq_1', 'bam_binaries/1/sq_2', 'bam_binaries/1/sq_3', 
'bam_binaries/1/sq_4', 'bam_binaries/1/sq_5', 'bam_binaries/1/sq_6');
-# COPY BINARY INTO bam.pg      FROM ('bam_binaries/1/pg_0', 
'bam_binaries/1/pg_1', 'bam_binaries/1/pg_2', 'bam_binaries/1/pg_3', 
'bam_binaries/1/pg_4', 'bam_binaries/1/pg_5');
-# COPY BINARY INTO bam.alignments_1 FROM       ('bam_binaries/1/alignments_0', 
'bam_binaries/1/alignments_1', 'bam_binaries/1/alignments_2', 
'bam_binaries/1/alignments_3', 'bam_binaries/1/alignments_4', 
'bam_binaries/1/alignments_5', 'bam_binaries/1/alignments_6', 
'bam_binaries/1/alignments_7', 'bam_binaries/1/alignments_8', 
'bam_binaries/1/alignments_9', 'bam_binaries/1/alignments_10', 
'bam_binaries/1/alignments_11');
-# COPY BINARY INTO bam.alignments_extra_1 FROM 
('bam_binaries/1/alignments_extra_0', 'bam_binaries/1/alignments_extra_1', 
'bam_binaries/1/alignments_extra_2', 'bam_binaries/1/alignments_extra_3');
-# 
-# # <bam_loader>: Loader finished processing 1 BAM file...
-# 
-# 09:28:35 >  
-# 09:28:35 >  "/usr/bin/python2" "bam_lib.SQL.py" "bam_lib"
-# 09:28:35 >  
 
-MAPI  = (monetdb) /var/tmp/mtest-22883/.s.monetdb.33399
+# 14:48:31 >  
+# 14:48:31 >  "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" 
"--host=/var/tmp/mtest-20228" "--port=35990"
+# 14:48:31 >  
+
+MAPI  = (monetdb) /var/tmp/mtest-20228/.s.monetdb.35990
 QUERY = SELECT bam_flag(111, 'Fail-hard');
 ERROR = !Unknown flag name given: Fail-hard
-MAPI  = (monetdb) /var/tmp/mtest-22883/.s.monetdb.33399
-QUERY = SELECT reverse_seq('invalidchars');
-ERROR = !Invalid character found in sequence: 'i'
 
-# 09:28:36 >  
-# 09:28:36 >  "Done."
-# 09:28:36 >  
+# 14:48:32 >  
+# 14:48:32 >  "Done."
+# 14:48:32 >  
diff --git a/sql/backends/monet5/bam/Tests/bam_lib.stable.out 
b/sql/backends/monet5/bam/Tests/bam_lib.stable.out
--- a/sql/backends/monet5/bam/Tests/bam_lib.stable.out
+++ b/sql/backends/monet5/bam/Tests/bam_lib.stable.out
@@ -1,9 +1,9 @@
 stdout of test 'bam_lib` in directory 'sql/backends/monet5/bam` itself:
 
 
-# 08:43:53 >  
-# 08:43:53 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=35352" "--set" 
"mapi_usock=/var/tmp/mtest-9342/.s.monetdb.35352" "--set" "monet_prompt=" 
"--forcemito" "--set" "mal_listing=2" 
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
 "--set" "mal_listing=0" "--set" "embedded_r=yes"
-# 08:43:53 >  
+# 14:48:31 >  
+# 14:48:31 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=35990" "--set" 
"mapi_usock=/var/tmp/mtest-20228/.s.monetdb.35990" "--set" "monet_prompt=" 
"--forcemito" "--set" "mal_listing=2" 
"--dbpath=/home/robin/MonetDB/PREFIX/var/MonetDB/mTests_sql_backends_monet5_bam"
 "--set" "mal_listing=0" "--set" "embedded_r=yes"
+# 14:48:31 >  
 
 # MonetDB 5 server v11.20.0
 # This is an unreleased version
@@ -13,47 +13,16 @@ stdout of test 'bam_lib` in directory 's
 # Copyright (c) 1993-July 2008 CWI.
 # Copyright (c) August 2008-2014 MonetDB B.V., all rights reserved
 # Visit http://www.monetdb.org/ for further information
-# Listening for connection requests on mapi:monetdb://robin-xps13:35352/
-# Listening for UNIX domain connection requests on 
mapi:monetdb:///var/tmp/mtest-9342/.s.monetdb.35352
+# Listening for connection requests on mapi:monetdb://robin-xps13:35990/
+# Listening for UNIX domain connection requests on 
mapi:monetdb:///var/tmp/mtest-20228/.s.monetdb.35990
 # MonetDB/SQL module loaded
 # MonetDB/R   module loaded
 
 Ready.
-# SQL catalog created, loading sql scripts once
-# loading sql script: 09_like.sql
-# loading sql script: 10_math.sql
-# loading sql script: 11_times.sql
-# loading sql script: 12_url.sql
-# loading sql script: 13_date.sql
-# loading sql script: 14_inet.sql
-# loading sql script: 15_querylog.sql
-# loading sql script: 16_tracelog.sql
-# loading sql script: 19_cluster.sql
-# loading sql script: 20_vacuum.sql
-# loading sql script: 21_dependency_functions.sql
-# loading sql script: 22_clients.sql
-# loading sql script: 23_skyserver.sql
-# loading sql script: 24_zorder.sql
-# loading sql script: 25_debug.sql
-# loading sql script: 26_sysmon.sql
-# loading sql script: 39_analytics.sql
-# loading sql script: 39_analytics_hge.sql
-# loading sql script: 40_json.sql
-# loading sql script: 40_json_hge.sql
-# loading sql script: 41_jsonstore.sql
-# loading sql script: 45_uuid.sql
-# loading sql script: 75_storagemodel.sql
-# loading sql script: 80_statistics.sql
-# loading sql script: 80_udf.sql
-# loading sql script: 80_udf_hge.sql
-# loading sql script: 85_bam.sql
-# loading sql script: 89_generator_hge.sql
-# loading sql script: 90_generator.sql
-# loading sql script: 99_system.sql
 
-# 08:43:54 >  
-# 08:43:54 >  "/usr/bin/python2" "bam_lib.SQL.py" "bam_lib"
-# 08:43:54 >  
+# 14:48:31 >  
+# 14:48:31 >  "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" 
"--host=/var/tmp/mtest-20228" "--port=35990"
+# 14:48:31 >  
 
 #SET SCHEMA bam;
 #SELECT bam_flag(1, 'mult_segm');
@@ -456,6 +425,12 @@ Ready.
 % clob # type
 % 15 # length
 [ "NVBHDKMWSRYCGAT"    ]
+#SELECT reverse_seq('invalidchars');
+% .L # table_name
+% reverse_seq_single_value # name
+% clob # type
+% 12 # length
+[ "????????????"       ]
 #SELECT seq, reverse_seq(seq) AS reverse_seq
 #FROM bam.alignments_1;
 % bam.alignments_1,    bam.L # table_name
@@ -780,6 +755,6 @@ Ready.
 % 8,   100,    8,      21,     1 # length
 [ 17922987,    
"TATACTTATAGAACAAATGAACCCAAAACCACATAAGGTAAACAACAAAGCTACTGGTTCAAAATTAAGCCTAACTTCAACAGTACCAGGCAAAAACCAT",
 17922987,       "3=1X1=1X43=1X16=1X33=",        "T"     ]
 
-# 08:43:54 >  
-# 08:43:54 >  "Done."
-# 08:43:54 >  
+# 14:48:32 >  
+# 14:48:32 >  "Done."
+# 14:48:32 >  
diff --git a/sql/backends/monet5/bam/bam_lib.c 
b/sql/backends/monet5/bam/bam_lib.c
--- a/sql/backends/monet5/bam/bam_lib.c
+++ b/sql/backends/monet5/bam/bam_lib.c
@@ -54,68 +54,51 @@ bam_flag(bit * ret, sht * flag, str * na
        return MAL_SUCCEED;
 }
 
+char reverse_seq_map[] = {
+       'T', //A
+       'V', //B
+       'G', //C
+       'H', //D
+        0 , //E
+        0 , //F
+       'C', //G
+       'D', //H
+        0 , //I
+        0 , //J
+       'M', //K
+        0 , //L
+       'K', //M
+       'N', //N
+        0 , //O
+        0 , //P
+        0 , //Q
+       'Y', //R
+       'S', //S
+       'A', //T
+        0 , //U
+       'B', //V
+       'W', //W
+        0 , //X
+       'R'  //Y
+};
+
 str
 reverse_seq(str * ret, str * seq)
 {
        str result;
        unsigned int i;
        unsigned int len = strlen(*seq);
+       sht map_index;
 
        result = GDKmalloc((len + 1) * sizeof(char));
        if (result == NULL)
                throw(MAL, "reverse_seq", MAL_MALLOC_FAIL);
+
        for (i = 0; i < len; ++i) {
-               switch ((*seq)[i]) {
-               case 'A':
-                       result[len - i - 1] = 'T';
-                       break;
-               case 'T':
-                       result[len - i - 1] = 'A';
-                       break;
-               case 'C':
-                       result[len - i - 1] = 'G';
-                       break;
-               case 'G':
-                       result[len - i - 1] = 'C';
-                       break;
-               case 'R':
-                       result[len - i - 1] = 'Y';
-                       break;
-               case 'Y':
-                       result[len - i - 1] = 'R';
-                       break;
-               case 'S':
-                       result[len - i - 1] = 'S';
-                       break;
-               case 'W':
-                       result[len - i - 1] = 'W';
-                       break;
-               case 'K':
-                       result[len - i - 1] = 'M';
-                       break;
-               case 'M':
-                       result[len - i - 1] = 'K';
-                       break;
-               case 'H':
-                       result[len - i - 1] = 'D';
-                       break;
-               case 'D':
-                       result[len - i - 1] = 'H';
-                       break;
-               case 'V':
-                       result[len - i - 1] = 'B';
-                       break;
-               case 'B':
-                       result[len - i - 1] = 'V';
-                       break;
-               case 'N':
-                       result[len - i - 1] = 'N';
-                       break;
-               default:
-                       GDKfree(result);
-                       throw(MAL, "reverse_seq",
-                                 "Invalid character found in sequence: '%c'\n",
-                                 (*seq)[i]);
+               map_index = (sht)((*seq)[i] - 'A');
+               if(map_index < 0 || map_index > 24 ||
+                               (result[len - i - 1] = 
reverse_seq_map[map_index]) == 0) {
+                       result[len - i - 1] = '?';
                }
        }
        result[len] = '\0';
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to