Changeset: 3f7305d007e1 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3f7305d007e1 Added Files: sql/backends/monet5/bam/Tests/bam_lib.reqtests sql/backends/monet5/bam/Tests/bam_lib.sql sql/backends/monet5/bam/Tests/coverage_usecase.reqtests sql/backends/monet5/bam/Tests/coverage_usecase.sql sql/backends/monet5/bam/Tests/mergetable.reqtests sql/backends/monet5/bam/Tests/mergetable.sql Removed Files: sql/backends/monet5/bam/Tests/bam_lib.SQL.py sql/backends/monet5/bam/Tests/mergetable.SQL.py sql/backends/monet5/bam/Tests/sql/bam_lib.sql sql/backends/monet5/bam/Tests/sql/mergetable.sql Modified Files: sql/backends/monet5/bam/Tests/All Branch: bamloader Log Message:
Simplified and improved some tests, added coverage test based on BTW-paper usecase; however does not work yet diffs (truncated from 443 to 300 lines): diff --git a/sql/backends/monet5/bam/Tests/All b/sql/backends/monet5/bam/Tests/All --- a/sql/backends/monet5/bam/Tests/All +++ b/sql/backends/monet5/bam/Tests/All @@ -26,3 +26,4 @@ HAVE_SAMTOOLS?query2.11 HAVE_SAMTOOLS?query2.12 HAVE_SAMTOOLS?mergetable HAVE_SAMTOOLS?bam_lib +#HAVE_SAMTOOLS?coverage_usecase diff --git a/sql/backends/monet5/bam/Tests/bam_lib.SQL.py b/sql/backends/monet5/bam/Tests/bam_lib.SQL.py deleted file mode 100644 --- a/sql/backends/monet5/bam/Tests/bam_lib.SQL.py +++ /dev/null @@ -1,3 +0,0 @@ -import bam - -bam.exec_sql_file("bam_lib.sql", {'PWD': bam.SRCDIR}) \ No newline at end of file diff --git a/sql/backends/monet5/bam/Tests/bam_lib.reqtests b/sql/backends/monet5/bam/Tests/bam_lib.reqtests new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bam/Tests/bam_lib.reqtests @@ -0,0 +1,1 @@ +bam_loader_file \ No newline at end of file diff --git a/sql/backends/monet5/bam/Tests/bam_lib.sql b/sql/backends/monet5/bam/Tests/bam_lib.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bam/Tests/bam_lib.sql @@ -0,0 +1,133 @@ +SET SCHEMA bam; + +# BAM_FLAG + +# Should all give 'true' +SELECT bam_flag(1, 'mult_segm'); +SELECT bam_flag(2, 'prop_alig'); +SELECT bam_flag(4, 'segm_unma'); +SELECT bam_flag(8, 'next_unma'); +SELECT bam_flag(16, 'segm_reve'); +SELECT bam_flag(32, 'next_reve'); +SELECT bam_flag(64, 'firs_segm'); +SELECT bam_flag(128, 'last_segm'); +SELECT bam_flag(256, 'seco_alig'); +SELECT bam_flag(512, 'qual_cont'); +SELECT bam_flag(1024, 'opti_dupl'); +SELECT bam_flag(2048, 'supp_alig'); + +# Should all give 'false' +SELECT bam_flag(2, 'mult_segm'); +SELECT bam_flag(4, 'prop_alig'); +SELECT bam_flag(8, 'segm_unma'); +SELECT bam_flag(16, 'next_unma'); +SELECT bam_flag(4, 'segm_reve'); +SELECT bam_flag(2, 'next_reve'); +SELECT bam_flag(4, 'firs_segm'); +SELECT bam_flag(8, 'last_segm'); +SELECT bam_flag(16, 'seco_alig'); +SELECT bam_flag(32, 'qual_cont'); +SELECT bam_flag(64, 'opti_dupl'); +SELECT bam_flag(128, 'supp_alig'); + +# Should all give 'true' +SELECT bam_flag(2047, 'mult_segm'); +SELECT bam_flag(2047, 'prop_alig'); +SELECT bam_flag(2047, 'segm_unma'); +SELECT bam_flag(2047, 'next_unma'); +SELECT bam_flag(2047, 'segm_reve'); +SELECT bam_flag(2047, 'next_reve'); +SELECT bam_flag(2047, 'firs_segm'); +SELECT bam_flag(2047, 'last_segm'); +SELECT bam_flag(2047, 'seco_alig'); +SELECT bam_flag(2047, 'qual_cont'); +SELECT bam_flag(2047, 'opti_dupl'); +SELECT bam_flag(4095, 'supp_alig'); + +# Should fail +SELECT bam_flag(111, 'Fail-hard'); + +# Bulk +SELECT flag, + bam_flag(flag, 'mult_segm') AS mult_segm, + bam_flag(flag, 'prop_alig') AS prop_alig, + bam_flag(flag, 'segm_unma') AS segm_unma, + bam_flag(flag, 'next_unma') AS next_unma, + bam_flag(flag, 'segm_reve') AS segm_reve, + bam_flag(flag, 'next_reve') AS next_reve, + bam_flag(flag, 'firs_segm') AS firs_segm, + bam_flag(flag, 'last_segm') AS last_segm, + bam_flag(flag, 'seco_alig') AS seco_alig, + bam_flag(flag, 'qual_cont') AS qual_cont, + bam_flag(flag, 'opti_dupl') AS opti_dupl, + bam_flag(flag, 'supp_alig') AS supp_alig +FROM bam.alignments_1; + + + + +# REVERSE_SEQ +SELECT reverse_seq('A'); +SELECT reverse_seq('T'); +SELECT reverse_seq('C'); +SELECT reverse_seq('G'); +SELECT reverse_seq('R'); +SELECT reverse_seq('Y'); +SELECT reverse_seq('S'); +SELECT reverse_seq('W'); +SELECT reverse_seq('K'); +SELECT reverse_seq('M'); +SELECT reverse_seq('H'); +SELECT reverse_seq('D'); +SELECT reverse_seq('V'); +SELECT reverse_seq('B'); +SELECT reverse_seq('N'); +SELECT reverse_seq('ATCGRYSWKMHDVBN'); +SELECT reverse_seq('invalidchars'); + +# Bulk +SELECT seq, reverse_seq(seq) AS reverse_seq +FROM bam.alignments_1; + + +# REVERSE_QUAL +SELECT reverse_qual('Should be reversed'); + +# Bulk +SELECT qual, reverse_qual(qual) AS reverse_qual +FROM bam.alignments_1; + + + + +# SEQ_LENGTH +SELECT seq_length('18M'); +SELECT seq_length('3I5D6N'); +SELECT seq_length('3=1X1=1X43=1X16=1X33='); + +# Bulk +SELECT cigar, seq_length(cigar) AS seq_length +FROM bam.alignments_1; + + + +# SEQ_CHAR + +# Some simple cases +SELECT seq_char(5000, 'ACTGAG', 0, '6M'); +SELECT seq_char(5000, 'ACTGAG', 4995, '6M'); +SELECT seq_char(5000, 'ACTGAG', 4994, '6M'); +SELECT seq_char(5000, 'ACTGAG', 5000, '6M'); +SELECT seq_char(5000, 'ACTGAG', 5001, '6M'); + +# Cases inspired by http://genome.sph.umich.edu/wiki/SAM#What_is_a_CIGAR.3F +SELECT seq_char(7, 'ACTAGAATGGCT', 5, '3M1I3M1D5M'); +SELECT seq_char(8, 'ACTAGAATGGCT', 5, '3M1I3M1D5M'); +SELECT seq_char(11, 'ACTAGAATGGCT', 5, '3M1I3M1D5M'); +SELECT seq_char(16, 'ACTAGAATGGCT', 5, '3M1I3M1D5M'); +SELECT seq_char(17, 'ACTAGAATGGCT', 5, '3M1I3M1D5M'); + +# Bulk +SELECT 17922987 AS ref_pos, seq, pos, cigar, seq_char(17922987, seq, pos, cigar) AS seq_char +FROM bam.alignments_1 +WHERE seq_char(17922987, seq, pos, cigar) IS NOT NULL; diff --git a/sql/backends/monet5/bam/Tests/coverage_usecase.reqtests b/sql/backends/monet5/bam/Tests/coverage_usecase.reqtests new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bam/Tests/coverage_usecase.reqtests @@ -0,0 +1,1 @@ +mergetable diff --git a/sql/backends/monet5/bam/Tests/coverage_usecase.sql b/sql/backends/monet5/bam/Tests/coverage_usecase.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bam/Tests/coverage_usecase.sql @@ -0,0 +1,45 @@ +# Creation of base table containing positional data +CREATE TABLE base ( + refpos INT, seq_char CHAR, cnt INT +); + +INSERT INTO base ( + SELECT s.value AS refpos, + bam.seq_char(s.value, al.seq, al.pos, al.cigar) AS seq_char, + COUNT(*) AS cnt + FROM + generate_series(CAST(0 AS INT), CAST(18960 AS INT)) as s + JOIN ( + SELECT pos, seq, cigar + FROM bam.alignments_1 + WHERE pos > 0 + ) AS al + ON s.value BETWEEN al.pos AND al.pos + bam.seq_length(al.cigar) + GROUP BY refpos, seq_char +); + +# Coverage, taken from the base table +CREATE VIEW coverage AS ( + SELECT refpos, SUM(cnt) AS cnt + FROM base + WHERE seq_char IS NOT NULL + GROUP BY refpos +); + +SELECT * +FROM coverage +ORDER BY cnt DESC; + + +# Coverage groups +SELECT refpos - refpos % 1000 AS grp_start, + refpos - refpos % 1000 + 1000 AS grp_end, + AVG(cnt) AS average +FROM coverage +GROUP BY grp_start, grp_end +ORDER BY average DESC; + + +# Clean up +DROP VIEW coverage; +DROP TABLE base; diff --git a/sql/backends/monet5/bam/Tests/mergetable.SQL.py b/sql/backends/monet5/bam/Tests/mergetable.SQL.py deleted file mode 100644 --- a/sql/backends/monet5/bam/Tests/mergetable.SQL.py +++ /dev/null @@ -1,3 +0,0 @@ -import bam - -bam.exec_sql_file("mergetable.sql", {'PWD': bam.SRCDIR}) diff --git a/sql/backends/monet5/bam/Tests/mergetable.reqtests b/sql/backends/monet5/bam/Tests/mergetable.reqtests new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bam/Tests/mergetable.reqtests @@ -0,0 +1,1 @@ +bam_loader_file diff --git a/sql/backends/monet5/bam/Tests/mergetable.sql b/sql/backends/monet5/bam/Tests/mergetable.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/bam/Tests/mergetable.sql @@ -0,0 +1,28 @@ +SET SCHEMA bam; + +# Add a merge table over these files +CREATE MERGE TABLE alignments ( + "virtual_offset" BIGINT NOT NULL, + "qname" STRING NOT NULL, + "flag" SMALLINT NOT NULL, + "rname" STRING NOT NULL, + "pos" INT NOT NULL, + "mapq" SMALLINT NOT NULL, + "cigar" STRING NOT NULL, + "rnext" STRING NOT NULL, + "pnext" INT NOT NULL, + "tlen" INT NOT NULL, + "seq" STRING NOT NULL, + "qual" STRING NOT NULL, + CONSTRAINT "alignments_pkey_virtual_offset" PRIMARY KEY ("virtual_offset") +); + +SELECT COUNT(*) FROM alignments; + +ALTER TABLE alignments ADD TABLE alignments_1; + +SELECT COUNT(*) FROM alignments; + +ALTER TABLE alignments ADD TABLE alignments_2; + +SELECT COUNT(*) FROM alignments; diff --git a/sql/backends/monet5/bam/Tests/sql/bam_lib.sql b/sql/backends/monet5/bam/Tests/sql/bam_lib.sql deleted file mode 100644 --- a/sql/backends/monet5/bam/Tests/sql/bam_lib.sql +++ /dev/null @@ -1,137 +0,0 @@ -SET SCHEMA bam; - -# Load a file to test our bam_lib functions on -CALL bam_loader_file('PWD/files/file1.bam', 0); - - -# BAM_FLAG - -# Should all give 'true' -SELECT bam_flag(1, 'mult_segm'); -SELECT bam_flag(2, 'prop_alig'); -SELECT bam_flag(4, 'segm_unma'); -SELECT bam_flag(8, 'next_unma'); -SELECT bam_flag(16, 'segm_reve'); -SELECT bam_flag(32, 'next_reve'); -SELECT bam_flag(64, 'firs_segm'); -SELECT bam_flag(128, 'last_segm'); -SELECT bam_flag(256, 'seco_alig'); -SELECT bam_flag(512, 'qual_cont'); -SELECT bam_flag(1024, 'opti_dupl'); -SELECT bam_flag(2048, 'supp_alig'); - -# Should all give 'false' -SELECT bam_flag(2, 'mult_segm'); -SELECT bam_flag(4, 'prop_alig'); -SELECT bam_flag(8, 'segm_unma'); -SELECT bam_flag(16, 'next_unma'); -SELECT bam_flag(4, 'segm_reve'); -SELECT bam_flag(2, 'next_reve'); -SELECT bam_flag(4, 'firs_segm'); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list